From 5cc40251d88272e856ad0074d346b8ac6834a2ab Mon Sep 17 00:00:00 2001 From: Leo Matak <78696884+matakleo@users.noreply.github.com> Date: Thu, 6 Jun 2024 13:43:22 -0600 Subject: [PATCH 1/3] Classifier (#1353) * the best version so far * matche python mostly * before major change, kinda works * long one, all numpy stuff * classfier v1 * Seems to be working * stlye check * removed some not used stuff * add additional convert methods * some test fixes * rehaul * changes from code review comments --- .../src/main/java/ucar/nc2/constants/CDM.java | 1 + .../java/ucar/nc2/dataset/NetcdfDataset.java | 11 +- .../java/ucar/nc2/dataset/VariableDS.java | 8 ++ .../main/java/ucar/nc2/filter/Classifier.java | 70 +++++++++++ .../data/ncml/enhance/testClassifier.ncml | 54 ++++++++ .../java/ucar/nc2/filter/TestClassifier.java | 51 ++++++++ .../ucar/nc2/ncml/TestEnhanceClassifier.java | 115 ++++++++++++++++++ 7 files changed, 307 insertions(+), 3 deletions(-) create mode 100644 cdm/core/src/main/java/ucar/nc2/filter/Classifier.java create mode 100644 cdm/core/src/test/data/ncml/enhance/testClassifier.ncml create mode 100644 cdm/core/src/test/java/ucar/nc2/filter/TestClassifier.java create mode 100644 cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceClassifier.java diff --git a/cdm/core/src/main/java/ucar/nc2/constants/CDM.java b/cdm/core/src/main/java/ucar/nc2/constants/CDM.java index bf8c7ac393..9fa7b862da 100644 --- a/cdm/core/src/main/java/ucar/nc2/constants/CDM.java +++ b/cdm/core/src/main/java/ucar/nc2/constants/CDM.java @@ -58,6 +58,7 @@ public class CDM { public static final String RUNTIME_COORDINATE = "runtimeCoordinate"; public static final String STANDARDIZE = "standardize"; public static final String NORMALIZE = "normalize"; + public static final String CLASSIFY = "classify"; // Special attributes diff --git a/cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java b/cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java index 830a330fca..fb0a38c505 100644 --- a/cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java +++ b/cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java @@ -125,11 +125,16 @@ public enum Enhance { * If the enhanced data type is not {@code FLOAT} or {@code DOUBLE}, this has no effect. */ ApplyNormalizer, + /** + * Classify doubles or floats based on positive/negative into 1 or 0 {@code} + * x<0 --> 0 x>0 --> 1 + */ + ApplyClassifier, } - private static Set EnhanceAll = - Collections.unmodifiableSet(EnumSet.of(Enhance.ConvertEnums, Enhance.ConvertUnsigned, Enhance.ApplyScaleOffset, - Enhance.ConvertMissing, Enhance.CoordSystems, Enhance.ApplyStandardizer, Enhance.ApplyNormalizer)); + private static Set EnhanceAll = Collections.unmodifiableSet( + EnumSet.of(Enhance.ConvertEnums, Enhance.ConvertUnsigned, Enhance.ApplyScaleOffset, Enhance.ConvertMissing, + Enhance.CoordSystems, Enhance.ApplyStandardizer, Enhance.ApplyNormalizer, Enhance.ApplyClassifier)); private static Set EnhanceNone = Collections.unmodifiableSet(EnumSet.noneOf(Enhance.class)); private static Set defaultEnhanceMode = EnhanceAll; diff --git a/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java b/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java index 0df7ab217c..5581a1faf4 100644 --- a/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java +++ b/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java @@ -289,6 +289,9 @@ Array convert(Array data, Set enhancements) { if (enhancements.contains(Enhance.ApplyNormalizer) && normalizer != null) { toApply.add(normalizer); } + if (enhancements.contains(Enhance.ApplyClassifier) && classifier != null) { + toApply.add(classifier); + } double[] dataArray = (double[]) data.get1DJavaArray(DataType.DOUBLE); @@ -865,6 +868,7 @@ public Array convert(Array in, boolean convertUnsigned, boolean applyScaleOffset private ScaleOffset scaleOffset; private Standardizer standardizer; private Normalizer normalizer; + private Classifier classifier; private ConvertMissing convertMissing; private Set enhanceMode = EnumSet.noneOf(Enhance.class); // The set of enhancements that were made. @@ -939,6 +943,10 @@ private void createEnhancements() { if (normalizerAtt != null && this.enhanceMode.contains(Enhance.ApplyNormalizer) && dataType.isFloatingPoint()) { this.normalizer = Normalizer.createFromVariable(this); } + Attribute classifierAtt = findAttribute(CDM.CLASSIFY); + if (classifierAtt != null && this.enhanceMode.contains(Enhance.ApplyClassifier) && dataType.isNumeric()) { + this.classifier = Classifier.createFromVariable(this); + } } public Builder toBuilder() { diff --git a/cdm/core/src/main/java/ucar/nc2/filter/Classifier.java b/cdm/core/src/main/java/ucar/nc2/filter/Classifier.java new file mode 100644 index 0000000000..650b273d59 --- /dev/null +++ b/cdm/core/src/main/java/ucar/nc2/filter/Classifier.java @@ -0,0 +1,70 @@ +package ucar.nc2.filter; + +import java.io.IOException; +import ucar.ma2.Array; +import ucar.ma2.DataType; +import ucar.ma2.IndexIterator; +import ucar.nc2.dataset.VariableDS; + +public class Classifier implements Enhancement { + private Classifier classifier = null; + private static Classifier emptyClassifier; + private int classifiedVal; + private int[] classifiedArray; + + public static Classifier createFromVariable(VariableDS var) { + try { + Array arr = var.read(); + // DataType type = var.getDataType(); + return emptyClassifier(); + } catch (IOException e) { + return emptyClassifier(); + } + } + + public static Classifier emptyClassifier() { + emptyClassifier = new Classifier(); + return emptyClassifier; + } + + /** Enough of a constructor */ + public Classifier() {} + + /** Classify double array */ + public int[] classifyDoubleArray(Array arr) { + int[] classifiedArray = new int[(int) arr.getSize()]; + int i = 0; + IndexIterator iterArr = arr.getIndexIterator(); + while (iterArr.hasNext()) { + Number value = (Number) iterArr.getObjectNext(); + if (!Double.isNaN(value.doubleValue())) { + + classifiedArray[i] = classifyArray(value.doubleValue()); + } + i++; + } + return classifiedArray; + } + + + + /** for a single double */ + public int classifyArray(double val) { + if (val >= 0) { + classifiedVal = 1; + } else { + classifiedVal = 0; + } + + return classifiedVal; + } + + @Override + public double convert(double val) { + return emptyClassifier.classifyArray(val); + } + + +} + + diff --git a/cdm/core/src/test/data/ncml/enhance/testClassifier.ncml b/cdm/core/src/test/data/ncml/enhance/testClassifier.ncml new file mode 100644 index 0000000000..b649675d99 --- /dev/null +++ b/cdm/core/src/test/data/ncml/enhance/testClassifier.ncml @@ -0,0 +1,54 @@ + + + + + + + + 1.0 2.0 3.0 4.0 5.0 + + + + + -1.0 -2.0 -3.0 -4.0 -5.0 + + + + + 1.0 -2.0 0.0 4.0 -5.0 + + + + + 1.0 2.0 3.0 4.0 5.0 + + + + + -1.0 -2.0 -3.0 -4.0 -5.0 + + + + 1.0 -2.0 0.0 4.0 -5.0 + + + + + 1 2 3 4 5 + + + + + -1.0 -2.0 -3.0 -4.0 -5.0 + + + + 1.0 -2.0 0.0 4.0 -5.0 + + + + + diff --git a/cdm/core/src/test/java/ucar/nc2/filter/TestClassifier.java b/cdm/core/src/test/java/ucar/nc2/filter/TestClassifier.java new file mode 100644 index 0000000000..967861064e --- /dev/null +++ b/cdm/core/src/test/java/ucar/nc2/filter/TestClassifier.java @@ -0,0 +1,51 @@ +package ucar.nc2.filter; + +import static org.junit.Assert.*; +import org.junit.Test; +import ucar.ma2.Array; + + +public class TestClassifier { + + + + /** test doubles */ + @Test + public void testClassifyDoubleArray_AllPositive() { + Classifier classifier = new Classifier(); + double[] input = {1.1, 2.2, 3.3}; + int[] expected = {1, 1, 1}; + Array DATA = Array.makeFromJavaArray(input); + assertArrayEquals(expected, classifier.classifyDoubleArray(DATA)); + } + + @Test + public void testClassifyDoubleArray_AllNegative() { + Classifier classifier = new Classifier(); + double[] input = {-1.1, -2.2, -3.3}; + int[] expected = {0, 0, 0}; + Array DATA = Array.makeFromJavaArray(input); + assertArrayEquals(expected, classifier.classifyDoubleArray(DATA)); + } + + @Test + public void testClassifyDoubleArray_Mixed() { + Classifier classifier = new Classifier(); + double[] input = {-1.1, 2.2, -3.3, 4.4}; + int[] expected = {0, 1, 0, 1}; + Array DATA = Array.makeFromJavaArray(input); + assertArrayEquals(expected, classifier.classifyDoubleArray(DATA)); + } + + @Test + public void testClassifyDoubleArray_WithZero() { + Classifier classifier = new Classifier(); + double[] input = {0.0, -1.1, 1.1, 0.0, 0.0, 0.0}; + int[] expected = {1, 0, 1, 1, 1, 1}; + Array DATA = Array.makeFromJavaArray(input); + assertArrayEquals(expected, classifier.classifyDoubleArray(DATA)); + } + + + +} diff --git a/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceClassifier.java b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceClassifier.java new file mode 100644 index 0000000000..00fac9b05d --- /dev/null +++ b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceClassifier.java @@ -0,0 +1,115 @@ +package ucar.nc2.ncml; + +import static com.google.common.truth.Truth.assertThat; +import static ucar.ma2.MAMath.nearlyEquals; + +import java.io.IOException; +import org.junit.Test; +import ucar.ma2.Array; +import ucar.ma2.DataType; +import ucar.nc2.NetcdfFile; +import ucar.nc2.Variable; +import ucar.nc2.dataset.NetcdfDatasets; +import ucar.unidata.util.test.TestDir; + +public class TestEnhanceClassifier { + + private static String dataDir = TestDir.cdmLocalTestDataDir + "ncml/enhance/"; + + public static final int[] all_ones = {1, 1, 1, 1, 1}; + public static final Array DATA_all_ones = Array.makeFromJavaArray(all_ones); + public static final int[] all_zeroes = {0, 0, 0, 0, 0}; + public static final Array DATA_all_zeroes = Array.makeFromJavaArray(all_zeroes); + public static final int[] mixNumbers = {1, 0, 1, 1, 0}; + public static final Array DATA_mixNumbers = Array.makeFromJavaArray(mixNumbers); + + + /** test on doubles, all positives, all negatives and a mixed array */ + @Test + public void testEnhanceClassifier_doubles() throws IOException { + try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testClassifier.ncml", true, null)) { + Variable doublePositives = ncfile.findVariable("doublePositives"); + assertThat((Object) doublePositives).isNotNull(); + assertThat(doublePositives.getDataType()).isEqualTo(DataType.DOUBLE); + assertThat(doublePositives.attributes().hasAttribute("classify")).isTrue(); + Array dataDoubles = doublePositives.read(); + assertThat(nearlyEquals(dataDoubles, DATA_all_ones)).isTrue(); + + Variable doubleNegatives = ncfile.findVariable("doubleNegatives"); + assertThat((Object) doubleNegatives).isNotNull(); + assertThat(doubleNegatives.getDataType()).isEqualTo(DataType.DOUBLE); + assertThat(doubleNegatives.attributes().hasAttribute("classify")).isTrue(); + Array datadoubleNegatives = doubleNegatives.read(); + assertThat(nearlyEquals(datadoubleNegatives, DATA_all_zeroes)).isTrue(); + + Variable doubleMix = ncfile.findVariable("doubleMix"); + assertThat((Object) doubleMix).isNotNull(); + assertThat(doubleMix.getDataType()).isEqualTo(DataType.DOUBLE); + assertThat(doubleMix.attributes().hasAttribute("classify")).isTrue(); + Array datadoubleMix = doubleMix.read(); + assertThat(nearlyEquals(datadoubleMix, DATA_mixNumbers)).isTrue(); + + } + + + } + + /** test on floats, all positives, all negatives and a mixed array */ + @Test + public void testEnhanceClassifier_floats() throws IOException { + try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testClassifier.ncml", true, null)) { + + Variable floatPositives = ncfile.findVariable("floatPositives"); + assertThat((Object) floatPositives).isNotNull(); + assertThat(floatPositives.getDataType()).isEqualTo(DataType.FLOAT); + assertThat(floatPositives.attributes().hasAttribute("classify")).isTrue(); + Array datafloats = floatPositives.read(); + assertThat(nearlyEquals(datafloats, DATA_all_ones)).isTrue(); + + Variable floatNegatives = ncfile.findVariable("floatNegatives"); + assertThat((Object) floatNegatives).isNotNull(); + assertThat(floatNegatives.getDataType()).isEqualTo(DataType.FLOAT); + assertThat(floatNegatives.attributes().hasAttribute("classify")).isTrue(); + Array datafloatNegatives = floatNegatives.read(); + assertThat(nearlyEquals(datafloatNegatives, DATA_all_zeroes)).isTrue(); + + Variable floatMix = ncfile.findVariable("floatMix"); + assertThat((Object) floatMix).isNotNull(); + assertThat(floatMix.getDataType()).isEqualTo(DataType.FLOAT); + assertThat(floatMix.attributes().hasAttribute("classify")).isTrue(); + Array datafloatsMix = floatMix.read(); + assertThat(nearlyEquals(datafloatsMix, DATA_mixNumbers)).isTrue(); + + } + + } + + /** enhance is not applied to Integers, so we expect the same values after application */ + @Test + public void testEnhanceClassifier_integers() throws IOException { + + try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testClassifier.ncml", true, null)) { + Variable IntegerPositives = ncfile.findVariable("intPositives"); + assertThat((Object) IntegerPositives).isNotNull(); + assertThat(IntegerPositives.getDataType()).isEqualTo(DataType.INT); + assertThat(IntegerPositives.attributes().hasAttribute("classify")).isTrue(); + Array dataIntegers = IntegerPositives.read(); + assertThat(nearlyEquals(dataIntegers, DATA_all_ones)).isTrue(); + + Variable intNegatives = ncfile.findVariable("intNegatives"); + assertThat((Object) intNegatives).isNotNull(); + assertThat(intNegatives.getDataType()).isEqualTo(DataType.INT); + assertThat(intNegatives.attributes().hasAttribute("classify")).isTrue(); + Array dataintNegatives = intNegatives.read(); + assertThat(nearlyEquals(dataintNegatives, DATA_all_zeroes)).isTrue(); + + Variable intMix = ncfile.findVariable("intMix"); + assertThat((Object) intMix).isNotNull(); + assertThat(intMix.getDataType()).isEqualTo(DataType.INT); + assertThat(intMix.attributes().hasAttribute("classify")).isTrue(); + Array dataintMix = intMix.read(); + assertThat(nearlyEquals(dataintMix, DATA_mixNumbers)).isTrue(); + } + + } +} From 55de4e1d5842728c7828e23b2e02c385dbc4eef0 Mon Sep 17 00:00:00 2001 From: Tara Drwenski Date: Wed, 12 Jun 2024 13:29:24 -0600 Subject: [PATCH 2/3] Update d4ts war file snapshot --- .../src/main/resources/ucar/unidata/util/test/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdm-test-utils/src/main/resources/ucar/unidata/util/test/Dockerfile b/cdm-test-utils/src/main/resources/ucar/unidata/util/test/Dockerfile index ed48604def..6b0b25cb00 100644 --- a/cdm-test-utils/src/main/resources/ucar/unidata/util/test/Dockerfile +++ b/cdm-test-utils/src/main/resources/ucar/unidata/util/test/Dockerfile @@ -6,7 +6,7 @@ USER root # TODO use release version of d4ts #ENV D4TS_WAR_URL https://artifacts.unidata.ucar.edu/repository/unidata-releases/edu/ucar/d4ts/5.4/d4ts-5.4.war -ENV D4TS_WAR_URL https://artifacts.unidata.ucar.edu/repository/unidata-snapshots/edu/ucar/d4ts/5.5-SNAPSHOT/d4ts-5.5-20240507.221412-201.war +ENV D4TS_WAR_URL https://artifacts.unidata.ucar.edu/repository/unidata-snapshots/edu/ucar/d4ts/5.5-SNAPSHOT/d4ts-5.5-20240603.195123-208.war ENV DTS_WAR_URL https://artifacts.unidata.ucar.edu/repository/unidata-releases/edu/ucar/dtswar/5.4/dtswar-5.4.war # Install necessary packages From 98545e9814e7671bef4d5eef20067c5395b946af Mon Sep 17 00:00:00 2001 From: Tara Drwenski Date: Thu, 13 Jun 2024 09:23:54 -0600 Subject: [PATCH 3/3] Fix nullptr exception (#1355) * Add test cases to url naming * Fix possible nullptr exception when parent does not exist --- cdm/core/src/main/java/thredds/filesystem/MFileOS.java | 4 +++- .../src/test/java/ucar/nc2/util/TestURLnaming.java | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/cdm/core/src/main/java/thredds/filesystem/MFileOS.java b/cdm/core/src/main/java/thredds/filesystem/MFileOS.java index 3bb3e7ff1a..094a02f6ec 100644 --- a/cdm/core/src/main/java/thredds/filesystem/MFileOS.java +++ b/cdm/core/src/main/java/thredds/filesystem/MFileOS.java @@ -88,9 +88,11 @@ public String getName() { return file.getName(); } + @Nullable @Override public MFile getParent() { - return new MFileOS(file.getParentFile()); + File parent = file.getParentFile(); + return parent == null ? null : new MFileOS(parent); } @Override diff --git a/cdm/core/src/test/java/ucar/nc2/util/TestURLnaming.java b/cdm/core/src/test/java/ucar/nc2/util/TestURLnaming.java index a0f3771508..6444265fa2 100644 --- a/cdm/core/src/test/java/ucar/nc2/util/TestURLnaming.java +++ b/cdm/core/src/test/java/ucar/nc2/util/TestURLnaming.java @@ -5,6 +5,8 @@ package ucar.nc2.util; +import static com.google.common.truth.Truth.assertThat; + import java.lang.invoke.MethodHandles; import org.junit.Test; import org.slf4j.Logger; @@ -30,6 +32,14 @@ public void testResolve() { testResolve("file://test/me/", "file:/wanna", "file:/wanna"); testResolve("file://test/me/", "C:/wanna", "C:/wanna"); testResolve("http://test/me/", "file:wanna", "file:wanna"); + + testResolve("urlWithoutSlash", "file:///path/with/slash", "file:///path/with/slash"); + } + + @Test + public void testResolveFile() { + assertThat(URLnaming.resolveFile("urlWithoutSlash", "file:///path/with/slash")) + .isEqualTo("file:///path/with/slash"); } private void testResolve(String base, String rel, String result) {