diff --git a/cdm/core/src/main/java/ucar/nc2/filter/Classifier.java b/cdm/core/src/main/java/ucar/nc2/filter/Classifier.java
index 14d862e141..3f60ea6ca5 100644
--- a/cdm/core/src/main/java/ucar/nc2/filter/Classifier.java
+++ b/cdm/core/src/main/java/ucar/nc2/filter/Classifier.java
@@ -1,7 +1,6 @@
package ucar.nc2.filter;
import java.io.IOException;
-import org.apache.commons.math.stat.descriptive.SummaryStatistics;
import ucar.ma2.Array;
import ucar.ma2.DataType;
import ucar.nc2.dataset.VariableDS;
@@ -27,15 +26,14 @@ public double convert(double val) {
return classifyArray(val);
}
-
- // Custom exception for invalid values ?
+ /** Custom exception for invalid values */
public static class InvalidValueException extends Exception {
public InvalidValueException(String message) {
super(message);
}
}
- // Method to classify int array
+ /** for a single double */
public int classifyArray(double val) {
int classifiedVal;
if (val >= 0) {
@@ -47,7 +45,19 @@ public int classifyArray(double val) {
return classifiedVal;
}
- /** for a single value? */
+ /** for a single float */
+ public int classifyArray(float val) {
+ int classifiedVal;
+ if (val >= 0) {
+ classifiedVal = 1;
+ } else {
+ classifiedVal = 0;
+ }
+
+ return classifiedVal;
+ }
+
+ /** for a single int ? */
public int classifyArray(int val) {
int classifiedVal;
if (val >= 0) {
@@ -59,6 +69,12 @@ public int classifyArray(int val) {
return classifiedVal;
}
+ /**
+ * Method to classify int array
+ * maybe not needed if enhancement applied only to doubles and floats?
+ */
+
+ /** Classify integer array */
public int[] classifyArray(int[] inputArray) throws InvalidValueException {
int[] classifiedArray = new int[inputArray.length];
@@ -72,7 +88,7 @@ public int[] classifyArray(int[] inputArray) throws InvalidValueException {
return classifiedArray;
}
- // Method to classify double array
+ /** Classify double array */
public int[] classifyArray(double[] inputArray) throws InvalidValueException {
int[] classifiedArray = new int[inputArray.length];
@@ -90,7 +106,7 @@ public int[] classifyArray(double[] inputArray) throws InvalidValueException {
return classifiedArray;
}
- // Method to classify float array
+ /** Classify float array */
public int[] classifyArray(float[] inputArray) throws InvalidValueException {
int[] classifiedArray = new int[inputArray.length];
@@ -108,43 +124,6 @@ public int[] classifyArray(float[] inputArray) throws InvalidValueException {
return classifiedArray;
}
-
- public static void main(String[] args) {
- try {
- // Example usage
- Classifier classifier = new Classifier();
-
- int[] intArray = {10, -5, 0, 20, 5};
- double[] doubleArray = {10.5, -5.5, 0.0, 20.1, 5.0};
- float[] floatArray = {10.5f, -5.5f, 0.0f, 20.1f, 5.0f};
-
- int[] intResult = classifier.classifyArray(intArray);
- int[] doubleResult = classifier.classifyArray(doubleArray); // This will throw an exception
- int[] floatResult = classifier.classifyArray(floatArray); // This will throw an exception
-
- // Print the classified arrays
- System.out.print("Classified int array: ");
- for (int value : intResult) {
- System.out.print(value + " ");
- }
- System.out.println();
-
- System.out.print("Classified double array: ");
- for (int value : doubleResult) {
- System.out.print(value + " ");
- }
- System.out.println();
-
- System.out.print("Classified float array: ");
- for (int value : floatResult) {
- System.out.print(value + " ");
- }
- System.out.println();
-
- } catch (InvalidValueException e) {
- System.err.println(e.getMessage());
- }
- }
}
diff --git a/cdm/core/src/main/java/ucar/nc2/filter/QuantileTransformer1D.java b/cdm/core/src/main/java/ucar/nc2/filter/QuantileTransformer1D.java
deleted file mode 100644
index 87cc5121fa..0000000000
--- a/cdm/core/src/main/java/ucar/nc2/filter/QuantileTransformer1D.java
+++ /dev/null
@@ -1,246 +0,0 @@
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.Scanner;
-import org.apache.commons.math.MathException;
-import org.apache.commons.math.distribution.NormalDistribution;
-import org.apache.commons.math.distribution.NormalDistributionImpl; // Import the concrete class
-// import org.apache.commons.math3.analysis.interpolation.LinearInterpolator
-
-
-public class QuantileTransformer1D {
-
- /**
- * The actual number of quantiles used to discretize the cumulative
- * distribution function.
- */
- private int n_quantiles_;
- /**
- * ndarray of shape (n_quantiles, n_features)
- * The values corresponding the quantiles of reference.
- * But in my case now it's only 1D, so shape (n_quantiles)
- */
- private double[] quantiles_;
- /**
- * ndarray of shape (n_quantiles, )
- * Quantiles of references.
- */
- private double[] references_;
- /** to be used in constructor */
- private int n_quantiles;
- /** to chose wether normal or uniform */
- private String outputDistribution;
-
- // Default values
- /** Constructor with all parameters */
- public QuantileTransformer1D(int n_quantiles, String outputDistribution) {
- this.n_quantiles = n_quantiles;
- this.outputDistribution = outputDistribution;
- System.out.println("I am in the constructor");
-
- }
-
- /** Dense matrix fit ? */
- /** should compute percentiles for dense matrix (i.e. not sparse) */
- public void _dense_fit(double[] X) {
- // Initialize the references array with the same length as X
- double[] references = new double[references_.length];
- /** X --> The data used to scale along the features axis. */
- int n_samples = X.length;
-
- for (int i = 0; i < this.references_.length; i++) {
- references[i] = this.references_[i] * 100;
- }
- System.out.println("I am in the _dense_fit");
-
-
- this.quantiles_ = computePercentiles(X, references);
- /** make it monotonically increasing */
- ensureMonotonic(this.quantiles_);
-
- }
-
- /** Compute the quantiles used for transforming. */
- public void fit(double[] X) {
- /** Fit method --> Compute the quantiles used for transforming. */
- /**
- * X --> The data used to scale along the features axis.
- * shaoe (n_samples)
- */
-
- int n_samples = X.length;
- // Compute the number of quantiles to use
- this.n_quantiles_ = Math.max(1, Math.min(this.n_quantiles, n_samples));
- // Compute the references array
- this.references_ = linspace(0, 1, this.n_quantiles_);
- System.out.println("I am in the fit");
-
- _dense_fit(X);
-
- }
-
- public double[] _transform_col(double[] X_col, double[] quantiles) throws MathException {
- String output_distribution = this.outputDistribution;
- double BOUNDS_THRESHOLD = 1e-7;
-
- double lower_bound_x = quantiles[0];
- double upper_bound_x = quantiles[quantiles.length - 1];
- int lower_bound_y = 0;
- int upper_bound_y = 1;
-
- // Create lower and upper bounds indices
- boolean[] lowerBoundsIdx = new boolean[X_col.length];
- boolean[] upperBoundsIdx = new boolean[X_col.length];
- if ("normal".equals(output_distribution)) {
- for (int i = 0; i < X_col.length; i++) {
- lowerBoundsIdx[i] = X_col[i] - BOUNDS_THRESHOLD < lower_bound_x;
- upperBoundsIdx[i] = X_col[i] + BOUNDS_THRESHOLD > upper_bound_x;
- }
- } else if ("uniform".equals(output_distribution)) {
- for (int i = 0; i < X_col.length; i++) {
- lowerBoundsIdx[i] = X_col[i] == lower_bound_x;
- upperBoundsIdx[i] = X_col[i] == upper_bound_x;
- }
- }
-
- // Assuming X_col, quantiles, references_ are already defined variables of type double[]
-
- double[] interpolated1 = interpolate(X_col, quantiles, this.references_);
- double[] interpolated2 = interpolate(Arrays.stream(X_col).map(x -> -x).toArray(),
- Arrays.stream(reverseArray(quantiles)).map(x -> -x).toArray(),
- Arrays.stream(reverseArray(this.references_)).map(x -> -x).toArray());
-
- double[] result = new double[X_col.length];
- for (int i = 0; i < X_col.length; i++) {
- result[i] = 0.5 * (interpolated1[i] - interpolated2[i]);
- }
-
- /**
- * X_col[upper_bounds_idx] = upper_bound_y
- * X_col[lower_bounds_idx] = lower_bound_y
- */
-
- for (int i = 0; i < X_col.length; i++) {
- if (upperBoundsIdx[i]) {
- X_col[i] = upper_bound_y;
- }
- if (lowerBoundsIdx[i]) {
- X_col[i] = lower_bound_y;
- }
- }
- System.out.println("I am in the _transform_col");
- // Assuming X_col, BOUNDS_THRESHOLD are defined variables
-
- if ("normal".equals(outputDistribution)) {
- // Create a normal distribution object
- NormalDistribution normalDist = new NormalDistributionImpl();
-
- // Perform the inverse transform using the percent point function (ppf)
- for (int i = 0; i < X_col.length; i++) {
- X_col[i] = normalDist.inverseCumulativeProbability(X_col[i]);
- }
-
- // Find the values to clip the data to avoid mapping to infinity
- double clip_min = normalDist.inverseCumulativeProbability(BOUNDS_THRESHOLD - Double.MIN_VALUE);
- double clip_max = normalDist.inverseCumulativeProbability(1 - (BOUNDS_THRESHOLD - Double.MIN_VALUE));
-
- // Clip the data such that the inverse transform will be consistent
- for (int i = 0; i < X_col.length; i++) {
- X_col[i] = Math.max(clip_min, Math.min(clip_max, X_col[i]));
- }
- }
- return X_col;
- // For uniform distribution, the ppf is the identity function, so no transformation is needed
- // Else output distribution is uniform and we let X_col unchanged
-
- }
-
- public double[] _transform(double[] X) throws MathException {
- int numRows = X.length;
- double[] newX;
-
- newX = _transform_col(X, this.quantiles_);
- System.out.println("I am in the _transform");
-
- return newX;
- }
-
-
- public static double[] computePercentiles(double[] values, double[] percentiles) {
- Arrays.sort(values);
- double[] results = new double[percentiles.length];
- for (int i = 0; i < percentiles.length; i++) {
- double rank = percentiles[i] / 100 * (values.length - 1);
- int lowerIndex = (int) Math.floor(rank);
- int upperIndex = (int) Math.ceil(rank);
- if (lowerIndex == upperIndex) {
- results[i] = values[lowerIndex];
- } else {
- double weight = rank - lowerIndex;
- results[i] = (1.0 - weight) * values[lowerIndex] + weight * values[upperIndex];
- }
- }
- return results;
- }
-
- public static void ensureMonotonic(double[] array) {
- for (int i = 1; i < array.length; i++) {
- if (array[i] < array[i - 1]) {
- array[i] = array[i - 1];
- }
- }
- }
-
- public static double[] reverseArray(double[] array) {
- double[] reversed = new double[array.length];
- for (int i = 0; i < array.length; i++) {
- reversed[i] = array[array.length - 1 - i];
- }
- return reversed;
- }
-
- public static double[] interpolate(double[] x, double[] xp, double[] fp) {
- double[] interpolated = new double[x.length];
- for (int i = 0; i < x.length; i++) {
- double xi = x[i];
- if (xi < xp[0]) {
- interpolated[i] = fp[0];
- } else if (xi > xp[xp.length - 1]) {
- interpolated[i] = fp[fp.length - 1];
- } else {
- int j = 0;
- while (xi > xp[j + 1]) {
- j++;
- }
- double x0 = xp[j];
- double x1 = xp[j + 1];
- double y0 = fp[j];
- double y1 = fp[j + 1];
- interpolated[i] = y0 + (xi - x0) * (y1 - y0) / (x1 - x0);
- }
- }
- return interpolated;
- }
-
- public static double[] linspace(double min, double max, int points) {
- double[] d = new double[points];
- for (int i = 0; i < points; i++) {
- d[i] = min + i * (max - min) / (points - 1);
- }
- return d;
- }
-
- // public static double[] fit_transformer(double X)
-
- public static void main(String[] args) throws MathException {
- // Example usage
- double[] data = {1.8, 2, 2.3, 2.4, 2.5, 5, 10, 11, 12, 13.1, 14.4, 15, 16};
- QuantileTransformer1D transformer = new QuantileTransformer1D(10, "uniform");
- Arrays.sort(data);
- transformer.fit(data);
- double[] transformedData = transformer._transform(data);
- System.out.println(Arrays.toString(transformedData));
- }
-}
diff --git a/cdm/core/src/test/data/ncml/enhance/testClassifier.ncml b/cdm/core/src/test/data/ncml/enhance/testClassifier.ncml
index c11819d7f3..8d21e2f8f1 100644
--- a/cdm/core/src/test/data/ncml/enhance/testClassifier.ncml
+++ b/cdm/core/src/test/data/ncml/enhance/testClassifier.ncml
@@ -40,15 +40,6 @@
1 2 3 4 5
-
-
- -1 -2 -3 -4 -5
-
-
-
-
- 1 -2 0 4 -5
-
diff --git a/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceClassifier.java b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceClassifier.java
index 5108bbae78..c324dbae64 100644
--- a/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceClassifier.java
+++ b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceClassifier.java
@@ -25,7 +25,7 @@ public class TestEnhanceClassifier {
public static final int[] inttest = {1, 2, 3, 4, 5};
public static final Array DATA_inttest = Array.makeFromJavaArray(inttest);
-
+ /** test on doubles, all positives, all negatives and a mixed array */
@Test
public void testEnhanceClassifier_doubles() throws IOException {
try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testClassifier.ncml", true, null)) {
@@ -55,6 +55,7 @@ public void testEnhanceClassifier_doubles() throws IOException {
}
+ /** test on floats, all positives, all negatives and a mixed array */
@Test
public void testEnhanceClassifier_floats() throws IOException {
try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testClassifier.ncml", true, null)) {
@@ -84,12 +85,12 @@ public void testEnhanceClassifier_floats() throws IOException {
}
+ /** enhance is not applied to Integers, so we expect the same values after application */
@Test
public void testEnhanceClassifier_integers() throws IOException {
- /** enhance is not applied to Integers.. */
+
try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testClassifier.ncml", true, null)) {
Variable IntegerPositives = ncfile.findVariable("intPositives");
- Array datafloatsz = IntegerPositives.read();
assertThat((Object) IntegerPositives).isNotNull();
assertThat(IntegerPositives.getDataType()).isEqualTo(DataType.INT);
assertThat(IntegerPositives.attributes().hasAttribute("classify")).isTrue();