diff --git a/CHANGELOG.md b/CHANGELOG.md index c2a4f1d7..bb7814c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,24 +6,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased / Draft -### Changed - -- `clip`: Throw an exception if min > max [#472](https://github.com/Open-EO/openeo-processes/issues/472) - -### Fixed - -- `between`: Clarify that `null` is passed through. -- `eq` and `neq`: Explicitly set the minimum value for the `delta` parameter. -- `filter_bbox`, `load_collection`, `load_stac`: Clarified that the bounding box is reprojected to the CRS of the spatial data cube dimensions if required. -- `filter_spatial`: Clarified that masking is applied using the given geometries. [#469](https://github.com/Open-EO/openeo-processes/issues/469) -- `sqrt`: Clarified that NaN is returned for negative numbers. - ## [2.0.0-rc.1] - 2023-05-25 -### Fixed - -- `array_append`: Added `number` type for labels to be consistent with other processes. Default to numerical index instead of string. Clarify that the `label` parameter only applies to labeled arrays. - ### Added - New processes in proposal state: @@ -32,7 +16,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `filter_vector` - `flatten_dimensions` - `load_geojson` + - `load_ml_model` - `load_url` + - `ml_fit_class_random_forest` + - `ml_fit_regr_random_forest` + - `ml_fit_class-xgboost` + - `ml_predict` + - `save_ml_model` - `unflatten_dimension` - `vector_buffer` - `vector_reproject` diff --git a/README.md b/README.md index 621276b6..24c28899 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ This repository contains a set of files formally describing the openEO Processes * [implementation.md](meta/implementation.md) in the `meta` folder provide some additional implementation details for back-ends. For back-end implementors, it's highly recommended to read them. * [subtype-schemas.json](meta/subtype-schemas.json) in the `meta` folder defines common data types (`subtype`s) for JSON Schema used in openEO processes. * Previously, an `examples` folder contained examples of user-defined processes. These have been migrated to the [openEO Community Examples](https://github.com/Open-EO/openeo-community-examples/tree/main/processes) repository. -* The [`dev`](dev/) folder can be used to test the process specification for validity and consistent "style". It also allows rendering the processes in a web browser. Check the [development documentation](dev/README.md) for details. +* The [`tests`](tests/) folder can be used to test the process specification for validity and consistent "style". It also allows rendering the processes in a web browser. Check the [tests documentation](tests/README.md) for details. ## Process diff --git a/and.json b/and.json index 3b28c8ef..c24ce95b 100644 --- a/and.json +++ b/and.json @@ -1,7 +1,7 @@ { "id": "and", "summary": "Logical AND", - "description": "Checks if **both** values are true.\n\nEvaluates parameter `x` before `y` and stops once the outcome is unambiguous. If any argument is `null`, the result will be `null` if the outcome is ambiguous.\n\n**Truth table:**\n\n```\nx \\ y || null | false | true\n----- || ----- | ----- | -----\nnull || null | false | null\nfalse || false | false | false\ntrue || null | false | true\n```", + "description": "Checks if **both** values are true.\n\nEvaluates parameter `x` before `y` and stops once the outcome is unambiguous. If any argument is `null`, the result will be `null` if the outcome is ambiguous.\n\n**Truth table:**\n\n```\na \\ b || null | false | true\n----- || ----- | ----- | -----\nnull || null | false | null\nfalse || false | false | false\ntrue || null | false | true\n```", "categories": [ "logic" ], @@ -90,4 +90,4 @@ "result": true } } -} +} \ No newline at end of file diff --git a/array_append.json b/array_append.json index f09145d2..80b48d12 100644 --- a/array_append.json +++ b/array_append.json @@ -25,20 +25,15 @@ }, { "name": "label", - "description": "Provides a label for the new value. If not given or `null`, the natural next array index as number is used as the label. If in any case the label exists, a `LabelExists` exception is thrown.\n\nThis parameter only applies if the given array is a labeled array. If a non-null values is provided and the array is not labeled, an `ArrayNotLabeled` exception is thrown.", + "description": "If the given array is a labeled array, a new label for the new value should be given. If not given or `null`, the array index as string is used as the label. If in any case the label exists, a `LabelExists` exception is thrown.", "optional": true, "default": null, - "schema": [ - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "null" - } - ] + "schema": { + "type": [ + "string", + "null" + ] + } } ], "returns": { @@ -53,9 +48,6 @@ "exceptions": { "LabelExists": { "message": "An array element with the specified label already exists." - }, - "ArrayNotLabeled": { - "message": "A label can't be provided as the given array is not labeled." } }, "examples": [ diff --git a/between.json b/between.json index 12e37693..b2e59b92 100644 --- a/between.json +++ b/between.json @@ -8,7 +8,7 @@ "parameters": [ { "name": "x", - "description": "The value to check.\n\nThe no-data value `null` is passed through and therefore gets propagated.", + "description": "The value to check.", "schema": { "description": "Any data type is allowed." } @@ -38,7 +38,7 @@ } ], "returns": { - "description": "`true` if `x` is between the specified bounds, `null` if `x` is a no-data value, `false` otherwise.", + "description": "`true` if `x` is between the specified bounds, otherwise `false`.", "schema": { "type": [ "boolean", diff --git a/clip.json b/clip.json index de2a4d1a..adbf7eaa 100644 --- a/clip.json +++ b/clip.json @@ -1,7 +1,7 @@ { "id": "clip", "summary": "Clip a value between a minimum and a maximum", - "description": "Clips a number between specified minimum and maximum values. A value larger than the maximum value is set to the maximum value, a value lower than the minimum value is set to the minimum value. If the maximum value is smaller than the minimum number, the process throws a `MinMaxSwapped` exception.\n\nThe no-data value `null` is passed through and therefore gets propagated.", + "description": "Clips a number between specified minimum and maximum values. A value larger than the maximum value is set to the maximum value, a value lower than the minimum value is set to the minimum value.\n\nThe no-data value `null` is passed through and therefore gets propagated.", "categories": [ "math" ], @@ -40,11 +40,6 @@ ] } }, - "exceptions": { - "MinMaxSwapped": { - "message": "The minimum value should be lower than or equal to the maximum value." - } - }, "examples": [ { "arguments": { @@ -78,5 +73,34 @@ }, "returns": null } - ] -} + ], + "process_graph": { + "min": { + "process_id": "min", + "arguments": { + "data": [ + { + "from_parameter": "max" + }, + { + "from_parameter": "x" + } + ] + } + }, + "max": { + "process_id": "max", + "arguments": { + "data": [ + { + "from_parameter": "min" + }, + { + "from_node": "min" + } + ] + }, + "result": true + } + } +} \ No newline at end of file diff --git a/eq.json b/eq.json index 0c62b42c..e7712399 100644 --- a/eq.json +++ b/eq.json @@ -38,8 +38,7 @@ "type": [ "number", "null" - ], - "minimumExclusive": 0 + ] }, "default": null, "optional": true diff --git a/filter_bbox.json b/filter_bbox.json index b7335847..3e2a7485 100644 --- a/filter_bbox.json +++ b/filter_bbox.json @@ -1,7 +1,7 @@ { "id": "filter_bbox", "summary": "Spatial filter using a bounding box", - "description": "Limits the data cube to the specified bounding box.\n\n* For raster data cubes, the filter retains a pixel in the data cube if the point at the pixel center intersects with the bounding box (as defined in the Simple Features standard by the OGC). Alternatively, ``filter_spatial()`` can be used to filter by geometry.\n* For vector data cubes, the filter retains the geometry in the data cube if the geometry is fully within the bounding box (as defined in the Simple Features standard by the OGC). All geometries that were empty or not contained fully within the bounding box will be removed from the data cube.\n\nAlternatively, filter spatially with geometries using ``filter_spatial()`` (on a raster data cube) or ``filter_vector()`` (on a vector data cube).", + "description": "Limits the data cube to the specified bounding box.\n\n* For raster data cubes, the filter retains a pixel in the data cube if the point at the pixel center intersects with the bounding box (as defined in the Simple Features standard by the OGC). Alternatively, ``filter_spatial()`` can be used to filter by geometry.\n* For vector data cubes, the filter retains the geometry in the data cube if the geometry is fully within the bounding box (as defined in the Simple Features standard by the OGC). All geometries that were empty or not contained fully within the bounding box will be removed from the data cube.\n\nAlternatively, ``filter_vector()`` can be used to filter by geometry.", "categories": [ "cubes", "filter" @@ -39,7 +39,7 @@ }, { "name": "extent", - "description": "A bounding box, which may include a vertical axis (see `base` and `height`).\n\nIf the bounding box is not provided in the coordinate reference system (CRS) of the data cube, the bounding box is reprojected to the CRS of the spatial data cube dimensions.", + "description": "A bounding box, which may include a vertical axis (see `base` and `height`).", "schema": { "type": "object", "subtype": "bounding-box", diff --git a/filter_spatial.json b/filter_spatial.json index ed4f7c3f..c0c116cd 100644 --- a/filter_spatial.json +++ b/filter_spatial.json @@ -1,7 +1,7 @@ { "id": "filter_spatial", "summary": "Spatial filter raster data cubes using geometries", - "description": "Limits the raster data cube over the spatial dimensions to the specified geometries.\n\n- For **polygons**, the filter retains a pixel in the data cube if the point at the pixel center intersects with at least one of the polygons (as defined in the Simple Features standard by the OGC).\n- For **points**, the process considers the closest pixel center.\n- For **lines** (line strings), the process considers all the pixels whose centers are closest to at least one point on the line.\n\nMore specifically, pixels outside of the bounding box of the given geometries will not be available after filtering. All pixels inside the bounding box that are not retained will be set to `null` (no data).\n\n Alternatively, use ``filter_bbox()`` to filter with a bounding box or ``filter_vector()`` to filter a vector data cube based on geometries. Use ``mask_polygon()`` to mask without changing the spatial extent of your data cube.", + "description": "Limits the raster data cube over the spatial dimensions to the specified geometries.\n\n- For **polygons**, the filter retains a pixel in the data cube if the point at the pixel center intersects with at least one of the polygons (as defined in the Simple Features standard by the OGC).\n- For **points**, the process considers the closest pixel center.\n- For **lines** (line strings), the process considers all the pixels whose centers are closest to at least one point on the line.\n\nMore specifically, pixels outside of the bounding box of the given geometry will not be available after filtering. All pixels inside the bounding box that are not retained will be set to `null` (no data).\n\n Alternatively, use ``filter_bbox()`` to filter by bounding box.", "categories": [ "cubes", "filter" @@ -26,7 +26,7 @@ }, { "name": "geometries", - "description": "One or more geometries used for spatial filtering and masking, given as GeoJSON or vector data cube.", + "description": "One or more geometries used for filtering, given as GeoJSON or vector data cube. If multiple geometries are provided, the union of them is used. Empty geometries are ignored.\n\nLimits the data cube to the bounding box of the given geometries. No implicit masking gets applied. To mask the pixels of the data cube use ``mask_polygon()``.", "schema": [ { "title": "Vector Data Cube", diff --git a/linear_scale_range.json b/linear_scale_range.json index 01f09857..172027c9 100644 --- a/linear_scale_range.json +++ b/linear_scale_range.json @@ -1,7 +1,7 @@ { "id": "linear_scale_range", "summary": "Linear transformation between two ranges", - "description": "Performs a linear transformation between the input and output range.\n\nThe given number in `x` is clipped to the bounds specified in `inputMin` and `inputMax` so that the underlying formula *`((x - inputMin) / (inputMax - inputMin)) * (outputMax - outputMin) + outputMin`* never returns a value outside of the range defined by `outputMin` and `outputMax`.\n\nPotential use case include\n\n* scaling values to the 8-bit range (0 - 255) often used for numeric representation of values in one of the channels of the [RGB colour model](https://en.wikipedia.org/wiki/RGB_color_model#Numeric_representations) or\n* calculating percentages (0 - 100).\n\nThe no-data value `null` is passed through and therefore gets propagated.", + "description": "Performs a linear transformation between the input and output range.\n\nThe given number in `x` is clipped to the bounds specified in `inputMin` and `inputMax` so that the underlying formula *`((x - inputMin) / (inputMax - inputMin)) * (outputMax - outputMin) + outputMin`* never returns any value lower than `outputMin` or greater than `outputMax`.\n\nPotential use case include\n\n* scaling values to the 8-bit range (0 - 255) often used for numeric representation of values in one of the channels of the [RGB colour model](https://en.wikipedia.org/wiki/RGB_color_model#Numeric_representations) or\n* calculating percentages (0 - 100).\n\nThe no-data value `null` is passed through and therefore gets propagated.", "categories": [ "math" ], @@ -166,4 +166,4 @@ "result": true } } -} +} \ No newline at end of file diff --git a/load_collection.json b/load_collection.json index a6701cc3..b93c879c 100644 --- a/load_collection.json +++ b/load_collection.json @@ -64,7 +64,7 @@ "default": null }, "crs": { - "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system. If the bounding box is not provided in the coordinate reference system (CRS) of the data cube, the bounding box is reprojected to the CRS of the spatial data cube dimensions.", + "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.", "anyOf": [ { "title": "EPSG Code", diff --git a/meta/subtype-schemas.json b/meta/subtype-schemas.json index 347df234..83ce72ba 100644 --- a/meta/subtype-schemas.json +++ b/meta/subtype-schemas.json @@ -232,6 +232,12 @@ } } }, + "ml-model": { + "type": "object", + "subtype": "ml-model", + "title": "Machine Learning Model", + "description": "A machine learning model, accompanied with STAC metadata that implements the the STAC ml-model extension." + }, "output-format": { "type": "string", "subtype": "output-format", diff --git a/proposals/ml_fit_class_xgboost.json b/ml_fit_class_xgboost.json similarity index 100% rename from proposals/ml_fit_class_xgboost.json rename to ml_fit_class_xgboost.json diff --git a/neq.json b/neq.json index 0e22b347..ff6bc9fd 100644 --- a/neq.json +++ b/neq.json @@ -38,8 +38,7 @@ "type": [ "number", "null" - ], - "minimumExclusive": 0 + ] }, "default": null, "optional": true diff --git a/or.json b/or.json index 4a83a63e..5964a341 100644 --- a/or.json +++ b/or.json @@ -1,7 +1,7 @@ { "id": "or", "summary": "Logical OR", - "description": "Checks if **at least one** of the values is true. Evaluates parameter `x` before `y` and stops once the outcome is unambiguous. If a component is `null`, the result will be `null` if the outcome is ambiguous.\n\n**Truth table:**\n\n```\nx \\ y || null | false | true\n----- || ---- | ----- | ----\nnull || null | null | true\nfalse || null | false | true\ntrue || true | true | true\n```", + "description": "Checks if **at least one** of the values is true. Evaluates parameter `x` before `y` and stops once the outcome is unambiguous. If a component is `null`, the result will be `null` if the outcome is ambiguous.\n\n**Truth table:**\n\n```\na \\ b || null | false | true\n----- || ---- | ----- | ----\nnull || null | null | true\nfalse || null | false | true\ntrue || true | true | true\n```", "categories": [ "logic" ], @@ -90,4 +90,4 @@ "result": true } } -} +} \ No newline at end of file diff --git a/proposals/filter_vector.json b/proposals/filter_vector.json index 1bb33c86..349f8d0f 100644 --- a/proposals/filter_vector.json +++ b/proposals/filter_vector.json @@ -1,7 +1,7 @@ { "id": "filter_vector", "summary": "Spatial vector filter using geometries", - "description": "Limits the vector data cube to the specified geometries. The process works on geometries as defined in the Simple Features standard by the OGC. All geometries that were empty or become empty will be removed from the data cube. Alternatively, use ``filter_bbox()`` to filter with a bounding box or ``filter_spatial()`` to filter a raster data cube based on geometries.", + "description": "Limits the vector data cube to the specified geometries. The process works on geometries as defined in the Simple Features standard by the OGC. All geometries that were empty or become empty will be removed from the data cube. Alternatively, use ``filter_bbox()`` to filter by bounding box.", "categories": [ "cubes", "filter", diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json new file mode 100644 index 00000000..7fa86d89 --- /dev/null +++ b/proposals/load_ml_model.json @@ -0,0 +1,46 @@ +{ + "id": "load_ml_model", + "summary": "Load a ML model", + "description": "Loads a machine learning model from a STAC Item.\n\nSuch a model could be trained and saved as part of a previous batch job with processes such as ``ml_fit_regr_random_forest()`` and ``save_ml_model()``.", + "categories": [ + "machine learning", + "import" + ], + "experimental": true, + "parameters": [ + { + "name": "uri", + "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the `ml-model` extension.", + "schema": [ + { + "title": "URL", + "type": "string", + "format": "uri", + "subtype": "uri", + "pattern": "^https?://" + }, + { + "title": "User-uploaded File", + "type": "string", + "subtype": "file-path", + "pattern": "^[^\r\n\\:'\"]+$" + } + ] + } + ], + "returns": { + "description": "A machine learning model to be used with machine learning processes such as ``ml_predict()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://github.com/stac-extensions/ml-model", + "title": "STAC ml-model extension", + "type": "text/html", + "rel": "about" + } + ] +} diff --git a/proposals/load_stac.json b/proposals/load_stac.json index 262745fc..c71d3a80 100644 --- a/proposals/load_stac.json +++ b/proposals/load_stac.json @@ -67,7 +67,7 @@ "default": null }, "crs": { - "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system. If the bounding box is not provided in the coordinate reference system (CRS) of the data cube, the bounding box is reprojected to the CRS of the spatial data cube dimensions.", + "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.", "anyOf": [ { "title": "EPSG Code", diff --git a/proposals/ml_fit_class_random_forest.json b/proposals/ml_fit_class_random_forest.json new file mode 100644 index 00000000..63da48a1 --- /dev/null +++ b/proposals/ml_fit_class_random_forest.json @@ -0,0 +1,110 @@ +{ + "id": "ml_fit_class_random_forest", + "summary": "Train a random forest classification model", + "description": "Executes the fit of a random forest classification based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest classification model is based on the approach by Breiman (2001).", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "predictors", + "description": "The predictors for the classification model as a vector data cube. Aggregated to the features (vectors) of the target input variable.", + "schema": [ + { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + }, + { + "type": "bands" + } + ] + }, + { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + }, + { + "type": "other" + } + ] + } + ] + }, + { + "name": "target", + "description": "The training sites for the classification model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).", + "schema": { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + } + ] + } + }, + { + "name": "max_variables", + "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split.\n- `sqrt`: The square root of the number of variables are considered for each split. This is often the default for classification.", + "schema": [ + { + "type": "integer", + "minimum": 1 + }, + { + "type": "string", + "enum": [ + "all", + "log2", + "onethird", + "sqrt" + ] + } + ] + }, + { + "name": "num_trees", + "description": "The number of trees build within the Random Forest classification.", + "optional": true, + "default": 100, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "seed", + "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": null, + "schema": { + "type": [ + "integer", + "null" + ] + } + } + ], + "returns": { + "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://doi.org/10.1023/A:1010933404324", + "title": "Breiman (2001): Random Forests", + "type": "text/html", + "rel": "about" + } + ] +} diff --git a/proposals/ml_fit_regr_random_forest.json b/proposals/ml_fit_regr_random_forest.json new file mode 100644 index 00000000..39207324 --- /dev/null +++ b/proposals/ml_fit_regr_random_forest.json @@ -0,0 +1,110 @@ +{ + "id": "ml_fit_regr_random_forest", + "summary": "Train a random forest regression model", + "description": "Executes the fit of a random forest regression based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest regression model is based on the approach by Breiman (2001).", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "predictors", + "description": "The predictors for the regression model as a vector data cube. Aggregated to the features (vectors) of the target input variable.", + "schema": [ + { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + }, + { + "type": "bands" + } + ] + }, + { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + }, + { + "type": "other" + } + ] + } + ] + }, + { + "name": "target", + "description": "The training sites for the regression model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).", + "schema": { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + } + ] + } + }, + { + "name": "max_variables", + "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split. This is often the default for regression.\n- `sqrt`: The square root of the number of variables are considered for each split.", + "schema": [ + { + "type": "integer", + "minimum": 1 + }, + { + "type": "string", + "enum": [ + "all", + "log2", + "onethird", + "sqrt" + ] + } + ] + }, + { + "name": "num_trees", + "description": "The number of trees build within the Random Forest regression.", + "optional": true, + "default": 100, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "seed", + "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": null, + "schema": { + "type": [ + "integer", + "null" + ] + } + } + ], + "returns": { + "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://doi.org/10.1023/A:1010933404324", + "title": "Breiman (2001): Random Forests", + "type": "text/html", + "rel": "about" + } + ] +} diff --git a/proposals/ml_predict.json b/proposals/ml_predict.json new file mode 100644 index 00000000..87cd2500 --- /dev/null +++ b/proposals/ml_predict.json @@ -0,0 +1,49 @@ +{ + "id": "ml_predict", + "summary": "Predict using ML", + "description": "Applies a machine learning model to a data cube of input features and returns the predicted values.", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "The data cube containing the input features.", + "schema": { + "type": "object", + "subtype": "datacube" + } + }, + { + "name": "model", + "description": "A ML model that was trained with one of the ML training processes such as ``ml_fit_regr_random_forest()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + { + "name": "dimensions", + "description": "Zero or more dimensions that will be reduced by the model. Fails with a `DimensionNotAvailable` exception if one of the specified dimensions does not exist.", + "schema": { + "type": "array", + "items": { + "type": "string" + } + } + } + ], + "returns": { + "description": "A data cube with the predicted values. It removes the specified dimensions and adds new dimension for the predicted values. It has the name `predictions` and is of type `other`. If a single value is returned, the dimension has a single label with name `0`.", + "schema": { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "other" + } + ] + } + } +} diff --git a/proposals/predict_curve.json b/proposals/predict_curve.json index 479b7fec..c4d78d99 100644 --- a/proposals/predict_curve.json +++ b/proposals/predict_curve.json @@ -1,7 +1,7 @@ { "id": "predict_curve", - "summary": "Predict values", - "description": "Predict values using a model function and pre-computed parameters. The process is intended to compute values for new labels.", + "summary": "Predict values using a model function", + "description": "Predict values using a model function and pre-computed parameters. The process is primarily intended to compute values for new labels, but it can also fill gaps where existing labels contain no-data (`null`) values.", "categories": [ "cubes", "math" diff --git a/proposals/save_ml_model.json b/proposals/save_ml_model.json new file mode 100644 index 00000000..5e9ea8b0 --- /dev/null +++ b/proposals/save_ml_model.json @@ -0,0 +1,44 @@ +{ + "id": "save_ml_model", + "summary": "Save a ML model", + "description": "Saves a machine learning model as part of a batch job.\n\nThe model will be accompanied by a separate STAC Item that implements the [ml-model extension](https://github.com/stac-extensions/ml-model).", + "categories": [ + "machine learning", + "import" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "The data to store as a machine learning model.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + { + "name": "options", + "description": "Additional parameters to create the file(s).", + "schema": { + "type": "object", + "additionalParameters": false + }, + "default": {}, + "optional": true + } + ], + "returns": { + "description": "Returns `false` if the process failed to store the model, `true` otherwise.", + "schema": { + "type": "boolean" + } + }, + "links": [ + { + "href": "https://github.com/stac-extensions/ml-model", + "title": "STAC ml-model extension", + "type": "text/html", + "rel": "about" + } + ] +} \ No newline at end of file diff --git a/sqrt.json b/sqrt.json index b85caf94..bc1aeb6c 100644 --- a/sqrt.json +++ b/sqrt.json @@ -1,7 +1,7 @@ { "id": "sqrt", "summary": "Square root", - "description": "Computes the square root of a real number `x`, which is equal to calculating `x` to the power of *0.5*. For negative `x`, the process returns `NaN`.\n\nA square root of x is a number a such that *`a² = x`*. Therefore, the square root is the inverse function of a to the power of 2, but only for *a >= 0*.\n\nThe no-data value `null` is passed through and therefore gets propagated.", + "description": "Computes the square root of a real number `x`, which is equal to calculating `x` to the power of *0.5*.\n\nA square root of x is a number a such that *`a² = x`*. Therefore, the square root is the inverse function of a to the power of 2, but only for *a >= 0*.\n\nThe no-data value `null` is passed through and therefore gets propagated.", "categories": [ "math", "math > exponential & logarithmic" @@ -58,11 +58,6 @@ "rel": "about", "href": "http://mathworld.wolfram.com/SquareRoot.html", "title": "Square root explained by Wolfram MathWorld" - }, - { - "rel": "about", - "href": "https://ieeexplore.ieee.org/document/8766229", - "title": "IEEE Standard 754-2019 for Floating-Point Arithmetic" } ], "process_graph": { @@ -77,4 +72,4 @@ "result": true } } -} +} \ No newline at end of file diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 00000000..e29b5fbe --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1,3 @@ +/node_modules/ +/package-lock.json +/processes.json diff --git a/tests/.words b/tests/.words new file mode 100644 index 00000000..a50285ba --- /dev/null +++ b/tests/.words @@ -0,0 +1,49 @@ +0-to-9 +1-to-0 +anno +behavior +boolean +center +centers +dekad +DEM-based +Domini +gamma0 +GeoJSON +FeatureCollections +labeled +MathWorld +n-ary +neighbor +neighborhood +neighborhoods +openEO +orthorectification +orthorectified +radiometrically +reflectances +reproject +Reprojects +resample +resampled +resamples +Resamples +resampling +Sentinel-2 +Sentinel-2A +Sentinel-2B +signum +STAC +catalog +Catalog +summand +UDFs +gdalwarp +Lanczos +sinc +interpolants +Breiman +Hyndman +date1 +date2 +favor diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 00000000..fc2382fe --- /dev/null +++ b/tests/README.md @@ -0,0 +1,30 @@ +# Tests for openEO Processes + +To run the tests follow these steps: + +1. Install [node and npm](https://nodejs.org) - should run with any recent version +2. Run `npm install` in this folder to install the dependencies +3. Run the tests with `npm test`. This will also lint the files and verify it follows best practices. +4. To show the files nicely formatted in a web browser, run `npm start`. It starts a server and opens the corresponding page in a web browser. + +## Development processes + +All new processes must be added to the `proposals` folder. Each process must be declared to be `experimental`. +Processes must comply to best practices, which ensure a certain degree of consistency. +`npm test` will validate and lint the processes and also ensure the best practices are applied. + +The linting checks that the files are named correctly, that the content is correctly formatted and indented (JSON and embedded CommonMark). +The best practices ensure that for examples the fields are not too short and also not too long for example. + +A spell check is also checking the texts. It may report names and rarely used technical words as errors. +If you are sure that these are correct, you can add them to the `.words` file to exclude the word from being reported as an error. +The file must contain one word per line. + +New processes should be added via GitHub Pull Requests. + +## Subtype schemas + +Sometimes it is useful to define a new "data type" on top of the JSON types (number, string, array, object, ...). +For example, a client could make a select box with all collections available by adding a subtype `collection-id` to the JSON type `string`. +If you think a new subype should be added, you need to add it to the `meta/subtype-schemas.json` file. +It must be a valid JSON Schema. The tests mentioned above will also verify to a certain degree that the subtypes are defined correctly. diff --git a/tests/docs.html b/tests/docs.html new file mode 100644 index 00000000..04b1c192 --- /dev/null +++ b/tests/docs.html @@ -0,0 +1,125 @@ + + + + + + openEO API Processes + + + + + + + +
+
+
+ + + + \ No newline at end of file diff --git a/tests/package.json b/tests/package.json new file mode 100644 index 00000000..1da8693f --- /dev/null +++ b/tests/package.json @@ -0,0 +1,30 @@ +{ + "name": "@openeo/processes", + "version": "2.0.0-rc.1", + "author": "openEO Consortium", + "contributors": [ + { + "name": "Matthias Mohr" + } + ], + "license": "Apache-2.0", + "description": "Validates the processes specified in this repository.", + "homepage": "http://openeo.org", + "bugs": { + "url": "https://github.com/Open-EO/openeo-processes/issues" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/Open-EO/openeo-processes.git" + }, + "devDependencies": { + "@openeo/processes-lint": "^0.1.5", + "concat-json-files": "^1.1.0", + "http-server": "^14.1.1" + }, + "scripts": { + "test": "openeo-processes-lint testConfig.json", + "generate": "concat-json-files \"../{*,proposals/*}.json\" -t \"processes.json\"", + "start": "npm run generate && http-server -p 9876 -o docs.html -c-1" + } +} diff --git a/tests/testConfig.json b/tests/testConfig.json new file mode 100644 index 00000000..9b5fbcb2 --- /dev/null +++ b/tests/testConfig.json @@ -0,0 +1,14 @@ +{ + "folder": "../", + "proposalsFolder": "../proposals/", + "ignoredWords": ".words", + "anyOfRequired": [ + "array_element", + "quantiles" + ], + "subtypeSchemas": "../meta/subtype-schemas.json", + "checkSubtypeSchemas": true, + "forbidDeprecatedTypes": false, + "checkProcessLinks": true, + "verbose": false +} diff --git a/xor.json b/xor.json index 6af7ae5e..d8dbde50 100644 --- a/xor.json +++ b/xor.json @@ -1,7 +1,7 @@ { "id": "xor", "summary": "Logical XOR (exclusive or)", - "description": "Checks if **exactly one** of the values is true. If a component is `null`, the result will be `null` if the outcome is ambiguous.\n\n**Truth table:**\n\n```\nx \\ y || null | false | true\n----- || ---- | ----- | -----\nnull || null | null | null\nfalse || null | false | true\ntrue || null | true | false\n```", + "description": "Checks if **exactly one** of the values is true. If a component is `null`, the result will be `null` if the outcome is ambiguous.\n\n**Truth table:**\n\n```\na \\ b || null | false | true\n----- || ---- | ----- | -----\nnull || null | null | null\nfalse || null | false | true\ntrue || null | true | false\n```", "categories": [ "logic" ], @@ -125,4 +125,4 @@ "result": true } } -} +} \ No newline at end of file