diff --git a/meta/subtype-schemas.json b/meta/subtype-schemas.json index 941e6a48..498adf60 100644 --- a/meta/subtype-schemas.json +++ b/meta/subtype-schemas.json @@ -238,12 +238,6 @@ } } }, - "ml-model": { - "type": "object", - "subtype": "ml-model", - "title": "Machine Learning Model", - "description": "A machine learning model, accompanied with STAC metadata that implements the the STAC ml-model extension." - }, "output-format": { "type": "string", "subtype": "output-format", diff --git a/proposals/fit_class_random_forest.json b/proposals/fit_class_random_forest.json deleted file mode 100644 index 6eb874bf..00000000 --- a/proposals/fit_class_random_forest.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "id": "fit_class_random_forest", - "summary": "Train a random forest classification model", - "description": "Executes the fit of a random forest classification based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest classification model is based on the approach by Breiman (2001).", - "categories": [ - "machine learning" - ], - "experimental": true, - "parameters": [ - { - "name": "predictors", - "description": "The predictors for the classification model as a vector data cube. Aggregated to the features (vectors) of the target input variable.", - "schema": [ - { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - }, - { - "type": "bands" - } - ] - }, - { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - }, - { - "type": "other" - } - ] - } - ] - }, - { - "name": "target", - "description": "The training sites for the classification model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).", - "schema": { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - } - ] - } - }, - { - "name": "max_variables", - "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split.\n- `sqrt`: The square root of the number of variables are considered for each split. This is often the default for classification.", - "schema": [ - { - "type": "integer", - "minimum": 1 - }, - { - "type": "string", - "enum": [ - "all", - "log2", - "onethird", - "sqrt" - ] - } - ] - }, - { - "name": "num_trees", - "description": "The number of trees build within the Random Forest classification.", - "optional": true, - "default": 100, - "schema": { - "type": "integer", - "minimum": 1 - } - }, - { - "name": "seed", - "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", - "optional": true, - "default": null, - "schema": { - "type": [ - "integer", - "null" - ] - } - } - ], - "returns": { - "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", - "schema": { - "type": "object", - "subtype": "ml-model" - } - }, - "links": [ - { - "href": "https://doi.org/10.1023/A:1010933404324", - "title": "Breiman (2001): Random Forests", - "type": "text/html", - "rel": "about" - } - ] -} diff --git a/proposals/fit_regr_random_forest.json b/proposals/fit_regr_random_forest.json deleted file mode 100644 index 51191fa5..00000000 --- a/proposals/fit_regr_random_forest.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "id": "fit_regr_random_forest", - "summary": "Train a random forest regression model", - "description": "Executes the fit of a random forest regression based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest regression model is based on the approach by Breiman (2001).", - "categories": [ - "machine learning" - ], - "experimental": true, - "parameters": [ - { - "name": "predictors", - "description": "The predictors for the regression model as a vector data cube. Aggregated to the features (vectors) of the target input variable.", - "schema": [ - { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - }, - { - "type": "bands" - } - ] - }, - { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - }, - { - "type": "other" - } - ] - } - ] - }, - { - "name": "target", - "description": "The training sites for the regression model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).", - "schema": { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - } - ] - } - }, - { - "name": "max_variables", - "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split. This is often the default for regression.\n- `sqrt`: The square root of the number of variables are considered for each split.", - "schema": [ - { - "type": "integer", - "minimum": 1 - }, - { - "type": "string", - "enum": [ - "all", - "log2", - "onethird", - "sqrt" - ] - } - ] - }, - { - "name": "num_trees", - "description": "The number of trees build within the Random Forest regression.", - "optional": true, - "default": 100, - "schema": { - "type": "integer", - "minimum": 1 - } - }, - { - "name": "seed", - "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", - "optional": true, - "default": null, - "schema": { - "type": [ - "integer", - "null" - ] - } - } - ], - "returns": { - "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", - "schema": { - "type": "object", - "subtype": "ml-model" - } - }, - "links": [ - { - "href": "https://doi.org/10.1023/A:1010933404324", - "title": "Breiman (2001): Random Forests", - "type": "text/html", - "rel": "about" - } - ] -} diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json deleted file mode 100644 index 151513c8..00000000 --- a/proposals/load_ml_model.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "id": "load_ml_model", - "summary": "Load a ML model", - "description": "Loads a machine learning model from a STAC Item.\n\nSuch a model could be trained and saved as part of a previous batch job with processes such as ``fit_regr_random_forest()`` and ``save_ml_model()``.", - "categories": [ - "machine learning", - "import" - ], - "experimental": true, - "parameters": [ - { - "name": "id", - "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the `ml-model` extension.", - "schema": [ - { - "title": "URL", - "type": "string", - "format": "uri", - "subtype": "uri", - "pattern": "^https?://" - }, - { - "title": "Batch Job ID", - "description": "Loading a model by batch job ID is possible only if a single model has been saved by the job. Otherwise, you have to load a specific model from a batch job by URL.", - "type": "string", - "subtype": "job-id", - "pattern": "^[\\w\\-\\.~]+$" - }, - { - "title": "User-uploaded File", - "type": "string", - "subtype": "file-path", - "pattern": "^[^\r\n\\:'\"]+$" - } - ] - } - ], - "returns": { - "description": "A machine learning model to be used with machine learning processes such as ``predict_random_forest()``.", - "schema": { - "type": "object", - "subtype": "ml-model" - } - }, - "links": [ - { - "href": "https://github.com/stac-extensions/ml-model", - "title": "STAC ml-model extension", - "type": "text/html", - "rel": "about" - } - ] -} diff --git a/proposals/predict_random_forest.json b/proposals/predict_random_forest.json deleted file mode 100644 index 62c54e9f..00000000 --- a/proposals/predict_random_forest.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "id": "predict_random_forest", - "summary": "Predict values based on a Random Forest model", - "description": "Applies a Random Forest machine learning model to an array and predict a value for it.", - "categories": [ - "machine learning", - "reducer" - ], - "experimental": true, - "parameters": [ - { - "name": "data", - "description": "An array of numbers.", - "schema": { - "type": "array", - "items": { - "type": [ - "number", - "null" - ] - } - } - }, - { - "name": "model", - "description": "A model object that can be trained with the processes ``fit_regr_random_forest()`` (regression) and ``fit_class_random_forest()`` (classification).", - "schema": { - "type": "object", - "subtype": "ml-model" - } - } - ], - "returns": { - "description": "The predicted value. Returns `null` if any of the given values in the array is a no-data value.", - "schema": { - "type": [ - "number", - "null" - ] - } - } -} diff --git a/proposals/save_ml_model.json b/proposals/save_ml_model.json deleted file mode 100644 index 5e9ea8b0..00000000 --- a/proposals/save_ml_model.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "id": "save_ml_model", - "summary": "Save a ML model", - "description": "Saves a machine learning model as part of a batch job.\n\nThe model will be accompanied by a separate STAC Item that implements the [ml-model extension](https://github.com/stac-extensions/ml-model).", - "categories": [ - "machine learning", - "import" - ], - "experimental": true, - "parameters": [ - { - "name": "data", - "description": "The data to store as a machine learning model.", - "schema": { - "type": "object", - "subtype": "ml-model" - } - }, - { - "name": "options", - "description": "Additional parameters to create the file(s).", - "schema": { - "type": "object", - "additionalParameters": false - }, - "default": {}, - "optional": true - } - ], - "returns": { - "description": "Returns `false` if the process failed to store the model, `true` otherwise.", - "schema": { - "type": "boolean" - } - }, - "links": [ - { - "href": "https://github.com/stac-extensions/ml-model", - "title": "STAC ml-model extension", - "type": "text/html", - "rel": "about" - } - ] -} \ No newline at end of file