Skip to content

Commit

Permalink
Merge pull request #49 from tvdboom/dataengines2
Browse files Browse the repository at this point in the history
Dataengines2
  • Loading branch information
tvdboom authored Feb 26, 2024
2 parents ff9468e + a59a3b5 commit 7810077
Show file tree
Hide file tree
Showing 46 changed files with 3,057 additions and 2,472 deletions.
26 changes: 21 additions & 5 deletions atom/_show_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@
"atom",
"beartype",
"category_encoders",
"dagshub",
"dill",
"featuretools",
"gplearn",
"imblearn",
"ipywidgets",
"featuretools",
"joblib",
"matplotlib",
"mlflow",
Expand All @@ -35,14 +34,31 @@
"optuna",
"pandas",
"plotly",
"ray",
"requests",
"sklearn",
"sklearnex", # Has no __version__ attribute
"scipy",
"shap",
"sktime",
"statsmodels",
"zoofs", # Has no __version__ attribute
"botorch",
"catboost",
"dagshub",
"dask[distributed]",
"explainerdashboard",
"gradio",
"lightgbm",
"modin[ray]",
"polars",
"pyarrow",
"pyspark",
"ray[serve]",
"requests",
"sklearnex",
"schemdraw",
"statsforecast",
"sweetviz",
"wordcloud",
"xgboost",
]


Expand Down
80 changes: 52 additions & 28 deletions atom/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,20 +158,20 @@ class ATOMClassifier(ATOM):
**X, train, test: dataframe-like**<br>
Feature set with shape=(n_samples, n_features).
**y: int, str or sequence**<br>
Target column corresponding to `X`.
**y: int, str, sequence or dataframe-like**<br>
Target column(s) corresponding to `X`.
- If int: Position of the target column in X.
- If str: Name of the target column in X.
- If int: Position of the target column in `X`.
- If str: Name of the target column in `X`.
- If sequence: Target column with shape=(n_samples,) or
sequence of column names or positions for multioutput tasks.
- If dataframe: Target columns for multioutput tasks.
- If dataframe-like: Target columns for multioutput tasks.
y: int, str, dict, sequence or dataframe, default=-1
Target column corresponding to `X`.
y: int, str, sequence or dataframe-like, default=-1
Target column(s) corresponding to `X`.
- If int: Position of the target column in X.
- If str: Name of the target column in X.
- If int: Position of the target column in `X`.
- If str: Name of the target column in `X`.
- If sequence: Target column with shape=(n_samples,) or
sequence of column names or positions for multioutput tasks.
- If dataframe: Target columns for multioutput tasks.
Expand Down Expand Up @@ -257,9 +257,16 @@ class ATOMClassifier(ATOM):
- "data":
- "numpy"
- "pandas" (default)
- "pandas-pyarrow"
- "polars"
- "polars-lazy"
- "pyarrow"
- "modin"
- "dask"
- "pyspark"
- "pyspark-pandas"
- "estimator":
Expand All @@ -276,6 +283,7 @@ class ATOMClassifier(ATOM):
parallelism. Less robust than `loky`.
- "threading": Single-node, thread-based parallelism.
- "ray": Multi-node, process-based parallelism.
- "dask": Multi-node, process-based parallelism.
memory: bool, str, Path or Memory, default=False
Enables caching for memory optimization. Read more in the
Expand Down Expand Up @@ -428,24 +436,24 @@ class ATOMForecaster(ATOM):
Exogenous feature set corresponding to y, with shape=(n_samples,
n_features).
**y: int, str or sequence**<br>
**y: int, str, sequence or dataframe-like**<br>
Time series.
- If int: Position of the target column in X.
- If str: Name of the target column in X.
- If int: Position of the target column in `X`.
- If str: Name of the target column in `X`.
- If sequence: Target column with shape=(n_samples,) or
sequence of column names or positions for multioutput tasks.
- If dataframe: Target columns for multioutput tasks.
- If dataframe-like: Target columns for multioutput tasks.
y: int, str, dict, sequence or dataframe, default=-1
y: int, str, sequence or dataframe-like, default=-1
Time series.
- If None: y is ignored.
- If int: Position of the target column in X.
- If str: Name of the target column in X.
- If None: `y` is ignored.
- If int: Position of the target column in `X`.
- If str: Name of the target column in `X`.
- If sequence: Target column with shape=(n_samples,) or
sequence of column names or positions for multioutput tasks.
- If dataframe: Target columns for multioutput tasks.
- If dataframe-like: Target columns for multioutput tasks.
This parameter is ignored if the time series is provided
through `arrays`.
Expand Down Expand Up @@ -526,9 +534,16 @@ class ATOMForecaster(ATOM):
- "data":
- "numpy"
- "pandas" (default)
- "pandas-pyarrow"
- "polars"
- "polars-lazy"
- "pyarrow"
- "modin"
- "dask"
- "pyspark"
- "pyspark-pandas"
- "estimator":
Expand All @@ -545,6 +560,7 @@ class ATOMForecaster(ATOM):
parallelism. Less robust than `loky`.
- "threading": Single-node, thread-based parallelism.
- "ray": Multi-node, process-based parallelism.
- "dask": Multi-node, process-based parallelism.
memory: bool, str, Path or Memory, default=False
Enables caching for memory optimization. Read more in the
Expand Down Expand Up @@ -689,24 +705,24 @@ class ATOMRegressor(ATOM):
**X, train, test: dataframe-like**<br>
Feature set with shape=(n_samples, n_features).
**y: int, str or sequence**<br>
Target column corresponding to `X`.
**y: int, str, sequence or dataframe-like**<br>
Target column(s) corresponding to `X`.
- If int: Position of the target column in X.
- If str: Name of the target column in X.
- If int: Position of the target column in `X`.
- If str: Name of the target column in `X`.
- If sequence: Target column with shape=(n_samples,) or
sequence of column names or positions for multioutput tasks.
- If dataframe: Target columns for multioutput tasks.
y: int, str, dict, sequence or dataframe, default=-1
Target column corresponding to `X`.
y: int, str, sequence or dataframe-like, default=-1
Target column(s) corresponding to `X`.
- If None: y is ignored.
- If int: Position of the target column in X.
- If str: Name of the target column in X.
- If None: `y` is ignored.
- If int: Position of the target column in `X`.
- If str: Name of the target column in `X`.
- If sequence: Target column with shape=(n_samples,) or
sequence of column names or positions for multioutput tasks.
- If dataframe: Target columns for multioutput tasks.
- If dataframe-like: Target columns for multioutput tasks.
This parameter is ignored if the target column is provided
through `arrays`.
Expand Down Expand Up @@ -775,9 +791,16 @@ class ATOMRegressor(ATOM):
- "data":
- "numpy"
- "pandas" (default)
- "pandas-pyarrow"
- "polars"
- "polars-lazy"
- "pyarrow"
- "modin"
- "dask"
- "pyspark"
- "pyspark-pandas"
- "estimator":
Expand All @@ -794,6 +817,7 @@ class ATOMRegressor(ATOM):
parallelism. Less robust than `loky`.
- "threading": Single-node, thread-based parallelism.
- "ray": Multi-node, process-based parallelism.
- "dask": Multi-node, process-based parallelism.
memory: bool, str, Path or Memory, default=False
Enables caching for memory optimization. Read more in the
Expand Down
Loading

0 comments on commit 7810077

Please sign in to comment.