Skip to content

Commit

Permalink
feat: ExpandDateTime convenience
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-twhite committed Dec 13, 2023
1 parent 1055c37 commit 2b69ad5
Show file tree
Hide file tree
Showing 6 changed files with 276 additions and 84 deletions.
165 changes: 83 additions & 82 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,90 +94,91 @@ quartodoc:
- title: Core
package: ibisml
contents:
- kind: page
path: core
summary:
name: Common
desc: Core APIs
contents:
- Recipe
- RecipeTransform
- TransformResult

- kind: page
path: selectors
summary:
name: Selectors
desc: Select sets of columns by name, type, or other properties
contents:
- cols
- contains
- endswith
- startswith
- matches
- numeric
- nominal
- categorical
- string
- integer
- floating
- temporal
- date
- time
- timestamp
- has_type
- where
- everything
- selector
- kind: page
path: core
summary:
name: Common
desc: Core APIs
contents:
- Recipe
- RecipeTransform
- TransformResult

- kind: page
path: selectors
summary:
name: Selectors
desc: Select sets of columns by name, type, or other properties
contents:
- cols
- contains
- endswith
- startswith
- matches
- numeric
- nominal
- categorical
- string
- integer
- floating
- temporal
- date
- time
- timestamp
- has_type
- where
- everything
- selector

- title: Steps
desc: Define steps in a recipe
package: ibisml
contents:
- kind: page
path: steps-imputation
summary:
name: Imputation
desc: Imputation and handling of missing values
contents:
- ImputeMean
- ImputeMode
- ImputeMedian
- FillNA

- kind: page
path: steps-encoding
summary:
name: Encoding
desc: Encoding of categorical and string columns
contents:
- OneHotEncode
- CategoricalEncode

- kind: page
path: steps-standardization
summary:
name: Standardization
desc: Standardization and normalization of numeric columns
contents:
- ScaleStandard

- kind: page
path: steps-temporal
summary:
name: Temporal
desc: Feature extraction for temporal columns
contents:
- ExpandDate
- ExpandTime

- kind: page
path: steps-other
summary:
name: Other
desc: Other common tabular operations
contents:
- Cast
- Drop
- MutateAt
- Mutate
- kind: page
path: steps-imputation
summary:
name: Imputation
desc: Imputation and handling of missing values
contents:
- ImputeMean
- ImputeMode
- ImputeMedian
- FillNA

- kind: page
path: steps-encoding
summary:
name: Encoding
desc: Encoding of categorical and string columns
contents:
- OneHotEncode
- CategoricalEncode

- kind: page
path: steps-standardization
summary:
name: Standardization
desc: Standardization and normalization of numeric columns
contents:
- ScaleStandard

- kind: page
path: steps-temporal
summary:
name: Temporal
desc: Feature extraction for temporal columns
contents:
- ExpandDateTime
- ExpandDate
- ExpandTime

- kind: page
path: steps-other
summary:
name: Other
desc: Other common tabular operations
contents:
- Cast
- Drop
- MutateAt
- Mutate
3 changes: 2 additions & 1 deletion ibisml/steps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from ibisml.steps.impute import FillNA, ImputeMean, ImputeMedian, ImputeMode
from ibisml.steps.standardize import ScaleStandard
from ibisml.steps.encode import OneHotEncode, CategoricalEncode
from ibisml.steps.temporal import ExpandDate, ExpandTime
from ibisml.steps.temporal import ExpandDateTime, ExpandDate, ExpandTime


__all__ = (
Expand All @@ -17,6 +17,7 @@
"ScaleStandard",
"OneHotEncode",
"CategoricalEncode",
"ExpandDateTime",
"ExpandDate",
"ExpandTime",
)
111 changes: 111 additions & 0 deletions ibisml/steps/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,117 @@
from ibisml.select import SelectionType, selector


class ExpandDateTime(Step):
"""A step for expanding date and time columns into one or more features.
New features will be named ``{input_column}_{component}``. For example, if
expanding a ``"year"`` component from column ``"x"``, the feature column
would be named ``"x_year"``.
Parameters
----------
inputs
A selection of date and time columns to expand into new features.
date_components
A sequence of date components to expand. Options include
- ``day``: the day of the month as a numeric value
- ``week``: the week of the year as a numeric value
- ``month``: the month of the year as a categorical value
- ``year``: the year as a numeric value
- ``dow``: the day of the week as a categorical value
- ``doy``: the day of the year as a numeric value
Defaults to ``["dow", "month", "year"]``.
time_components
A sequence of time components to expand. Options include ``hour``,
``minute``, ``second``, and ``millisecond``.
Defaults to ``["hour", "minute", "second"]``.
Examples
--------
>>> import ibisml as ml
Expand date and time columns using the default components
>>> step = ml.ExpandDateTime(ml.datetime())
Expand specific columns using specific components for date and time
>>> step = ml.ExpandDateTime(["x", "y"], ["day", "year"], ["hour", "minute"])
"""

def __init__(
self,
inputs: SelectionType,
date_components: Sequence[
Literal["day", "week", "month", "year", "dow", "doy"]
] = (
"dow",
"month",
"year",
),
time_components: Sequence[
Literal["hour", "minute", "second", "millisecond"]
] = (
"hour",
"minute",
"second",
),
):
self.inputs = selector(inputs)
self.date_components = list(date_components)
self.time_components = list(time_components)

def _repr(self) -> Iterable[tuple[str, Any]]:
yield ("", self.inputs)
yield ("date_components", self.date_components)
yield ("time_components", self.time_components)

def fit(self, table: ir.Table, metadata: Metadata) -> Transform:
date_columns = self.inputs.select_columns(table, metadata)

if "month" in self.date_components:
for col in date_columns:
metadata.set_categories(
f"{col}_month",
[
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
],
)
if "dow" in self.date_components:
for col in date_columns:
metadata.set_categories(
f"{col}_dow",
[
"Monday",
"Tuesday",
"Wednesday",
"Thurday",
"Friday",
"Saturday",
"Sunday",
],
)

time_columns = self.inputs.select_columns(table, metadata)
return ml.transforms.ExpandDateTime(
date_columns, self.date_components, time_columns, self.time_components
)


class ExpandDate(Step):
"""A step for expanding date columns into one or more features.
Expand Down
3 changes: 2 additions & 1 deletion ibisml/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from ibisml.transforms.impute import FillNA
from ibisml.transforms.standardize import ScaleStandard
from ibisml.transforms.encode import OneHotEncode, CategoricalEncode
from ibisml.transforms.temporal import ExpandDate, ExpandTime
from ibisml.transforms.temporal import ExpandDateTime, ExpandDate, ExpandTime

__all__ = (
"Cast",
Expand All @@ -13,6 +13,7 @@
"ScaleStandard",
"OneHotEncode",
"CategoricalEncode",
"ExpandDateTime",
"ExpandDate",
"ExpandTime",
)
53 changes: 53 additions & 0 deletions ibisml/transforms/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,59 @@
import ibis.expr.types as ir


class ExpandDateTime(Transform):
def __init__(
self,
date_columns: list[str],
date_components: list[Literal["day", "week", "month", "year", "dow", "doy"]],
time_columns: list[str],
time_components: list[Literal["hour", "minute", "second", "millisecond"]],
):
self.date_columns = date_columns
self.date_components = date_components
self.time_columns = time_columns
self.time_components = time_components

@property
def input_columns(self) -> list[str]:
return self.date_columns + self.time_columns

def transform(self, table: ir.Table) -> ir.Table:
new_cols = []

for name in self.date_columns:
col = table[name]
for comp in self.date_components:
if comp == "day":
feat = col.day()
elif comp == "week":
feat = col.week_of_year()
elif comp == "month":
feat = col.month() - 1
elif comp == "year":
feat = col.year()
elif comp == "dow":
feat = col.day_of_week.index()
elif comp == "doy":
feat = col.day_of_year()
new_cols.append(feat.name(f"{name}_{comp}"))

for name in self.time_columns:
col = table[name]
for comp in self.time_components:
if comp == "hour":
feat = col.hour()
elif comp == "minute":
feat = col.minute()
elif comp == "second":
feat = col.second()
elif comp == "millisecond":
feat = col.millisecond()
new_cols.append(feat.name(f"{name}_{comp}"))

return table.mutate(new_cols)


class ExpandDate(Transform):
def __init__(
self,
Expand Down
Loading

0 comments on commit 2b69ad5

Please sign in to comment.