Skip to content

Commit 12d0945

Browse files
authored
Merge pull request #1276 from automl/966_output_trafos
966 output trafos
2 parents 5f47524 + a878522 commit 12d0945

File tree

3 files changed

+99
-0
lines changed

3 files changed

+99
-0
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from __future__ import annotations
2+
3+
import numpy as np
4+
import scipy
5+
6+
from smac import constants
7+
from smac.runhistory.encoder.encoder import RunHistoryEncoder
8+
from smac.utils.logging import get_logger
9+
10+
__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
11+
__license__ = "3-clause BSD"
12+
13+
14+
logger = get_logger(__name__)
15+
16+
17+
class RunHistoryGaussianCopulaEncoder(RunHistoryEncoder):
18+
def transform_response_values(self, values: np.ndarray) -> np.ndarray:
19+
"""Transforms the response values by using log."""
20+
min_log_cost = max(constants.MINIMAL_COST_FOR_LOG, 1e-10)
21+
22+
if np.any(values <= 0):
23+
logger.warning("Got cost of smaller/equal to 0. Replace by %f since we use" " log cost." % min_log_cost)
24+
values[values < min_log_cost] = min_log_cost
25+
26+
n = max(len(values), 2) # Ensure at least two values to avoid division by zero
27+
log_n = np.log(n) if n > 1 else 1e-10 # Avoid log(1) = 0
28+
29+
quants = (scipy.stats.rankdata(values.flatten()) - 1) / (n - 1)
30+
31+
cutoff = min(0.1, 1 / (4 * np.power(n, 0.25) * np.sqrt(np.pi * log_n)))
32+
33+
quants = np.clip(quants, a_min=cutoff, a_max=1 - cutoff)
34+
35+
rval = np.array([scipy.stats.norm.ppf(q) for q in quants]).reshape((-1, 1))
36+
37+
return rval
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from __future__ import annotations
2+
3+
import numpy as np
4+
import scipy.stats
5+
6+
from smac import constants
7+
from smac.runhistory.encoder.encoder import RunHistoryEncoder
8+
from smac.utils.logging import get_logger
9+
10+
__copyright__ = "Copyright 2022, automl.org"
11+
__license__ = "3-clause BSD"
12+
13+
logger = get_logger(__name__)
14+
15+
16+
class RunHistoryPercentileEncoder(RunHistoryEncoder):
17+
def transform_response_values(self, values: np.ndarray) -> np.ndarray:
18+
"""Transforms the response values by applying a log transformation,
19+
rank-based quantile transformation, and inverse Gaussian CDF scaling.
20+
"""
21+
if values.ndim > 1:
22+
logger.warning(f"Received values with shape {values.shape}, aggregating along axis=1.")
23+
values = np.mean(values, axis=1)
24+
25+
if np.any(values <= 0):
26+
logger.warning("Got cost <= 0. Replacing by %f since we use log cost." % constants.MINIMAL_COST_FOR_LOG)
27+
values = np.clip(values, constants.MINIMAL_COST_FOR_LOG, None)
28+
29+
log_values = np.log(values)
30+
31+
eps = 1e-6 # keep strictly within (0,1)
32+
quants = [np.clip(scipy.stats.percentileofscore(log_values, v) / 100, eps, 1 - eps) for v in log_values]
33+
34+
output = scipy.stats.norm.ppf(quants).reshape((-1, 1))
35+
36+
print(f">>> Encoder output shape = {output.shape}")
37+
38+
return output
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from __future__ import annotations
2+
3+
import numpy as np
4+
from sklearn.preprocessing import PowerTransformer
5+
6+
from smac.runhistory.encoder.encoder import RunHistoryEncoder
7+
from smac.utils.logging import get_logger
8+
9+
__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
10+
__license__ = "3-clause BSD"
11+
12+
logger = get_logger(__name__)
13+
14+
15+
class RunHistoryPowerTransformEncoder(RunHistoryEncoder):
16+
def transform_response_values(self, values: np.ndarray) -> np.ndarray:
17+
"""Apply PowerTransformer (Yeo-Johnson) to response values."""
18+
if values.size == 0:
19+
logger.debug("Received empty array for transformation.")
20+
return values.reshape(-1, 1)
21+
22+
values = values.reshape(-1, 1)
23+
transformer = PowerTransformer(method="yeo-johnson", standardize=True)
24+
return transformer.fit_transform(values)

0 commit comments

Comments
 (0)