Skip to content

Commit b879c30

Browse files
achoumcopybara-github
authored andcommitted
Internal change
PiperOrigin-RevId: 387569213
1 parent b39de9e commit b879c30

File tree

5 files changed

+100
-45
lines changed

5 files changed

+100
-45
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Changelog
22

3-
## 0.1.8 - ???
3+
## 0.1.8 - 2021-07-28
44

55
### Features
66

configure/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from setuptools.command.install import install
2121
from setuptools.dist import Distribution
2222

23-
_VERSION = "0.1.7"
23+
_VERSION = "0.1.8"
2424

2525
with open("README.md", "r", encoding="utf-8") as fh:
2626
long_description = fh.read()

tensorflow_decision_forests/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
4646
"""
4747

48-
__version__ = "0.1.7"
48+
__version__ = "0.1.8"
4949
__author__ = "Mathieu Guillame-Bert"
5050

5151
from tensorflow_decision_forests import keras

tensorflow_decision_forests/keras/wrappers_pre_generated.py

Lines changed: 90 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,17 @@ class RandomForestModel(core.CoreModel):
8383
the raw input). Can be used to prepare the features or to stack multiple
8484
models on top of each other. Unlike preprocessing done in the tf.dataset,
8585
the operation in "preprocessing" are serialized with the model.
86+
postprocessing: Like "preprocessing" but applied on the model output.
8687
ranking_group: Only for `task=Task.RANKING`. Name of a tf.string feature that
8788
identifies queries in a query/document ranking task. The ranking group
8889
is not added automatically for the set of features if
8990
`exclude_non_specified_features=false`.
90-
temp_directory: Temporary directory used during the training. The space
91-
required depends on the learner. In many cases, only a temporary copy of a
92-
model will be there.
91+
temp_directory: Temporary directory used to store the model Assets after the
92+
training, and possibly as a work directory during the training. This
93+
temporary directory is necessary for the model to be exported after
94+
training e.g. `model.save(path)`. If not specified, `temp_directory` is
95+
set to a temporary directory using `tempfile.TemporaryDirectory`. This
96+
directory is deleted when the model python object is garbage-collected.
9397
verbose: If true, displays information about the training.
9498
hyperparameter_template: Override the default value of the hyper-parameters.
9599
If None (default) the default parameters of the library are used. If set,
@@ -110,6 +114,11 @@ class RandomForestModel(core.CoreModel):
110114
111115
advanced_arguments: Advanced control of the model that most users won't need
112116
to use. See `AdvancedArguments` for details.
117+
num_threads: Number of threads used to train the model. Different learning
118+
algorithms use multi-threading differently and with different degree of
119+
efficiency. If specified, `num_threads` field of the
120+
`advanced_arguments.yggdrasil_deployment_config` has priority.
121+
name: The name of the model.
113122
adapt_bootstrap_size_ratio_for_maximum_training_duration: Control how the
114123
maximum training duration (if set) is applied. If false, the training
115124
stop when the time is used. If true, adapts the size of the sampled
@@ -254,11 +263,14 @@ def __init__(
254263
features: Optional[List[core.FeatureUsage]] = None,
255264
exclude_non_specified_features: Optional[bool] = False,
256265
preprocessing: Optional["tf.keras.models.Functional"] = None,
266+
postprocessing: Optional["tf.keras.models.Functional"] = None,
257267
ranking_group: Optional[str] = None,
258268
temp_directory: Optional[str] = None,
259269
verbose: Optional[bool] = True,
260270
hyperparameter_template: Optional[str] = None,
261271
advanced_arguments: Optional[AdvancedArguments] = None,
272+
num_threads: Optional[int] = 6,
273+
name: Optional[str] = None,
262274
adapt_bootstrap_size_ratio_for_maximum_training_duration: Optional[
263275
bool] = False,
264276
allow_na_conditions: Optional[bool] = False,
@@ -349,10 +361,13 @@ def __init__(
349361
features=features,
350362
exclude_non_specified_features=exclude_non_specified_features,
351363
preprocessing=preprocessing,
364+
postprocessing=postprocessing,
352365
ranking_group=ranking_group,
353366
temp_directory=temp_directory,
354367
verbose=verbose,
355-
advanced_arguments=advanced_arguments)
368+
advanced_arguments=advanced_arguments,
369+
num_threads=num_threads,
370+
name=name)
356371

357372
@staticmethod
358373
def predefined_hyperparameters() -> List[core.HyperParameterTemplate]:
@@ -418,13 +433,17 @@ class GradientBoostedTreesModel(core.CoreModel):
418433
the raw input). Can be used to prepare the features or to stack multiple
419434
models on top of each other. Unlike preprocessing done in the tf.dataset,
420435
the operation in "preprocessing" are serialized with the model.
436+
postprocessing: Like "preprocessing" but applied on the model output.
421437
ranking_group: Only for `task=Task.RANKING`. Name of a tf.string feature that
422438
identifies queries in a query/document ranking task. The ranking group
423439
is not added automatically for the set of features if
424440
`exclude_non_specified_features=false`.
425-
temp_directory: Temporary directory used during the training. The space
426-
required depends on the learner. In many cases, only a temporary copy of a
427-
model will be there.
441+
temp_directory: Temporary directory used to store the model Assets after the
442+
training, and possibly as a work directory during the training. This
443+
temporary directory is necessary for the model to be exported after
444+
training e.g. `model.save(path)`. If not specified, `temp_directory` is
445+
set to a temporary directory using `tempfile.TemporaryDirectory`. This
446+
directory is deleted when the model python object is garbage-collected.
428447
verbose: If true, displays information about the training.
429448
hyperparameter_template: Override the default value of the hyper-parameters.
430449
If None (default) the default parameters of the library are used. If set,
@@ -445,6 +464,11 @@ class GradientBoostedTreesModel(core.CoreModel):
445464
446465
advanced_arguments: Advanced control of the model that most users won't need
447466
to use. See `AdvancedArguments` for details.
467+
num_threads: Number of threads used to train the model. Different learning
468+
algorithms use multi-threading differently and with different degree of
469+
efficiency. If specified, `num_threads` field of the
470+
`advanced_arguments.yggdrasil_deployment_config` has priority.
471+
name: The name of the model.
448472
adapt_subsample_for_maximum_training_duration: Control how the maximum
449473
training duration (if set) is applied. If false, the training stop when
450474
the time is used. If true, the size of the sampled datasets used train
@@ -644,11 +668,14 @@ def __init__(
644668
features: Optional[List[core.FeatureUsage]] = None,
645669
exclude_non_specified_features: Optional[bool] = False,
646670
preprocessing: Optional["tf.keras.models.Functional"] = None,
671+
postprocessing: Optional["tf.keras.models.Functional"] = None,
647672
ranking_group: Optional[str] = None,
648673
temp_directory: Optional[str] = None,
649674
verbose: Optional[bool] = True,
650675
hyperparameter_template: Optional[str] = None,
651676
advanced_arguments: Optional[AdvancedArguments] = None,
677+
num_threads: Optional[int] = 6,
678+
name: Optional[str] = None,
652679
adapt_subsample_for_maximum_training_duration: Optional[bool] = False,
653680
allow_na_conditions: Optional[bool] = False,
654681
apply_link_function: Optional[bool] = True,
@@ -780,10 +807,13 @@ def __init__(
780807
features=features,
781808
exclude_non_specified_features=exclude_non_specified_features,
782809
preprocessing=preprocessing,
810+
postprocessing=postprocessing,
783811
ranking_group=ranking_group,
784812
temp_directory=temp_directory,
785813
verbose=verbose,
786-
advanced_arguments=advanced_arguments)
814+
advanced_arguments=advanced_arguments,
815+
num_threads=num_threads,
816+
name=name)
787817

788818
@staticmethod
789819
def predefined_hyperparameters() -> List[core.HyperParameterTemplate]:
@@ -848,32 +878,44 @@ class CartModel(core.CoreModel):
848878
the raw input). Can be used to prepare the features or to stack multiple
849879
models on top of each other. Unlike preprocessing done in the tf.dataset,
850880
the operation in "preprocessing" are serialized with the model.
851-
ranking_group: Only for `task=Task.RANKING`. Name of a tf.string feature
852-
that identifies queries in a query/document ranking task. The ranking
853-
group is not added automatically for the set of features if
881+
postprocessing: Like "preprocessing" but applied on the model output.
882+
ranking_group: Only for `task=Task.RANKING`. Name of a tf.string feature that
883+
identifies queries in a query/document ranking task. The ranking group
884+
is not added automatically for the set of features if
854885
`exclude_non_specified_features=false`.
855-
temp_directory: Temporary directory used during the training. The space
856-
required depends on the learner. In many cases, only a temporary copy of a
857-
model will be there.
886+
temp_directory: Temporary directory used to store the model Assets after the
887+
training, and possibly as a work directory during the training. This
888+
temporary directory is necessary for the model to be exported after
889+
training e.g. `model.save(path)`. If not specified, `temp_directory` is
890+
set to a temporary directory using `tempfile.TemporaryDirectory`. This
891+
directory is deleted when the model python object is garbage-collected.
858892
verbose: If true, displays information about the training.
859893
hyperparameter_template: Override the default value of the hyper-parameters.
860894
If None (default) the default parameters of the library are used. If set,
861895
`default_hyperparameter_template` refers to one of the following
862896
preconfigured hyper-parameter sets. Those sets outperforms the default
863-
hyper-parameters (either generally or in specific scenarios). You can omit
864-
the version (e.g. remove "@v5") to use the last version of the template.
865-
In this case, the hyper-parameter can change in between releases (not
866-
recommended for training in production).
897+
hyper-parameters (either generally or in specific scenarios).
898+
You can omit the version (e.g. remove "@v5") to use the last version of
899+
the template. In this case, the hyper-parameter can change in between
900+
releases (not recommended for training in production).
901+
902+
867903
advanced_arguments: Advanced control of the model that most users won't need
868904
to use. See `AdvancedArguments` for details.
905+
num_threads: Number of threads used to train the model. Different learning
906+
algorithms use multi-threading differently and with different degree of
907+
efficiency. If specified, `num_threads` field of the
908+
`advanced_arguments.yggdrasil_deployment_config` has priority.
909+
name: The name of the model.
869910
allow_na_conditions: If true, the tree training evaluates conditions of the
870911
type `X is NA` i.e. `X is missing`. Default: False.
871912
categorical_algorithm: How to learn splits on categorical attributes.
872913
- `CART`: CART algorithm. Find categorical splits of the form "value \\in
873914
mask". The solution is exact for binary classification, regression and
874915
ranking. It is approximated for multi-class classification. This is a
875-
good first algorithm to use. In case of overfitting (very small dataset,
876-
large dictionary), the "random" algorithm is a good alternative.
916+
good first algorithm to use. In case of overfitting (very small
917+
dataset, large dictionary), the "random" algorithm is a good
918+
alternative.
877919
- `ONE_HOT`: One-hot encoding. Find the optimal categorical split of the
878920
form "attribute == param". This method is similar (but more efficient)
879921
than converting converting each possible categorical value into a
@@ -894,7 +936,7 @@ class CartModel(core.CoreModel):
894936
available, the least frequent items are ignored. Changing this value is
895937
similar to change the "max_vocab_count" before loading the dataset, with
896938
the following exception: With `max_vocab_count`, all the remaining items
897-
are grouped in a special Out-of-vocabulary item. With `max_num_items`,
939+
are grouped in a special Out-of-vocabulary item. With `max_num_items`,
898940
this is not the case. Default: -1.
899941
categorical_set_split_min_item_frequency: For categorical set splits e.g.
900942
texts. Minimum number of occurrences of an item to be considered.
@@ -904,16 +946,16 @@ class CartModel(core.CoreModel):
904946
words, as long as a node satisfy the splits "constraints (e.g. maximum
905947
depth, minimum number of observations), the node will be split. This is
906948
the "classical" way to grow decision trees.
907-
- `BEST_FIRST_GLOBAL`: The node with the best loss reduction among all the
908-
nodes of the tree is selected for splitting. This method is also called
909-
"best first" or "leaf-wise growth". See "Best-first decision
949+
- `BEST_FIRST_GLOBAL`: The node with the best loss reduction among all
950+
the nodes of the tree is selected for splitting. This method is also
951+
called "best first" or "leaf-wise growth". See "Best-first decision
910952
tree learning", Shi and "Additive logistic regression : A statistical
911953
view of boosting", Friedman for more details. Default: "LOCAL".
912954
in_split_min_examples_check: Whether to check the `min_examples` constraint
913955
in the split search (i.e. splits leading to one child having less than
914-
`min_examples` examples are considered invalid) or before the split search
915-
(i.e. a node can be derived only if it contains more than `min_examples`
916-
examples). If false, there can be nodes with less than
956+
`min_examples` examples are considered invalid) or before the split
957+
search (i.e. a node can be derived only if it contains more than
958+
`min_examples` examples). If false, there can be nodes with less than
917959
`min_examples` training examples. Default: True.
918960
max_depth: Maximum depth of the tree. `max_depth=1` means that all trees
919961
will be roots. Negative values are ignored. Default: 16.
@@ -926,9 +968,9 @@ class CartModel(core.CoreModel):
926968
model training non-deterministic. Default: -1.0.
927969
min_examples: Minimum number of examples in a node. Default: 5.
928970
missing_value_policy: Method used to handle missing attribute values.
929-
- `GLOBAL_IMPUTATION`: Missing attribute values are imputed, with the mean
930-
(in case of numerical attribute) or the most-frequent-item (in case of
931-
categorical attribute) computed on the entire dataset (i.e. the
971+
- `GLOBAL_IMPUTATION`: Missing attribute values are imputed, with the
972+
mean (in case of numerical attribute) or the most-frequent-item (in
973+
case of categorical attribute) computed on the entire dataset (i.e. the
932974
information contained in the data spec).
933975
- `LOCAL_IMPUTATION`: Missing attribute values are imputed with the mean
934976
(numerical attribute) or most-frequent-item (in the case of categorical
@@ -942,24 +984,24 @@ class CartModel(core.CoreModel):
942984
node. An attribute is valid if it has at least a valid split. If
943985
`num_candidate_attributes=0`, the value is set to the classical default
944986
value for Random Forest: `sqrt(number of input attributes)` in case of
945-
classification and `number_of_input_attributes / 3` in case of
946-
regression. If `num_candidate_attributes=-1`, all the attributes are
987+
classification and `number_of_input_attributes / 3` in case of
988+
regression. If `num_candidate_attributes=-1`, all the attributes are
947989
tested. Default: 0.
948990
num_candidate_attributes_ratio: Ratio of attributes tested at each node. If
949991
set, it is equivalent to `num_candidate_attributes =
950992
number_of_input_features x num_candidate_attributes_ratio`. The possible
951993
values are between ]0, and 1] as well as -1. If not set or equal to -1,
952994
the `num_candidate_attributes` is used. Default: -1.0.
953-
sorting_strategy: How are sorted the numerical features in order to find the
954-
splits
995+
sorting_strategy: How are sorted the numerical features in order to find
996+
the splits
955997
- PRESORT: The features are pre-sorted at the start of the training. This
956998
solution is faster but consumes much more memory than IN_NODE.
957999
- IN_NODE: The features are sorted just before being used in the node.
9581000
This solution is slow but consumes little amount of memory.
9591001
. Default: "PRESORT".
9601002
sparse_oblique_normalization: For sparse oblique splits i.e.
961-
`split_axis=SPARSE_OBLIQUE`. Normalization applied on the features, before
962-
applying the sparse oblique projections.
1003+
`split_axis=SPARSE_OBLIQUE`. Normalization applied on the features,
1004+
before applying the sparse oblique projections.
9631005
- `NONE`: No normalization.
9641006
- `STANDARD_DEVIATION`: Normalize the feature by the estimated standard
9651007
deviation on the entire train dataset. Also known as Z-Score
@@ -969,19 +1011,20 @@ class CartModel(core.CoreModel):
9691011
sparse_oblique_num_projections_exponent: For sparse oblique splits i.e.
9701012
`split_axis=SPARSE_OBLIQUE`. Controls of the number of random projections
9711013
to test at each node as `num_features^num_projections_exponent`. Default:
972-
None.
1014+
None.
9731015
sparse_oblique_projection_density_factor: For sparse oblique splits i.e.
9741016
`split_axis=SPARSE_OBLIQUE`. Controls of the number of random projections
9751017
to test at each node as `num_features^num_projections_exponent`. Default:
976-
None.
1018+
None.
9771019
split_axis: What structure of split to consider for numerical features.
978-
- `AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
979-
is the "classical" way to train a tree. Default value.
1020+
- `AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time).
1021+
This is the "classical" way to train a tree. Default value.
9801022
- `SPARSE_OBLIQUE`: Sparse oblique splits (i.e. splits one a small number
9811023
of features) from "Sparse Projection Oblique Random Forests", Tomita et
9821024
al., 2020. Default: "AXIS_ALIGNED".
9831025
validation_ratio: Ratio of the training dataset used to create the
9841026
validation dataset used to prune the tree. Default: 0.1.
1027+
9851028
"""
9861029

9871030
@core._list_explicit_arguments
@@ -990,11 +1033,14 @@ def __init__(self,
9901033
features: Optional[List[core.FeatureUsage]] = None,
9911034
exclude_non_specified_features: Optional[bool] = False,
9921035
preprocessing: Optional["tf.keras.models.Functional"] = None,
1036+
postprocessing: Optional["tf.keras.models.Functional"] = None,
9931037
ranking_group: Optional[str] = None,
9941038
temp_directory: Optional[str] = None,
9951039
verbose: Optional[bool] = True,
9961040
hyperparameter_template: Optional[str] = None,
9971041
advanced_arguments: Optional[AdvancedArguments] = None,
1042+
num_threads: Optional[int] = 6,
1043+
name: Optional[str] = None,
9981044
allow_na_conditions: Optional[bool] = False,
9991045
categorical_algorithm: Optional[str] = "CART",
10001046
categorical_set_split_greedy_sampling: Optional[float] = 0.1,
@@ -1072,10 +1118,13 @@ def __init__(self,
10721118
features=features,
10731119
exclude_non_specified_features=exclude_non_specified_features,
10741120
preprocessing=preprocessing,
1121+
postprocessing=postprocessing,
10751122
ranking_group=ranking_group,
10761123
temp_directory=temp_directory,
10771124
verbose=verbose,
1078-
advanced_arguments=advanced_arguments)
1125+
advanced_arguments=advanced_arguments,
1126+
num_threads=num_threads,
1127+
name=name)
10791128

10801129
@staticmethod
10811130
def predefined_hyperparameters() -> List[core.HyperParameterTemplate]:

0 commit comments

Comments
 (0)