@@ -83,13 +83,17 @@ class RandomForestModel(core.CoreModel):
8383 the raw input). Can be used to prepare the features or to stack multiple
8484 models on top of each other. Unlike preprocessing done in the tf.dataset,
8585 the operation in "preprocessing" are serialized with the model.
86+ postprocessing: Like "preprocessing" but applied on the model output.
8687 ranking_group: Only for `task=Task.RANKING`. Name of a tf.string feature that
8788 identifies queries in a query/document ranking task. The ranking group
8889 is not added automatically for the set of features if
8990 `exclude_non_specified_features=false`.
90- temp_directory: Temporary directory used during the training. The space
91- required depends on the learner. In many cases, only a temporary copy of a
92- model will be there.
91+ temp_directory: Temporary directory used to store the model Assets after the
92+ training, and possibly as a work directory during the training. This
93+ temporary directory is necessary for the model to be exported after
94+ training e.g. `model.save(path)`. If not specified, `temp_directory` is
95+ set to a temporary directory using `tempfile.TemporaryDirectory`. This
96+ directory is deleted when the model python object is garbage-collected.
9397 verbose: If true, displays information about the training.
9498 hyperparameter_template: Override the default value of the hyper-parameters.
9599 If None (default) the default parameters of the library are used. If set,
@@ -110,6 +114,11 @@ class RandomForestModel(core.CoreModel):
110114
111115 advanced_arguments: Advanced control of the model that most users won't need
112116 to use. See `AdvancedArguments` for details.
117+ num_threads: Number of threads used to train the model. Different learning
118+ algorithms use multi-threading differently and with different degree of
119+ efficiency. If specified, `num_threads` field of the
120+ `advanced_arguments.yggdrasil_deployment_config` has priority.
121+ name: The name of the model.
113122 adapt_bootstrap_size_ratio_for_maximum_training_duration: Control how the
114123 maximum training duration (if set) is applied. If false, the training
115124 stop when the time is used. If true, adapts the size of the sampled
@@ -254,11 +263,14 @@ def __init__(
254263 features : Optional [List [core .FeatureUsage ]] = None ,
255264 exclude_non_specified_features : Optional [bool ] = False ,
256265 preprocessing : Optional ["tf.keras.models.Functional" ] = None ,
266+ postprocessing : Optional ["tf.keras.models.Functional" ] = None ,
257267 ranking_group : Optional [str ] = None ,
258268 temp_directory : Optional [str ] = None ,
259269 verbose : Optional [bool ] = True ,
260270 hyperparameter_template : Optional [str ] = None ,
261271 advanced_arguments : Optional [AdvancedArguments ] = None ,
272+ num_threads : Optional [int ] = 6 ,
273+ name : Optional [str ] = None ,
262274 adapt_bootstrap_size_ratio_for_maximum_training_duration : Optional [
263275 bool ] = False ,
264276 allow_na_conditions : Optional [bool ] = False ,
@@ -349,10 +361,13 @@ def __init__(
349361 features = features ,
350362 exclude_non_specified_features = exclude_non_specified_features ,
351363 preprocessing = preprocessing ,
364+ postprocessing = postprocessing ,
352365 ranking_group = ranking_group ,
353366 temp_directory = temp_directory ,
354367 verbose = verbose ,
355- advanced_arguments = advanced_arguments )
368+ advanced_arguments = advanced_arguments ,
369+ num_threads = num_threads ,
370+ name = name )
356371
357372 @staticmethod
358373 def predefined_hyperparameters () -> List [core .HyperParameterTemplate ]:
@@ -418,13 +433,17 @@ class GradientBoostedTreesModel(core.CoreModel):
418433 the raw input). Can be used to prepare the features or to stack multiple
419434 models on top of each other. Unlike preprocessing done in the tf.dataset,
420435 the operation in "preprocessing" are serialized with the model.
436+ postprocessing: Like "preprocessing" but applied on the model output.
421437 ranking_group: Only for `task=Task.RANKING`. Name of a tf.string feature that
422438 identifies queries in a query/document ranking task. The ranking group
423439 is not added automatically for the set of features if
424440 `exclude_non_specified_features=false`.
425- temp_directory: Temporary directory used during the training. The space
426- required depends on the learner. In many cases, only a temporary copy of a
427- model will be there.
441+ temp_directory: Temporary directory used to store the model Assets after the
442+ training, and possibly as a work directory during the training. This
443+ temporary directory is necessary for the model to be exported after
444+ training e.g. `model.save(path)`. If not specified, `temp_directory` is
445+ set to a temporary directory using `tempfile.TemporaryDirectory`. This
446+ directory is deleted when the model python object is garbage-collected.
428447 verbose: If true, displays information about the training.
429448 hyperparameter_template: Override the default value of the hyper-parameters.
430449 If None (default) the default parameters of the library are used. If set,
@@ -445,6 +464,11 @@ class GradientBoostedTreesModel(core.CoreModel):
445464
446465 advanced_arguments: Advanced control of the model that most users won't need
447466 to use. See `AdvancedArguments` for details.
467+ num_threads: Number of threads used to train the model. Different learning
468+ algorithms use multi-threading differently and with different degree of
469+ efficiency. If specified, `num_threads` field of the
470+ `advanced_arguments.yggdrasil_deployment_config` has priority.
471+ name: The name of the model.
448472 adapt_subsample_for_maximum_training_duration: Control how the maximum
449473 training duration (if set) is applied. If false, the training stop when
450474 the time is used. If true, the size of the sampled datasets used train
@@ -644,11 +668,14 @@ def __init__(
644668 features : Optional [List [core .FeatureUsage ]] = None ,
645669 exclude_non_specified_features : Optional [bool ] = False ,
646670 preprocessing : Optional ["tf.keras.models.Functional" ] = None ,
671+ postprocessing : Optional ["tf.keras.models.Functional" ] = None ,
647672 ranking_group : Optional [str ] = None ,
648673 temp_directory : Optional [str ] = None ,
649674 verbose : Optional [bool ] = True ,
650675 hyperparameter_template : Optional [str ] = None ,
651676 advanced_arguments : Optional [AdvancedArguments ] = None ,
677+ num_threads : Optional [int ] = 6 ,
678+ name : Optional [str ] = None ,
652679 adapt_subsample_for_maximum_training_duration : Optional [bool ] = False ,
653680 allow_na_conditions : Optional [bool ] = False ,
654681 apply_link_function : Optional [bool ] = True ,
@@ -780,10 +807,13 @@ def __init__(
780807 features = features ,
781808 exclude_non_specified_features = exclude_non_specified_features ,
782809 preprocessing = preprocessing ,
810+ postprocessing = postprocessing ,
783811 ranking_group = ranking_group ,
784812 temp_directory = temp_directory ,
785813 verbose = verbose ,
786- advanced_arguments = advanced_arguments )
814+ advanced_arguments = advanced_arguments ,
815+ num_threads = num_threads ,
816+ name = name )
787817
788818 @staticmethod
789819 def predefined_hyperparameters () -> List [core .HyperParameterTemplate ]:
@@ -848,32 +878,44 @@ class CartModel(core.CoreModel):
848878 the raw input). Can be used to prepare the features or to stack multiple
849879 models on top of each other. Unlike preprocessing done in the tf.dataset,
850880 the operation in "preprocessing" are serialized with the model.
851- ranking_group: Only for `task=Task.RANKING`. Name of a tf.string feature
852- that identifies queries in a query/document ranking task. The ranking
853- group is not added automatically for the set of features if
881+ postprocessing: Like "preprocessing" but applied on the model output.
882+ ranking_group: Only for `task=Task.RANKING`. Name of a tf.string feature that
883+ identifies queries in a query/document ranking task. The ranking group
884+ is not added automatically for the set of features if
854885 `exclude_non_specified_features=false`.
855- temp_directory: Temporary directory used during the training. The space
856- required depends on the learner. In many cases, only a temporary copy of a
857- model will be there.
886+ temp_directory: Temporary directory used to store the model Assets after the
887+ training, and possibly as a work directory during the training. This
888+ temporary directory is necessary for the model to be exported after
889+ training e.g. `model.save(path)`. If not specified, `temp_directory` is
890+ set to a temporary directory using `tempfile.TemporaryDirectory`. This
891+ directory is deleted when the model python object is garbage-collected.
858892 verbose: If true, displays information about the training.
859893 hyperparameter_template: Override the default value of the hyper-parameters.
860894 If None (default) the default parameters of the library are used. If set,
861895 `default_hyperparameter_template` refers to one of the following
862896 preconfigured hyper-parameter sets. Those sets outperforms the default
863- hyper-parameters (either generally or in specific scenarios). You can omit
864- the version (e.g. remove "@v5") to use the last version of the template.
865- In this case, the hyper-parameter can change in between releases (not
866- recommended for training in production).
897+ hyper-parameters (either generally or in specific scenarios).
898+ You can omit the version (e.g. remove "@v5") to use the last version of
899+ the template. In this case, the hyper-parameter can change in between
900+ releases (not recommended for training in production).
901+
902+
867903 advanced_arguments: Advanced control of the model that most users won't need
868904 to use. See `AdvancedArguments` for details.
905+ num_threads: Number of threads used to train the model. Different learning
906+ algorithms use multi-threading differently and with different degree of
907+ efficiency. If specified, `num_threads` field of the
908+ `advanced_arguments.yggdrasil_deployment_config` has priority.
909+ name: The name of the model.
869910 allow_na_conditions: If true, the tree training evaluates conditions of the
870911 type `X is NA` i.e. `X is missing`. Default: False.
871912 categorical_algorithm: How to learn splits on categorical attributes.
872913 - `CART`: CART algorithm. Find categorical splits of the form "value \\in
873914 mask". The solution is exact for binary classification, regression and
874915 ranking. It is approximated for multi-class classification. This is a
875- good first algorithm to use. In case of overfitting (very small dataset,
876- large dictionary), the "random" algorithm is a good alternative.
916+ good first algorithm to use. In case of overfitting (very small
917+ dataset, large dictionary), the "random" algorithm is a good
918+ alternative.
877919 - `ONE_HOT`: One-hot encoding. Find the optimal categorical split of the
878920 form "attribute == param". This method is similar (but more efficient)
879921 than converting converting each possible categorical value into a
@@ -894,7 +936,7 @@ class CartModel(core.CoreModel):
894936 available, the least frequent items are ignored. Changing this value is
895937 similar to change the "max_vocab_count" before loading the dataset, with
896938 the following exception: With `max_vocab_count`, all the remaining items
897- are grouped in a special Out-of-vocabulary item. With `max_num_items`,
939+ are grouped in a special Out-of-vocabulary item. With `max_num_items`,
898940 this is not the case. Default: -1.
899941 categorical_set_split_min_item_frequency: For categorical set splits e.g.
900942 texts. Minimum number of occurrences of an item to be considered.
@@ -904,16 +946,16 @@ class CartModel(core.CoreModel):
904946 words, as long as a node satisfy the splits "constraints (e.g. maximum
905947 depth, minimum number of observations), the node will be split. This is
906948 the "classical" way to grow decision trees.
907- - `BEST_FIRST_GLOBAL`: The node with the best loss reduction among all the
908- nodes of the tree is selected for splitting. This method is also called
909- "best first" or "leaf-wise growth". See "Best-first decision
949+ - `BEST_FIRST_GLOBAL`: The node with the best loss reduction among all
950+ the nodes of the tree is selected for splitting. This method is also
951+ called "best first" or "leaf-wise growth". See "Best-first decision
910952 tree learning", Shi and "Additive logistic regression : A statistical
911953 view of boosting", Friedman for more details. Default: "LOCAL".
912954 in_split_min_examples_check: Whether to check the `min_examples` constraint
913955 in the split search (i.e. splits leading to one child having less than
914- `min_examples` examples are considered invalid) or before the split search
915- (i.e. a node can be derived only if it contains more than `min_examples`
916- examples). If false, there can be nodes with less than
956+ `min_examples` examples are considered invalid) or before the split
957+ search (i.e. a node can be derived only if it contains more than
958+ `min_examples` examples). If false, there can be nodes with less than
917959 `min_examples` training examples. Default: True.
918960 max_depth: Maximum depth of the tree. `max_depth=1` means that all trees
919961 will be roots. Negative values are ignored. Default: 16.
@@ -926,9 +968,9 @@ class CartModel(core.CoreModel):
926968 model training non-deterministic. Default: -1.0.
927969 min_examples: Minimum number of examples in a node. Default: 5.
928970 missing_value_policy: Method used to handle missing attribute values.
929- - `GLOBAL_IMPUTATION`: Missing attribute values are imputed, with the mean
930- (in case of numerical attribute) or the most-frequent-item (in case of
931- categorical attribute) computed on the entire dataset (i.e. the
971+ - `GLOBAL_IMPUTATION`: Missing attribute values are imputed, with the
972+ mean (in case of numerical attribute) or the most-frequent-item (in
973+ case of categorical attribute) computed on the entire dataset (i.e. the
932974 information contained in the data spec).
933975 - `LOCAL_IMPUTATION`: Missing attribute values are imputed with the mean
934976 (numerical attribute) or most-frequent-item (in the case of categorical
@@ -942,24 +984,24 @@ class CartModel(core.CoreModel):
942984 node. An attribute is valid if it has at least a valid split. If
943985 `num_candidate_attributes=0`, the value is set to the classical default
944986 value for Random Forest: `sqrt(number of input attributes)` in case of
945- classification and `number_of_input_attributes / 3` in case of
946- regression. If `num_candidate_attributes=-1`, all the attributes are
987+ classification and `number_of_input_attributes / 3` in case of
988+ regression. If `num_candidate_attributes=-1`, all the attributes are
947989 tested. Default: 0.
948990 num_candidate_attributes_ratio: Ratio of attributes tested at each node. If
949991 set, it is equivalent to `num_candidate_attributes =
950992 number_of_input_features x num_candidate_attributes_ratio`. The possible
951993 values are between ]0, and 1] as well as -1. If not set or equal to -1,
952994 the `num_candidate_attributes` is used. Default: -1.0.
953- sorting_strategy: How are sorted the numerical features in order to find the
954- splits
995+ sorting_strategy: How are sorted the numerical features in order to find
996+ the splits
955997 - PRESORT: The features are pre-sorted at the start of the training. This
956998 solution is faster but consumes much more memory than IN_NODE.
957999 - IN_NODE: The features are sorted just before being used in the node.
9581000 This solution is slow but consumes little amount of memory.
9591001 . Default: "PRESORT".
9601002 sparse_oblique_normalization: For sparse oblique splits i.e.
961- `split_axis=SPARSE_OBLIQUE`. Normalization applied on the features, before
962- applying the sparse oblique projections.
1003+ `split_axis=SPARSE_OBLIQUE`. Normalization applied on the features,
1004+ before applying the sparse oblique projections.
9631005 - `NONE`: No normalization.
9641006 - `STANDARD_DEVIATION`: Normalize the feature by the estimated standard
9651007 deviation on the entire train dataset. Also known as Z-Score
@@ -969,19 +1011,20 @@ class CartModel(core.CoreModel):
9691011 sparse_oblique_num_projections_exponent: For sparse oblique splits i.e.
9701012 `split_axis=SPARSE_OBLIQUE`. Controls of the number of random projections
9711013 to test at each node as `num_features^num_projections_exponent`. Default:
972- None.
1014+ None.
9731015 sparse_oblique_projection_density_factor: For sparse oblique splits i.e.
9741016 `split_axis=SPARSE_OBLIQUE`. Controls of the number of random projections
9751017 to test at each node as `num_features^num_projections_exponent`. Default:
976- None.
1018+ None.
9771019 split_axis: What structure of split to consider for numerical features.
978- - `AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
979- is the "classical" way to train a tree. Default value.
1020+ - `AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time).
1021+ This is the "classical" way to train a tree. Default value.
9801022 - `SPARSE_OBLIQUE`: Sparse oblique splits (i.e. splits one a small number
9811023 of features) from "Sparse Projection Oblique Random Forests", Tomita et
9821024 al., 2020. Default: "AXIS_ALIGNED".
9831025 validation_ratio: Ratio of the training dataset used to create the
9841026 validation dataset used to prune the tree. Default: 0.1.
1027+
9851028 """
9861029
9871030 @core ._list_explicit_arguments
@@ -990,11 +1033,14 @@ def __init__(self,
9901033 features : Optional [List [core .FeatureUsage ]] = None ,
9911034 exclude_non_specified_features : Optional [bool ] = False ,
9921035 preprocessing : Optional ["tf.keras.models.Functional" ] = None ,
1036+ postprocessing : Optional ["tf.keras.models.Functional" ] = None ,
9931037 ranking_group : Optional [str ] = None ,
9941038 temp_directory : Optional [str ] = None ,
9951039 verbose : Optional [bool ] = True ,
9961040 hyperparameter_template : Optional [str ] = None ,
9971041 advanced_arguments : Optional [AdvancedArguments ] = None ,
1042+ num_threads : Optional [int ] = 6 ,
1043+ name : Optional [str ] = None ,
9981044 allow_na_conditions : Optional [bool ] = False ,
9991045 categorical_algorithm : Optional [str ] = "CART" ,
10001046 categorical_set_split_greedy_sampling : Optional [float ] = 0.1 ,
@@ -1072,10 +1118,13 @@ def __init__(self,
10721118 features = features ,
10731119 exclude_non_specified_features = exclude_non_specified_features ,
10741120 preprocessing = preprocessing ,
1121+ postprocessing = postprocessing ,
10751122 ranking_group = ranking_group ,
10761123 temp_directory = temp_directory ,
10771124 verbose = verbose ,
1078- advanced_arguments = advanced_arguments )
1125+ advanced_arguments = advanced_arguments ,
1126+ num_threads = num_threads ,
1127+ name = name )
10791128
10801129 @staticmethod
10811130 def predefined_hyperparameters () -> List [core .HyperParameterTemplate ]:
0 commit comments