From df01031cfe34fd056c97b51f191b35f419dd4713 Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Tue, 1 Mar 2022 15:39:09 -0800 Subject: [PATCH] Zero-shot AutoML (#468) * Prepare for release Co-authored-by: Moe Kayali * bug fix * improve doc and code quality Co-authored-by: Qingyun Wu --- flaml/automl.py | 58 +- flaml/default/README.md | 184 +++ flaml/default/__init__.py | 18 + flaml/default/all/binary.json | 943 ++++++++++++ flaml/default/all/multiclass.json | 1325 +++++++++++++++++ flaml/default/all/regression.json | 882 +++++++++++ flaml/default/estimator.py | 192 +++ flaml/default/extra_tree/binary.json | 358 +++++ flaml/default/extra_tree/multiclass.json | 307 ++++ flaml/default/extra_tree/regression.json | 309 ++++ flaml/default/greedy.py | 97 ++ flaml/default/lgbm/binary.json | 367 +++++ flaml/default/lgbm/multiclass.json | 413 +++++ flaml/default/lgbm/regression.json | 278 ++++ flaml/default/portfolio.py | 234 +++ flaml/default/regret.py | 50 + flaml/default/rf/binary.json | 330 ++++ flaml/default/rf/multiclass.json | 325 ++++ flaml/default/rf/regression.json | 290 ++++ flaml/default/suggest.py | 223 +++ flaml/default/xgb_limitdepth/binary.json | 326 ++++ flaml/default/xgb_limitdepth/multiclass.json | 354 +++++ flaml/default/xgb_limitdepth/regression.json | 347 +++++ flaml/default/xgboost/binary.json | 372 +++++ flaml/default/xgboost/multiclass.json | 509 +++++++ flaml/default/xgboost/regression.json | 308 ++++ flaml/model.py | 8 +- test/default/all/metafeatures.csv | 13 + test/default/extra_tree/2dplanes.json | 1 + test/default/extra_tree/Airlines.json | 1 + test/default/extra_tree/Albert.json | 1 + .../extra_tree/Amazon_employee_access.json | 1 + test/default/extra_tree/adult.json | 1 + test/default/extra_tree/bng_breastTumor.json | 1 + test/default/extra_tree/bng_pbc.json | 1 + test/default/extra_tree/car.json | 1 + test/default/extra_tree/connect-4.json | 1 + test/default/extra_tree/default.json | 1 + test/default/extra_tree/dilbert.json | 1 + test/default/extra_tree/poker.json | 1 + test/default/extra_tree/results.csv | 142 ++ test/default/lgbm/2dplanes.json | 1 + test/default/lgbm/APSFailure.json | 1 + test/default/lgbm/Airlines.json | 1 + test/default/lgbm/Albert.json | 1 + test/default/lgbm/Amazon_employee_access.json | 1 + test/default/lgbm/Dionis.json | 1 + test/default/lgbm/adult.json | 1 + test/default/lgbm/bng_breastTumor.json | 1 + test/default/lgbm/bng_pbc.json | 1 + test/default/lgbm/car.json | 1 + test/default/lgbm/connect-4.json | 1 + test/default/lgbm/default.json | 1 + test/default/lgbm/dilbert.json | 1 + test/default/lgbm/poker.json | 1 + test/default/lgbm/results.csv | 167 +++ test/default/rf/2dplanes.json | 1 + test/default/rf/Airlines.json | 1 + test/default/rf/Albert.json | 1 + test/default/rf/Amazon_employee_access.json | 1 + test/default/rf/Dionis.json | 1 + test/default/rf/adult.json | 1 + test/default/rf/bng_breastTumor.json | 1 + test/default/rf/bng_pbc.json | 1 + test/default/rf/car.json | 1 + test/default/rf/connect-4.json | 1 + test/default/rf/default.json | 1 + test/default/rf/dilbert.json | 1 + test/default/rf/poker.json | 1 + test/default/rf/results.csv | 145 ++ test/default/test_defaults.py | 222 +++ test/default/xgb_limitdepth/2dplanes.json | 1 + test/default/xgb_limitdepth/Airlines.json | 1 + .../Amazon_employee_access.json | 1 + test/default/xgb_limitdepth/adult.json | 1 + .../xgb_limitdepth/bng_breastTumor.json | 1 + test/default/xgb_limitdepth/bng_pbc.json | 1 + test/default/xgb_limitdepth/car.json | 1 + test/default/xgb_limitdepth/connect-4.json | 1 + test/default/xgb_limitdepth/default.json | 1 + test/default/xgb_limitdepth/dilbert.json | 1 + test/default/xgb_limitdepth/poker.json | 1 + test/default/xgb_limitdepth/results.csv | 116 ++ test/default/xgboost/2dplanes.json | 1 + test/default/xgboost/Airlines.json | 1 + test/default/xgboost/Albert.json | 1 + .../xgboost/Amazon_employee_access.json | 1 + test/default/xgboost/adult.json | 1 + test/default/xgboost/bng_breastTumor.json | 1 + test/default/xgboost/bng_pbc.json | 1 + test/default/xgboost/car.json | 1 + test/default/xgboost/connect-4.json | 1 + test/default/xgboost/default.json | 1 + test/default/xgboost/dilbert.json | 1 + test/default/xgboost/poker.json | 1 + test/default/xgboost/results.csv | 222 +++ test/default_lgbm.py | 16 + test/default_xgb.py | 13 + website/docs/Examples/Default-Flamlized.md | 97 ++ website/docs/Getting-Started.md | 12 +- website/docs/Research.md | 1 + website/docs/Use-Cases/Zero-Shot-AutoML.md | 247 +++ 102 files changed, 10873 insertions(+), 9 deletions(-) create mode 100644 flaml/default/README.md create mode 100644 flaml/default/__init__.py create mode 100644 flaml/default/all/binary.json create mode 100644 flaml/default/all/multiclass.json create mode 100644 flaml/default/all/regression.json create mode 100644 flaml/default/estimator.py create mode 100644 flaml/default/extra_tree/binary.json create mode 100644 flaml/default/extra_tree/multiclass.json create mode 100644 flaml/default/extra_tree/regression.json create mode 100644 flaml/default/greedy.py create mode 100644 flaml/default/lgbm/binary.json create mode 100644 flaml/default/lgbm/multiclass.json create mode 100644 flaml/default/lgbm/regression.json create mode 100644 flaml/default/portfolio.py create mode 100644 flaml/default/regret.py create mode 100644 flaml/default/rf/binary.json create mode 100644 flaml/default/rf/multiclass.json create mode 100644 flaml/default/rf/regression.json create mode 100644 flaml/default/suggest.py create mode 100644 flaml/default/xgb_limitdepth/binary.json create mode 100644 flaml/default/xgb_limitdepth/multiclass.json create mode 100644 flaml/default/xgb_limitdepth/regression.json create mode 100644 flaml/default/xgboost/binary.json create mode 100644 flaml/default/xgboost/multiclass.json create mode 100644 flaml/default/xgboost/regression.json create mode 100644 test/default/all/metafeatures.csv create mode 100644 test/default/extra_tree/2dplanes.json create mode 100644 test/default/extra_tree/Airlines.json create mode 100644 test/default/extra_tree/Albert.json create mode 100644 test/default/extra_tree/Amazon_employee_access.json create mode 100644 test/default/extra_tree/adult.json create mode 100644 test/default/extra_tree/bng_breastTumor.json create mode 100644 test/default/extra_tree/bng_pbc.json create mode 100644 test/default/extra_tree/car.json create mode 100644 test/default/extra_tree/connect-4.json create mode 100644 test/default/extra_tree/default.json create mode 100644 test/default/extra_tree/dilbert.json create mode 100644 test/default/extra_tree/poker.json create mode 100644 test/default/extra_tree/results.csv create mode 100644 test/default/lgbm/2dplanes.json create mode 100644 test/default/lgbm/APSFailure.json create mode 100644 test/default/lgbm/Airlines.json create mode 100644 test/default/lgbm/Albert.json create mode 100644 test/default/lgbm/Amazon_employee_access.json create mode 100644 test/default/lgbm/Dionis.json create mode 100644 test/default/lgbm/adult.json create mode 100644 test/default/lgbm/bng_breastTumor.json create mode 100644 test/default/lgbm/bng_pbc.json create mode 100644 test/default/lgbm/car.json create mode 100644 test/default/lgbm/connect-4.json create mode 100644 test/default/lgbm/default.json create mode 100644 test/default/lgbm/dilbert.json create mode 100644 test/default/lgbm/poker.json create mode 100644 test/default/lgbm/results.csv create mode 100644 test/default/rf/2dplanes.json create mode 100644 test/default/rf/Airlines.json create mode 100644 test/default/rf/Albert.json create mode 100644 test/default/rf/Amazon_employee_access.json create mode 100644 test/default/rf/Dionis.json create mode 100644 test/default/rf/adult.json create mode 100644 test/default/rf/bng_breastTumor.json create mode 100644 test/default/rf/bng_pbc.json create mode 100644 test/default/rf/car.json create mode 100644 test/default/rf/connect-4.json create mode 100644 test/default/rf/default.json create mode 100644 test/default/rf/dilbert.json create mode 100644 test/default/rf/poker.json create mode 100644 test/default/rf/results.csv create mode 100644 test/default/test_defaults.py create mode 100644 test/default/xgb_limitdepth/2dplanes.json create mode 100644 test/default/xgb_limitdepth/Airlines.json create mode 100644 test/default/xgb_limitdepth/Amazon_employee_access.json create mode 100644 test/default/xgb_limitdepth/adult.json create mode 100644 test/default/xgb_limitdepth/bng_breastTumor.json create mode 100644 test/default/xgb_limitdepth/bng_pbc.json create mode 100644 test/default/xgb_limitdepth/car.json create mode 100644 test/default/xgb_limitdepth/connect-4.json create mode 100644 test/default/xgb_limitdepth/default.json create mode 100644 test/default/xgb_limitdepth/dilbert.json create mode 100644 test/default/xgb_limitdepth/poker.json create mode 100644 test/default/xgb_limitdepth/results.csv create mode 100644 test/default/xgboost/2dplanes.json create mode 100644 test/default/xgboost/Airlines.json create mode 100644 test/default/xgboost/Albert.json create mode 100644 test/default/xgboost/Amazon_employee_access.json create mode 100644 test/default/xgboost/adult.json create mode 100644 test/default/xgboost/bng_breastTumor.json create mode 100644 test/default/xgboost/bng_pbc.json create mode 100644 test/default/xgboost/car.json create mode 100644 test/default/xgboost/connect-4.json create mode 100644 test/default/xgboost/default.json create mode 100644 test/default/xgboost/dilbert.json create mode 100644 test/default/xgboost/poker.json create mode 100644 test/default/xgboost/results.csv create mode 100644 test/default_lgbm.py create mode 100644 test/default_xgb.py create mode 100644 website/docs/Examples/Default-Flamlized.md create mode 100644 website/docs/Use-Cases/Zero-Shot-AutoML.md diff --git a/flaml/automl.py b/flaml/automl.py index d7b4aeefe..a8cfd94de 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -49,6 +49,7 @@ from .data import ( ) from . import tune from .training_log import training_log_reader, training_log_writer +from flaml.default.suggest import suggest_learner logger = logging.getLogger(__name__) logger_formatter = logging.Formatter( @@ -540,9 +541,13 @@ class AutoML(BaseEstimator): is used. BlendSearch can be tried when the search space is complex, for example, containing multiple disjoint, discontinuous subspaces. When set to 'random', random search is used. - starting_points: A dictionary to specify the starting hyperparameter - config for the estimators. - Keys are the name of the estimators, and values are the starting + starting_points: A dictionary or a str to specify the starting hyperparameter + config for the estimators | default="static". + If str: + - if "data", use data-dependent defaults; + - if "data:path" use data-dependent defaults which are stored at path; + - if "static", use data-independent defaults. + If dict, keys are the name of the estimators, and values are the starting hyperparamter configurations for the corresponding estimators. The value can be a single hyperparamter configuration dict or a list of hyperparamter configuration dicts. @@ -611,7 +616,7 @@ class AutoML(BaseEstimator): settings["split_type"] = settings.get("split_type", "auto") settings["hpo_method"] = settings.get("hpo_method", "auto") settings["learner_selector"] = settings.get("learner_selector", "sample") - settings["starting_points"] = settings.get("starting_points", {}) + settings["starting_points"] = settings.get("starting_points", "static") settings["n_concurrent_trials"] = settings.get("n_concurrent_trials", 1) settings["keep_search_state"] = settings.get("keep_search_state", False) settings["early_stop"] = settings.get("early_stop", False) @@ -1900,9 +1905,13 @@ class AutoML(BaseEstimator): is used. BlendSearch can be tried when the search space is complex, for example, containing multiple disjoint, discontinuous subspaces. When set to 'random', random search is used. - starting_points: A dictionary to specify the starting hyperparameter - config for the estimators. - Keys are the name of the estimators, and values are the starting + starting_points: A dictionary or a str to specify the starting hyperparameter + config for the estimators | default="data". + If str: + - if "data", use data-dependent defaults; + - if "data:path" use data-dependent defaults which are stored at path; + - if "static", use data-independent defaults. + If dict, keys are the name of the estimators, and values are the starting hyperparamter configurations for the corresponding estimators. The value can be a single hyperparamter configuration dict or a list of hyperparamter configuration dicts. @@ -2191,6 +2200,41 @@ class AutoML(BaseEstimator): get_estimator_class(self._state.task, estimator_name), ) # set up learner search space + if isinstance(starting_points, str) and starting_points.startswith("data"): + from flaml.default import suggest_config + + location = starting_points[5:] + starting_points = {} + for estimator_name in estimator_list: + try: + configs = suggest_config( + self._state.task, + self._X_train_all, + self._y_train_all, + estimator_name, + location, + k=1, + ) + starting_points[estimator_name] = [ + x["hyperparameters"] for x in configs + ] + except FileNotFoundError: + pass + try: + learner = suggest_learner( + self._state.task, + self._X_train_all, + self._y_train_all, + estimator_list=estimator_list, + location=location, + ) + if learner != estimator_list[0]: + estimator_list.remove(learner) + estimator_list.insert(0, learner) + except FileNotFoundError: + pass + + starting_points = {} if starting_points == "static" else starting_points for estimator_name in estimator_list: estimator_class = self._state.learner_classes[estimator_name] estimator_class.init() diff --git a/flaml/default/README.md b/flaml/default/README.md new file mode 100644 index 000000000..cc4ad4c8a --- /dev/null +++ b/flaml/default/README.md @@ -0,0 +1,184 @@ +# FLAML-Zero: Zero-shot AutoML + +## Zero-shot AutoML + +There are several ways to use zero-shot AutoML, i.e., train a model with the data-dependent default configuration. + +0. Use estimators in `flaml.default.estimator`. + +```python +from flaml.default import LGBMRegressor + +estimator = LGBMRegressor() +estimator.fit(X_train, y_train) +estimator.predict(X_test, y_test) +``` + + +1. Use AutoML.fit(). set `starting_points="data"` and `max_iter=0`. + +```python +X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame) +automl = AutoML() +automl_settings = { + "time_budget": 2, + "task": "classification", + "log_file_name": "test/iris.log", + "starting_points": "data", + "max_iter": 0, +} +automl.fit(X_train, y_train, **automl_settings) +``` + +2. Use `flaml.default.preprocess_and_suggest_hyperparams`. + +```python +from flaml.default import preprocess_and_suggest_hyperparams + +X, y = load_iris(return_X_y=True, as_frame=True) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) +hyperparams, estimator_class, X_transformed, y_transformed, feature_transformer, label_transformer = preprocess_and_suggest_hyperparams( + "classification", X_train, y_train, "lgbm" +) +model = estimator_class(**hyperparams) # estimator_class is LGBMClassifier +model.fit(X_transformed, y_train) # LGBMClassifier can handle raw labels +X_test = feature_transformer.transform(X_test) # preprocess test data +y_pred = model.predict(X_test) +``` + +If you want to use your own meta-learned defaults, specify the path containing the meta-learned defaults. For example, + +```python +X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame) +automl = AutoML() +automl_settings = { + "time_budget": 2, + "task": "classification", + "log_file_name": "test/iris.log", + "starting_points": "data:test/default", + "estimator_list": ["lgbm", "xgb_limitdepth", "rf"] + "max_iter": 0, +} +automl.fit(X_train, y_train, **automl_settings) +``` + +Since this is a multiclass task, it will look for the following files under `test/default/`: + +- `all/multiclass.json`. +- `{learner_name}/multiclass.json` for every learner_name in the estimator_list. + +Read the next subsection to understand how to generate these files if you would like to meta-learn the defaults yourself. + +To perform hyperparameter search starting with the data-dependent defaults, remove `max_iter=0`. + +## Perform Meta Learning + +FLAML provides a package `flaml.default` to learn defaults customized for your own tasks/learners/metrics. + +### Prepare a collection of training tasks + +Collect a diverse set of training tasks. For each task, extract its meta feature and save in a .csv file. For example, test/default/all/metafeatures.csv: + +``` +Dataset,NumberOfInstances,NumberOfFeatures,NumberOfClasses,PercentageOfNumericFeatures +2dplanes,36691,10,0,1.0 +adult,43957,14,2,0.42857142857142855 +Airlines,485444,7,2,0.42857142857142855 +Albert,382716,78,2,0.3333333333333333 +Amazon_employee_access,29492,9,2,0.0 +bng_breastTumor,104976,9,0,0.1111111111111111 +bng_pbc,900000,18,0,0.5555555555555556 +car,1555,6,4,0.0 +connect-4,60801,42,3,0.0 +dilbert,9000,2000,5,1.0 +Dionis,374569,60,355,1.0 +poker,922509,10,0,1.0 +``` + +The first column is the dataset name, and the latter four are meta features. + +### Prepare the candidate configurations + +You can extract the best configurations for each task in your collection of training tasks by running flaml on each of them with a long enough budget. Save the best configuration in a .json file under `{location_for_defaults}/{learner_name}/{task_name}.json`. For example, + +```python +X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame) +automl.fit(X_train, y_train, estimator_list=["lgbm"], **settings) +automl.save_best_config("test/default/lgbm/iris.json") +``` + +### Evaluate each candidate configuration on each task + +Save the evaluation results in a .csv file. For example, save the evaluation results for lgbm under `test/default/lgbm/results.csv`: + +``` +task,fold,type,result,params +2dplanes,0,regression,0.946366,{'_modeljson': 'lgbm/2dplanes.json'} +2dplanes,0,regression,0.907774,{'_modeljson': 'lgbm/adult.json'} +2dplanes,0,regression,0.901643,{'_modeljson': 'lgbm/Airlines.json'} +2dplanes,0,regression,0.915098,{'_modeljson': 'lgbm/Albert.json'} +2dplanes,0,regression,0.302328,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +2dplanes,0,regression,0.94523,{'_modeljson': 'lgbm/bng_breastTumor.json'} +2dplanes,0,regression,0.945698,{'_modeljson': 'lgbm/bng_pbc.json'} +2dplanes,0,regression,0.946194,{'_modeljson': 'lgbm/car.json'} +2dplanes,0,regression,0.945549,{'_modeljson': 'lgbm/connect-4.json'} +2dplanes,0,regression,0.946232,{'_modeljson': 'lgbm/default.json'} +2dplanes,0,regression,0.945594,{'_modeljson': 'lgbm/dilbert.json'} +2dplanes,0,regression,0.836996,{'_modeljson': 'lgbm/Dionis.json'} +2dplanes,0,regression,0.917152,{'_modeljson': 'lgbm/poker.json'} +adult,0,binary,0.927203,{'_modeljson': 'lgbm/2dplanes.json'} +adult,0,binary,0.932072,{'_modeljson': 'lgbm/adult.json'} +adult,0,binary,0.926563,{'_modeljson': 'lgbm/Airlines.json'} +adult,0,binary,0.928604,{'_modeljson': 'lgbm/Albert.json'} +adult,0,binary,0.911171,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +adult,0,binary,0.930645,{'_modeljson': 'lgbm/bng_breastTumor.json'} +adult,0,binary,0.928603,{'_modeljson': 'lgbm/bng_pbc.json'} +adult,0,binary,0.915825,{'_modeljson': 'lgbm/car.json'} +adult,0,binary,0.919499,{'_modeljson': 'lgbm/connect-4.json'} +adult,0,binary,0.930109,{'_modeljson': 'lgbm/default.json'} +adult,0,binary,0.932453,{'_modeljson': 'lgbm/dilbert.json'} +adult,0,binary,0.921959,{'_modeljson': 'lgbm/Dionis.json'} +adult,0,binary,0.910763,{'_modeljson': 'lgbm/poker.json'} +... +``` + +The `type` column indicates the type of the task, such as regression, binary or multiclass. +The `result` column stores the evaluation result, assuming the large the better. The `params` column indicates which json config is used. For example 'lgbm/2dplanes.json' indicates that the best lgbm configuration extracted from 2dplanes is used. + +### Learn data-dependent defaults + +To recap, the inputs required for meta-learning are: + +1. Metafeatures: e.g., `{location}/all/metafeatures.csv`. +1. Configurations: `{location}/{learner_name}/{task_name}.json`. +1. Evaluation results: `{location}/{learner_name}/results.csv`. + +For example, if the input location is "test/default", learners are lgbm, xgb_limitdepth and rf, the following command learns data-dependent defaults for binary classification tasks. + +```bash +python portfolio.py --output test/default --input test/default --metafeatures test/default/all/metafeatures.csv --task binary --estimator lgbm xgb_limitdepth rf +``` + +It will produce the following files as output: + +- test/default/lgbm/binary.json: the learned defaults for lgbm. +- test/default/xgb_limitdepth/binary.json: the learned defaults for xgb_limitdepth. +- test/default/rf/binary.json: the learned defaults for rf. +- test/default/all/binary.json: the learned defaults for lgbm, xgb_limitdepth and rf together. + +Change "binary" into "multiclass" or "regression" for the other tasks. + +## Reference + +For more technical details, please check our research paper. + +* [Mining Robust Default Configurations for Resource-constrained AutoML](https://arxiv.org/abs/2202.09927). Moe Kayali, Chi Wang. arXiv preprint arXiv:2202.09927 (2022). + +```bibtex +@article{Kayali2022default, + title={Mining Robust Default Configurations for Resource-constrained AutoML}, + author={Moe Kayali and Chi Wang}, + year={2022}, + journal={arXiv preprint arXiv:2202.09927}, +} +``` \ No newline at end of file diff --git a/flaml/default/__init__.py b/flaml/default/__init__.py new file mode 100644 index 000000000..a52051e13 --- /dev/null +++ b/flaml/default/__init__.py @@ -0,0 +1,18 @@ +from .suggest import ( + suggest_config, + suggest_learner, + suggest_hyperparams, + preprocess_and_suggest_hyperparams, + meta_feature, +) +from .estimator import ( + flamlize_estimator, + LGBMClassifier, + LGBMRegressor, + XGBClassifier, + XGBRegressor, + RandomForestClassifier, + RandomForestRegressor, + ExtraTreesClassifier, + ExtraTreesRegressor, +) diff --git a/flaml/default/all/binary.json b/flaml/default/all/binary.json new file mode 100644 index 000000000..2d98013c4 --- /dev/null +++ b/flaml/default/all/binary.json @@ -0,0 +1,943 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 2541, + "num_leaves": 1667, + "min_child_samples": 29, + "learning_rate": 0.0016660662914022302, + "log_max_bin": 8, + "colsample_bytree": 0.5157078343718623, + "reg_alpha": 0.045792841240713165, + "reg_lambda": 0.0012362651138125363, + "FLAML_sample_size": 436899 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 141, + "num_leaves": 139, + "min_child_samples": 8, + "learning_rate": 0.04824748268727149, + "log_max_bin": 9, + "colsample_bytree": 0.5261441571042451, + "reg_alpha": 0.002896920833899335, + "reg_lambda": 0.024463247502165594 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 31204, + "num_leaves": 4, + "min_child_samples": 3, + "learning_rate": 0.009033979476164342, + "log_max_bin": 10, + "colsample_bytree": 0.5393339924944204, + "reg_alpha": 15.800090067239827, + "reg_lambda": 34.82471227276953 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 362, + "num_leaves": 1208, + "min_child_samples": 8, + "learning_rate": 0.02070742242160566, + "log_max_bin": 4, + "colsample_bytree": 0.37915528071680865, + "reg_alpha": 0.002982599447751338, + "reg_lambda": 1.136605174453919, + "FLAML_sample_size": 337147 + } + }, + { + "class": "lgbm", + "hyperparameters": {} + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 319, + "max_leaves": 1312, + "min_child_weight": 0.001, + "learning_rate": 0.01872379806270421, + "subsample": 0.6890079660561895, + "colsample_bylevel": 0.7551225121854014, + "colsample_bytree": 0.7860755604500558, + "reg_alpha": 0.17028752704343114, + "reg_lambda": 1.4375743264564231 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 7902, + "max_leaves": 49, + "min_child_weight": 0.038063497848955595, + "learning_rate": 0.0009765625, + "subsample": 0.9357800695141445, + "colsample_bylevel": 0.47031312177249246, + "colsample_bytree": 0.9053386579586192, + "reg_alpha": 1.5286102593845932, + "reg_lambda": 18.96811296717419 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 13499, + "max_leaves": 60, + "min_child_weight": 0.008494221584011285, + "learning_rate": 0.006955765856675575, + "subsample": 0.5965241023754743, + "colsample_bylevel": 0.590641168068946, + "colsample_bytree": 1.0, + "reg_alpha": 0.2522240954379289, + "reg_lambda": 5.351809144038808 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 591, + "max_leaves": 16651, + "min_child_weight": 0.03356567864689129, + "learning_rate": 0.002595066436678338, + "subsample": 0.9114132805513452, + "colsample_bylevel": 0.9503441844594458, + "colsample_bytree": 0.5703338448066768, + "reg_alpha": 0.010405212349127894, + "reg_lambda": 0.05352660657433639 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 877, + "max_depth": 11, + "min_child_weight": 0.6205465771093738, + "learning_rate": 0.013622118381700795, + "subsample": 0.566692814245426, + "colsample_bylevel": 0.8865741642101924, + "colsample_bytree": 1.0, + "reg_alpha": 0.01386336444764391, + "reg_lambda": 3.113947886074155 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 5457, + "max_depth": 6, + "min_child_weight": 0.19978269031877885, + "learning_rate": 0.003906732665632749, + "subsample": 0.8207785234496902, + "colsample_bylevel": 0.8438751931476698, + "colsample_bytree": 0.42202862997585794, + "reg_alpha": 0.017372558844968737, + "reg_lambda": 0.03977802121721031 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 3526, + "max_depth": 13, + "min_child_weight": 0.0994486725676356, + "learning_rate": 0.0009765625, + "subsample": 0.46123759274652554, + "colsample_bylevel": 1.0, + "colsample_bytree": 0.4498813776397717, + "reg_alpha": 0.002599398546499414, + "reg_lambda": 0.028336396854402753 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": {} + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 501, + "max_features": 0.24484242524861066, + "max_leaves": 1156, + "criterion": "entropy" + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 356, + "max_features": 0.1, + "max_leaves": 102, + "criterion": "gini" + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 1000, + "max_features": 0.1779692423238241, + "max_leaves": 7499, + "criterion": "gini" + } + }, + { + "class": "rf", + "hyperparameters": {} + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 1080, + "max_features": 1.0, + "max_leaves": 590, + "criterion": "entropy" + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 2047, + "max_features": 0.46132798093546956, + "max_leaves": 12856, + "criterion": "gini" + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 408, + "max_features": 0.3629795757973625, + "max_leaves": 81, + "criterion": "entropy" + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 553, + "max_features": 0.9592132391435095, + "max_leaves": 1127, + "criterion": "entropy" + } + }, + { + "class": "extra_tree", + "hyperparameters": {} + } + ], + "preprocessing": { + "center": [ + 18000.0, + 28.0, + 2.0, + 0.7565217391304347 + ], + "scale": [ + 42124.0, + 130.0, + 1.0, + 0.5714285714285715 + ] + }, + "neighbors": [ + { + "features": [ + 1.196467571930491, + 1.0923076923076922, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 5, + 18, + 19, + 4, + 8, + 3, + 9, + 7, + 10, + 6, + 21, + 2, + 20, + 17, + 13, + 16, + 15, + 1, + 14, + 12, + 0, + 11 + ] + }, + { + "features": [ + 11.096856898680088, + -0.16153846153846155, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 0, + 5, + 7, + 9, + 11, + 8, + 1, + 18, + 15, + 12, + 3, + 2, + 10, + 20, + 4, + 6, + 13, + 17, + 14, + 19, + 16, + 21 + ] + }, + { + "features": [ + 8.658152122305575, + 0.38461538461538464, + 0.0, + -0.7405797101449274 + ], + "choice": [ + 7, + 9, + 2, + 5, + 10, + 1, + 0, + 3, + 12, + 4, + 6, + 11, + 8, + 18, + 15, + 13, + 20, + 16, + 17, + 21, + 14, + 19 + ] + }, + { + "features": [ + 0.27281359794891274, + -0.14615384615384616, + 0.0, + -1.3239130434782607 + ], + "choice": [ + 8, + 11, + 0, + 5, + 1, + 15, + 13, + 16, + 10, + 9, + 20, + 7, + 17, + 12, + 4, + 3, + 21, + 18, + 6, + 14, + 19, + 2 + ] + }, + { + "features": [ + -0.4125676573924604, + -0.1076923076923077, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 19, + 15, + 11, + 17, + 8, + 14, + 13, + 16, + 3, + 18, + 21, + 6, + 9, + 10, + 20, + 5, + 7, + 1, + 0, + 12, + 2, + 4 + ] + }, + { + "features": [ + 0.6409647706770487, + 1.5538461538461539, + 0.0, + 0.0 + ], + "choice": [ + 2, + 14, + 10, + 19, + 6, + 0, + 1, + 4, + 11, + 3, + 5, + 17, + 9, + 13, + 12, + 20, + 7, + 15, + 18, + 8, + 16, + 21 + ] + }, + { + "features": [ + 2.3515573069983855, + 0.16923076923076924, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 7, + 9, + 10, + 5, + 2, + 0, + 3, + 1, + 12, + 4, + 6, + 11, + 18, + 8, + 15, + 13, + 16, + 21, + 20, + 17, + 14, + 19 + ] + }, + { + "features": [ + 0.6162045389801538, + -0.1076923076923077, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 10, + 12, + 1, + 4, + 11, + 6, + 9, + 0, + 2, + 5, + 3, + 7, + 8, + 13, + 20, + 17, + 15, + 14, + 16, + 19, + 18, + 21 + ] + }, + { + "features": [ + 0.5386240622922799, + -0.09230769230769231, + 0.0, + -0.5582880434782608 + ], + "choice": [ + 1, + 0, + 5, + 11, + 10, + 9, + 6, + 4, + 3, + 20, + 17, + 18, + 13, + 15, + 16, + 8, + 7, + 2, + 12, + 21, + 19, + 14 + ] + }, + { + "features": [ + -0.41133320672300827, + -0.18461538461538463, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 14, + 9, + 7, + 10, + 15, + 13, + 3, + 6, + 16, + 5, + 19, + 2, + 12, + 18, + 4, + 21, + 20, + 0, + 11, + 17, + 1, + 8 + ] + }, + { + "features": [ + -0.31155635742094767, + 12.36923076923077, + 0.0, + 0.3865087169129372 + ], + "choice": [ + 7, + 2, + 6, + 10, + 3, + 0, + 9, + 20, + 5, + 1, + 18, + 11, + 8, + 17, + 4, + 13, + 15, + 12, + 14, + 16, + 19, + 21 + ] + }, + { + "features": [ + -0.40594435476213087, + -0.06153846153846154, + 0.0, + -0.7114130434782607 + ], + "choice": [ + 9, + 5, + 6, + 1, + 0, + 13, + 15, + 7, + 19, + 4, + 16, + 3, + 10, + 12, + 11, + 18, + 14, + 8, + 17, + 20, + 21, + 2 + ] + }, + { + "features": [ + 0.0, + 32.83076923076923, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 20, + 17, + 0, + 1, + 18, + 3, + 13, + 9, + 10, + 5, + 11, + 15, + 2, + 4, + 12, + 16, + 14, + 19, + 21 + ] + }, + { + "features": [ + 1.6675766783781218, + 0.0, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 7, + 9, + 5, + 0, + 1, + 10, + 6, + 11, + 4, + 2, + 12, + 3, + 8, + 15, + 13, + 18, + 16, + 20, + 17, + 21, + 14, + 19 + ] + }, + { + "features": [ + -0.36356946158959264, + 0.8923076923076924, + 0.0, + -1.2266908212560386 + ], + "choice": [ + 8, + 15, + 3, + 13, + 16, + 11, + 4, + 0, + 20, + 6, + 14, + 5, + 1, + 21, + 17, + 9, + 10, + 18, + 19, + 7, + 12, + 2 + ] + }, + { + "features": [ + -0.38225239768303104, + -0.05384615384615385, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 16, + 13, + 15, + 18, + 17, + 14, + 20, + 8, + 10, + 9, + 3, + 7, + 19, + 21, + 11, + 1, + 5, + 0, + 6, + 4, + 2, + 12 + ] + }, + { + "features": [ + -0.3590352293229513, + 0.06153846153846154, + 0.0, + -1.3239130434782607 + ], + "choice": [ + 7, + 9, + 10, + 4, + 5, + 17, + 19, + 20, + 12, + 18, + 6, + 13, + 16, + 0, + 1, + 3, + 15, + 21, + 14, + 11, + 8, + 2 + ] + }, + { + "features": [ + 0.3090399772101415, + 0.6923076923076923, + 0.0, + -0.003997789240972687 + ], + "choice": [ + 7, + 9, + 10, + 1, + 12, + 5, + 3, + 4, + 0, + 11, + 20, + 8, + 17, + 13, + 6, + 15, + 16, + 21, + 18, + 2, + 14, + 19 + ] + }, + { + "features": [ + -0.3118649700883107, + -0.17692307692307693, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 20, + 18, + 21, + 17, + 7, + 9, + 15, + 13, + 1, + 16, + 4, + 12, + 5, + 0, + 10, + 14, + 6, + 11, + 8, + 3, + 2, + 19 + ] + }, + { + "features": [ + 0.0, + 32.83076923076923, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 9, + 10, + 0, + 5, + 1, + 12, + 3, + 4, + 2, + 21, + 11, + 16, + 18, + 20, + 15, + 8, + 17, + 13, + 14, + 19 + ] + }, + { + "features": [ + -0.3178473079479632, + -0.06153846153846154, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 18, + 17, + 20, + 1, + 5, + 21, + 0, + 8, + 4, + 3, + 10, + 12, + 9, + 13, + 11, + 6, + 16, + 15, + 7, + 19, + 14, + 2 + ] + } + ], + "configsource": [ + "lgbm/Airlines", + "lgbm/riccardo", + "lgbm/fried", + "lgbm/Dionis", + "lgbm/default", + "xgboost/fabert", + "xgboost/bng_lowbwt", + "xgboost/pol", + "xgboost/Amazon_employee_access", + "xgb_limitdepth/Jannis", + "xgb_limitdepth/adult", + "xgb_limitdepth/Amazon_employee_access", + "xgb_limitdepth/default", + "rf/Amazon_employee_access", + "rf/kc1", + "rf/Helena", + "rf/default", + "extra_tree/segment", + "extra_tree/Helena", + "extra_tree/kr-vs-kp", + "extra_tree/bank-marketing", + "extra_tree/default" + ] +} \ No newline at end of file diff --git a/flaml/default/all/multiclass.json b/flaml/default/all/multiclass.json new file mode 100644 index 000000000..78b7f577a --- /dev/null +++ b/flaml/default/all/multiclass.json @@ -0,0 +1,1325 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 134, + "num_leaves": 225, + "min_child_samples": 21, + "learning_rate": 0.10182098014295998, + "log_max_bin": 5, + "colsample_bytree": 0.6103565306428956, + "reg_alpha": 0.0009765625, + "reg_lambda": 40.413729576022625 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 3726, + "num_leaves": 155, + "min_child_samples": 4, + "learning_rate": 0.040941607728296484, + "log_max_bin": 5, + "colsample_bytree": 0.5326256194627191, + "reg_alpha": 0.7408711930398492, + "reg_lambda": 0.5467731065349226 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 573, + "num_leaves": 16, + "min_child_samples": 52, + "learning_rate": 0.2422782244991656, + "log_max_bin": 7, + "colsample_bytree": 1.0, + "reg_alpha": 0.03433194930183514, + "reg_lambda": 0.03870494540146326 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 2931, + "num_leaves": 106, + "min_child_samples": 49, + "learning_rate": 0.007146230961642236, + "log_max_bin": 7, + "colsample_bytree": 0.46947896116006055, + "reg_alpha": 0.37428758811879526, + "reg_lambda": 23.639977131692564 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 241, + "num_leaves": 58, + "min_child_samples": 2, + "learning_rate": 0.022730855281657265, + "log_max_bin": 5, + "colsample_bytree": 0.5620897082415793, + "reg_alpha": 0.0031614554887399314, + "reg_lambda": 0.02175056245188971 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 8353, + "num_leaves": 371, + "min_child_samples": 71, + "learning_rate": 0.017965875630873252, + "log_max_bin": 10, + "colsample_bytree": 0.9002082433803926, + "reg_alpha": 0.4864366003694002, + "reg_lambda": 0.024138585745106363, + "FLAML_sample_size": 470619 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 320, + "num_leaves": 24, + "min_child_samples": 53, + "learning_rate": 0.019316895546068795, + "log_max_bin": 6, + "colsample_bytree": 0.3955693254372702, + "reg_alpha": 0.0013785083170001627, + "reg_lambda": 0.04644365636517757 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 733, + "num_leaves": 11, + "min_child_samples": 94, + "learning_rate": 0.06276798296942972, + "log_max_bin": 6, + "colsample_bytree": 0.6341928918435795, + "reg_alpha": 0.5811038918218691, + "reg_lambda": 43.304997517523944 + } + }, + { + "class": "lgbm", + "hyperparameters": {} + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 392, + "max_leaves": 46, + "min_child_weight": 0.20655273911443411, + "learning_rate": 0.08039123467849849, + "subsample": 0.6482821473906787, + "colsample_bylevel": 0.5448604029329934, + "colsample_bytree": 0.4211786481671673, + "reg_alpha": 0.029040644754759502, + "reg_lambda": 4.60220206538413 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 6357, + "max_leaves": 206, + "min_child_weight": 1.9495322566288034, + "learning_rate": 0.0068766724195393905, + "subsample": 0.9451618245005704, + "colsample_bylevel": 0.9030482524943064, + "colsample_bytree": 0.9278972006416252, + "reg_alpha": 0.01857648400903689, + "reg_lambda": 6.021166480604588, + "FLAML_sample_size": 344444 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 1067, + "max_leaves": 55, + "min_child_weight": 1.578700876556201, + "learning_rate": 0.01882776721912098, + "subsample": 0.6486829588043383, + "colsample_bylevel": 1.0, + "colsample_bytree": 0.6470978147570122, + "reg_alpha": 0.2623396481373557, + "reg_lambda": 12.320026567378322 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 765, + "max_leaves": 6, + "min_child_weight": 0.001, + "learning_rate": 1.0, + "subsample": 0.9833803894285497, + "colsample_bylevel": 1.0, + "colsample_bytree": 1.0, + "reg_alpha": 0.0012553728257619922, + "reg_lambda": 0.03280542610559108 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 2866, + "max_leaves": 2954, + "min_child_weight": 0.003652484923138387, + "learning_rate": 0.006320484540131336, + "subsample": 0.45886345839532916, + "colsample_bylevel": 0.4143419565729296, + "colsample_bytree": 0.9117641224108227, + "reg_alpha": 0.2873746517375349, + "reg_lambda": 17.04964039639045 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 512, + "max_leaves": 3194, + "min_child_weight": 0.004561511536080627, + "learning_rate": 0.05288849444758447, + "subsample": 0.8653058105000044, + "colsample_bylevel": 0.8833689901424637, + "colsample_bytree": 0.9505209943737727, + "reg_alpha": 0.0037017878164852017, + "reg_lambda": 2.1872397928745113, + "FLAML_sample_size": 470620 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 335, + "max_leaves": 37, + "min_child_weight": 0.0013851539632487603, + "learning_rate": 0.2593737370075479, + "subsample": 0.9810091528571387, + "colsample_bylevel": 0.9484250613084422, + "colsample_bytree": 0.192606132199437, + "reg_alpha": 0.10585986776049093, + "reg_lambda": 0.017684465384509407 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 8315, + "max_leaves": 4, + "min_child_weight": 0.7673654415794792, + "learning_rate": 0.002432260930606481, + "subsample": 0.8476000618302348, + "colsample_bylevel": 0.8815698870579244, + "colsample_bytree": 0.7057137578225323, + "reg_alpha": 0.0016838090603716895, + "reg_lambda": 0.28815989841009226 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 319, + "max_leaves": 1312, + "min_child_weight": 0.001, + "learning_rate": 0.01872379806270421, + "subsample": 0.6890079660561895, + "colsample_bylevel": 0.7551225121854014, + "colsample_bytree": 0.7860755604500558, + "reg_alpha": 0.17028752704343114, + "reg_lambda": 1.4375743264564231 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 5739, + "max_leaves": 5, + "min_child_weight": 0.1359602026207002, + "learning_rate": 0.14496176867613397, + "subsample": 0.864897070662231, + "colsample_bylevel": 0.01, + "colsample_bytree": 0.9394057513384305, + "reg_alpha": 0.001103317921178771, + "reg_lambda": 0.1655504349283218 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 3369, + "max_leaves": 23, + "min_child_weight": 0.006136645605168392, + "learning_rate": 0.05726537983358939, + "subsample": 1.0, + "colsample_bylevel": 1.0, + "colsample_bytree": 1.0, + "reg_alpha": 0.40981311572427176, + "reg_lambda": 4.343877111132155 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 1191, + "max_depth": 13, + "min_child_weight": 6.4007885677724605, + "learning_rate": 0.037622775650237326, + "subsample": 1.0, + "colsample_bylevel": 0.3697773165627811, + "colsample_bytree": 0.813871237069598, + "reg_alpha": 0.0009765625, + "reg_lambda": 1.075702708240612 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 1499, + "max_depth": 11, + "min_child_weight": 0.07563529776156448, + "learning_rate": 0.039042609221240955, + "subsample": 0.7832981935783824, + "colsample_bylevel": 1.0, + "colsample_bytree": 1.0, + "reg_alpha": 0.0009765625, + "reg_lambda": 23.513066752844153 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 313, + "max_depth": 7, + "min_child_weight": 30.424259012001368, + "learning_rate": 0.08466828646360688, + "subsample": 0.9897083979469301, + "colsample_bylevel": 0.6769490906308069, + "colsample_bytree": 1.0, + "reg_alpha": 0.0014544085935366477, + "reg_lambda": 34.09911172306857 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 566, + "max_depth": 13, + "min_child_weight": 0.013176186839973599, + "learning_rate": 0.09285619488896565, + "subsample": 0.5897287493640815, + "colsample_bylevel": 0.923664288991597, + "colsample_bytree": 0.8244714790646485, + "reg_alpha": 0.023484974838756726, + "reg_lambda": 0.5690298249126402, + "FLAML_sample_size": 470620 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": {} + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 971, + "max_depth": 8, + "min_child_weight": 0.0044052948947322645, + "learning_rate": 0.15171239415469703, + "subsample": 0.8340342805529243, + "colsample_bylevel": 0.9489310919814007, + "colsample_bytree": 0.022724724669028674, + "reg_alpha": 0.0009765625, + "reg_lambda": 0.0025897714798936954 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 464, + "max_depth": 2, + "min_child_weight": 0.0068282719220722, + "learning_rate": 0.07962498837600937, + "subsample": 0.47139986510869014, + "colsample_bylevel": 0.4814471959023239, + "colsample_bytree": 0.6050207253592859, + "reg_alpha": 0.0010290828959872173, + "reg_lambda": 0.0103104214002687 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 1799, + "max_depth": 3, + "min_child_weight": 0.0010034151843327725, + "learning_rate": 0.03453775119035777, + "subsample": 0.31322065037892344, + "colsample_bylevel": 1.0, + "colsample_bytree": 0.2219038021462818, + "reg_alpha": 0.03885163786709896, + "reg_lambda": 1.1077175359756786 + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 1000, + "max_features": 0.1779692423238241, + "max_leaves": 7499, + "criterion": "gini" + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 400, + "max_features": 0.8961466398827462, + "max_leaves": 25095, + "criterion": "entropy", + "FLAML_sample_size": 470620 + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 470, + "max_features": 0.12698484669953783, + "max_leaves": 31499, + "criterion": "entropy" + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 632, + "max_features": 1.0, + "max_leaves": 1360, + "criterion": "entropy" + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 1713, + "max_features": 0.40966311008832224, + "max_leaves": 10210, + "criterion": "entropy", + "FLAML_sample_size": 105352 + } + }, + { + "class": "rf", + "hyperparameters": {} + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 1074, + "max_features": 0.6008299059364026, + "max_leaves": 9287 + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 833, + "max_features": 0.055027081530106846, + "max_leaves": 1361, + "criterion": "gini" + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 2047, + "max_features": 0.9560062760906606, + "max_leaves": 32767, + "criterion": "entropy", + "FLAML_sample_size": 470620 + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 812, + "max_features": 1.0, + "max_leaves": 1474, + "criterion": "entropy" + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 2047, + "max_features": 1.0, + "max_leaves": 18344 + } + }, + { + "class": "extra_tree", + "hyperparameters": {} + } + ], + "preprocessing": { + "center": [ + 40337.0, + 54.0, + 7.0, + 1.0 + ], + "scale": [ + 58722.0, + 766.0, + 6.0, + 1.0 + ] + }, + "neighbors": [ + { + "features": [ + 8.217925138789552, + 0.0, + 0.0, + -0.8148148148148149 + ], + "choice": [ + 5, + 23, + 29, + 36, + 1, + 32, + 33, + 19, + 14, + 13, + 20, + 10, + 38, + 17, + 39, + 30, + 0, + 3, + 34, + 2, + 28, + 11, + 31, + 24, + 9, + 15, + 22, + 7, + 37, + 4, + 8, + 6, + 18, + 27, + 26, + 16, + 25, + 35, + 12 + ] + }, + { + "features": [ + 5.691767991553421, + 0.007832898172323759, + 58.0, + 0.0 + ], + "choice": [ + 0, + 28, + 33, + 34, + 35, + 30, + 37, + 7, + 9, + 8, + 6, + 4, + 15, + 2, + 39 + ] + }, + { + "features": [ + 0.385937127482034, + 0.9530026109660574, + 0.5, + 0.0 + ], + "choice": [ + 3, + 22, + 0, + 4, + 1, + 20, + 7, + 9, + 11, + 19, + 27, + 8, + 24, + 5, + 6, + 15, + 17, + 23, + 36, + 18, + 2, + 32, + 34, + 26, + 38, + 30, + 29, + 31, + 37, + 28, + 39, + 25, + 33, + 35, + 12 + ] + }, + { + "features": [ + 0.3123020333094922, + -0.03524804177545692, + 15.5, + 0.0 + ], + "choice": [ + 11, + 3, + 0, + 22, + 7, + 6, + 27, + 9, + 16, + 17, + 20, + 26, + 38, + 1, + 34, + 14, + 4, + 28, + 5, + 37, + 23, + 36, + 31, + 19, + 25, + 18, + 30, + 29, + 15, + 33, + 12, + 24, + 35, + 39, + 2, + 8 + ] + }, + { + "features": [ + 0.5964033922550321, + 0.0, + -0.5, + 0.0 + ], + "choice": [ + 3, + 0, + 11, + 22, + 17, + 9, + 24, + 13, + 7, + 4, + 8, + 19, + 10, + 20, + 6, + 36, + 21, + 38, + 32, + 27, + 29, + 18, + 31, + 28, + 34, + 15, + 2, + 30, + 37, + 26, + 16, + 33, + 39, + 1, + 14, + 5, + 25, + 23, + 35, + 12 + ] + }, + { + "features": [ + -0.5336500800381458, + 9.328981723237598, + 0.5, + 0.0 + ], + "choice": [ + 22, + 0, + 27, + 26, + 24, + 25, + 28, + 34, + 35, + 33, + 20, + 37, + 30, + 3, + 9, + 7, + 6, + 8, + 15, + 4, + 1, + 2, + 39, + 12 + ] + }, + { + "features": [ + 0.20201968597799802, + -0.0587467362924282, + 0.0, + 0.0 + ], + "choice": [ + 4, + 6, + 1, + 7, + 37, + 36, + 38, + 34, + 19, + 21, + 29, + 39, + 32, + 12, + 33, + 24, + 9, + 26, + 14, + 10, + 16, + 23, + 15, + 27, + 11, + 20, + 5, + 13, + 3, + 18, + 31, + 35, + 0, + 25, + 28, + 30, + 22, + 17, + 2, + 8 + ] + }, + { + "features": [ + 0.20677088655018563, + 0.16449086161879894, + 0.5, + 0.0 + ], + "choice": [ + 3, + 20, + 21, + 0, + 10, + 9, + 11, + 19, + 1, + 5, + 24, + 7, + 22, + 4, + 17, + 8, + 15, + 36, + 38, + 14, + 18, + 32, + 29, + 27, + 34, + 6, + 30, + 2, + 28, + 23, + 31, + 37, + 33, + 39, + 26, + 16, + 35, + 25, + 12 + ] + }, + { + "features": [ + -0.6604339089268076, + -0.06266318537859007, + -0.5, + -1.0 + ], + "choice": [ + 12, + 18, + 8, + 2, + 24, + 23, + 14, + 19, + 21, + 10, + 5, + 31, + 38, + 36, + 29, + 1, + 34, + 20, + 33, + 32, + 16, + 39, + 7, + 25, + 3, + 11, + 26, + 17, + 13, + 4, + 0, + 30, + 28, + 15, + 9, + 35, + 22, + 27, + 37, + 6 + ] + }, + { + "features": [ + -0.6703620448894793, + 1.0469973890339426, + 0.3333333333333333, + 0.0 + ], + "choice": [ + 15, + 25, + 4, + 18, + 9, + 23, + 14, + 21, + 27, + 26, + 19, + 13, + 17, + 1, + 16, + 24, + 10, + 35, + 39, + 11, + 28, + 33, + 30, + 8, + 20, + 0, + 12, + 2, + 32, + 34, + 29, + 3, + 31, + 6, + 36, + 38, + 37, + 5, + 7, + 22 + ] + }, + { + "features": [ + 0.34848949286468445, + -0.015665796344647518, + -0.6666666666666666, + -1.0 + ], + "choice": [ + 1, + 10, + 21, + 20, + 5, + 19, + 2, + 13, + 14, + 23, + 17, + 3, + 0, + 15, + 11, + 24, + 9, + 22, + 12, + 34, + 32, + 8, + 7, + 18, + 30, + 4, + 28, + 33, + 36, + 37, + 27, + 39, + 29, + 38, + 31, + 6, + 25, + 26, + 16, + 35 + ] + }, + { + "features": [ + -0.5336500800381458, + 2.5404699738903394, + -0.3333333333333333, + 0.0 + ], + "choice": [ + 9, + 2, + 18, + 25, + 8, + 23, + 27, + 15, + 19, + 20, + 4, + 5, + 24, + 7, + 14, + 3, + 1, + 11, + 6, + 0, + 22, + 17, + 26, + 12, + 37, + 36, + 16, + 38, + 34, + 35, + 32, + 31, + 29, + 30, + 28, + 39, + 33 + ] + }, + { + "features": [ + -0.5606757263036, + 0.9738903394255874, + 0.0, + 0.0 + ], + "choice": [ + 17, + 4, + 13, + 35, + 1, + 9, + 11, + 30, + 21, + 8, + 0, + 23, + 32, + 24, + 20, + 28, + 19, + 10, + 31, + 14, + 39, + 27, + 3, + 15, + 29, + 25, + 6, + 33, + 37, + 26, + 36, + 34, + 18, + 16, + 38, + 5, + 7, + 2, + 22, + 12 + ] + }, + { + "features": [ + 0.0, + -0.06266318537859007, + -0.6666666666666666, + 0.0 + ], + "choice": [ + 19, + 2, + 12, + 1, + 24, + 5, + 18, + 10, + 22, + 31, + 37, + 20, + 8, + 13, + 0, + 21, + 7, + 23, + 3, + 11, + 17, + 14, + 29, + 4, + 9, + 34, + 32, + 28, + 33, + 30, + 16, + 35, + 26, + 6, + 27, + 15, + 25, + 39, + 36, + 38 + ] + }, + { + "features": [ + -0.6562617077075031, + 0.21148825065274152, + 0.5, + 0.0 + ], + "choice": [ + 27, + 9, + 24, + 18, + 10, + 2, + 16, + 26, + 20, + 6, + 7, + 25, + 14, + 5, + 23, + 3, + 21, + 19, + 15, + 1, + 11, + 4, + 8, + 13, + 17, + 0, + 38, + 36, + 37, + 34, + 32, + 28, + 22, + 31, + 29, + 30, + 33, + 39, + 35, + 12 + ] + }, + { + "features": [ + -0.6515105071353156, + -0.04960835509138381, + 0.0, + 0.0 + ], + "choice": [ + 37, + 36, + 38, + 34, + 9, + 29, + 26, + 39, + 32, + 31, + 6, + 11, + 1, + 3, + 20, + 33, + 10, + 16, + 27, + 17, + 7, + 13, + 30, + 5, + 28, + 21, + 14, + 4, + 15, + 35, + 0, + 19, + 23, + 2, + 24, + 22, + 8, + 18, + 12, + 25 + ] + }, + { + "features": [ + -0.6739552467559007, + -0.04699738903394256, + -0.5, + 0.0 + ], + "choice": [ + 31, + 29, + 16, + 26, + 6, + 32, + 38, + 37, + 36, + 21, + 13, + 7, + 34, + 3, + 17, + 11, + 33, + 1, + 28, + 27, + 0, + 9, + 39, + 30, + 22, + 14, + 19, + 20, + 10, + 4, + 23, + 35, + 15, + 24, + 5, + 8, + 2, + 18, + 12, + 25 + ] + } + ], + "configsource": [ + "lgbm/Helena", + "lgbm/connect-4", + "lgbm/jungle_chess_2pcs_raw_endgame_complete", + "lgbm/Jannis", + "lgbm/fabert", + "lgbm/Covertype", + "lgbm/segment", + "lgbm/APSFailure", + "lgbm/default", + "xgboost/segment", + "xgboost/Albert", + "xgboost/Helena", + "xgboost/car", + "xgboost/house_8L", + "xgboost/Covertype", + "xgboost/cnae-9", + "xgboost/KDDCup09_appetency", + "xgboost/fabert", + "xgboost/dilbert", + "xgboost/jungle_chess_2pcs_raw_endgame_complete", + "xgb_limitdepth/guillermo", + "xgb_limitdepth/connect-4", + "xgb_limitdepth/Helena", + "xgb_limitdepth/Covertype", + "xgb_limitdepth/default", + "xgb_limitdepth/cnae-9", + "xgb_limitdepth/vehicle", + "xgb_limitdepth/mfeat-factors", + "rf/Helena", + "rf/Covertype", + "rf/Fashion-MNIST", + "rf/jungle_chess_2pcs_raw_endgame_complete", + "rf/MiniBooNE", + "rf/default", + "extra_tree/houses", + "extra_tree/fabert", + "extra_tree/Covertype", + "extra_tree/Amazon_employee_access", + "extra_tree/fried", + "extra_tree/default" + ] +} \ No newline at end of file diff --git a/flaml/default/all/regression.json b/flaml/default/all/regression.json new file mode 100644 index 000000000..a62f44175 --- /dev/null +++ b/flaml/default/all/regression.json @@ -0,0 +1,882 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 4797, + "num_leaves": 122, + "min_child_samples": 2, + "learning_rate": 0.022635758411078528, + "log_max_bin": 9, + "colsample_bytree": 0.7019911744574896, + "reg_alpha": 0.004252223402511765, + "reg_lambda": 0.11288241427227624 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 1009, + "num_leaves": 42, + "min_child_samples": 12, + "learning_rate": 0.02167229637171611, + "log_max_bin": 7, + "colsample_bytree": 0.7385038460573171, + "reg_alpha": 0.003607184551842614, + "reg_lambda": 12.08340803550741 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 32767, + "num_leaves": 372, + "min_child_samples": 4, + "learning_rate": 0.03517259015200922, + "log_max_bin": 5, + "colsample_bytree": 1.0, + "reg_alpha": 0.02271142170225636, + "reg_lambda": 0.001963791798843179, + "FLAML_sample_size": 830258 + } + }, + { + "class": "lgbm", + "hyperparameters": {} + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 6357, + "max_leaves": 206, + "min_child_weight": 1.9495322566288034, + "learning_rate": 0.0068766724195393905, + "subsample": 0.9451618245005704, + "colsample_bylevel": 0.9030482524943064, + "colsample_bytree": 0.9278972006416252, + "reg_alpha": 0.01857648400903689, + "reg_lambda": 6.021166480604588, + "FLAML_sample_size": 344444 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 23045, + "max_leaves": 247, + "min_child_weight": 0.004319397499079841, + "learning_rate": 0.0032914413473281215, + "subsample": 0.7334190564433234, + "colsample_bylevel": 1.0, + "colsample_bytree": 1.0, + "reg_alpha": 0.03514226467919635, + "reg_lambda": 1.2679661021665851 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 1899, + "max_leaves": 59, + "min_child_weight": 0.013389019900720164, + "learning_rate": 0.0028943401472847964, + "subsample": 0.7808944208233943, + "colsample_bylevel": 1.0, + "colsample_bytree": 0.9999355357362375, + "reg_alpha": 0.7905117773932884, + "reg_lambda": 2.916897119216104 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 5611, + "max_leaves": 61, + "min_child_weight": 0.01070518287797225, + "learning_rate": 0.005485127037677848, + "subsample": 0.4713518256961299, + "colsample_bylevel": 0.9777437906530106, + "colsample_bytree": 0.9519335125615331, + "reg_alpha": 0.03621564207188963, + "reg_lambda": 1.8045765669466283 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 4923, + "max_depth": 12, + "min_child_weight": 0.7625732991776795, + "learning_rate": 0.009239549681857523, + "subsample": 0.8193164619615052, + "colsample_bylevel": 0.7785754297307862, + "colsample_bytree": 0.788491073979525, + "reg_alpha": 0.002282749364196872, + "reg_lambda": 131.2194560716441 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 2111, + "max_depth": 9, + "min_child_weight": 3.405822241186395, + "learning_rate": 0.005804247705198151, + "subsample": 0.37848422782052427, + "colsample_bylevel": 0.8228350674288559, + "colsample_bytree": 0.8813475713109656, + "reg_alpha": 0.009761356063132219, + "reg_lambda": 13.187783936727843, + "FLAML_sample_size": 810000 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 1499, + "max_depth": 11, + "min_child_weight": 0.07563529776156448, + "learning_rate": 0.039042609221240955, + "subsample": 0.7832981935783824, + "colsample_bylevel": 1.0, + "colsample_bytree": 1.0, + "reg_alpha": 0.0009765625, + "reg_lambda": 23.513066752844153 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 19722, + "max_depth": 11, + "min_child_weight": 6.46800727978204, + "learning_rate": 0.0010837437950202355, + "subsample": 0.49509562408032115, + "colsample_bylevel": 1.0, + "colsample_bytree": 0.8826299329274134, + "reg_alpha": 0.23887161121959208, + "reg_lambda": 15.163773888208217 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 544, + "max_depth": 12, + "min_child_weight": 79.32555867011995, + "learning_rate": 0.010128107120014433, + "subsample": 0.9799974977817297, + "colsample_bylevel": 0.881815418056542, + "colsample_bytree": 0.9718556912196423, + "reg_alpha": 72.63148950428749, + "reg_lambda": 1.4601415712058006 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": {} + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 960, + "max_features": 0.694616932858775, + "max_leaves": 8937 + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 2047, + "max_features": 1.0, + "max_leaves": 32767, + "FLAML_sample_size": 830258 + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 2047, + "max_features": 0.6683903035731483, + "max_leaves": 591, + "criterion": "entropy" + } + }, + { + "class": "rf", + "hyperparameters": {} + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 1233, + "max_features": 1.0, + "max_leaves": 6452 + } + }, + { + "class": "extra_tree", + "hyperparameters": {} + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 346, + "max_features": 1.0, + "max_leaves": 1007, + "criterion": "entropy" + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 2047, + "max_features": 0.5106397565689275, + "max_leaves": 32767, + "FLAML_sample_size": 319382 + } + } + ], + "preprocessing": { + "center": [ + 36691.0, + 10.0, + 0.0, + 0.85 + ], + "scale": [ + 463680.0, + 8.5, + 1.0, + 0.48611111111111116 + ] + }, + "neighbors": [ + { + "features": [ + 0.0, + 0.0, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 3, + 6, + 12, + 1, + 16, + 20, + 7, + 13, + 9, + 8, + 4, + 11, + 0, + 14, + 18, + 15, + 5, + 17, + 10, + 21, + 2, + 19 + ] + }, + { + "features": [ + 0.6972675120772946, + 10.588235294117647, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 19, + 18, + 21, + 20 + ] + }, + { + "features": [ + -0.05244133885438233, + 3.5294117647058822, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 1, + 0, + 3, + 14, + 17, + 15, + 16, + 10, + 8, + 18, + 2, + 19, + 20, + 4, + 21, + 13, + 9, + 5, + 7, + 11, + 6, + 12 + ] + }, + { + "features": [ + 1.8618637853692201, + -0.11764705882352941, + 0.0, + -0.3771428571428571 + ], + "choice": [ + 12, + 7, + 4, + 9, + 13, + 8, + 1, + 6, + 3, + 5, + 16, + 10, + 0, + 18, + 14, + 20, + 15, + 17, + 19, + 2, + 21 + ] + }, + { + "features": [ + 0.1472675120772947, + -0.11764705882352941, + 0.0, + -1.52 + ], + "choice": [ + 1, + 12, + 9, + 3, + 7, + 6, + 11, + 13, + 16, + 20, + 8, + 4, + 18, + 0, + 10, + 14, + 21, + 5, + 15, + 17, + 2, + 19 + ] + }, + { + "features": [ + -0.045171238785369223, + -0.11764705882352941, + 0.0, + -0.3771428571428571 + ], + "choice": [ + 12, + 6, + 1, + 3, + 16, + 9, + 20, + 15, + 14, + 11, + 7, + 21, + 18, + 17, + 4, + 8, + 19, + 5, + 13, + 0, + 10, + 2 + ] + }, + { + "features": [ + 1.8618637853692201, + 9.411764705882353, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 19, + 18, + 21, + 20 + ] + }, + { + "features": [ + -0.018758626639061422, + -0.11764705882352941, + 0.0, + -1.2914285714285714 + ], + "choice": [ + 6, + 3, + 12, + 9, + 1, + 16, + 20, + 13, + 7, + 11, + 8, + 18, + 4, + 14, + 10, + 15, + 0, + 17, + 21, + 5, + 19, + 2 + ] + }, + { + "features": [ + 1.8618637853692201, + 0.9411764705882353, + 0.0, + -0.6057142857142855 + ], + "choice": [ + 0, + 5, + 4, + 8, + 10, + 12, + 7, + 9, + 1, + 2, + 13, + 3, + 6, + 14, + 19, + 17, + 21, + 18, + 16, + 20 + ] + }, + { + "features": [ + 1.8618637853692201, + 0.0, + 0.0, + -1.5428571428571427 + ], + "choice": [ + 9, + 7, + 1, + 4, + 6, + 3, + 12, + 13, + 0, + 8, + 10, + 5, + 14, + 16, + 20, + 18, + 21, + 15, + 2, + 17, + 19 + ] + }, + { + "features": [ + 0.2647105762594893, + 0.0, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 12, + 6, + 1, + 3, + 13, + 7, + 16, + 9, + 20, + 0, + 8, + 4, + 11, + 14, + 18, + 5, + 10, + 15, + 17, + 21, + 2, + 19 + ] + }, + { + "features": [ + -0.058378623188405795, + 0.23529411764705882, + 0.0, + -0.3771428571428571 + ], + "choice": [ + 0, + 3, + 1, + 2 + ] + }, + { + "features": [ + 0.0, + 0.0, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 7, + 9, + 1, + 11, + 8, + 0, + 4, + 5, + 6, + 3, + 10, + 2, + 13, + 12, + 19, + 18, + 21, + 15, + 14, + 17, + 20, + 16 + ] + }, + { + "features": [ + -0.03490769496204279, + 0.7058823529411765, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 7, + 11, + 5, + 4, + 9, + 1, + 8, + 3, + 6, + 0, + 10, + 2, + 17, + 12, + 15, + 14, + 16, + 13, + 19, + 18, + 21, + 20 + ] + }, + { + "features": [ + -0.03490769496204279, + -0.23529411764705882, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 6, + 4, + 8, + 5, + 7, + 9, + 11, + 10, + 3, + 1, + 18, + 12, + 21, + 19, + 0, + 14, + 16, + 20, + 15, + 13, + 17, + 2 + ] + }, + { + "features": [ + -0.03906789164941339, + -0.23529411764705882, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 0, + 4, + 7, + 5, + 11, + 1, + 8, + 10, + 9, + 6, + 12, + 3, + 13, + 14, + 15, + 17, + 16, + 2, + 21, + 18, + 19, + 20 + ] + }, + { + "features": [ + 0.0, + 0.0, + 0.0, + -0.3085714285714286 + ], + "choice": [ + 18, + 19, + 20, + 10, + 15, + 17, + 5, + 11, + 14, + 4, + 7, + 9, + 21, + 8, + 3, + 6, + 13, + 1, + 16, + 12, + 0, + 2 + ] + }, + { + "features": [ + 1.050207039337474, + 0.9411764705882353, + 0.0, + -0.7199999999999999 + ], + "choice": [ + 17, + 15, + 14, + 16 + ] + }, + { + "features": [ + 0.686201690821256, + -0.11764705882352941, + 0.0, + -1.0628571428571427 + ], + "choice": [ + 15, + 17, + 14, + 19, + 16, + 18, + 21, + 20 + ] + }, + { + "features": [ + 1.9104080400276053, + 0.0, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 10, + 2, + 5, + 8, + 0, + 4, + 19, + 7, + 9, + 13, + 17, + 15, + 18, + 21, + 1, + 14, + 12, + 20, + 6, + 3, + 16 + ] + }, + { + "features": [ + -0.050015096618357485, + 4.470588235294118, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 8, + 10, + 4, + 7, + 5, + 11, + 18, + 6, + 20, + 19, + 9, + 14, + 16, + 21, + 0, + 3, + 15, + 17, + 1, + 2, + 13, + 12 + ] + }, + { + "features": [ + -0.04660973084886128, + -0.8235294117647058, + 0.0, + -1.0628571428571427 + ], + "choice": [ + 11, + 13, + 10, + 8, + 9, + 20, + 12, + 18, + 19, + 21 + ] + } + ], + "configsource": [ + "lgbm/houses", + "lgbm/house_8L", + "lgbm/poker", + "lgbm/default", + "xgboost/Albert", + "xgboost/mv", + "xgboost/bng_echomonths", + "xgboost/house_16H", + "xgb_limitdepth/higgs", + "xgb_limitdepth/bng_pharynx", + "xgb_limitdepth/connect-4", + "xgb_limitdepth/house_16H", + "xgb_limitdepth/bng_echomonths", + "xgb_limitdepth/default", + "rf/houses", + "rf/poker", + "rf/bank-marketing", + "rf/default", + "extra_tree/house_16H", + "extra_tree/default", + "extra_tree/dilbert", + "extra_tree/particulate-matter" + ] +} \ No newline at end of file diff --git a/flaml/default/estimator.py b/flaml/default/estimator.py new file mode 100644 index 000000000..717e3e195 --- /dev/null +++ b/flaml/default/estimator.py @@ -0,0 +1,192 @@ +import sklearn.ensemble as ensemble +from functools import wraps +from flaml.data import CLASSIFICATION +from .suggest import preprocess_and_suggest_hyperparams + +DEFAULT_LOCATION = "default_location" + + +def flamlize_estimator(super_class, name: str, task: str, alternatives=None): + """Enhance an estimator class with flaml's data-dependent default hyperparameter settings. + + Example: + + ```python + import sklearn.ensemble as ensemble + RandomForestRegressor = flamlize_estimator( + ensemble.RandomForestRegressor, "rf", "regression" + ) + ``` + + Args: + super_class: an scikit-learn compatible estimator class. + name: a str of the estimator's name. + task: a str of the task type. + alternatives: (Optional) a list for alternative estimator names. For example, + ```[("max_depth", 0, "xgboost")]``` means if the "max_depth" is set to 0 + in the constructor, then look for the learned defaults for estimator "xgboost". + """ + + class EstimatorClass(super_class): + """**Enhanced with flaml's data-dependent default hyperparameter settings.**""" + + @wraps(super_class.__init__) + def __init__(self, **params): + if DEFAULT_LOCATION in params: + self._default_location = params.pop(DEFAULT_LOCATION) + else: + self._default_location = None + self._params = params + super().__init__(**params) + + @wraps(super_class._get_param_names) + @classmethod + def _get_param_names(cls): + return super_class._get_param_names() + + def suggest_hyperparams(self, X, y): + """Suggest hyperparameters. + + Example: + + ```python + from flaml.default import LGBMRegressor + + estimator = LGBMRegressor() + hyperparams, estimator_name, X_transformed, y_transformed = estimator.fit(X_train, y_train) + print(hyperparams) + ``` + + Args: + X: A dataframe of training data in shape n*m. + y: A series of labels in shape n*1. + + Returns: + hyperparams: A dict of the hyperparameter configurations. + estimator_name: A str of the underlying estimator name, e.g., 'xgb_limitdepth'. + X_transformed: the preprocessed X. + y_transformed: the preprocessed y. + """ + estimator_name = name + if alternatives: + for alternative in alternatives: + if self._params.get(alternative[0]) == alternative[1]: + estimator_name = alternative[2] + break + estimator_name = ( + "choose_xgb" + if ( + estimator_name == "xgb_limitdepth" + and "max_depth" not in self._params + ) + else estimator_name + ) + ( + hyperparams, + estimator_class, + X_transformed, + y_transformed, + self._feature_transformer, + self._label_transformer, + ) = preprocess_and_suggest_hyperparams( + task, X, y, estimator_name, self._default_location + ) + assert estimator_class == super_class + hyperparams.update(self._params) + return hyperparams, estimator_name, X_transformed, y_transformed + + @wraps(super_class.fit) + def fit(self, X, y, *args, **params): + hyperparams, estimator_name, X, y_transformed = self.suggest_hyperparams( + X, y + ) + self.set_params(**hyperparams) + if self._label_transformer and estimator_name in [ + "rf", + "extra_tree", + "xgboost", + "xgb_limitdepth", + "choose_xgb", + ]: + # rf and et have trouble in handling boolean labels; xgboost requires integer labels + fitted = super().fit(X, y_transformed, *args, **params) + # if hasattr(self, "_classes"): + # self._classes = self._label_transformer.classes_ + # else: + self.classes_ = self._label_transformer.classes_ + if "xgb" not in estimator_name: + # rf and et would do inverse transform automatically; xgb doesn't + self._label_transformer = None + else: + # lgbm doesn't need label transformation except for non-str/num labels + try: + fitted = super().fit(X, y, *args, **params) + self._label_transformer = None + except ValueError: + # Unknown label type: 'unknown' + fitted = super().fit(X, y_transformed, *args, **params) + self._classes = self._label_transformer.classes_ + return fitted + + @wraps(super_class.predict) + def predict(self, X, *args, **params): + if name != "lgbm" or task not in CLASSIFICATION: + X = self._feature_transformer.transform(X) + y_pred = super().predict(X, *args, **params) + if self._label_transformer and y_pred.ndim == 1: + y_pred = self._label_transformer.inverse_transform(y_pred) + return y_pred + + if hasattr(super_class, "predict_proba"): + + @wraps(super_class.predict_proba) + def predict_proba(self, X, *args, **params): + X_test = self._feature_transformer.transform(X) + y_pred = super().predict_proba(X_test, *args, **params) + return y_pred + + EstimatorClass.__doc__ += " " + super_class.__doc__ + EstimatorClass.__name__ = super_class.__name__ + return EstimatorClass + + +RandomForestRegressor = flamlize_estimator( + ensemble.RandomForestRegressor, "rf", "regression" +) +RandomForestClassifier = flamlize_estimator( + ensemble.RandomForestClassifier, "rf", "classification" +) +ExtraTreesRegressor = flamlize_estimator( + ensemble.ExtraTreesRegressor, "extra_tree", "regression" +) +ExtraTreesClassifier = flamlize_estimator( + ensemble.ExtraTreesClassifier, "extra_tree", "classification" +) + +try: + import lightgbm + + LGBMRegressor = flamlize_estimator(lightgbm.LGBMRegressor, "lgbm", "regression") + LGBMClassifier = flamlize_estimator( + lightgbm.LGBMClassifier, "lgbm", "classification" + ) +except ImportError: + pass + +try: + import xgboost + + XGBRegressor = flamlize_estimator( + xgboost.XGBRegressor, + "xgb_limitdepth", + "regression", + [("max_depth", 0, "xgboost")], + ) + XGBClassifier = flamlize_estimator( + xgboost.XGBClassifier, + "xgb_limitdepth", + "classification", + [("max_depth", 0, "xgboost")], + ) +except ImportError: + pass diff --git a/flaml/default/extra_tree/binary.json b/flaml/default/extra_tree/binary.json new file mode 100644 index 000000000..d9d1642c8 --- /dev/null +++ b/flaml/default/extra_tree/binary.json @@ -0,0 +1,358 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 1080, + "max_features": 1.0, + "max_leaves": 590, + "criterion": "entropy" + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 2047, + "max_features": 0.46132798093546956, + "max_leaves": 12856, + "criterion": "gini" + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 408, + "max_features": 0.3629795757973625, + "max_leaves": 81, + "criterion": "entropy" + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 553, + "max_features": 0.9592132391435095, + "max_leaves": 1127, + "criterion": "entropy" + } + }, + { + "class": "extra_tree", + "hyperparameters": {} + } + ], + "preprocessing": { + "center": [ + 18000.0, + 28.0, + 2.0, + 0.7565217391304347 + ], + "scale": [ + 42124.0, + 130.0, + 1.0, + 0.5714285714285715 + ] + }, + "neighbors": [ + { + "features": [ + 1.196467571930491, + 1.0923076923076922, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 1, + 2, + 4 + ] + }, + { + "features": [ + 11.096856898680088, + -0.16153846153846155, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 1, + 3, + 0, + 2, + 4 + ] + }, + { + "features": [ + 8.658152122305575, + 0.38461538461538464, + 0.0, + -0.7405797101449274 + ], + "choice": [ + 1, + 3, + 0, + 4 + ] + }, + { + "features": [ + 0.27281359794891274, + -0.14615384615384616, + 0.0, + -1.3239130434782607 + ], + "choice": [ + 3, + 0, + 4 + ] + }, + { + "features": [ + -0.4125676573924604, + -0.1076923076923077, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 2, + 0, + 1, + 4 + ] + }, + { + "features": [ + 0.6409647706770487, + 1.5538461538461539, + 0.0, + 0.0 + ], + "choice": [ + 2, + 0, + 3, + 1, + 4 + ] + }, + { + "features": [ + 2.3515573069983855, + 0.16923076923076924, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 1, + 4 + ] + }, + { + "features": [ + 0.6162045389801538, + -0.1076923076923077, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 3, + 0, + 2, + 1, + 4 + ] + }, + { + "features": [ + 0.5386240622922799, + -0.09230769230769231, + 0.0, + -0.5582880434782608 + ], + "choice": [ + 3, + 0, + 1, + 4 + ] + }, + { + "features": [ + -0.41133320672300827, + -0.18461538461538463, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 2, + 1, + 4 + ] + }, + { + "features": [ + -0.31155635742094767, + 12.36923076923077, + 0.0, + 0.3865087169129372 + ], + "choice": [ + 3, + 1, + 0, + 2, + 4 + ] + }, + { + "features": [ + -0.40594435476213087, + -0.06153846153846154, + 0.0, + -0.7114130434782607 + ], + "choice": [ + 2, + 1, + 0, + 3, + 4 + ] + }, + { + "features": [ + 0.0, + 32.83076923076923, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 3, + 0, + 1, + 2, + 4 + ] + }, + { + "features": [ + 1.6675766783781218, + 0.0, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 1, + 3, + 0, + 4 + ] + }, + { + "features": [ + -0.36356946158959264, + 0.8923076923076924, + 0.0, + -1.2266908212560386 + ], + "choice": [ + 3, + 4 + ] + }, + { + "features": [ + -0.38225239768303104, + -0.05384615384615385, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 1, + 0, + 3, + 2, + 4 + ] + }, + { + "features": [ + -0.3590352293229513, + 0.06153846153846154, + 0.0, + -1.3239130434782607 + ], + "choice": [ + 0, + 2, + 3, + 1, + 4 + ] + }, + { + "features": [ + 0.3090399772101415, + 0.6923076923076923, + 0.0, + -0.003997789240972687 + ], + "choice": [ + 3, + 0, + 4 + ] + }, + { + "features": [ + -0.3118649700883107, + -0.17692307692307693, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 3, + 1, + 4 + ] + }, + { + "features": [ + 0.0, + 32.83076923076923, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 4 + ] + }, + { + "features": [ + -0.3178473079479632, + -0.06153846153846154, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 1, + 0, + 3, + 4 + ] + } + ], + "configsource": [ + "segment", + "Helena", + "kr-vs-kp", + "bank-marketing", + "default" + ] +} \ No newline at end of file diff --git a/flaml/default/extra_tree/multiclass.json b/flaml/default/extra_tree/multiclass.json new file mode 100644 index 000000000..402d3491a --- /dev/null +++ b/flaml/default/extra_tree/multiclass.json @@ -0,0 +1,307 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 1074, + "max_features": 0.6008299059364026, + "max_leaves": 9287 + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 833, + "max_features": 0.055027081530106846, + "max_leaves": 1361, + "criterion": "gini" + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 2047, + "max_features": 0.9560062760906606, + "max_leaves": 32767, + "criterion": "entropy", + "FLAML_sample_size": 470620 + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 812, + "max_features": 1.0, + "max_leaves": 1474, + "criterion": "entropy" + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 2047, + "max_features": 1.0, + "max_leaves": 18344 + } + }, + { + "class": "extra_tree", + "hyperparameters": {} + } + ], + "preprocessing": { + "center": [ + 24668.5, + 54.0, + 7.0, + 1.0 + ], + "scale": [ + 57198.0, + 770.5, + 6.0, + 1.0 + ] + }, + "neighbors": [ + { + "features": [ + 8.710820308402392, + 0.0, + 0.0, + -0.8148148148148149 + ], + "choice": [ + 2, + 4, + 5 + ] + }, + { + "features": [ + 0.6701545508584216, + 0.9474367293964958, + 0.5, + 0.0 + ], + "choice": [ + 2, + 0, + 4, + 3, + 5 + ] + }, + { + "features": [ + 0.5945575020105598, + -0.03504218040233614, + 15.5, + 0.0 + ], + "choice": [ + 4, + 0, + 3, + 2, + 1, + 5 + ] + }, + { + "features": [ + 0.8862285394594217, + 0.0, + -0.5, + 0.0 + ], + "choice": [ + 2, + 4, + 0, + 3, + 5 + ] + }, + { + "features": [ + -0.2739344033008147, + 9.2744970798183, + 0.5, + 0.0 + ], + "choice": [ + 0, + 1, + 3, + 5 + ] + }, + { + "features": [ + 0.48133676002657433, + -0.058403634003893576, + 0.0, + 0.0 + ], + "choice": [ + 3, + 2, + 4, + 0, + 5 + ] + }, + { + "features": [ + 0.4862145529563971, + 0.16353017521090202, + 0.5, + 0.0 + ], + "choice": [ + 2, + 4, + 0, + 3, + 5 + ] + }, + { + "features": [ + -0.40409629707332423, + -0.06229720960415315, + -0.5, + -1.0 + ], + "choice": [ + 4, + 2, + 0, + 5 + ] + }, + { + "features": [ + -0.41428896115248787, + 1.0408825438027256, + 0.3333333333333333, + 0.0 + ], + "choice": [ + 1, + 5 + ] + }, + { + "features": [ + 0.6317091506696039, + -0.015574302401038288, + -0.6666666666666666, + -1.0 + ], + "choice": [ + 0, + 2, + 3, + 5 + ] + }, + { + "features": [ + -0.2739344033008147, + 2.5256327060350423, + -0.3333333333333333, + 0.0 + ], + "choice": [ + 3, + 2, + 4, + 0, + 1, + 5 + ] + }, + { + "features": [ + -0.30168012867582783, + 0.9682024659312135, + 0.0, + 0.0 + ], + "choice": [ + 1, + 5 + ] + }, + { + "features": [ + 0.2739344033008147, + -0.06229720960415315, + -0.6666666666666666, + 0.0 + ], + "choice": [ + 3, + 0, + 1, + 5 + ] + }, + { + "features": [ + -0.39981293052204625, + 0.21025308241401688, + 0.5, + 0.0 + ], + "choice": [ + 4, + 2, + 3, + 0, + 5 + ] + }, + { + "features": [ + -0.3949351375922235, + -0.04931862426995458, + 0.0, + 0.0 + ], + "choice": [ + 3, + 2, + 4, + 0, + 5 + ] + }, + { + "features": [ + -0.41797790132522117, + -0.04672290720311486, + -0.5, + 0.0 + ], + "choice": [ + 4, + 3, + 2, + 0, + 5 + ] + } + ], + "configsource": [ + "houses", + "fabert", + "Covertype", + "Amazon_employee_access", + "fried", + "default" + ] +} \ No newline at end of file diff --git a/flaml/default/extra_tree/regression.json b/flaml/default/extra_tree/regression.json new file mode 100644 index 000000000..fc256ec7e --- /dev/null +++ b/flaml/default/extra_tree/regression.json @@ -0,0 +1,309 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 1233, + "max_features": 1.0, + "max_leaves": 6452 + } + }, + { + "class": "extra_tree", + "hyperparameters": {} + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 346, + "max_features": 1.0, + "max_leaves": 1007, + "criterion": "entropy" + } + }, + { + "class": "extra_tree", + "hyperparameters": { + "n_estimators": 2047, + "max_features": 0.5106397565689275, + "max_leaves": 32767, + "FLAML_sample_size": 319382 + } + } + ], + "preprocessing": { + "center": [ + 36691.0, + 10.0, + 0.0, + 1.0 + ], + "scale": [ + 474977.25, + 7.5, + 1.0, + 0.5 + ] + }, + "neighbors": [ + { + "features": [ + 0.0, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 2, + 0, + 3, + 1 + ] + }, + { + "features": [ + 0.6806831274550518, + 12.0, + 0.0, + 0.0 + ], + "choice": [ + 1 + ] + }, + { + "features": [ + -0.05119403087200492, + 4.0, + 0.0, + 0.0 + ], + "choice": [ + 0, + 1 + ] + }, + { + "features": [ + 1.817579684079606, + -0.13333333333333333, + 0.0, + -0.6666666666666667 + ], + "choice": [ + 0, + 3, + 2, + 1 + ] + }, + { + "features": [ + 0.14376478031316237, + -0.13333333333333333, + 0.0, + -1.7777777777777777 + ], + "choice": [ + 2, + 0, + 3, + 1 + ] + }, + { + "features": [ + -0.044096848849076456, + -0.13333333333333333, + 0.0, + -0.6666666666666667 + ], + "choice": [ + 2, + 3, + 0, + 1 + ] + }, + { + "features": [ + 1.817579684079606, + 10.666666666666666, + 0.0, + 0.0 + ], + "choice": [ + 1 + ] + }, + { + "features": [ + -0.01831245601763032, + -0.13333333333333333, + 0.0, + -1.5555555555555556 + ], + "choice": [ + 2, + 0, + 3, + 1 + ] + }, + { + "features": [ + 1.817579684079606, + 1.0666666666666667, + 0.0, + -0.8888888888888888 + ], + "choice": [ + 1 + ] + }, + { + "features": [ + 1.817579684079606, + 0.0, + 0.0, + -1.8 + ], + "choice": [ + 2, + 0, + 3, + 1 + ] + }, + { + "features": [ + 0.2584144819567674, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 2, + 0, + 3, + 1 + ] + }, + { + "features": [ + 0.0, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 1 + ] + }, + { + "features": [ + -0.034077421602824134, + 0.8, + 0.0, + 0.0 + ], + "choice": [ + 1 + ] + }, + { + "features": [ + -0.034077421602824134, + -0.26666666666666666, + 0.0, + 0.0 + ], + "choice": [ + 0, + 3, + 1 + ] + }, + { + "features": [ + -0.038138668746766295, + -0.26666666666666666, + 0.0, + 0.0 + ], + "choice": [ + 3, + 0, + 1 + ] + }, + { + "features": [ + 0.0, + 0.0, + 0.0, + -0.6000000000000001 + ], + "choice": [ + 0, + 1 + ] + }, + { + "features": [ + 0.6698805048031248, + -0.13333333333333333, + 0.0, + -1.3333333333333335 + ], + "choice": [ + 3, + 1 + ] + }, + { + "features": [ + 1.8649693222149062, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 1 + ] + }, + { + "features": [ + -0.0488254963790371, + 5.066666666666666, + 0.0, + 0.0 + ], + "choice": [ + 0, + 2, + 1 + ] + }, + { + "features": [ + -0.04550112663290715, + -0.9333333333333333, + 0.0, + -1.3333333333333335 + ], + "choice": [ + 2, + 0, + 1 + ] + } + ], + "configsource": [ + "house_16H", + "default", + "dilbert", + "particulate-matter" + ] +} \ No newline at end of file diff --git a/flaml/default/greedy.py b/flaml/default/greedy.py new file mode 100644 index 000000000..0b2fbb028 --- /dev/null +++ b/flaml/default/greedy.py @@ -0,0 +1,97 @@ +import numpy as np +import pandas as pd +from sklearn.preprocessing import RobustScaler +from sklearn.metrics import pairwise_distances + + +def _augment(row): + max, avg, id = row.max(), row.mean(), row.index[0] + return row.apply(lambda x: (x, max, avg, id)) + + +def construct_portfolio(regret_matrix, meta_features, regret_bound): + """The portfolio construction algorithm. + + (Reference)[https://arxiv.org/abs/2202.09927]. + + Args: + regret_matrix: A dataframe of regret matrix. + meta_features: None or a dataframe of metafeatures matrix. + When set to None, the algorithm uses greedy strategy. + Otherwise, the algorithm uses greedy strategy with feedback + from the nearest neighbor predictor. + regret_bound: A float of the regret bound. + + Returns: + A list of configuration names. + """ + configs = [] + all_configs = set(regret_matrix.index.tolist()) + tasks = regret_matrix.columns + # pre-processing + if meta_features is not None: + scaler = RobustScaler() + meta_features = meta_features.loc[tasks] + meta_features.loc[:, :] = scaler.fit_transform(meta_features) + nearest_task = {} + for t in tasks: + other_meta_features = meta_features.drop(t) + dist = pd.DataFrame( + pairwise_distances( + meta_features.loc[t].to_numpy().reshape(1, -1), + other_meta_features, + metric="l2", + ), + columns=other_meta_features.index, + ) + nearest_task[t] = dist.idxmin(axis=1) + regret_matrix = regret_matrix.apply(_augment, axis=1) + print(regret_matrix) + + def loss(configs): + """Loss of config set `configs`, according to nearest neighbor config predictor.""" + if meta_features is not None: + r = [] + best_config_per_task = regret_matrix.loc[configs, :].min() + for t in tasks: + config = best_config_per_task[nearest_task[t]].iloc[0][-1] + r.append(regret_matrix[t][config][0]) + else: + r = regret_matrix.loc[configs].min() + excessive_regret = (np.array(r) - regret_bound).clip(min=0).sum() + avg_regret = np.array(r).mean() + return excessive_regret, avg_regret + + prev = np.inf + i = 0 + eps = 1e-5 + while True: + candidates = [configs + [d] for d in all_configs.difference(configs)] + losses, avg_regret = tuple(zip(*(loss(x) for x in candidates))) + sorted_losses = np.sort(losses) + if sorted_losses[1] - sorted_losses[0] < eps: + minloss = np.nanmin(losses) + print( + f"tie detected at loss = {sorted_losses[0]}, using alternative metric." + ) + tied = np.flatnonzero(losses - minloss < eps) + losses = [(avg_regret[i], i) for i in tied] + minloss, ind = min(losses) + if minloss > prev - eps: + print( + f"May be overfitting at k = {i + 1}, current = {minloss:.5f}, " + f"prev = {prev:.5f}. Stopping." + ) + break + configs = candidates[ind] + prev = minloss + else: + configs = candidates[np.nanargmin(losses)] + i += 1 + if sorted_losses[0] <= eps: + print( + f"Reached target regret bound of {regret_bound}! k = {i}. Declining to pick further!" + ) + break + + return configs diff --git a/flaml/default/lgbm/binary.json b/flaml/default/lgbm/binary.json new file mode 100644 index 000000000..a8b0508c3 --- /dev/null +++ b/flaml/default/lgbm/binary.json @@ -0,0 +1,367 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 2541, + "num_leaves": 1667, + "min_child_samples": 29, + "learning_rate": 0.0016660662914022302, + "log_max_bin": 8, + "colsample_bytree": 0.5157078343718623, + "reg_alpha": 0.045792841240713165, + "reg_lambda": 0.0012362651138125363, + "FLAML_sample_size": 436899 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 141, + "num_leaves": 139, + "min_child_samples": 8, + "learning_rate": 0.04824748268727149, + "log_max_bin": 9, + "colsample_bytree": 0.5261441571042451, + "reg_alpha": 0.002896920833899335, + "reg_lambda": 0.024463247502165594 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 31204, + "num_leaves": 4, + "min_child_samples": 3, + "learning_rate": 0.009033979476164342, + "log_max_bin": 10, + "colsample_bytree": 0.5393339924944204, + "reg_alpha": 15.800090067239827, + "reg_lambda": 34.82471227276953 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 362, + "num_leaves": 1208, + "min_child_samples": 8, + "learning_rate": 0.02070742242160566, + "log_max_bin": 4, + "colsample_bytree": 0.37915528071680865, + "reg_alpha": 0.002982599447751338, + "reg_lambda": 1.136605174453919, + "FLAML_sample_size": 337147 + } + }, + { + "class": "lgbm", + "hyperparameters": {} + } + ], + "preprocessing": { + "center": [ + 18000.0, + 28.0, + 2.0, + 0.7565217391304347 + ], + "scale": [ + 42124.0, + 130.0, + 1.0, + 0.5714285714285715 + ] + }, + "neighbors": [ + { + "features": [ + 1.196467571930491, + 1.0923076923076922, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 4 + ] + }, + { + "features": [ + 11.096856898680088, + -0.16153846153846155, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 0, + 1, + 3, + 2, + 4 + ] + }, + { + "features": [ + 8.658152122305575, + 0.38461538461538464, + 0.0, + -0.7405797101449274 + ], + "choice": [ + 2, + 1, + 0, + 3, + 4 + ] + }, + { + "features": [ + 0.27281359794891274, + -0.14615384615384616, + 0.0, + -1.3239130434782607 + ], + "choice": [ + 0, + 1, + 4 + ] + }, + { + "features": [ + -0.4125676573924604, + -0.1076923076923077, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 3, + 1, + 0, + 2, + 4 + ] + }, + { + "features": [ + 0.6409647706770487, + 1.5538461538461539, + 0.0, + 0.0 + ], + "choice": [ + 2, + 0, + 1, + 4 + ] + }, + { + "features": [ + 2.3515573069983855, + 0.16923076923076924, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 2, + 0, + 3, + 1, + 4 + ] + }, + { + "features": [ + 0.6162045389801538, + -0.1076923076923077, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 1, + 4 + ] + }, + { + "features": [ + 0.5386240622922799, + -0.09230769230769231, + 0.0, + -0.5582880434782608 + ], + "choice": [ + 1, + 0, + 4 + ] + }, + { + "features": [ + -0.41133320672300827, + -0.18461538461538463, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 3, + 2, + 4 + ] + }, + { + "features": [ + -0.31155635742094767, + 12.36923076923077, + 0.0, + 0.3865087169129372 + ], + "choice": [ + 2, + 3, + 0, + 1, + 4 + ] + }, + { + "features": [ + -0.40594435476213087, + -0.06153846153846154, + 0.0, + -0.7114130434782607 + ], + "choice": [ + 1, + 0, + 4 + ] + }, + { + "features": [ + 0.0, + 32.83076923076923, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 1, + 3, + 2, + 4 + ] + }, + { + "features": [ + 1.6675766783781218, + 0.0, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 1, + 4 + ] + }, + { + "features": [ + -0.36356946158959264, + 0.8923076923076924, + 0.0, + -1.2266908212560386 + ], + "choice": [ + 3, + 4 + ] + }, + { + "features": [ + -0.38225239768303104, + -0.05384615384615385, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 3, + 1, + 0, + 4 + ] + }, + { + "features": [ + -0.3590352293229513, + 0.06153846153846154, + 0.0, + -1.3239130434782607 + ], + "choice": [ + 4 + ] + }, + { + "features": [ + 0.3090399772101415, + 0.6923076923076923, + 0.0, + -0.003997789240972687 + ], + "choice": [ + 1, + 3, + 4 + ] + }, + { + "features": [ + -0.3118649700883107, + -0.17692307692307693, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 1, + 4 + ] + }, + { + "features": [ + 0.0, + 32.83076923076923, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 1, + 3, + 4 + ] + }, + { + "features": [ + -0.3178473079479632, + -0.06153846153846154, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 1, + 0, + 4 + ] + } + ], + "configsource": [ + "Airlines", + "riccardo", + "fried", + "Dionis", + "default" + ] +} \ No newline at end of file diff --git a/flaml/default/lgbm/multiclass.json b/flaml/default/lgbm/multiclass.json new file mode 100644 index 000000000..8e72ac86d --- /dev/null +++ b/flaml/default/lgbm/multiclass.json @@ -0,0 +1,413 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 134, + "num_leaves": 225, + "min_child_samples": 21, + "learning_rate": 0.10182098014295998, + "log_max_bin": 5, + "colsample_bytree": 0.6103565306428956, + "reg_alpha": 0.0009765625, + "reg_lambda": 40.413729576022625 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 3726, + "num_leaves": 155, + "min_child_samples": 4, + "learning_rate": 0.040941607728296484, + "log_max_bin": 5, + "colsample_bytree": 0.5326256194627191, + "reg_alpha": 0.7408711930398492, + "reg_lambda": 0.5467731065349226 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 573, + "num_leaves": 16, + "min_child_samples": 52, + "learning_rate": 0.2422782244991656, + "log_max_bin": 7, + "colsample_bytree": 1.0, + "reg_alpha": 0.03433194930183514, + "reg_lambda": 0.03870494540146326 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 2931, + "num_leaves": 106, + "min_child_samples": 49, + "learning_rate": 0.007146230961642236, + "log_max_bin": 7, + "colsample_bytree": 0.46947896116006055, + "reg_alpha": 0.37428758811879526, + "reg_lambda": 23.639977131692564 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 241, + "num_leaves": 58, + "min_child_samples": 2, + "learning_rate": 0.022730855281657265, + "log_max_bin": 5, + "colsample_bytree": 0.5620897082415793, + "reg_alpha": 0.0031614554887399314, + "reg_lambda": 0.02175056245188971 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 8353, + "num_leaves": 371, + "min_child_samples": 71, + "learning_rate": 0.017965875630873252, + "log_max_bin": 10, + "colsample_bytree": 0.9002082433803926, + "reg_alpha": 0.4864366003694002, + "reg_lambda": 0.024138585745106363, + "FLAML_sample_size": 470619 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 320, + "num_leaves": 24, + "min_child_samples": 53, + "learning_rate": 0.019316895546068795, + "log_max_bin": 6, + "colsample_bytree": 0.3955693254372702, + "reg_alpha": 0.0013785083170001627, + "reg_lambda": 0.04644365636517757 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 733, + "num_leaves": 11, + "min_child_samples": 94, + "learning_rate": 0.06276798296942972, + "log_max_bin": 6, + "colsample_bytree": 0.6341928918435795, + "reg_alpha": 0.5811038918218691, + "reg_lambda": 43.304997517523944 + } + }, + { + "class": "lgbm", + "hyperparameters": {} + } + ], + "preprocessing": { + "center": [ + 40337.0, + 54.0, + 7.0, + 1.0 + ], + "scale": [ + 58722.0, + 766.0, + 6.0, + 1.0 + ] + }, + "neighbors": [ + { + "features": [ + 8.217925138789552, + 0.0, + 0.0, + -0.8148148148148149 + ], + "choice": [ + 5, + 1, + 0, + 3, + 2, + 7, + 4, + 8 + ] + }, + { + "features": [ + 5.691767991553421, + 0.007832898172323759, + 58.0, + 0.0 + ], + "choice": [ + 0, + 2, + 4, + 7, + 6, + 8 + ] + }, + { + "features": [ + 0.385937127482034, + 0.9530026109660574, + 0.5, + 0.0 + ], + "choice": [ + 3, + 7, + 0, + 4, + 1, + 8 + ] + }, + { + "features": [ + 0.3123020333094922, + -0.03524804177545692, + 15.5, + 0.0 + ], + "choice": [ + 3, + 0, + 7, + 6, + 1, + 4, + 5, + 2, + 8 + ] + }, + { + "features": [ + 0.5964033922550321, + 0.0, + -0.5, + 0.0 + ], + "choice": [ + 3, + 0, + 7, + 4, + 8 + ] + }, + { + "features": [ + -0.5336500800381458, + 9.328981723237598, + 0.5, + 0.0 + ], + "choice": [ + 3, + 0, + 4, + 1, + 2, + 7, + 6, + 8 + ] + }, + { + "features": [ + 0.20201968597799802, + -0.0587467362924282, + 0.0, + 0.0 + ], + "choice": [ + 4, + 6, + 1, + 7, + 5, + 3, + 0, + 2, + 8 + ] + }, + { + "features": [ + 0.20677088655018563, + 0.16449086161879894, + 0.5, + 0.0 + ], + "choice": [ + 3, + 0, + 1, + 5, + 7, + 4, + 8 + ] + }, + { + "features": [ + -0.6604339089268076, + -0.06266318537859007, + -0.5, + -1.0 + ], + "choice": [ + 8 + ] + }, + { + "features": [ + -0.6703620448894793, + 1.0469973890339426, + 0.3333333333333333, + 0.0 + ], + "choice": [ + 4, + 1, + 8 + ] + }, + { + "features": [ + 0.34848949286468445, + -0.015665796344647518, + -0.6666666666666666, + -1.0 + ], + "choice": [ + 1, + 5, + 2, + 3, + 0, + 8 + ] + }, + { + "features": [ + -0.5336500800381458, + 2.5404699738903394, + -0.3333333333333333, + 0.0 + ], + "choice": [ + 2, + 8 + ] + }, + { + "features": [ + -0.5606757263036, + 0.9738903394255874, + 0.0, + 0.0 + ], + "choice": [ + 4, + 1, + 8 + ] + }, + { + "features": [ + 0.0, + -0.06266318537859007, + -0.6666666666666666, + 0.0 + ], + "choice": [ + 2, + 1, + 5, + 8 + ] + }, + { + "features": [ + -0.6562617077075031, + 0.21148825065274152, + 0.5, + 0.0 + ], + "choice": [ + 2, + 6, + 7, + 5, + 3, + 1, + 4, + 8 + ] + }, + { + "features": [ + -0.6515105071353156, + -0.04960835509138381, + 0.0, + 0.0 + ], + "choice": [ + 6, + 1, + 3, + 7, + 5, + 4, + 0, + 2, + 8 + ] + }, + { + "features": [ + -0.6739552467559007, + -0.04699738903394256, + -0.5, + 0.0 + ], + "choice": [ + 6, + 7, + 3, + 1, + 0, + 4, + 5, + 8 + ] + } + ], + "configsource": [ + "Helena", + "connect-4", + "jungle_chess_2pcs_raw_endgame_complete", + "Jannis", + "fabert", + "Covertype", + "segment", + "APSFailure", + "default" + ] +} \ No newline at end of file diff --git a/flaml/default/lgbm/regression.json b/flaml/default/lgbm/regression.json new file mode 100644 index 000000000..97f6285ea --- /dev/null +++ b/flaml/default/lgbm/regression.json @@ -0,0 +1,278 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 4797, + "num_leaves": 122, + "min_child_samples": 2, + "learning_rate": 0.022635758411078528, + "log_max_bin": 9, + "colsample_bytree": 0.7019911744574896, + "reg_alpha": 0.004252223402511765, + "reg_lambda": 0.11288241427227624 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 1009, + "num_leaves": 42, + "min_child_samples": 12, + "learning_rate": 0.02167229637171611, + "log_max_bin": 7, + "colsample_bytree": 0.7385038460573171, + "reg_alpha": 0.003607184551842614, + "reg_lambda": 12.08340803550741 + } + }, + { + "class": "lgbm", + "hyperparameters": { + "n_estimators": 32767, + "num_leaves": 372, + "min_child_samples": 4, + "learning_rate": 0.03517259015200922, + "log_max_bin": 5, + "colsample_bytree": 1.0, + "reg_alpha": 0.02271142170225636, + "reg_lambda": 0.001963791798843179, + "FLAML_sample_size": 830258 + } + }, + { + "class": "lgbm", + "hyperparameters": {} + } + ], + "preprocessing": { + "center": [ + 36691.0, + 10.0, + 0.0, + 1.0 + ], + "scale": [ + 140856.0, + 3.0, + 1.0, + 0.33333333333333337 + ] + }, + "neighbors": [ + { + "features": [ + 0.0, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 3 + ] + }, + { + "features": [ + -0.17263020389617767, + 10.0, + 0.0, + 0.0 + ], + "choice": [ + 1, + 0, + 3 + ] + }, + { + "features": [ + 6.129018288180837, + -0.3333333333333333, + 0.0, + -1.0 + ], + "choice": [ + 1, + 3 + ] + }, + { + "features": [ + 0.48478588061566424, + -0.3333333333333333, + 0.0, + -2.666666666666666 + ], + "choice": [ + 1, + 3 + ] + }, + { + "features": [ + -0.14869796103822344, + -0.3333333333333333, + 0.0, + -1.0 + ], + "choice": [ + 1, + 3 + ] + }, + { + "features": [ + -0.06175100812176975, + -0.3333333333333333, + 0.0, + -2.333333333333333 + ], + "choice": [ + 3 + ] + }, + { + "features": [ + 6.129018288180837, + 2.6666666666666665, + 0.0, + -1.333333333333333 + ], + "choice": [ + 0, + 1, + 2, + 3 + ] + }, + { + "features": [ + 6.129018288180837, + 0.0, + 0.0, + -2.6999999999999997 + ], + "choice": [ + 1, + 3 + ] + }, + { + "features": [ + 0.8713934798659624, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 1, + 3 + ] + }, + { + "features": [ + -0.19217498722099166, + 0.6666666666666666, + 0.0, + -1.0 + ], + "choice": [ + 0, + 3 + ] + }, + { + "features": [ + 0.0, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 1, + 0, + 3 + ] + }, + { + "features": [ + -0.11491168285341058, + 2.0, + 0.0, + 0.0 + ], + "choice": [ + 1, + 3 + ] + }, + { + "features": [ + -0.11491168285341058, + -0.6666666666666666, + 0.0, + 0.0 + ], + "choice": [ + 3 + ] + }, + { + "features": [ + -0.1286065201340376, + -0.6666666666666666, + 0.0, + 0.0 + ], + "choice": [ + 0, + 1, + 3 + ] + }, + { + "features": [ + 0.0, + 0.0, + 0.0, + -0.9 + ], + "choice": [ + 3 + ] + }, + { + "features": [ + 6.288819787584483, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + -0.16464332367808257, + 12.666666666666666, + 0.0, + 0.0 + ], + "choice": [ + 0, + 3 + ] + } + ], + "configsource": [ + "houses", + "house_8L", + "poker", + "default" + ] +} \ No newline at end of file diff --git a/flaml/default/portfolio.py b/flaml/default/portfolio.py new file mode 100644 index 000000000..2856b34e0 --- /dev/null +++ b/flaml/default/portfolio.py @@ -0,0 +1,234 @@ +import pandas as pd +import numpy as np +import argparse +from pathlib import Path +import json +from sklearn.preprocessing import RobustScaler +from flaml.default import greedy +from flaml.default.regret import load_result, build_regret + +regret_bound = 0.01 + + +def config_predictor_tuple(tasks, configs, meta_features, regret_matrix): + """Config predictor represented in tuple. + + The returned tuple consists of (meta_features, preferences, proc). + + Returns: + meta_features_norm: A dataframe of normalized meta features, each column for a task. + preferences: A dataframe of sorted configuration indicies by their performance per task (column). + regret_matrix: A dataframe of the configuration(row)-task(column) regret matrix. + """ + # pre-processing + scaler = RobustScaler() + meta_features_norm = meta_features.loc[tasks] # this makes a copy + meta_features_norm.loc[:, :] = scaler.fit_transform(meta_features_norm) + + proc = { + "center": scaler.center_.tolist(), + "scale": scaler.scale_.tolist(), + } + + # best model for each dataset in training + # choices = regret_matrix[tasks].loc[configs].reset_index(drop=True).idxmin() + + # break ties using the order in configs + regret = ( + regret_matrix[tasks] + .loc[configs] + .reset_index(drop=True) + .apply(lambda row: row.apply(lambda x: (x, row.name)), axis=1) + ) + print(regret) + preferences = np.argsort(regret, axis=0) + print(preferences) + return (meta_features_norm, preferences, proc) + + +def build_portfolio(meta_features, regret, strategy): + """Build a portfolio from meta features and regret matrix. + + Args: + meta_features: A dataframe of metafeatures matrix. + regret: A dataframe of regret matrix. + strategy: A str of the strategy, one of ("greedy", "greedy-feedback"). + """ + assert strategy in ("greedy", "greedy-feedback") + if strategy == "greedy": + portfolio = greedy.construct_portfolio(regret, None, regret_bound) + elif strategy == "greedy-feedback": + portfolio = greedy.construct_portfolio(regret, meta_features, regret_bound) + if "default" not in portfolio and "default" in regret.index: + portfolio += ["default"] + return portfolio + + +def load_json(filename): + """Returns the contents of json file filename.""" + with open(filename, "r") as f: + return json.load(f) + + +def _filter(preference, regret): + """Remove choices after default or have NaN regret.""" + try: + last = regret.index.get_loc("default") # len(preference) - 1 + preference = preference[: preference[preference == last].index[0] + 1] + except KeyError: # no "default" + pass + finally: + regret = regret.reset_index(drop=True) + preference = preference[regret[preference].notna().to_numpy()] + # regret = regret[preference].reset_index(drop=True) + # dup = regret[regret.duplicated()] + # if not dup.empty: + # # break ties using the order in configs + # unique = dup.drop_duplicates() + # for u in unique: + # subset = regret == u + # preference[subset].sort_values(inplace=True) + # # raise ValueError(preference) + return preference.tolist() + + +def serialize(configs, regret, meta_features, output_file, config_path): + """Store to disk all information FLAML-metalearn needs at runtime. + + configs: names of model configs + regret: regret matrix + meta_features: task metafeatures + output_file: filename + config_path: path containing config json files + """ + output_file = Path(output_file) + # delete if exists + try: + output_file.unlink() + except FileNotFoundError: + pass + + meta_features_norm, preferences, proc = config_predictor_tuple( + regret.columns, configs, meta_features, regret + ) + portfolio = [load_json(config_path.joinpath(m + ".json")) for m in configs] + regret = regret.loc[configs] + meta_predictor = { + "version": "default", + "portfolio": portfolio, + "preprocessing": proc, + "neighbors": [ + {"features": tuple(x), "choice": _filter(preferences[y], regret[y])} + for x, y in zip( + meta_features_norm.to_records(index=False), preferences.columns + ) + ], + "configsource": list(configs), + } + + with open(output_file, "w+") as f: + json.dump(meta_predictor, f, indent=4) + return meta_predictor + + +# def analyze(regret_matrix, meta_predictor): +# tasks = regret_matrix.columns +# neighbors = meta_predictor["neighbors"] +# from sklearn.neighbors import NearestNeighbors + +# nn = NearestNeighbors(n_neighbors=1) +# for i, task in enumerate(neighbors): +# other_tasks = [j for j in range(len(neighbors)) if j != i] +# # find the nn and the regret +# nn.fit([neighbors[j]["features"] for j in other_tasks]) +# dist, ind = nn.kneighbors( +# np.array(task["features"]).reshape(1, -1), return_distance=True +# ) +# ind = other_tasks[int(ind.item())] +# choice = int(neighbors[ind]["choice"][0]) +# r = regret_matrix.iloc[choice, i] +# if r > regret_bound: +# label = "outlier" +# else: +# label = "normal" +# print(tasks[i], label, tasks[ind], "dist", dist, "regret", r) +# # find the best model and the regret +# regrets = regret_matrix.iloc[other_tasks, i] +# best = regrets.min() +# if best > regret_bound: +# print(tasks[i], "best_regret", best, "task", regrets.idxmin()) + + +def main(): + parser = argparse.ArgumentParser(description="Build a portfolio.") + parser.add_argument( + "--strategy", help="One of {greedy, greedy-feedback}", default="greedy" + ) + parser.add_argument("--input", help="Input path") + parser.add_argument("--metafeatures", help="CSV of task metafeatures") + parser.add_argument("--exclude", help="One task name to exclude (for LOO purposes)") + parser.add_argument("--output", help="Location to write portfolio JSON") + parser.add_argument("--task", help="Task to merge portfolios", default="binary") + parser.add_argument( + "--estimator", + help="Estimators to merge portfolios", + default=["lgbm", "xgboost"], + nargs="+", + ) + args = parser.parse_args() + + meta_features = pd.read_csv(args.metafeatures, index_col=0).groupby(level=0).first() + if args.exclude: + meta_features.drop(args.exclude, inplace=True) + + baseline_best = None + all_results = None + for estimator in args.estimator: + # produce regret + all, baseline = load_result( + f"{args.input}/{estimator}/results.csv", args.task, "result" + ) + regret = build_regret(all, baseline) + regret = regret.replace(np.inf, np.nan).dropna(axis=1, how="all") + + if args.exclude: + regret = regret.loc[[i for i in regret.index if args.exclude not in i]] + regret = regret[[c for c in regret.columns if args.exclude not in c]] + + print( + f"Regret matrix complete: {100 * regret.count().sum() / regret.shape[0] / regret.shape[1]}%" + ) + print(f"Num models considered: {regret.shape[0]}") + + configs = build_portfolio(meta_features, regret, args.strategy) + meta_predictor = serialize( + configs, + regret, + meta_features, + f"{args.output}/{estimator}/{args.task}.json", + Path(f"{args.input}/{estimator}"), + ) + configsource = meta_predictor["configsource"] + all = all.loc[configsource] + all.rename({x: f"{estimator}/{x}" for x in regret.index.values}, inplace=True) + baseline_best = ( + baseline + if baseline_best is None + else pd.DataFrame({0: baseline_best, 1: baseline}).max(1) + ) + all_results = all if all_results is None else pd.concat([all_results, all]) + # analyze(regret, meta_predictor) + regrets = build_regret(all_results, baseline_best) + if len(args.estimator) > 1: + meta_predictor = serialize( + regrets.index, + regrets, + meta_features, + f"{args.output}/all/{args.task}.json", + Path(args.input), + ) + + +if __name__ == "__main__": + # execute only if run as a script + main() diff --git a/flaml/default/regret.py b/flaml/default/regret.py new file mode 100644 index 000000000..e566ac6e7 --- /dev/null +++ b/flaml/default/regret.py @@ -0,0 +1,50 @@ +import argparse +from os import path +import pandas as pd + + +def build_regret(all, baseline): + all = all[all.columns.intersection(baseline.index)] + return baseline - all + + +def write_regret(regret, filename): + regret.to_csv(filename) + + +def load_result(filename, task_type, metric): + df = pd.read_csv(filename) + df = df.loc[ + (df[metric].notnull()) & (df.type == task_type), + ["task", "fold", "params", metric], + ] + df["params"] = df["params"].apply( + lambda x: path.splitext(path.basename(eval(x)["_modeljson"]))[0] + ) + baseline = ( + df.loc[df["task"] == df["params"], ["task", metric]] + .groupby("task") + .mean()[metric] + ) + df = df.pivot_table(index="params", columns="task", values=metric) + return df, baseline + + +def main(): + parser = argparse.ArgumentParser(description="Build a regret matrix.") + parser.add_argument("--result_csv", help="File of experiment results") + parser.add_argument("--task_type", help="Type of task") + parser.add_argument( + "--metric", help="Metric for calculating regret", default="result" + ) + parser.add_argument("--output", help="Location to write regret CSV to") + args = parser.parse_args() + + all, baseline = load_result(args.result_csv, args.task_type, args.metric) + regret = build_regret(all, baseline) + write_regret(regret, args.output) + + +if __name__ == "__main__": + # execute only if run as a script + main() diff --git a/flaml/default/rf/binary.json b/flaml/default/rf/binary.json new file mode 100644 index 000000000..5a6eeff5f --- /dev/null +++ b/flaml/default/rf/binary.json @@ -0,0 +1,330 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "rf", + "hyperparameters": { + "n_estimators": 501, + "max_features": 0.24484242524861066, + "max_leaves": 1156, + "criterion": "entropy" + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 356, + "max_features": 0.1, + "max_leaves": 102, + "criterion": "gini" + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 1000, + "max_features": 0.1779692423238241, + "max_leaves": 7499, + "criterion": "gini" + } + }, + { + "class": "rf", + "hyperparameters": {} + } + ], + "preprocessing": { + "center": [ + 18000.0, + 28.0, + 2.0, + 0.7565217391304347 + ], + "scale": [ + 42124.0, + 130.0, + 1.0, + 0.5714285714285715 + ] + }, + "neighbors": [ + { + "features": [ + 1.196467571930491, + 1.0923076923076922, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 3 + ] + }, + { + "features": [ + 11.096856898680088, + -0.16153846153846155, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + 8.658152122305575, + 0.38461538461538464, + 0.0, + -0.7405797101449274 + ], + "choice": [ + 2, + 0, + 3 + ] + }, + { + "features": [ + 0.27281359794891274, + -0.14615384615384616, + 0.0, + -1.3239130434782607 + ], + "choice": [ + 2, + 0, + 3 + ] + }, + { + "features": [ + -0.4125676573924604, + -0.1076923076923077, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 2, + 1, + 0, + 3 + ] + }, + { + "features": [ + 0.6409647706770487, + 1.5538461538461539, + 0.0, + 0.0 + ], + "choice": [ + 1, + 0, + 2, + 3 + ] + }, + { + "features": [ + 2.3515573069983855, + 0.16923076923076924, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 2, + 0, + 3 + ] + }, + { + "features": [ + 0.6162045389801538, + -0.1076923076923077, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 0, + 2, + 1, + 3 + ] + }, + { + "features": [ + 0.5386240622922799, + -0.09230769230769231, + 0.0, + -0.5582880434782608 + ], + "choice": [ + 0, + 2, + 3 + ] + }, + { + "features": [ + -0.41133320672300827, + -0.18461538461538463, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 1, + 2, + 0, + 3 + ] + }, + { + "features": [ + -0.31155635742094767, + 12.36923076923077, + 0.0, + 0.3865087169129372 + ], + "choice": [ + 0, + 2, + 1, + 3 + ] + }, + { + "features": [ + -0.40594435476213087, + -0.06153846153846154, + 0.0, + -0.7114130434782607 + ], + "choice": [ + 0, + 2, + 3 + ] + }, + { + "features": [ + 0.0, + 32.83076923076923, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 2, + 3 + ] + }, + { + "features": [ + 1.6675766783781218, + 0.0, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 2, + 0, + 3 + ] + }, + { + "features": [ + -0.36356946158959264, + 0.8923076923076924, + 0.0, + -1.2266908212560386 + ], + "choice": [ + 2, + 0, + 3 + ] + }, + { + "features": [ + -0.38225239768303104, + -0.05384615384615385, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 3 + ] + }, + { + "features": [ + -0.3590352293229513, + 0.06153846153846154, + 0.0, + -1.3239130434782607 + ], + "choice": [ + 0, + 3 + ] + }, + { + "features": [ + 0.3090399772101415, + 0.6923076923076923, + 0.0, + -0.003997789240972687 + ], + "choice": [ + 0, + 2, + 3 + ] + }, + { + "features": [ + -0.3118649700883107, + -0.17692307692307693, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 2, + 0, + 3 + ] + }, + { + "features": [ + 0.0, + 32.83076923076923, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 3 + ] + }, + { + "features": [ + -0.3178473079479632, + -0.06153846153846154, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 3 + ] + } + ], + "configsource": [ + "Amazon_employee_access", + "kc1", + "Helena", + "default" + ] +} \ No newline at end of file diff --git a/flaml/default/rf/multiclass.json b/flaml/default/rf/multiclass.json new file mode 100644 index 000000000..7c16804fe --- /dev/null +++ b/flaml/default/rf/multiclass.json @@ -0,0 +1,325 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "rf", + "hyperparameters": { + "n_estimators": 1000, + "max_features": 0.1779692423238241, + "max_leaves": 7499, + "criterion": "gini" + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 400, + "max_features": 0.8961466398827462, + "max_leaves": 25095, + "criterion": "entropy", + "FLAML_sample_size": 470620 + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 470, + "max_features": 0.12698484669953783, + "max_leaves": 31499, + "criterion": "entropy" + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 632, + "max_features": 1.0, + "max_leaves": 1360, + "criterion": "entropy" + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 1713, + "max_features": 0.40966311008832224, + "max_leaves": 10210, + "criterion": "entropy", + "FLAML_sample_size": 105352 + } + }, + { + "class": "rf", + "hyperparameters": {} + } + ], + "preprocessing": { + "center": [ + 40337.0, + 54.0, + 7.0, + 1.0 + ], + "scale": [ + 58722.0, + 766.0, + 6.0, + 1.0 + ] + }, + "neighbors": [ + { + "features": [ + 8.217925138789552, + 0.0, + 0.0, + -0.8148148148148149 + ], + "choice": [ + 1, + 4, + 5 + ] + }, + { + "features": [ + 5.691767991553421, + 0.007832898172323759, + 58.0, + 0.0 + ], + "choice": [ + 0, + 2, + 5 + ] + }, + { + "features": [ + 0.385937127482034, + 0.9530026109660574, + 0.5, + 0.0 + ], + "choice": [ + 4, + 2, + 1, + 3, + 0, + 5 + ] + }, + { + "features": [ + 0.3123020333094922, + -0.03524804177545692, + 15.5, + 0.0 + ], + "choice": [ + 0, + 3, + 2, + 1, + 5 + ] + }, + { + "features": [ + 0.5964033922550321, + 0.0, + -0.5, + 0.0 + ], + "choice": [ + 4, + 1, + 3, + 0, + 2, + 5 + ] + }, + { + "features": [ + -0.5336500800381458, + 9.328981723237598, + 0.5, + 0.0 + ], + "choice": [ + 0, + 2, + 5 + ] + }, + { + "features": [ + 0.20201968597799802, + -0.0587467362924282, + 0.0, + 0.0 + ], + "choice": [ + 1, + 4, + 5 + ] + }, + { + "features": [ + 0.20677088655018563, + 0.16449086161879894, + 0.5, + 0.0 + ], + "choice": [ + 4, + 1, + 2, + 0, + 3, + 5 + ] + }, + { + "features": [ + -0.6604339089268076, + -0.06266318537859007, + -0.5, + -1.0 + ], + "choice": [ + 3, + 1, + 5 + ] + }, + { + "features": [ + -0.6703620448894793, + 1.0469973890339426, + 0.3333333333333333, + 0.0 + ], + "choice": [ + 0, + 5 + ] + }, + { + "features": [ + 0.34848949286468445, + -0.015665796344647518, + -0.6666666666666666, + -1.0 + ], + "choice": [ + 4, + 2, + 0, + 5 + ] + }, + { + "features": [ + -0.5336500800381458, + 2.5404699738903394, + -0.3333333333333333, + 0.0 + ], + "choice": [ + 4, + 3, + 1, + 2, + 0, + 5 + ] + }, + { + "features": [ + -0.5606757263036, + 0.9738903394255874, + 0.0, + 0.0 + ], + "choice": [ + 2, + 4, + 0, + 3, + 1, + 5 + ] + }, + { + "features": [ + 0.0, + -0.06266318537859007, + -0.6666666666666666, + 0.0 + ], + "choice": [ + 3, + 1, + 4, + 0, + 5 + ] + }, + { + "features": [ + -0.6562617077075031, + 0.21148825065274152, + 0.5, + 0.0 + ], + "choice": [ + 4, + 0, + 3, + 1, + 2, + 5 + ] + }, + { + "features": [ + -0.6515105071353156, + -0.04960835509138381, + 0.0, + 0.0 + ], + "choice": [ + 1, + 4, + 3, + 5 + ] + }, + { + "features": [ + -0.6739552467559007, + -0.04699738903394256, + -0.5, + 0.0 + ], + "choice": [ + 3, + 1, + 4, + 5 + ] + } + ], + "configsource": [ + "Helena", + "Covertype", + "Fashion-MNIST", + "jungle_chess_2pcs_raw_endgame_complete", + "MiniBooNE", + "default" + ] +} \ No newline at end of file diff --git a/flaml/default/rf/regression.json b/flaml/default/rf/regression.json new file mode 100644 index 000000000..9368f67be --- /dev/null +++ b/flaml/default/rf/regression.json @@ -0,0 +1,290 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "rf", + "hyperparameters": { + "n_estimators": 960, + "max_features": 0.694616932858775, + "max_leaves": 8937 + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 2047, + "max_features": 1.0, + "max_leaves": 32767, + "FLAML_sample_size": 830258 + } + }, + { + "class": "rf", + "hyperparameters": { + "n_estimators": 2047, + "max_features": 0.6683903035731483, + "max_leaves": 591, + "criterion": "entropy" + } + }, + { + "class": "rf", + "hyperparameters": {} + } + ], + "preprocessing": { + "center": [ + 36691.0, + 10.0, + 0.0, + 0.85 + ], + "scale": [ + 460950.5, + 5.5, + 1.0, + 0.48611111111111116 + ] + }, + "neighbors": [ + { + "features": [ + 0.0, + 0.0, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + -0.052751868150701646, + 5.454545454545454, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 0, + 3 + ] + }, + { + "features": [ + 1.8728887375108607, + -0.18181818181818182, + 0.0, + -0.3771428571428571 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + 0.14813955077605948, + -0.18181818181818182, + 0.0, + -1.52 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + -0.04543871847410948, + -0.18181818181818182, + 0.0, + -0.3771428571428571 + ], + "choice": [ + 2, + 1, + 0, + 3 + ] + }, + { + "features": [ + -0.018869705098486712, + -0.18181818181818182, + 0.0, + -1.2914285714285714 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + 1.8728887375108607, + 1.4545454545454546, + 0.0, + -0.6057142857142855 + ], + "choice": [ + 0, + 3 + ] + }, + { + "features": [ + 1.8728887375108607, + 0.0, + 0.0, + -1.5428571428571427 + ], + "choice": [ + 0, + 2, + 1, + 3 + ] + }, + { + "features": [ + 0.266278049378404, + 0.0, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + 0.0, + 0.0, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 1, + 0, + 3 + ] + }, + { + "features": [ + -0.035114399485411125, + 1.0909090909090908, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 3 + ] + }, + { + "features": [ + -0.035114399485411125, + -0.36363636363636365, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 0, + 2, + 1, + 3 + ] + }, + { + "features": [ + -0.03929923061152987, + -0.36363636363636365, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 0, + 1, + 3 + ] + }, + { + "features": [ + 0.0, + 0.0, + 0.0, + -0.3085714285714286 + ], + "choice": [ + 1, + 3 + ] + }, + { + "features": [ + 1.056425798431719, + 1.4545454545454546, + 0.0, + -0.7199999999999999 + ], + "choice": [ + 3 + ] + }, + { + "features": [ + 0.6902650067631991, + -0.18181818181818182, + 0.0, + -1.0628571428571427 + ], + "choice": [ + 1, + 3 + ] + }, + { + "features": [ + 1.92172044503694, + 0.0, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 3 + ] + }, + { + "features": [ + -0.050311259018050745, + 6.909090909090909, + 0.0, + 0.3085714285714286 + ], + "choice": [ + 0, + 2, + 1, + 3 + ] + } + ], + "configsource": [ + "houses", + "poker", + "bank-marketing", + "default" + ] +} \ No newline at end of file diff --git a/flaml/default/suggest.py b/flaml/default/suggest.py new file mode 100644 index 000000000..ec7a71dfe --- /dev/null +++ b/flaml/default/suggest.py @@ -0,0 +1,223 @@ +import numpy as np +from sklearn.neighbors import NearestNeighbors +import logging +import pathlib +import json +from flaml.data import CLASSIFICATION, DataTransformer +from flaml.ml import get_estimator_class, get_classification_objective + +LOCATION = pathlib.Path(__file__).parent.resolve() +logger = logging.getLogger(__name__) +CONFIG_PREDICTORS = {} + + +def meta_feature(task, X_train, y_train): + is_classification = task in CLASSIFICATION + n_row = X_train.shape[0] + n_feat = X_train.shape[1] + n_class = len(np.unique(y_train)) if is_classification else 0 + percent_num = X_train.select_dtypes(include=np.number).shape[1] / n_feat + return (n_row, n_feat, n_class, percent_num) + + +def load_config_predictor(estimator_name, task, location=None): + key = f"{estimator_name}_{task}" + predictor = CONFIG_PREDICTORS.get(key) + if predictor: + return predictor + task = "multiclass" if task == "multi" else task + try: + location = location or LOCATION + with open(f"{location}/{estimator_name}/{task}.json", "r") as f: + CONFIG_PREDICTORS[key] = predictor = json.load(f) + except FileNotFoundError: + raise FileNotFoundError( + f"Portfolio has not been built for {estimator_name} on {task} task." + ) + return predictor + + +def suggest_config(task, X, y, estimator_or_predictor, location=None, k=None): + """Suggest a list of configs for the given task and training data. + + The returned configs can be used as starting points for AutoML.fit(). + `FLAML_sample_size` is removed from the configs. + """ + task = ( + get_classification_objective(len(np.unique(y))) + if task == "classification" + else task + ) + predictor = ( + load_config_predictor(estimator_or_predictor, task, location) + if isinstance(estimator_or_predictor, str) + else estimator_or_predictor + ) + assert predictor["version"] == "default" + prep = predictor["preprocessing"] + feature = meta_feature(task, X, y) + feature = (np.array(feature) - np.array(prep["center"])) / np.array(prep["scale"]) + neighbors = predictor["neighbors"] + nn = NearestNeighbors(n_neighbors=1) + nn.fit([x["features"] for x in neighbors]) + dist, ind = nn.kneighbors(feature.reshape(1, -1), return_distance=True) + logger.info(f"metafeature distance: {dist.item()}") + ind = int(ind.item()) + choice = neighbors[ind]["choice"] if k is None else neighbors[ind]["choice"][:k] + configs = [predictor["portfolio"][x] for x in choice] + for config in configs: + hyperparams = config["hyperparameters"] + if hyperparams and "FLAML_sample_size" in hyperparams: + hyperparams.pop("FLAML_sample_size") + return configs + + +def suggest_learner( + task, X, y, estimator_or_predictor="all", estimator_list=None, location=None +): + """Suggest best learner within estimator_list.""" + configs = suggest_config(task, X, y, estimator_or_predictor, location) + if not estimator_list: + return configs[0]["class"] + for c in configs: + if c["class"] in estimator_list: + return c["class"] + return estimator_list[0] + + +def suggest_hyperparams(task, X, y, estimator_or_predictor, location=None): + """Suggest hyperparameter configurations and an estimator class. + + The configurations can be used to initialize the estimator class like lightgbm.LGBMRegressor. + + Example: + + ```python + hyperparams, estimator_class = suggest_hyperparams("regression", X_train, y_train, "lgbm") + model = estimator_class(**hyperparams) # estimator_class is LGBMRegressor + model.fit(X_train, y_train) + ``` + + Args: + task: A string of the task type, e.g., + 'classification', 'regression', 'ts_forecast', 'rank', + 'seq-classification', 'seq-regression'. + X: A dataframe of training data in shape n*m. + For 'ts_forecast' task, the first column of X_train + must be the timestamp column (datetime type). Other + columns in the dataframe are assumed to be exogenous + variables (categorical or numeric). + y: A series of labels in shape n*1. + estimator_or_predictor: A str of the learner name or a dict of the learned config predictor. + If a dict, it contains: + - "version": a str of the version number. + - "preprocessing": a dictionary containing: + * "center": a list of meta feature value offsets for normalization. + * "scale": a list of meta feature scales to normalize each dimension. + - "neighbors": a list of dictionaries. Each dictionary contains: + * "features": a list of the normalized meta features for a neighbor. + * "choice": an integer of the configuration id in the portfolio. + - "portfolio": a list of dictionaries, each corresponding to a configuration: + * "class": a str of the learner name. + * "hyperparameters": a dict of the config. The key "FLAML_sample_size" will be ignored. + location: (Optional) A str of the location containing mined portfolio file. + Only valid when the portfolio is a str, by default the location is flaml/default. + + Returns: + hyperparams: A dict of the hyperparameter configurations. + estiamtor_class: A class of the underlying estimator, e.g., lightgbm.LGBMClassifier. + """ + config = suggest_config(task, X, y, estimator_or_predictor, location=location, k=1)[ + 0 + ] + estimator = config["class"] + model_class = get_estimator_class(task, estimator) + hyperparams = config["hyperparameters"] + model = model_class(task=task, **hyperparams) + estimator_class = model.estimator_class + hyperparams = hyperparams and model.params + return hyperparams, estimator_class + + +def preprocess_and_suggest_hyperparams( + task, + X, + y, + estimator_or_predictor, + location=None, +): + """Preprocess the data and suggest hyperparameters. + + Example: + + ```python + hyperparams, estimator_class, X, y, feature_transformer, label_transformer = \ + preprocess_and_suggest_hyperparams("classification", X_train, y_train, "xgb_limitdepth") + model = estimator_class(**hyperparams) # estimator_class is XGBClassifier + model.fit(X, y) + X_test = feature_transformer.transform(X_test) + y_pred = label_transformer.inverse_transform(pd.Series(model.predict(X_test).astype(int))) + ``` + + Args: + task: A string of the task type, e.g., + 'classification', 'regression', 'ts_forecast', 'rank', + 'seq-classification', 'seq-regression'. + X: A dataframe of training data in shape n*m. + For 'ts_forecast' task, the first column of X_train + must be the timestamp column (datetime type). Other + columns in the dataframe are assumed to be exogenous + variables (categorical or numeric). + y: A series of labels in shape n*1. + estimator_or_predictor: A str of the learner name or a dict of the learned config predictor. + "choose_xgb" means choosing between xgb_limitdepth and xgboost. + If a dict, it contains: + - "version": a str of the version number. + - "preprocessing": a dictionary containing: + * "center": a list of meta feature value offsets for normalization. + * "scale": a list of meta feature scales to normalize each dimension. + - "neighbors": a list of dictionaries. Each dictionary contains: + * "features": a list of the normalized meta features for a neighbor. + * "choice": a integer of the configuration id in the portfolio. + - "portfolio": a list of dictionaries, each corresponding to a configuration: + * "class": a str of the learner name. + * "hyperparameters": a dict of the config. They key "FLAML_sample_size" will be ignored. + location: (Optional) A str of the location containing mined portfolio file. + Only valid when the portfolio is a str, by default the location is flaml/default. + + Returns: + hyperparams: A dict of the hyperparameter configurations. + estiamtor_class: A class of the underlying estimator, e.g., lightgbm.LGBMClassifier. + X: the preprocessed X. + y: the preprocessed y. + feature_transformer: a data transformer that can be applied to X_test. + label_transformer: a label transformer that can be applied to y_test. + """ + dt = DataTransformer() + X, y = dt.fit_transform(X, y, task) + if "choose_xgb" == estimator_or_predictor: + # choose between xgb_limitdepth and xgboost + estimator_or_predictor = suggest_learner( + task, + X, + y, + estimator_list=["xgb_limitdepth", "xgboost"], + location=location, + ) + config = suggest_config(task, X, y, estimator_or_predictor, location=location, k=1)[ + 0 + ] + estimator = config["class"] + model_class = get_estimator_class(task, estimator) + hyperparams = config["hyperparameters"] + model = model_class(task=task, **hyperparams) + estimator_class = model.estimator_class + X = model._preprocess(X) + hyperparams = hyperparams and model.params + + class AutoMLTransformer: + def transform(self, X): + return model._preprocess(dt.transform(X)) + + transformer = AutoMLTransformer() + return hyperparams, estimator_class, X, y, transformer, dt.label_transformer diff --git a/flaml/default/xgb_limitdepth/binary.json b/flaml/default/xgb_limitdepth/binary.json new file mode 100644 index 000000000..e63eef25c --- /dev/null +++ b/flaml/default/xgb_limitdepth/binary.json @@ -0,0 +1,326 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 877, + "max_depth": 11, + "min_child_weight": 0.6205465771093738, + "learning_rate": 0.013622118381700795, + "subsample": 0.566692814245426, + "colsample_bylevel": 0.8865741642101924, + "colsample_bytree": 1.0, + "reg_alpha": 0.01386336444764391, + "reg_lambda": 3.113947886074155 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 5457, + "max_depth": 6, + "min_child_weight": 0.19978269031877885, + "learning_rate": 0.003906732665632749, + "subsample": 0.8207785234496902, + "colsample_bylevel": 0.8438751931476698, + "colsample_bytree": 0.42202862997585794, + "reg_alpha": 0.017372558844968737, + "reg_lambda": 0.03977802121721031 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 3526, + "max_depth": 13, + "min_child_weight": 0.0994486725676356, + "learning_rate": 0.0009765625, + "subsample": 0.46123759274652554, + "colsample_bylevel": 1.0, + "colsample_bytree": 0.4498813776397717, + "reg_alpha": 0.002599398546499414, + "reg_lambda": 0.028336396854402753 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": {} + } + ], + "preprocessing": { + "center": [ + 18000.0, + 21.0, + 2.0, + 0.7565217391304347 + ], + "scale": [ + 39542.5, + 143.0, + 1.0, + 0.5714285714285715 + ] + }, + "neighbors": [ + { + "features": [ + 1.2745779857115762, + 1.0419580419580419, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 1, + 3 + ] + }, + { + "features": [ + 11.821306189542897, + -0.0979020979020979, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 0, + 2, + 3 + ] + }, + { + "features": [ + 0.290624012138838, + -0.08391608391608392, + 0.0, + -1.3239130434782607 + ], + "choice": [ + 2, + 1, + 0, + 3 + ] + }, + { + "features": [ + -0.4395018018587596, + -0.04895104895104895, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + 0.68280963520263, + 1.4615384615384615, + 0.0, + 0.0 + ], + "choice": [ + 1, + 2, + 0, + 3 + ] + }, + { + "features": [ + 0.65643295188721, + -0.04895104895104895, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 1, + 3 + ] + }, + { + "features": [ + 0.5737876967819435, + -0.03496503496503497, + 0.0, + -0.5582880434782608 + ], + "choice": [ + 2, + 1, + 0, + 3 + ] + }, + { + "features": [ + -0.4381867610798508, + -0.11888111888111888, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 1, + 3 + ] + }, + { + "features": [ + -0.3318960611999747, + 11.293706293706293, + 0.0, + 0.3865087169129372 + ], + "choice": [ + 1, + 0, + 2, + 3 + ] + }, + { + "features": [ + -0.432446102294999, + -0.006993006993006993, + 0.0, + -0.7114130434782607 + ], + "choice": [ + 0, + 1, + 3 + ] + }, + { + "features": [ + 0.0, + 29.895104895104897, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 1, + 2, + 3 + ] + }, + { + "features": [ + 1.7764430675855092, + 0.04895104895104895, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 1, + 2, + 3 + ] + }, + { + "features": [ + -0.3873047986343807, + 0.8601398601398601, + 0.0, + -1.2266908212560386 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + -0.40720743503824997, + 0.0, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 1, + 0, + 2, + 3 + ] + }, + { + "features": [ + -0.38247455269646585, + 0.1048951048951049, + 0.0, + -1.3239130434782607 + ], + "choice": [ + 0, + 1, + 3 + ] + }, + { + "features": [ + 0.32921540115066067, + 0.6783216783216783, + 0.0, + -0.003997789240972687 + ], + "choice": [ + 0, + 1, + 3 + ] + }, + { + "features": [ + -0.3322248213947019, + -0.11188811188811189, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 3 + ] + }, + { + "features": [ + 0.0, + 29.895104895104897, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 1, + 3 + ] + }, + { + "features": [ + -0.3385977113232598, + -0.006993006993006993, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 1, + 3 + ] + } + ], + "configsource": [ + "Jannis", + "adult", + "Amazon_employee_access", + "default" + ] +} \ No newline at end of file diff --git a/flaml/default/xgb_limitdepth/multiclass.json b/flaml/default/xgb_limitdepth/multiclass.json new file mode 100644 index 000000000..73b177b6b --- /dev/null +++ b/flaml/default/xgb_limitdepth/multiclass.json @@ -0,0 +1,354 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 1191, + "max_depth": 13, + "min_child_weight": 6.4007885677724605, + "learning_rate": 0.037622775650237326, + "subsample": 1.0, + "colsample_bylevel": 0.3697773165627811, + "colsample_bytree": 0.813871237069598, + "reg_alpha": 0.0009765625, + "reg_lambda": 1.075702708240612 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 1499, + "max_depth": 11, + "min_child_weight": 0.07563529776156448, + "learning_rate": 0.039042609221240955, + "subsample": 0.7832981935783824, + "colsample_bylevel": 1.0, + "colsample_bytree": 1.0, + "reg_alpha": 0.0009765625, + "reg_lambda": 23.513066752844153 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 313, + "max_depth": 7, + "min_child_weight": 30.424259012001368, + "learning_rate": 0.08466828646360688, + "subsample": 0.9897083979469301, + "colsample_bylevel": 0.6769490906308069, + "colsample_bytree": 1.0, + "reg_alpha": 0.0014544085935366477, + "reg_lambda": 34.09911172306857 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 566, + "max_depth": 13, + "min_child_weight": 0.013176186839973599, + "learning_rate": 0.09285619488896565, + "subsample": 0.5897287493640815, + "colsample_bylevel": 0.923664288991597, + "colsample_bytree": 0.8244714790646485, + "reg_alpha": 0.023484974838756726, + "reg_lambda": 0.5690298249126402, + "FLAML_sample_size": 470620 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": {} + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 971, + "max_depth": 8, + "min_child_weight": 0.0044052948947322645, + "learning_rate": 0.15171239415469703, + "subsample": 0.8340342805529243, + "colsample_bylevel": 0.9489310919814007, + "colsample_bytree": 0.022724724669028674, + "reg_alpha": 0.0009765625, + "reg_lambda": 0.0025897714798936954 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 464, + "max_depth": 2, + "min_child_weight": 0.0068282719220722, + "learning_rate": 0.07962498837600937, + "subsample": 0.47139986510869014, + "colsample_bylevel": 0.4814471959023239, + "colsample_bytree": 0.6050207253592859, + "reg_alpha": 0.0010290828959872173, + "reg_lambda": 0.0103104214002687 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 1799, + "max_depth": 3, + "min_child_weight": 0.0010034151843327725, + "learning_rate": 0.03453775119035777, + "subsample": 0.31322065037892344, + "colsample_bylevel": 1.0, + "colsample_bytree": 0.2219038021462818, + "reg_alpha": 0.03885163786709896, + "reg_lambda": 1.1077175359756786 + } + } + ], + "preprocessing": { + "center": [ + 24668.5, + 54.0, + 7.0, + 1.0 + ], + "scale": [ + 57198.0, + 770.5, + 6.0, + 1.0 + ] + }, + "neighbors": [ + { + "features": [ + 8.710820308402392, + 0.0, + 0.0, + -0.8148148148148149 + ], + "choice": [ + 0, + 3, + 4 + ] + }, + { + "features": [ + 0.6701545508584216, + 0.9474367293964958, + 0.5, + 0.0 + ], + "choice": [ + 0, + 2, + 7, + 4 + ] + }, + { + "features": [ + 0.5945575020105598, + -0.03504218040233614, + 15.5, + 0.0 + ], + "choice": [ + 0, + 2, + 7, + 6, + 3, + 4 + ] + }, + { + "features": [ + 0.8862285394594217, + 0.0, + -0.5, + 0.0 + ], + "choice": [ + 2, + 4 + ] + }, + { + "features": [ + -0.2739344033008147, + 9.2744970798183, + 0.5, + 0.0 + ], + "choice": [ + 0, + 2, + 7, + 6, + 4 + ] + }, + { + "features": [ + 0.48133676002657433, + -0.058403634003893576, + 0.0, + 0.0 + ], + "choice": [ + 1, + 4 + ] + }, + { + "features": [ + 0.4862145529563971, + 0.16353017521090202, + 0.5, + 0.0 + ], + "choice": [ + 0, + 1, + 4 + ] + }, + { + "features": [ + -0.40409629707332423, + -0.06229720960415315, + -0.5, + -1.0 + ], + "choice": [ + 4 + ] + }, + { + "features": [ + -0.41428896115248787, + 1.0408825438027256, + 0.3333333333333333, + 0.0 + ], + "choice": [ + 5, + 3, + 1, + 7, + 6, + 4 + ] + }, + { + "features": [ + 0.6317091506696039, + -0.015574302401038288, + -0.6666666666666666, + -1.0 + ], + "choice": [ + 1, + 0, + 3, + 4 + ] + }, + { + "features": [ + -0.2739344033008147, + 2.5256327060350423, + -0.3333333333333333, + 0.0 + ], + "choice": [ + 0, + 5, + 3, + 7, + 4 + ] + }, + { + "features": [ + -0.30168012867582783, + 0.9682024659312135, + 0.0, + 0.0 + ], + "choice": [ + 1, + 3, + 4 + ] + }, + { + "features": [ + 0.2739344033008147, + -0.06229720960415315, + -0.6666666666666666, + 0.0 + ], + "choice": [ + 4 + ] + }, + { + "features": [ + -0.39981293052204625, + 0.21025308241401688, + 0.5, + 0.0 + ], + "choice": [ + 7, + 4 + ] + }, + { + "features": [ + -0.3949351375922235, + -0.04931862426995458, + 0.0, + 0.0 + ], + "choice": [ + 6, + 0, + 7, + 1, + 3, + 4 + ] + }, + { + "features": [ + -0.41797790132522117, + -0.04672290720311486, + -0.5, + 0.0 + ], + "choice": [ + 6, + 1, + 7, + 2, + 0, + 3, + 4 + ] + } + ], + "configsource": [ + "guillermo", + "connect-4", + "Helena", + "Covertype", + "default", + "cnae-9", + "vehicle", + "mfeat-factors" + ] +} \ No newline at end of file diff --git a/flaml/default/xgb_limitdepth/regression.json b/flaml/default/xgb_limitdepth/regression.json new file mode 100644 index 000000000..95e1a8d62 --- /dev/null +++ b/flaml/default/xgb_limitdepth/regression.json @@ -0,0 +1,347 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 4923, + "max_depth": 12, + "min_child_weight": 0.7625732991776795, + "learning_rate": 0.009239549681857523, + "subsample": 0.8193164619615052, + "colsample_bylevel": 0.7785754297307862, + "colsample_bytree": 0.788491073979525, + "reg_alpha": 0.002282749364196872, + "reg_lambda": 131.2194560716441 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 2111, + "max_depth": 9, + "min_child_weight": 3.405822241186395, + "learning_rate": 0.005804247705198151, + "subsample": 0.37848422782052427, + "colsample_bylevel": 0.8228350674288559, + "colsample_bytree": 0.8813475713109656, + "reg_alpha": 0.009761356063132219, + "reg_lambda": 13.187783936727843, + "FLAML_sample_size": 810000 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 1499, + "max_depth": 11, + "min_child_weight": 0.07563529776156448, + "learning_rate": 0.039042609221240955, + "subsample": 0.7832981935783824, + "colsample_bylevel": 1.0, + "colsample_bytree": 1.0, + "reg_alpha": 0.0009765625, + "reg_lambda": 23.513066752844153 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 19722, + "max_depth": 11, + "min_child_weight": 6.46800727978204, + "learning_rate": 0.0010837437950202355, + "subsample": 0.49509562408032115, + "colsample_bylevel": 1.0, + "colsample_bytree": 0.8826299329274134, + "reg_alpha": 0.23887161121959208, + "reg_lambda": 15.163773888208217 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": { + "n_estimators": 544, + "max_depth": 12, + "min_child_weight": 79.32555867011995, + "learning_rate": 0.010128107120014433, + "subsample": 0.9799974977817297, + "colsample_bylevel": 0.881815418056542, + "colsample_bytree": 0.9718556912196423, + "reg_alpha": 72.63148950428749, + "reg_lambda": 1.4601415712058006 + } + }, + { + "class": "xgb_limitdepth", + "hyperparameters": {} + } + ], + "preprocessing": { + "center": [ + 36691.0, + 10.0, + 0.0, + 1.0 + ], + "scale": [ + 140856.0, + 1.0, + 1.0, + 0.4444444444444444 + ] + }, + "neighbors": [ + { + "features": [ + 0.0, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 4, + 5 + ] + }, + { + "features": [ + -0.17263020389617767, + 30.0, + 0.0, + 0.0 + ], + "choice": [ + 2, + 0, + 5 + ] + }, + { + "features": [ + 6.129018288180837, + -1.0, + 0.0, + -0.7500000000000001 + ], + "choice": [ + 1, + 0, + 2, + 4, + 5 + ] + }, + { + "features": [ + 0.48478588061566424, + -1.0, + 0.0, + -2.0 + ], + "choice": [ + 4, + 1, + 3, + 5 + ] + }, + { + "features": [ + -0.14869796103822344, + -1.0, + 0.0, + -0.7500000000000001 + ], + "choice": [ + 4, + 1, + 3, + 0, + 5 + ] + }, + { + "features": [ + -0.06175100812176975, + -1.0, + 0.0, + -1.7500000000000002 + ], + "choice": [ + 4, + 1, + 5 + ] + }, + { + "features": [ + 6.129018288180837, + 8.0, + 0.0, + -1.0 + ], + "choice": [ + 0, + 2, + 1, + 4, + 5 + ] + }, + { + "features": [ + 6.129018288180837, + 0.0, + 0.0, + -2.0250000000000004 + ], + "choice": [ + 1, + 0, + 2, + 4, + 5 + ] + }, + { + "features": [ + 0.8713934798659624, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 4, + 5 + ] + }, + { + "features": [ + 0.0, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 1, + 3, + 0, + 2, + 5 + ] + }, + { + "features": [ + -0.11491168285341058, + 6.0, + 0.0, + 0.0 + ], + "choice": [ + 3, + 1, + 0, + 2, + 4, + 5 + ] + }, + { + "features": [ + -0.11491168285341058, + -2.0, + 0.0, + 0.0 + ], + "choice": [ + 0, + 1, + 3, + 2, + 4, + 5 + ] + }, + { + "features": [ + -0.1286065201340376, + -2.0, + 0.0, + 0.0 + ], + "choice": [ + 3, + 0, + 2, + 1, + 4, + 5 + ] + }, + { + "features": [ + 0.0, + 0.0, + 0.0, + -0.6750000000000002 + ], + "choice": [ + 2, + 3, + 1, + 0, + 5 + ] + }, + { + "features": [ + 6.288819787584483, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 2, + 0, + 1, + 5 + ] + }, + { + "features": [ + -0.16464332367808257, + 38.0, + 0.0, + 0.0 + ], + "choice": [ + 0, + 2, + 3, + 1, + 5 + ] + }, + { + "features": [ + -0.15343329357641847, + -7.0, + 0.0, + -1.5000000000000002 + ], + "choice": [ + 3, + 5 + ] + } + ], + "configsource": [ + "higgs", + "bng_pharynx", + "connect-4", + "house_16H", + "bng_echomonths", + "default" + ] +} \ No newline at end of file diff --git a/flaml/default/xgboost/binary.json b/flaml/default/xgboost/binary.json new file mode 100644 index 000000000..6445043c0 --- /dev/null +++ b/flaml/default/xgboost/binary.json @@ -0,0 +1,372 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 319, + "max_leaves": 1312, + "min_child_weight": 0.001, + "learning_rate": 0.01872379806270421, + "subsample": 0.6890079660561895, + "colsample_bylevel": 0.7551225121854014, + "colsample_bytree": 0.7860755604500558, + "reg_alpha": 0.17028752704343114, + "reg_lambda": 1.4375743264564231 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 7902, + "max_leaves": 49, + "min_child_weight": 0.038063497848955595, + "learning_rate": 0.0009765625, + "subsample": 0.9357800695141445, + "colsample_bylevel": 0.47031312177249246, + "colsample_bytree": 0.9053386579586192, + "reg_alpha": 1.5286102593845932, + "reg_lambda": 18.96811296717419 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 13499, + "max_leaves": 60, + "min_child_weight": 0.008494221584011285, + "learning_rate": 0.006955765856675575, + "subsample": 0.5965241023754743, + "colsample_bylevel": 0.590641168068946, + "colsample_bytree": 1.0, + "reg_alpha": 0.2522240954379289, + "reg_lambda": 5.351809144038808 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 591, + "max_leaves": 16651, + "min_child_weight": 0.03356567864689129, + "learning_rate": 0.002595066436678338, + "subsample": 0.9114132805513452, + "colsample_bylevel": 0.9503441844594458, + "colsample_bytree": 0.5703338448066768, + "reg_alpha": 0.010405212349127894, + "reg_lambda": 0.05352660657433639 + } + } + ], + "preprocessing": { + "center": [ + 18000.0, + 28.0, + 2.0, + 0.7565217391304347 + ], + "scale": [ + 42124.0, + 130.0, + 1.0, + 0.5714285714285715 + ] + }, + "neighbors": [ + { + "features": [ + 1.196467571930491, + 1.0923076923076922, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 3, + 2, + 1 + ] + }, + { + "features": [ + 11.096856898680088, + -0.16153846153846155, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 0, + 2, + 3, + 1 + ] + }, + { + "features": [ + 8.658152122305575, + 0.38461538461538464, + 0.0, + -0.7405797101449274 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + 0.27281359794891274, + -0.14615384615384616, + 0.0, + -1.3239130434782607 + ], + "choice": [ + 3, + 0, + 2, + 1 + ] + }, + { + "features": [ + -0.4125676573924604, + -0.1076923076923077, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 3, + 1, + 0, + 2 + ] + }, + { + "features": [ + 0.6409647706770487, + 1.5538461538461539, + 0.0, + 0.0 + ], + "choice": [ + 1, + 0, + 2, + 3 + ] + }, + { + "features": [ + 2.3515573069983855, + 0.16923076923076924, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + 0.6162045389801538, + -0.1076923076923077, + 0.0, + -0.5739130434782609 + ], + "choice": [ + 1, + 0, + 2, + 3 + ] + }, + { + "features": [ + 0.5386240622922799, + -0.09230769230769231, + 0.0, + -0.5582880434782608 + ], + "choice": [ + 0, + 1, + 3, + 2 + ] + }, + { + "features": [ + -0.41133320672300827, + -0.18461538461538463, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 2, + 1, + 0, + 3 + ] + }, + { + "features": [ + -0.31155635742094767, + 12.36923076923077, + 0.0, + 0.3865087169129372 + ], + "choice": [ + 2, + 1, + 0, + 3 + ] + }, + { + "features": [ + -0.40594435476213087, + -0.06153846153846154, + 0.0, + -0.7114130434782607 + ], + "choice": [ + 0, + 1, + 2, + 3 + ] + }, + { + "features": [ + 0.0, + 32.83076923076923, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0 + ] + }, + { + "features": [ + 1.6675766783781218, + 0.0, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + -0.36356946158959264, + 0.8923076923076924, + 0.0, + -1.2266908212560386 + ], + "choice": [ + 3, + 1, + 0, + 2 + ] + }, + { + "features": [ + -0.38225239768303104, + -0.05384615384615385, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 3, + 2, + 0, + 1 + ] + }, + { + "features": [ + -0.3590352293229513, + 0.06153846153846154, + 0.0, + -1.3239130434782607 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + 0.3090399772101415, + 0.6923076923076923, + 0.0, + -0.003997789240972687 + ], + "choice": [ + 2, + 0, + 3, + 1 + ] + }, + { + "features": [ + -0.3118649700883107, + -0.17692307692307693, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + 0.0, + 32.83076923076923, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 3 + ] + }, + { + "features": [ + -0.3178473079479632, + -0.06153846153846154, + 0.0, + 0.4260869565217391 + ], + "choice": [ + 0, + 3, + 1, + 2 + ] + } + ], + "configsource": [ + "fabert", + "bng_lowbwt", + "pol", + "Amazon_employee_access" + ] +} \ No newline at end of file diff --git a/flaml/default/xgboost/multiclass.json b/flaml/default/xgboost/multiclass.json new file mode 100644 index 000000000..d293cd6de --- /dev/null +++ b/flaml/default/xgboost/multiclass.json @@ -0,0 +1,509 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 392, + "max_leaves": 46, + "min_child_weight": 0.20655273911443411, + "learning_rate": 0.08039123467849849, + "subsample": 0.6482821473906787, + "colsample_bylevel": 0.5448604029329934, + "colsample_bytree": 0.4211786481671673, + "reg_alpha": 0.029040644754759502, + "reg_lambda": 4.60220206538413 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 6357, + "max_leaves": 206, + "min_child_weight": 1.9495322566288034, + "learning_rate": 0.0068766724195393905, + "subsample": 0.9451618245005704, + "colsample_bylevel": 0.9030482524943064, + "colsample_bytree": 0.9278972006416252, + "reg_alpha": 0.01857648400903689, + "reg_lambda": 6.021166480604588, + "FLAML_sample_size": 344444 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 1067, + "max_leaves": 55, + "min_child_weight": 1.578700876556201, + "learning_rate": 0.01882776721912098, + "subsample": 0.6486829588043383, + "colsample_bylevel": 1.0, + "colsample_bytree": 0.6470978147570122, + "reg_alpha": 0.2623396481373557, + "reg_lambda": 12.320026567378322 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 765, + "max_leaves": 6, + "min_child_weight": 0.001, + "learning_rate": 1.0, + "subsample": 0.9833803894285497, + "colsample_bylevel": 1.0, + "colsample_bytree": 1.0, + "reg_alpha": 0.0012553728257619922, + "reg_lambda": 0.03280542610559108 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 2866, + "max_leaves": 2954, + "min_child_weight": 0.003652484923138387, + "learning_rate": 0.006320484540131336, + "subsample": 0.45886345839532916, + "colsample_bylevel": 0.4143419565729296, + "colsample_bytree": 0.9117641224108227, + "reg_alpha": 0.2873746517375349, + "reg_lambda": 17.04964039639045 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 512, + "max_leaves": 3194, + "min_child_weight": 0.004561511536080627, + "learning_rate": 0.05288849444758447, + "subsample": 0.8653058105000044, + "colsample_bylevel": 0.8833689901424637, + "colsample_bytree": 0.9505209943737727, + "reg_alpha": 0.0037017878164852017, + "reg_lambda": 2.1872397928745113, + "FLAML_sample_size": 470620 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 335, + "max_leaves": 37, + "min_child_weight": 0.0013851539632487603, + "learning_rate": 0.2593737370075479, + "subsample": 0.9810091528571387, + "colsample_bylevel": 0.9484250613084422, + "colsample_bytree": 0.192606132199437, + "reg_alpha": 0.10585986776049093, + "reg_lambda": 0.017684465384509407 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 8315, + "max_leaves": 4, + "min_child_weight": 0.7673654415794792, + "learning_rate": 0.002432260930606481, + "subsample": 0.8476000618302348, + "colsample_bylevel": 0.8815698870579244, + "colsample_bytree": 0.7057137578225323, + "reg_alpha": 0.0016838090603716895, + "reg_lambda": 0.28815989841009226 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 319, + "max_leaves": 1312, + "min_child_weight": 0.001, + "learning_rate": 0.01872379806270421, + "subsample": 0.6890079660561895, + "colsample_bylevel": 0.7551225121854014, + "colsample_bytree": 0.7860755604500558, + "reg_alpha": 0.17028752704343114, + "reg_lambda": 1.4375743264564231 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 5739, + "max_leaves": 5, + "min_child_weight": 0.1359602026207002, + "learning_rate": 0.14496176867613397, + "subsample": 0.864897070662231, + "colsample_bylevel": 0.01, + "colsample_bytree": 0.9394057513384305, + "reg_alpha": 0.001103317921178771, + "reg_lambda": 0.1655504349283218 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 3369, + "max_leaves": 23, + "min_child_weight": 0.006136645605168392, + "learning_rate": 0.05726537983358939, + "subsample": 1.0, + "colsample_bylevel": 1.0, + "colsample_bytree": 1.0, + "reg_alpha": 0.40981311572427176, + "reg_lambda": 4.343877111132155 + } + } + ], + "preprocessing": { + "center": [ + 24668.5, + 54.0, + 7.0, + 1.0 + ], + "scale": [ + 57198.0, + 770.5, + 6.0, + 1.0 + ] + }, + "neighbors": [ + { + "features": [ + 8.710820308402392, + 0.0, + 0.0, + -0.8148148148148149 + ], + "choice": [ + 5, + 4, + 1, + 8, + 10, + 2, + 0, + 6, + 9, + 7, + 3 + ] + }, + { + "features": [ + 0.6701545508584216, + 0.9474367293964958, + 0.5, + 0.0 + ], + "choice": [ + 0, + 2, + 3, + 6, + 10, + 8, + 9 + ] + }, + { + "features": [ + 0.5945575020105598, + -0.03504218040233614, + 15.5, + 0.0 + ], + "choice": [ + 0, + 2, + 3, + 7, + 8, + 5, + 10, + 9, + 6 + ] + }, + { + "features": [ + 0.8862285394594217, + 0.0, + -0.5, + 0.0 + ], + "choice": [ + 2, + 8, + 0, + 4, + 10, + 1, + 9, + 6, + 7, + 5, + 3 + ] + }, + { + "features": [ + -0.2739344033008147, + 9.2744970798183, + 0.5, + 0.0 + ], + "choice": [ + 0, + 3, + 6 + ] + }, + { + "features": [ + 0.48133676002657433, + -0.058403634003893576, + 0.0, + 0.0 + ], + "choice": [ + 10, + 3, + 0, + 5, + 1, + 7, + 6, + 2, + 4, + 9, + 8 + ] + }, + { + "features": [ + 0.4862145529563971, + 0.16353017521090202, + 0.5, + 0.0 + ], + "choice": [ + 1, + 0, + 2, + 3, + 10, + 8, + 6, + 5, + 9, + 7 + ] + }, + { + "features": [ + -0.40409629707332423, + -0.06229720960415315, + -0.5, + -1.0 + ], + "choice": [ + 3, + 9, + 5, + 10, + 1, + 7, + 2, + 8, + 4, + 6, + 0 + ] + }, + { + "features": [ + -0.41428896115248787, + 1.0408825438027256, + 0.3333333333333333, + 0.0 + ], + "choice": [ + 6, + 9, + 0, + 5, + 10, + 4, + 8, + 7, + 1, + 2, + 3 + ] + }, + { + "features": [ + 0.6317091506696039, + -0.015574302401038288, + -0.6666666666666666, + -1.0 + ], + "choice": [ + 1, + 10, + 4, + 5, + 8, + 6, + 2, + 0, + 3, + 9, + 7 + ] + }, + { + "features": [ + -0.2739344033008147, + 2.5256327060350423, + -0.3333333333333333, + 0.0 + ], + "choice": [ + 0, + 2, + 3, + 9, + 6, + 10, + 5, + 8, + 7 + ] + }, + { + "features": [ + -0.30168012867582783, + 0.9682024659312135, + 0.0, + 0.0 + ], + "choice": [ + 8, + 4, + 0, + 2, + 10, + 1, + 5, + 6, + 9, + 7, + 3 + ] + }, + { + "features": [ + 0.2739344033008147, + -0.06229720960415315, + -0.6666666666666666, + 0.0 + ], + "choice": [ + 10, + 3, + 9, + 1, + 4, + 2, + 8, + 5, + 0, + 7, + 6 + ] + }, + { + "features": [ + -0.39981293052204625, + 0.21025308241401688, + 0.5, + 0.0 + ], + "choice": [ + 0, + 9, + 1, + 7, + 5, + 10, + 6, + 2, + 4, + 8, + 3 + ] + }, + { + "features": [ + -0.3949351375922235, + -0.04931862426995458, + 0.0, + 0.0 + ], + "choice": [ + 0, + 2, + 1, + 7, + 8, + 4, + 5, + 6, + 10, + 9, + 3 + ] + }, + { + "features": [ + -0.41797790132522117, + -0.04672290720311486, + -0.5, + 0.0 + ], + "choice": [ + 7, + 4, + 8, + 2, + 0, + 5, + 10, + 1, + 6, + 9, + 3 + ] + } + ], + "configsource": [ + "segment", + "Albert", + "Helena", + "car", + "house_8L", + "Covertype", + "cnae-9", + "KDDCup09_appetency", + "fabert", + "dilbert", + "jungle_chess_2pcs_raw_endgame_complete" + ] +} \ No newline at end of file diff --git a/flaml/default/xgboost/regression.json b/flaml/default/xgboost/regression.json new file mode 100644 index 000000000..1ff43d61d --- /dev/null +++ b/flaml/default/xgboost/regression.json @@ -0,0 +1,308 @@ +{ + "version": "default", + "portfolio": [ + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 6357, + "max_leaves": 206, + "min_child_weight": 1.9495322566288034, + "learning_rate": 0.0068766724195393905, + "subsample": 0.9451618245005704, + "colsample_bylevel": 0.9030482524943064, + "colsample_bytree": 0.9278972006416252, + "reg_alpha": 0.01857648400903689, + "reg_lambda": 6.021166480604588, + "FLAML_sample_size": 344444 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 23045, + "max_leaves": 247, + "min_child_weight": 0.004319397499079841, + "learning_rate": 0.0032914413473281215, + "subsample": 0.7334190564433234, + "colsample_bylevel": 1.0, + "colsample_bytree": 1.0, + "reg_alpha": 0.03514226467919635, + "reg_lambda": 1.2679661021665851 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 1899, + "max_leaves": 59, + "min_child_weight": 0.013389019900720164, + "learning_rate": 0.0028943401472847964, + "subsample": 0.7808944208233943, + "colsample_bylevel": 1.0, + "colsample_bytree": 0.9999355357362375, + "reg_alpha": 0.7905117773932884, + "reg_lambda": 2.916897119216104 + } + }, + { + "class": "xgboost", + "hyperparameters": { + "n_estimators": 5611, + "max_leaves": 61, + "min_child_weight": 0.01070518287797225, + "learning_rate": 0.005485127037677848, + "subsample": 0.4713518256961299, + "colsample_bylevel": 0.9777437906530106, + "colsample_bytree": 0.9519335125615331, + "reg_alpha": 0.03621564207188963, + "reg_lambda": 1.8045765669466283 + } + } + ], + "preprocessing": { + "center": [ + 36691.0, + 10.0, + 0.0, + 1.0 + ], + "scale": [ + 324551.25, + 2.5, + 1.0, + 0.36111111111111116 + ] + }, + "neighbors": [ + { + "features": [ + 0.0, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 2, + 3, + 0, + 1 + ] + }, + { + "features": [ + -0.07492191140844474, + 12.0, + 0.0, + 0.0 + ], + "choice": [ + 0, + 1, + 3, + 2 + ] + }, + { + "features": [ + 2.6600082421497375, + -0.4, + 0.0, + -0.923076923076923 + ], + "choice": [ + 3, + 0, + 2, + 1 + ] + }, + { + "features": [ + 0.21039820367353385, + -0.4, + 0.0, + -2.4615384615384612 + ], + "choice": [ + 3, + 2, + 0, + 1 + ] + }, + { + "features": [ + -0.06453526215043079, + -0.4, + 0.0, + -0.923076923076923 + ], + "choice": [ + 2, + 3, + 0, + 1 + ] + }, + { + "features": [ + -0.026800081651203008, + -0.4, + 0.0, + -2.1538461538461537 + ], + "choice": [ + 2, + 3, + 0, + 1 + ] + }, + { + "features": [ + 2.6600082421497375, + 3.2, + 0.0, + -1.2307692307692306 + ], + "choice": [ + 1, + 0, + 3, + 2 + ] + }, + { + "features": [ + 2.6600082421497375, + 0.0, + 0.0, + -2.492307692307692 + ], + "choice": [ + 3, + 0, + 2, + 1 + ] + }, + { + "features": [ + 0.3781868040871819, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 2, + 3, + 0, + 1 + ] + }, + { + "features": [ + 0.0, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 3, + 0, + 1, + 2 + ] + }, + { + "features": [ + -0.04987193856132121, + 2.4, + 0.0, + 0.0 + ], + "choice": [ + 3, + 1, + 0, + 2 + ] + }, + { + "features": [ + -0.04987193856132121, + -0.8, + 0.0, + 0.0 + ], + "choice": [ + 2, + 0, + 1, + 3 + ] + }, + { + "features": [ + -0.0558155299047531, + -0.8, + 0.0, + 0.0 + ], + "choice": [ + 0, + 3, + 1, + 2 + ] + }, + { + "features": [ + 0.0, + 0.0, + 0.0, + -0.8307692307692308 + ], + "choice": [ + 1, + 0, + 3, + 2 + ] + }, + { + "features": [ + 2.729362465866331, + 0.0, + 0.0, + 0.0 + ], + "choice": [ + 1, + 0, + 3, + 2 + ] + }, + { + "features": [ + -0.07145558675247746, + 15.2, + 0.0, + 0.0 + ], + "choice": [ + 0, + 3, + 1, + 2 + ] + } + ], + "configsource": [ + "Albert", + "mv", + "bng_echomonths", + "house_16H" + ] +} \ No newline at end of file diff --git a/flaml/model.py b/flaml/model.py index 27e9c3184..506a6c774 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -784,6 +784,7 @@ class LGBMEstimator(BaseEstimator): ITER_HP = "n_estimators" HAS_CALLBACK = True + DEFAULT_ITER = 100 @classmethod def search_space(cls, data_size, **params): @@ -887,7 +888,7 @@ class LGBMEstimator(BaseEstimator): def fit(self, X_train, y_train, budget=None, **kwargs): start_time = time.time() deadline = start_time + budget if budget else np.inf - n_iter = self.params[self.ITER_HP] + n_iter = self.params.get(self.ITER_HP, self.DEFAULT_ITER) trained = False if not self.HAS_CALLBACK: mem0 = psutil.virtual_memory().available if psutil is not None else 1 @@ -996,6 +997,8 @@ class LGBMEstimator(BaseEstimator): class XGBoostEstimator(SKLearnEstimator): """The class for tuning XGBoost regressor, not using sklearn API.""" + DEFAULT_ITER = 10 + @classmethod def search_space(cls, data_size, **params): upper = min(32768, int(data_size[0])) @@ -1146,6 +1149,8 @@ class XGBoostEstimator(SKLearnEstimator): class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator): """The class for tuning XGBoost with unlimited depth, using sklearn API.""" + DEFAULT_ITER = 10 + @classmethod def search_space(cls, data_size, **params): space = XGBoostEstimator.search_space(data_size) @@ -1352,6 +1357,7 @@ class CatBoostEstimator(BaseEstimator): """The class for tuning CatBoost.""" ITER_HP = "n_estimators" + DEFAULT_ITER = 1000 @classmethod def search_space(cls, data_size, **params): diff --git a/test/default/all/metafeatures.csv b/test/default/all/metafeatures.csv new file mode 100644 index 000000000..5693b5781 --- /dev/null +++ b/test/default/all/metafeatures.csv @@ -0,0 +1,13 @@ +Dataset,NumberOfInstances,NumberOfFeatures,NumberOfClasses,PercentageOfNumericFeatures +2dplanes,36691,10,0,1.0 +adult,43957,14,2,0.42857142857142855 +Airlines,485444,7,2,0.42857142857142855 +Albert,382716,78,2,0.3333333333333333 +Amazon_employee_access,29492,9,2,0.0 +bng_breastTumor,104976,9,0,0.1111111111111111 +bng_pbc,900000,18,0,0.5555555555555556 +car,1555,6,4,0.0 +connect-4,60801,42,3,0.0 +dilbert,9000,2000,5,1.0 +Dionis,374569,60,355,1.0 +poker,922509,10,0,1.0 diff --git a/test/default/extra_tree/2dplanes.json b/test/default/extra_tree/2dplanes.json new file mode 100644 index 000000000..7d497f0e0 --- /dev/null +++ b/test/default/extra_tree/2dplanes.json @@ -0,0 +1 @@ +{"class": "extra_tree", "hyperparameters": {"n_estimators": 16, "max_features": 1.0, "max_leaves": 54}} \ No newline at end of file diff --git a/test/default/extra_tree/Airlines.json b/test/default/extra_tree/Airlines.json new file mode 100644 index 000000000..41a1743e9 --- /dev/null +++ b/test/default/extra_tree/Airlines.json @@ -0,0 +1 @@ +{"class": "extra_tree", "hyperparameters": {"n_estimators": 2047, "max_features": 1.0, "max_leaves": 8194, "criterion": "gini", "FLAML_sample_size": 436899}} \ No newline at end of file diff --git a/test/default/extra_tree/Albert.json b/test/default/extra_tree/Albert.json new file mode 100644 index 000000000..018f4bd62 --- /dev/null +++ b/test/default/extra_tree/Albert.json @@ -0,0 +1 @@ +{"class": "extra_tree", "hyperparameters": {"n_estimators": 1733, "max_features": 0.3841826938360253, "max_leaves": 32767, "criterion": "entropy", "FLAML_sample_size": 344444}} \ No newline at end of file diff --git a/test/default/extra_tree/Amazon_employee_access.json b/test/default/extra_tree/Amazon_employee_access.json new file mode 100644 index 000000000..3a041b265 --- /dev/null +++ b/test/default/extra_tree/Amazon_employee_access.json @@ -0,0 +1 @@ +{"class": "extra_tree", "hyperparameters": {"n_estimators": 812, "max_features": 1.0, "max_leaves": 1474, "criterion": "entropy"}} \ No newline at end of file diff --git a/test/default/extra_tree/adult.json b/test/default/extra_tree/adult.json new file mode 100644 index 000000000..4522b4992 --- /dev/null +++ b/test/default/extra_tree/adult.json @@ -0,0 +1 @@ +{"class": "extra_tree", "hyperparameters": {"n_estimators": 859, "max_features": 1.0, "max_leaves": 967, "criterion": "entropy"}} \ No newline at end of file diff --git a/test/default/extra_tree/bng_breastTumor.json b/test/default/extra_tree/bng_breastTumor.json new file mode 100644 index 000000000..38145c38c --- /dev/null +++ b/test/default/extra_tree/bng_breastTumor.json @@ -0,0 +1 @@ +{"class": "extra_tree", "hyperparameters": {"n_estimators": 90, "max_features": 1.0, "max_leaves": 1301, "FLAML_sample_size": 94478}} \ No newline at end of file diff --git a/test/default/extra_tree/bng_pbc.json b/test/default/extra_tree/bng_pbc.json new file mode 100644 index 000000000..6eb8ced29 --- /dev/null +++ b/test/default/extra_tree/bng_pbc.json @@ -0,0 +1 @@ +{"class": "extra_tree", "hyperparameters": {"n_estimators": 1211, "max_features": 1.0, "max_leaves": 32767, "FLAML_sample_size": 810000}} \ No newline at end of file diff --git a/test/default/extra_tree/car.json b/test/default/extra_tree/car.json new file mode 100644 index 000000000..06904813d --- /dev/null +++ b/test/default/extra_tree/car.json @@ -0,0 +1 @@ +{"class": "extra_tree", "hyperparameters": {"n_estimators": 333, "max_features": 1.0, "max_leaves": 201, "criterion": "gini"}} \ No newline at end of file diff --git a/test/default/extra_tree/connect-4.json b/test/default/extra_tree/connect-4.json new file mode 100644 index 000000000..5d2e67ff9 --- /dev/null +++ b/test/default/extra_tree/connect-4.json @@ -0,0 +1 @@ +{"class": "extra_tree", "hyperparameters": {"n_estimators": 229, "max_features": 0.5372053700721111, "max_leaves": 11150, "criterion": "entropy"}} \ No newline at end of file diff --git a/test/default/extra_tree/default.json b/test/default/extra_tree/default.json new file mode 100644 index 000000000..88dfc8650 --- /dev/null +++ b/test/default/extra_tree/default.json @@ -0,0 +1 @@ +{"class": "extra_tree", "hyperparameters": {}} \ No newline at end of file diff --git a/test/default/extra_tree/dilbert.json b/test/default/extra_tree/dilbert.json new file mode 100644 index 000000000..e7f4d89ff --- /dev/null +++ b/test/default/extra_tree/dilbert.json @@ -0,0 +1 @@ +{"class": "extra_tree", "hyperparameters": {"n_estimators": 346, "max_features": 1.0, "max_leaves": 1007, "criterion": "entropy"}} \ No newline at end of file diff --git a/test/default/extra_tree/poker.json b/test/default/extra_tree/poker.json new file mode 100644 index 000000000..5d7f1ee48 --- /dev/null +++ b/test/default/extra_tree/poker.json @@ -0,0 +1 @@ +{"class": "extra_tree", "hyperparameters": {"n_estimators": 1416, "max_features": 1.0, "max_leaves": 32767, "FLAML_sample_size": 830258}} \ No newline at end of file diff --git a/test/default/extra_tree/results.csv b/test/default/extra_tree/results.csv new file mode 100644 index 000000000..ebcc68628 --- /dev/null +++ b/test/default/extra_tree/results.csv @@ -0,0 +1,142 @@ +task,fold,type,result,params +2dplanes,0,regression,0.946503,{'_modeljson': 'et/2dplanes.json'} +2dplanes,0,regression,0.945047,{'_modeljson': 'et/adult.json'} +2dplanes,0,regression,0.933571,{'_modeljson': 'et/Airlines.json'} +2dplanes,0,regression,0.919021,{'_modeljson': 'et/Albert.json'} +2dplanes,0,regression,0.944532,{'_modeljson': 'et/Amazon_employee_access.json'} +2dplanes,0,regression,0.94471,{'_modeljson': 'et/bng_breastTumor.json'} +2dplanes,0,regression,0.914912,{'_modeljson': 'et/bng_pbc.json'} +2dplanes,0,regression,0.946045,{'_modeljson': 'et/car.json'} +2dplanes,0,regression,0.935777,{'_modeljson': 'et/connect-4.json'} +2dplanes,0,regression,0.91501,{'_modeljson': 'et/default.json'} +2dplanes,0,regression,0.94497,{'_modeljson': 'et/dilbert.json'} +2dplanes,0,regression,0.914907,{'_modeljson': 'et/poker.json'} +adult,0,binary,0.902771,{'_modeljson': 'et/2dplanes.json'} +adult,0,binary,0.919086,{'_modeljson': 'et/adult.json'} +adult,0,binary,0.906742,{'_modeljson': 'et/Airlines.json'} +adult,0,binary,0.897039,{'_modeljson': 'et/Albert.json'} +adult,0,binary,0.919317,{'_modeljson': 'et/Amazon_employee_access.json'} +adult,0,binary,0.918404,{'_modeljson': 'et/bng_breastTumor.json'} +adult,0,binary,0.895193,{'_modeljson': 'et/bng_pbc.json'} +adult,0,binary,0.912965,{'_modeljson': 'et/car.json'} +adult,0,binary,0.904228,{'_modeljson': 'et/connect-4.json'} +adult,0,binary,0.893933,{'_modeljson': 'et/default.json'} +adult,0,binary,0.918539,{'_modeljson': 'et/dilbert.json'} +adult,0,binary,0.895813,{'_modeljson': 'et/poker.json'} +Airlines,0,binary,0.683928,{'_modeljson': 'et/2dplanes.json'} +Airlines,0,binary,0.709673,{'_modeljson': 'et/adult.json'} +Airlines,0,binary,0.724391,{'_modeljson': 'et/Airlines.json'} +Airlines,0,binary,0.707411,{'_modeljson': 'et/Albert.json'} +Airlines,0,binary,0.713548,{'_modeljson': 'et/Amazon_employee_access.json'} +Airlines,0,binary,0.712774,{'_modeljson': 'et/bng_breastTumor.json'} +Airlines,0,binary,0.708477,{'_modeljson': 'et/bng_pbc.json'} +Airlines,0,binary,0.695604,{'_modeljson': 'et/car.json'} +Airlines,0,binary,0.719631,{'_modeljson': 'et/connect-4.json'} +Airlines,0,binary,0.619025,{'_modeljson': 'et/default.json'} +Airlines,0,binary,0.710038,{'_modeljson': 'et/dilbert.json'} +Airlines,0,binary,0.708628,{'_modeljson': 'et/poker.json'} +Albert,0,binary,0.707126,{'_modeljson': 'et/2dplanes.json'} +Albert,0,binary,0.727819,{'_modeljson': 'et/adult.json'} +Albert,0,binary,0.733953,{'_modeljson': 'et/Airlines.json'} +Albert,0,binary,0.739138,{'_modeljson': 'et/Albert.json'} +Albert,0,binary,0.729251,{'_modeljson': 'et/Amazon_employee_access.json'} +Albert,0,binary,0.728612,{'_modeljson': 'et/bng_breastTumor.json'} +Albert,0,binary,0.736396,{'_modeljson': 'et/bng_pbc.json'} +Albert,0,binary,0.719311,{'_modeljson': 'et/car.json'} +Albert,0,binary,0.735032,{'_modeljson': 'et/connect-4.json'} +Albert,0,binary,0.725017,{'_modeljson': 'et/default.json'} +Albert,0,binary,0.728108,{'_modeljson': 'et/dilbert.json'} +Albert,0,binary,0.736668,{'_modeljson': 'et/poker.json'} +Amazon_employee_access,0,binary,0.708259,{'_modeljson': 'et/2dplanes.json'} +Amazon_employee_access,0,binary,0.872603,{'_modeljson': 'et/adult.json'} +Amazon_employee_access,0,binary,0.839293,{'_modeljson': 'et/Airlines.json'} +Amazon_employee_access,0,binary,0.834606,{'_modeljson': 'et/Albert.json'} +Amazon_employee_access,0,binary,0.873141,{'_modeljson': 'et/Amazon_employee_access.json'} +Amazon_employee_access,0,binary,0.860569,{'_modeljson': 'et/bng_breastTumor.json'} +Amazon_employee_access,0,binary,0.834654,{'_modeljson': 'et/bng_pbc.json'} +Amazon_employee_access,0,binary,0.81679,{'_modeljson': 'et/car.json'} +Amazon_employee_access,0,binary,0.831975,{'_modeljson': 'et/connect-4.json'} +Amazon_employee_access,0,binary,0.839651,{'_modeljson': 'et/default.json'} +Amazon_employee_access,0,binary,0.868815,{'_modeljson': 'et/dilbert.json'} +Amazon_employee_access,0,binary,0.841461,{'_modeljson': 'et/poker.json'} +bng_breastTumor,0,regression,0.137191,{'_modeljson': 'et/2dplanes.json'} +bng_breastTumor,0,regression,0.181002,{'_modeljson': 'et/adult.json'} +bng_breastTumor,0,regression,0.163121,{'_modeljson': 'et/Airlines.json'} +bng_breastTumor,0,regression,0.116596,{'_modeljson': 'et/Albert.json'} +bng_breastTumor,0,regression,0.181745,{'_modeljson': 'et/Amazon_employee_access.json'} +bng_breastTumor,0,regression,0.180948,{'_modeljson': 'et/bng_breastTumor.json'} +bng_breastTumor,0,regression,0.0784668,{'_modeljson': 'et/bng_pbc.json'} +bng_breastTumor,0,regression,0.168552,{'_modeljson': 'et/car.json'} +bng_breastTumor,0,regression,0.165576,{'_modeljson': 'et/connect-4.json'} +bng_breastTumor,0,regression,-0.28734,{'_modeljson': 'et/default.json'} +bng_breastTumor,0,regression,0.1822,{'_modeljson': 'et/dilbert.json'} +bng_breastTumor,0,regression,0.0780929,{'_modeljson': 'et/poker.json'} +bng_pbc,0,regression,0.332032,{'_modeljson': 'et/2dplanes.json'} +bng_pbc,0,regression,0.3879,{'_modeljson': 'et/adult.json'} +bng_pbc,0,regression,0.411442,{'_modeljson': 'et/Airlines.json'} +bng_pbc,0,regression,0.400094,{'_modeljson': 'et/Albert.json'} +bng_pbc,0,regression,0.394067,{'_modeljson': 'et/Amazon_employee_access.json'} +bng_pbc,0,regression,0.391695,{'_modeljson': 'et/bng_breastTumor.json'} +bng_pbc,0,regression,0.421267,{'_modeljson': 'et/bng_pbc.json'} +bng_pbc,0,regression,0.361909,{'_modeljson': 'et/car.json'} +bng_pbc,0,regression,0.402332,{'_modeljson': 'et/connect-4.json'} +bng_pbc,0,regression,0.418622,{'_modeljson': 'et/default.json'} +bng_pbc,0,regression,0.388768,{'_modeljson': 'et/dilbert.json'} +bng_pbc,0,regression,0.421152,{'_modeljson': 'et/poker.json'} +car,0,multiclass,-0.0815482,{'_modeljson': 'et/2dplanes.json'} +car,0,multiclass,-0.218552,{'_modeljson': 'et/adult.json'} +car,0,multiclass,-0.0474428,{'_modeljson': 'et/Airlines.json'} +car,0,multiclass,-0.108586,{'_modeljson': 'et/Albert.json'} +car,0,multiclass,-0.218073,{'_modeljson': 'et/Amazon_employee_access.json'} +car,0,multiclass,-0.0397411,{'_modeljson': 'et/bng_breastTumor.json'} +car,0,multiclass,-0.0485655,{'_modeljson': 'et/bng_pbc.json'} +car,0,multiclass,-0.0524496,{'_modeljson': 'et/car.json'} +car,0,multiclass,-0.0690461,{'_modeljson': 'et/connect-4.json'} +car,0,multiclass,-0.111939,{'_modeljson': 'et/default.json'} +car,0,multiclass,-0.218153,{'_modeljson': 'et/dilbert.json'} +car,0,multiclass,-0.0502018,{'_modeljson': 'et/poker.json'} +connect-4,0,multiclass,-0.706448,{'_modeljson': 'et/2dplanes.json'} +connect-4,0,multiclass,-0.54998,{'_modeljson': 'et/adult.json'} +connect-4,0,multiclass,-0.495074,{'_modeljson': 'et/Airlines.json'} +connect-4,0,multiclass,-0.468797,{'_modeljson': 'et/Albert.json'} +connect-4,0,multiclass,-0.528177,{'_modeljson': 'et/Amazon_employee_access.json'} +connect-4,0,multiclass,-0.545043,{'_modeljson': 'et/bng_breastTumor.json'} +connect-4,0,multiclass,-0.57415,{'_modeljson': 'et/bng_pbc.json'} +connect-4,0,multiclass,-0.639965,{'_modeljson': 'et/car.json'} +connect-4,0,multiclass,-0.459906,{'_modeljson': 'et/connect-4.json'} +connect-4,0,multiclass,-0.540561,{'_modeljson': 'et/default.json'} +connect-4,0,multiclass,-0.547218,{'_modeljson': 'et/dilbert.json'} +connect-4,0,multiclass,-0.573145,{'_modeljson': 'et/poker.json'} +dilbert,0,multiclass,-0.626964,{'_modeljson': 'et/2dplanes.json'} +dilbert,0,multiclass,-0.230603,{'_modeljson': 'et/adult.json'} +dilbert,0,multiclass,-0.246071,{'_modeljson': 'et/Airlines.json'} +dilbert,0,multiclass,-0.237068,{'_modeljson': 'et/Albert.json'} +dilbert,0,multiclass,-0.230785,{'_modeljson': 'et/Amazon_employee_access.json'} +dilbert,0,multiclass,-0.253409,{'_modeljson': 'et/bng_breastTumor.json'} +dilbert,0,multiclass,-0.247331,{'_modeljson': 'et/bng_pbc.json'} +dilbert,0,multiclass,-0.383859,{'_modeljson': 'et/car.json'} +dilbert,0,multiclass,-0.234819,{'_modeljson': 'et/connect-4.json'} +dilbert,0,multiclass,-0.308227,{'_modeljson': 'et/default.json'} +dilbert,0,multiclass,-0.231163,{'_modeljson': 'et/dilbert.json'} +dilbert,0,multiclass,-0.245383,{'_modeljson': 'et/poker.json'} +Dionis,0,multiclass,-3.354,{'_modeljson': 'et/2dplanes.json'} +Dionis,0,multiclass,-1.56815,{'_modeljson': 'et/adult.json'} +Dionis,0,multiclass,-0.758098,{'_modeljson': 'et/Airlines.json'} +Dionis,0,multiclass,-1.36204,{'_modeljson': 'et/Amazon_employee_access.json'} +Dionis,0,multiclass,-1.40398,{'_modeljson': 'et/bng_breastTumor.json'} +Dionis,0,multiclass,-2.44773,{'_modeljson': 'et/car.json'} +Dionis,0,multiclass,-0.759589,{'_modeljson': 'et/connect-4.json'} +Dionis,0,multiclass,-0.789821,{'_modeljson': 'et/default.json'} +Dionis,0,multiclass,-1.54593,{'_modeljson': 'et/dilbert.json'} +poker,0,regression,0.103608,{'_modeljson': 'et/2dplanes.json'} +poker,0,regression,0.314258,{'_modeljson': 'et/adult.json'} +poker,0,regression,0.531285,{'_modeljson': 'et/Airlines.json'} +poker,0,regression,0.30208,{'_modeljson': 'et/Albert.json'} +poker,0,regression,0.358474,{'_modeljson': 'et/Amazon_employee_access.json'} +poker,0,regression,0.344292,{'_modeljson': 'et/bng_breastTumor.json'} +poker,0,regression,0.663188,{'_modeljson': 'et/bng_pbc.json'} +poker,0,regression,0.180103,{'_modeljson': 'et/car.json'} +poker,0,regression,0.394291,{'_modeljson': 'et/connect-4.json'} +poker,0,regression,0.753355,{'_modeljson': 'et/default.json'} +poker,0,regression,0.317809,{'_modeljson': 'et/dilbert.json'} +poker,0,regression,0.663812,{'_modeljson': 'et/poker.json'} diff --git a/test/default/lgbm/2dplanes.json b/test/default/lgbm/2dplanes.json new file mode 100644 index 000000000..3d65810fc --- /dev/null +++ b/test/default/lgbm/2dplanes.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {"n_estimators": 103, "num_leaves": 33, "min_child_samples": 4, "learning_rate": 0.05800185361316003, "log_max_bin": 6, "colsample_bytree": 1.0, "reg_alpha": 1.5987124004961213, "reg_lambda": 10.56445079499673}} \ No newline at end of file diff --git a/test/default/lgbm/APSFailure.json b/test/default/lgbm/APSFailure.json new file mode 100644 index 000000000..f36a73098 --- /dev/null +++ b/test/default/lgbm/APSFailure.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {"n_estimators": 733, "num_leaves": 11, "min_child_samples": 94, "learning_rate": 0.06276798296942972, "log_max_bin": 6, "colsample_bytree": 0.6341928918435795, "reg_alpha": 0.5811038918218691, "reg_lambda": 43.304997517523944}} \ No newline at end of file diff --git a/test/default/lgbm/Airlines.json b/test/default/lgbm/Airlines.json new file mode 100644 index 000000000..15ec9a038 --- /dev/null +++ b/test/default/lgbm/Airlines.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {"n_estimators": 2541, "num_leaves": 1667, "min_child_samples": 29, "learning_rate": 0.0016660662914022302, "log_max_bin": 8, "colsample_bytree": 0.5157078343718623, "reg_alpha": 0.045792841240713165, "reg_lambda": 0.0012362651138125363, "FLAML_sample_size": 436899}} \ No newline at end of file diff --git a/test/default/lgbm/Albert.json b/test/default/lgbm/Albert.json new file mode 100644 index 000000000..1d8107696 --- /dev/null +++ b/test/default/lgbm/Albert.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {"n_estimators": 12659, "num_leaves": 566, "min_child_samples": 51, "learning_rate": 0.0017248557932071625, "log_max_bin": 10, "colsample_bytree": 0.35373661752616337, "reg_alpha": 0.004824272162679245, "reg_lambda": 8.51563063056529, "FLAML_sample_size": 344444}} \ No newline at end of file diff --git a/test/default/lgbm/Amazon_employee_access.json b/test/default/lgbm/Amazon_employee_access.json new file mode 100644 index 000000000..39d070454 --- /dev/null +++ b/test/default/lgbm/Amazon_employee_access.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {"n_estimators": 198, "num_leaves": 6241, "min_child_samples": 3, "learning_rate": 0.003807690748728824, "log_max_bin": 10, "colsample_bytree": 0.3192882305722113, "reg_alpha": 0.024630507311503163, "reg_lambda": 0.06738306675149014}} \ No newline at end of file diff --git a/test/default/lgbm/Dionis.json b/test/default/lgbm/Dionis.json new file mode 100644 index 000000000..b6531056b --- /dev/null +++ b/test/default/lgbm/Dionis.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {"n_estimators": 362, "num_leaves": 1208, "min_child_samples": 8, "learning_rate": 0.02070742242160566, "log_max_bin": 4, "colsample_bytree": 0.37915528071680865, "reg_alpha": 0.002982599447751338, "reg_lambda": 1.136605174453919, "FLAML_sample_size": 337147}} \ No newline at end of file diff --git a/test/default/lgbm/adult.json b/test/default/lgbm/adult.json new file mode 100644 index 000000000..c3a0d0240 --- /dev/null +++ b/test/default/lgbm/adult.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {"n_estimators": 11842, "num_leaves": 31, "min_child_samples": 3, "learning_rate": 0.0015861878568503534, "log_max_bin": 8, "colsample_bytree": 0.3814347840573729, "reg_alpha": 0.0009765625, "reg_lambda": 0.011319689446351965}} \ No newline at end of file diff --git a/test/default/lgbm/bng_breastTumor.json b/test/default/lgbm/bng_breastTumor.json new file mode 100644 index 000000000..e4f48fdc0 --- /dev/null +++ b/test/default/lgbm/bng_breastTumor.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {"n_estimators": 644, "num_leaves": 40, "min_child_samples": 38, "learning_rate": 0.06007328261566753, "log_max_bin": 5, "colsample_bytree": 0.6950692048656423, "reg_alpha": 0.0009765625, "reg_lambda": 9.849318389111616, "FLAML_sample_size": 94478}} \ No newline at end of file diff --git a/test/default/lgbm/bng_pbc.json b/test/default/lgbm/bng_pbc.json new file mode 100644 index 000000000..d46d25115 --- /dev/null +++ b/test/default/lgbm/bng_pbc.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {"n_estimators": 27202, "num_leaves": 848, "min_child_samples": 2, "learning_rate": 0.0019296395751528979, "log_max_bin": 5, "colsample_bytree": 0.7328229531785452, "reg_alpha": 6.112225454676263, "reg_lambda": 0.08606162543586986, "FLAML_sample_size": 810000}} \ No newline at end of file diff --git a/test/default/lgbm/car.json b/test/default/lgbm/car.json new file mode 100644 index 000000000..93a463cd7 --- /dev/null +++ b/test/default/lgbm/car.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {"n_estimators": 311, "num_leaves": 4, "min_child_samples": 5, "learning_rate": 0.5547292134798673, "log_max_bin": 3, "colsample_bytree": 0.9917614238487915, "reg_alpha": 0.0009765625, "reg_lambda": 0.0019177370889840813}} \ No newline at end of file diff --git a/test/default/lgbm/connect-4.json b/test/default/lgbm/connect-4.json new file mode 100644 index 000000000..bd06c8138 --- /dev/null +++ b/test/default/lgbm/connect-4.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {"n_estimators": 3726, "num_leaves": 155, "min_child_samples": 4, "learning_rate": 0.040941607728296484, "log_max_bin": 5, "colsample_bytree": 0.5326256194627191, "reg_alpha": 0.7408711930398492, "reg_lambda": 0.5467731065349226}} \ No newline at end of file diff --git a/test/default/lgbm/default.json b/test/default/lgbm/default.json new file mode 100644 index 000000000..76a6ef5fa --- /dev/null +++ b/test/default/lgbm/default.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {}} \ No newline at end of file diff --git a/test/default/lgbm/dilbert.json b/test/default/lgbm/dilbert.json new file mode 100644 index 000000000..46891b958 --- /dev/null +++ b/test/default/lgbm/dilbert.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {"n_estimators": 7325, "num_leaves": 15, "min_child_samples": 6, "learning_rate": 0.009932524214971736, "log_max_bin": 6, "colsample_bytree": 0.8592091503131608, "reg_alpha": 0.0009997224940106115, "reg_lambda": 0.04069855891326503}} \ No newline at end of file diff --git a/test/default/lgbm/poker.json b/test/default/lgbm/poker.json new file mode 100644 index 000000000..c38415277 --- /dev/null +++ b/test/default/lgbm/poker.json @@ -0,0 +1 @@ +{"class": "lgbm", "hyperparameters": {"n_estimators": 32767, "num_leaves": 372, "min_child_samples": 4, "learning_rate": 0.03517259015200922, "log_max_bin": 5, "colsample_bytree": 1.0, "reg_alpha": 0.02271142170225636, "reg_lambda": 0.001963791798843179, "FLAML_sample_size": 830258}} \ No newline at end of file diff --git a/test/default/lgbm/results.csv b/test/default/lgbm/results.csv new file mode 100644 index 000000000..e292900b5 --- /dev/null +++ b/test/default/lgbm/results.csv @@ -0,0 +1,167 @@ +task,fold,type,result,params +2dplanes,0,regression,0.946366,{'_modeljson': 'lgbm/2dplanes.json'} +2dplanes,0,regression,0.907774,{'_modeljson': 'lgbm/adult.json'} +2dplanes,0,regression,0.901643,{'_modeljson': 'lgbm/Airlines.json'} +2dplanes,0,regression,0.915098,{'_modeljson': 'lgbm/Albert.json'} +2dplanes,0,regression,0.302328,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +2dplanes,0,regression,0.94523,{'_modeljson': 'lgbm/bng_breastTumor.json'} +2dplanes,0,regression,0.945698,{'_modeljson': 'lgbm/bng_pbc.json'} +2dplanes,0,regression,0.946194,{'_modeljson': 'lgbm/car.json'} +2dplanes,0,regression,0.945549,{'_modeljson': 'lgbm/connect-4.json'} +2dplanes,0,regression,0.946232,{'_modeljson': 'lgbm/default.json'} +2dplanes,0,regression,0.945594,{'_modeljson': 'lgbm/dilbert.json'} +2dplanes,0,regression,0.836996,{'_modeljson': 'lgbm/Dionis.json'} +2dplanes,0,regression,0.917152,{'_modeljson': 'lgbm/poker.json'} +adult,0,binary,0.927203,{'_modeljson': 'lgbm/2dplanes.json'} +adult,0,binary,0.932072,{'_modeljson': 'lgbm/adult.json'} +adult,0,binary,0.926563,{'_modeljson': 'lgbm/Airlines.json'} +adult,0,binary,0.928604,{'_modeljson': 'lgbm/Albert.json'} +adult,0,binary,0.911171,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +adult,0,binary,0.930645,{'_modeljson': 'lgbm/bng_breastTumor.json'} +adult,0,binary,0.928603,{'_modeljson': 'lgbm/bng_pbc.json'} +adult,0,binary,0.915825,{'_modeljson': 'lgbm/car.json'} +adult,0,binary,0.919499,{'_modeljson': 'lgbm/connect-4.json'} +adult,0,binary,0.930109,{'_modeljson': 'lgbm/default.json'} +adult,0,binary,0.932453,{'_modeljson': 'lgbm/dilbert.json'} +adult,0,binary,0.921959,{'_modeljson': 'lgbm/Dionis.json'} +adult,0,binary,0.910763,{'_modeljson': 'lgbm/poker.json'} +Airlines,0,binary,0.705404,{'_modeljson': 'lgbm/2dplanes.json'} +Airlines,0,binary,0.714521,{'_modeljson': 'lgbm/adult.json'} +Airlines,0,binary,0.732288,{'_modeljson': 'lgbm/Airlines.json'} +Airlines,0,binary,0.710273,{'_modeljson': 'lgbm/Albert.json'} +Airlines,0,binary,0.707107,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +Airlines,0,binary,0.718682,{'_modeljson': 'lgbm/bng_breastTumor.json'} +Airlines,0,binary,0.724703,{'_modeljson': 'lgbm/bng_pbc.json'} +Airlines,0,binary,0.690574,{'_modeljson': 'lgbm/car.json'} +Airlines,0,binary,0.725808,{'_modeljson': 'lgbm/connect-4.json'} +Airlines,0,binary,0.710419,{'_modeljson': 'lgbm/default.json'} +Airlines,0,binary,0.710419,{'_modeljson': 'lgbm/default.json'} +Airlines,0,binary,0.718609,{'_modeljson': 'lgbm/dilbert.json'} +Airlines,0,binary,0.716213,{'_modeljson': 'lgbm/Dionis.json'} +Airlines,0,binary,0.654868,{'_modeljson': 'lgbm/poker.json'} +Albert,0,binary,0.744825,{'_modeljson': 'lgbm/2dplanes.json'} +Albert,0,binary,0.758979,{'_modeljson': 'lgbm/adult.json'} +Albert,0,binary,0.758364,{'_modeljson': 'lgbm/Airlines.json'} +Albert,0,binary,0.770923,{'_modeljson': 'lgbm/Albert.json'} +Albert,0,binary,0.745091,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +Albert,0,binary,0.754523,{'_modeljson': 'lgbm/APSFailure.json'} +Albert,0,binary,0.759939,{'_modeljson': 'lgbm/bng_breastTumor.json'} +Albert,0,binary,0.765119,{'_modeljson': 'lgbm/bng_pbc.json'} +Albert,0,binary,0.745067,{'_modeljson': 'lgbm/car.json'} +Albert,0,binary,0.762311,{'_modeljson': 'lgbm/connect-4.json'} +Albert,0,binary,0.753181,{'_modeljson': 'lgbm/default.json'} +Albert,0,binary,0.753181,{'_modeljson': 'lgbm/default.json'} +Albert,0,binary,0.760248,{'_modeljson': 'lgbm/dilbert.json'} +Albert,0,binary,0.758111,{'_modeljson': 'lgbm/Dionis.json'} +Albert,0,binary,0.761768,{'_modeljson': 'lgbm/poker.json'} +Amazon_employee_access,0,binary,0.811238,{'_modeljson': 'lgbm/2dplanes.json'} +Amazon_employee_access,0,binary,0.867285,{'_modeljson': 'lgbm/adult.json'} +Amazon_employee_access,0,binary,0.8888,{'_modeljson': 'lgbm/Airlines.json'} +Amazon_employee_access,0,binary,0.881302,{'_modeljson': 'lgbm/Albert.json'} +Amazon_employee_access,0,binary,0.891085,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +Amazon_employee_access,0,binary,0.816736,{'_modeljson': 'lgbm/APSFailure.json'} +Amazon_employee_access,0,binary,0.861187,{'_modeljson': 'lgbm/bng_breastTumor.json'} +Amazon_employee_access,0,binary,0.848348,{'_modeljson': 'lgbm/bng_pbc.json'} +Amazon_employee_access,0,binary,0.760891,{'_modeljson': 'lgbm/car.json'} +Amazon_employee_access,0,binary,0.872951,{'_modeljson': 'lgbm/connect-4.json'} +Amazon_employee_access,0,binary,0.851183,{'_modeljson': 'lgbm/default.json'} +Amazon_employee_access,0,binary,0.851183,{'_modeljson': 'lgbm/default.json'} +Amazon_employee_access,0,binary,0.851173,{'_modeljson': 'lgbm/dilbert.json'} +Amazon_employee_access,0,binary,0.843577,{'_modeljson': 'lgbm/Dionis.json'} +Amazon_employee_access,0,binary,0.866543,{'_modeljson': 'lgbm/poker.json'} +bng_breastTumor,0,regression,0.186246,{'_modeljson': 'lgbm/2dplanes.json'} +bng_breastTumor,0,regression,0.181787,{'_modeljson': 'lgbm/adult.json'} +bng_breastTumor,0,regression,0.177175,{'_modeljson': 'lgbm/Airlines.json'} +bng_breastTumor,0,regression,0.169053,{'_modeljson': 'lgbm/Albert.json'} +bng_breastTumor,0,regression,0.0734972,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +bng_breastTumor,0,regression,0.192189,{'_modeljson': 'lgbm/APSFailure.json'} +bng_breastTumor,0,regression,0.195887,{'_modeljson': 'lgbm/bng_breastTumor.json'} +bng_breastTumor,0,regression,0.144786,{'_modeljson': 'lgbm/bng_pbc.json'} +bng_breastTumor,0,regression,0.168074,{'_modeljson': 'lgbm/car.json'} +bng_breastTumor,0,regression,0.159819,{'_modeljson': 'lgbm/connect-4.json'} +bng_breastTumor,0,regression,0.192813,{'_modeljson': 'lgbm/default.json'} +bng_breastTumor,0,regression,0.192813,{'_modeljson': 'lgbm/default.json'} +bng_breastTumor,0,regression,0.193994,{'_modeljson': 'lgbm/dilbert.json'} +bng_breastTumor,0,regression,0.162977,{'_modeljson': 'lgbm/Dionis.json'} +bng_breastTumor,0,regression,-0.0283641,{'_modeljson': 'lgbm/poker.json'} +bng_pbc,0,regression,0.415569,{'_modeljson': 'lgbm/2dplanes.json'} +bng_pbc,0,regression,0.421659,{'_modeljson': 'lgbm/adult.json'} +bng_pbc,0,regression,0.433399,{'_modeljson': 'lgbm/Airlines.json'} +bng_pbc,0,regression,0.429397,{'_modeljson': 'lgbm/Albert.json'} +bng_pbc,0,regression,0.218693,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +bng_pbc,0,regression,0.426949,{'_modeljson': 'lgbm/APSFailure.json'} +bng_pbc,0,regression,0.444361,{'_modeljson': 'lgbm/bng_breastTumor.json'} +bng_pbc,0,regression,0.459898,{'_modeljson': 'lgbm/bng_pbc.json'} +bng_pbc,0,regression,0.404274,{'_modeljson': 'lgbm/car.json'} +bng_pbc,0,regression,0.453742,{'_modeljson': 'lgbm/connect-4.json'} +bng_pbc,0,regression,0.425581,{'_modeljson': 'lgbm/default.json'} +bng_pbc,0,regression,0.425581,{'_modeljson': 'lgbm/default.json'} +bng_pbc,0,regression,0.440833,{'_modeljson': 'lgbm/dilbert.json'} +bng_pbc,0,regression,0.42319,{'_modeljson': 'lgbm/Dionis.json'} +bng_pbc,0,regression,0.440263,{'_modeljson': 'lgbm/poker.json'} +car,0,multiclass,-0.126115,{'_modeljson': 'lgbm/2dplanes.json'} +car,0,multiclass,-0.20528,{'_modeljson': 'lgbm/adult.json'} +car,0,multiclass,-0.189212,{'_modeljson': 'lgbm/Airlines.json'} +car,0,multiclass,-0.233147,{'_modeljson': 'lgbm/Albert.json'} +car,0,multiclass,-0.598807,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +car,0,multiclass,-0.119622,{'_modeljson': 'lgbm/APSFailure.json'} +car,0,multiclass,-0.0372956,{'_modeljson': 'lgbm/bng_breastTumor.json'} +car,0,multiclass,-0.179642,{'_modeljson': 'lgbm/bng_pbc.json'} +car,0,multiclass,-0.000121047,{'_modeljson': 'lgbm/car.json'} +car,0,multiclass,-0.050453,{'_modeljson': 'lgbm/connect-4.json'} +car,0,multiclass,-0.00234879,{'_modeljson': 'lgbm/default.json'} +car,0,multiclass,-0.00234879,{'_modeljson': 'lgbm/default.json'} +car,0,multiclass,-0.000295737,{'_modeljson': 'lgbm/dilbert.json'} +car,0,multiclass,-0.297016,{'_modeljson': 'lgbm/Dionis.json'} +car,0,multiclass,-0.00178529,{'_modeljson': 'lgbm/poker.json'} +connect-4,0,multiclass,-0.527657,{'_modeljson': 'lgbm/2dplanes.json'} +connect-4,0,multiclass,-0.462894,{'_modeljson': 'lgbm/adult.json'} +connect-4,0,multiclass,-0.449048,{'_modeljson': 'lgbm/Airlines.json'} +connect-4,0,multiclass,-0.393871,{'_modeljson': 'lgbm/Albert.json'} +connect-4,0,multiclass,-0.73746,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +connect-4,0,multiclass,-0.485399,{'_modeljson': 'lgbm/APSFailure.json'} +connect-4,0,multiclass,-0.393378,{'_modeljson': 'lgbm/bng_breastTumor.json'} +connect-4,0,multiclass,-0.388117,{'_modeljson': 'lgbm/bng_pbc.json'} +connect-4,0,multiclass,-0.484577,{'_modeljson': 'lgbm/car.json'} +connect-4,0,multiclass,-0.32741,{'_modeljson': 'lgbm/connect-4.json'} +connect-4,0,multiclass,-0.482328,{'_modeljson': 'lgbm/default.json'} +connect-4,0,multiclass,-0.482328,{'_modeljson': 'lgbm/default.json'} +connect-4,0,multiclass,-0.413426,{'_modeljson': 'lgbm/dilbert.json'} +connect-4,0,multiclass,-0.438676,{'_modeljson': 'lgbm/Dionis.json'} +connect-4,0,multiclass,-0.489035,{'_modeljson': 'lgbm/poker.json'} +dilbert,0,multiclass,-0.134669,{'_modeljson': 'lgbm/2dplanes.json'} +dilbert,0,multiclass,-0.0405039,{'_modeljson': 'lgbm/adult.json'} +dilbert,0,multiclass,-0.0888238,{'_modeljson': 'lgbm/Airlines.json'} +dilbert,0,multiclass,-0.0618876,{'_modeljson': 'lgbm/Albert.json'} +dilbert,0,multiclass,-0.0653412,{'_modeljson': 'lgbm/APSFailure.json'} +dilbert,0,multiclass,-0.0484292,{'_modeljson': 'lgbm/bng_breastTumor.json'} +dilbert,0,multiclass,-0.126248,{'_modeljson': 'lgbm/bng_pbc.json'} +dilbert,0,multiclass,-0.0473867,{'_modeljson': 'lgbm/car.json'} +dilbert,0,multiclass,-0.0759236,{'_modeljson': 'lgbm/connect-4.json'} +dilbert,0,multiclass,-0.0490604,{'_modeljson': 'lgbm/default.json'} +dilbert,0,multiclass,-0.0490604,{'_modeljson': 'lgbm/default.json'} +dilbert,0,multiclass,-0.034108,{'_modeljson': 'lgbm/dilbert.json'} +dilbert,0,multiclass,-0.0661046,{'_modeljson': 'lgbm/Dionis.json'} +dilbert,0,multiclass,-0.0744684,{'_modeljson': 'lgbm/poker.json'} +Dionis,0,multiclass,-0.395452,{'_modeljson': 'lgbm/2dplanes.json'} +Dionis,0,multiclass,-1.40235,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +Dionis,0,multiclass,-0.306241,{'_modeljson': 'lgbm/APSFailure.json'} +Dionis,0,multiclass,-33.7902,{'_modeljson': 'lgbm/car.json'} +Dionis,0,multiclass,-27.9456,{'_modeljson': 'lgbm/default.json'} +Dionis,0,multiclass,-28.095,{'_modeljson': 'lgbm/default.json'} +Dionis,0,multiclass,-0.318142,{'_modeljson': 'lgbm/Dionis.json'} +poker,0,regression,0.203695,{'_modeljson': 'lgbm/2dplanes.json'} +poker,0,regression,0.424513,{'_modeljson': 'lgbm/adult.json'} +poker,0,regression,0.490528,{'_modeljson': 'lgbm/Airlines.json'} +poker,0,regression,0.767652,{'_modeljson': 'lgbm/Albert.json'} +poker,0,regression,0.0592655,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +poker,0,regression,0.393168,{'_modeljson': 'lgbm/APSFailure.json'} +poker,0,regression,0.614152,{'_modeljson': 'lgbm/bng_breastTumor.json'} +poker,0,regression,0.854134,{'_modeljson': 'lgbm/bng_pbc.json'} +poker,0,regression,0.197075,{'_modeljson': 'lgbm/car.json'} +poker,0,regression,0.879695,{'_modeljson': 'lgbm/connect-4.json'} +poker,0,regression,0.284102,{'_modeljson': 'lgbm/default.json'} +poker,0,regression,0.284102,{'_modeljson': 'lgbm/default.json'} +poker,0,regression,0.433648,{'_modeljson': 'lgbm/dilbert.json'} +poker,0,regression,0.657666,{'_modeljson': 'lgbm/Dionis.json'} +poker,0,regression,0.940835,{'_modeljson': 'lgbm/poker.json'} diff --git a/test/default/rf/2dplanes.json b/test/default/rf/2dplanes.json new file mode 100644 index 000000000..0ae89789b --- /dev/null +++ b/test/default/rf/2dplanes.json @@ -0,0 +1 @@ +{"class": "rf", "hyperparameters": {"n_estimators": 38, "max_features": 1.0, "max_leaves": 58}} \ No newline at end of file diff --git a/test/default/rf/Airlines.json b/test/default/rf/Airlines.json new file mode 100644 index 000000000..d87f6a21a --- /dev/null +++ b/test/default/rf/Airlines.json @@ -0,0 +1 @@ +{"class": "rf", "hyperparameters": {"n_estimators": 418, "max_features": 0.5303485415288045, "max_leaves": 6452, "criterion": "entropy", "FLAML_sample_size": 436899}} \ No newline at end of file diff --git a/test/default/rf/Albert.json b/test/default/rf/Albert.json new file mode 100644 index 000000000..74ee8c516 --- /dev/null +++ b/test/default/rf/Albert.json @@ -0,0 +1 @@ +{"class": "rf", "hyperparameters": {"n_estimators": 2047, "max_features": 0.10091610074262287, "max_leaves": 32767, "criterion": "entropy", "FLAML_sample_size": 344444}} \ No newline at end of file diff --git a/test/default/rf/Amazon_employee_access.json b/test/default/rf/Amazon_employee_access.json new file mode 100644 index 000000000..7983e1e03 --- /dev/null +++ b/test/default/rf/Amazon_employee_access.json @@ -0,0 +1 @@ +{"class": "rf", "hyperparameters": {"n_estimators": 501, "max_features": 0.24484242524861066, "max_leaves": 1156, "criterion": "entropy"}} \ No newline at end of file diff --git a/test/default/rf/Dionis.json b/test/default/rf/Dionis.json new file mode 100644 index 000000000..2159b581b --- /dev/null +++ b/test/default/rf/Dionis.json @@ -0,0 +1 @@ +{"class": "rf", "hyperparameters": {"n_estimators": 510, "max_features": 0.12094682590862652, "max_leaves": 32767, "criterion": "entropy", "FLAML_sample_size": 337147}} \ No newline at end of file diff --git a/test/default/rf/adult.json b/test/default/rf/adult.json new file mode 100644 index 000000000..daba27981 --- /dev/null +++ b/test/default/rf/adult.json @@ -0,0 +1 @@ +{"class": "rf", "hyperparameters": {"n_estimators": 1212, "max_features": 0.3129111648657632, "max_leaves": 779, "criterion": "entropy"}} \ No newline at end of file diff --git a/test/default/rf/bng_breastTumor.json b/test/default/rf/bng_breastTumor.json new file mode 100644 index 000000000..5fa43ab96 --- /dev/null +++ b/test/default/rf/bng_breastTumor.json @@ -0,0 +1 @@ +{"class": "rf", "hyperparameters": {"n_estimators": 288, "max_features": 0.6436380990499977, "max_leaves": 1823, "FLAML_sample_size": 94478}} \ No newline at end of file diff --git a/test/default/rf/bng_pbc.json b/test/default/rf/bng_pbc.json new file mode 100644 index 000000000..fd537b20e --- /dev/null +++ b/test/default/rf/bng_pbc.json @@ -0,0 +1 @@ +{"class": "rf", "hyperparameters": {"n_estimators": 2047, "max_features": 0.3158919059422144, "max_leaves": 32767, "FLAML_sample_size": 810000}} \ No newline at end of file diff --git a/test/default/rf/car.json b/test/default/rf/car.json new file mode 100644 index 000000000..99d6eb824 --- /dev/null +++ b/test/default/rf/car.json @@ -0,0 +1 @@ +{"class": "rf", "hyperparameters": {"n_estimators": 792, "max_features": 1.0, "max_leaves": 67, "criterion": "entropy"}} \ No newline at end of file diff --git a/test/default/rf/connect-4.json b/test/default/rf/connect-4.json new file mode 100644 index 000000000..f045a9984 --- /dev/null +++ b/test/default/rf/connect-4.json @@ -0,0 +1 @@ +{"class": "rf", "hyperparameters": {"n_estimators": 1907, "max_features": 0.3728618389498168, "max_leaves": 11731, "criterion": "entropy"}} \ No newline at end of file diff --git a/test/default/rf/default.json b/test/default/rf/default.json new file mode 100644 index 000000000..527a662a7 --- /dev/null +++ b/test/default/rf/default.json @@ -0,0 +1 @@ +{"class": "rf", "hyperparameters": {}} \ No newline at end of file diff --git a/test/default/rf/dilbert.json b/test/default/rf/dilbert.json new file mode 100644 index 000000000..2c822d77f --- /dev/null +++ b/test/default/rf/dilbert.json @@ -0,0 +1 @@ +{"class": "rf", "hyperparameters": {"n_estimators": 350, "max_features": 0.748250835121453, "max_leaves": 433, "criterion": "entropy"}} \ No newline at end of file diff --git a/test/default/rf/poker.json b/test/default/rf/poker.json new file mode 100644 index 000000000..ab6fd2d4a --- /dev/null +++ b/test/default/rf/poker.json @@ -0,0 +1 @@ +{"class": "rf", "hyperparameters": {"n_estimators": 2047, "max_features": 1.0, "max_leaves": 32767, "FLAML_sample_size": 830258}} \ No newline at end of file diff --git a/test/default/rf/results.csv b/test/default/rf/results.csv new file mode 100644 index 000000000..3737ec409 --- /dev/null +++ b/test/default/rf/results.csv @@ -0,0 +1,145 @@ +task,fold,type,result,metric,params,info +2dplanes,0,regression,0.946488,r2,{'_modeljson': 'rf/2dplanes.json'}, +2dplanes,0,regression,0.936392,r2,{'_modeljson': 'rf/adult.json'}, +2dplanes,0,regression,0.940486,r2,{'_modeljson': 'rf/Airlines.json'}, +2dplanes,0,regression,0.924025,r2,{'_modeljson': 'rf/Albert.json'}, +2dplanes,0,regression,0.911362,r2,{'_modeljson': 'rf/Amazon_employee_access.json'}, +2dplanes,0,regression,0.944353,r2,{'_modeljson': 'rf/bng_breastTumor.json'}, +2dplanes,0,regression,0.932343,r2,{'_modeljson': 'rf/bng_pbc.json'}, +2dplanes,0,regression,0.946423,r2,{'_modeljson': 'rf/car.json'}, +2dplanes,0,regression,0.937309,r2,{'_modeljson': 'rf/connect-4.json'}, +2dplanes,0,regression,0.930126,r2,{'_modeljson': 'rf/default.json'}, +2dplanes,0,regression,0.945707,r2,{'_modeljson': 'rf/dilbert.json'}, +2dplanes,0,regression,0.923313,r2,{'_modeljson': 'rf/Dionis.json'}, +2dplanes,0,regression,0.930579,r2,{'_modeljson': 'rf/poker.json'}, +adult,0,binary,0.912946,auc,{'_modeljson': 'rf/2dplanes.json'}, +adult,0,binary,0.91978,auc,{'_modeljson': 'rf/adult.json'}, +adult,0,binary,0.910127,auc,{'_modeljson': 'rf/Airlines.json'}, +adult,0,binary,0.910553,auc,{'_modeljson': 'rf/Albert.json'}, +adult,0,binary,0.919662,auc,{'_modeljson': 'rf/Amazon_employee_access.json'}, +adult,0,binary,0.915769,auc,{'_modeljson': 'rf/bng_breastTumor.json'}, +adult,0,binary,0.91003,auc,{'_modeljson': 'rf/bng_pbc.json'}, +adult,0,binary,0.914697,auc,{'_modeljson': 'rf/car.json'}, +adult,0,binary,0.911118,auc,{'_modeljson': 'rf/connect-4.json'}, +adult,0,binary,0.907368,auc,{'_modeljson': 'rf/default.json'}, +adult,0,binary,0.919216,auc,{'_modeljson': 'rf/dilbert.json'}, +adult,0,binary,0.910528,auc,{'_modeljson': 'rf/Dionis.json'}, +adult,0,binary,0.904508,auc,{'_modeljson': 'rf/poker.json'}, +Airlines,0,binary,0.687817,auc,{'_modeljson': 'rf/2dplanes.json'}, +Airlines,0,binary,0.712804,auc,{'_modeljson': 'rf/adult.json'}, +Airlines,0,binary,0.727357,auc,{'_modeljson': 'rf/Airlines.json'}, +Airlines,0,binary,0.705541,auc,{'_modeljson': 'rf/Albert.json'}, +Airlines,0,binary,0.71012,auc,{'_modeljson': 'rf/Amazon_employee_access.json'}, +Airlines,0,binary,0.722532,auc,{'_modeljson': 'rf/bng_breastTumor.json'}, +Airlines,0,binary,0.709287,auc,{'_modeljson': 'rf/bng_pbc.json'}, +Airlines,0,binary,0.688678,auc,{'_modeljson': 'rf/car.json'}, +Airlines,0,binary,0.725288,auc,{'_modeljson': 'rf/connect-4.json'}, +Airlines,0,binary,0.657276,auc,{'_modeljson': 'rf/default.json'}, +Airlines,0,binary,0.708515,auc,{'_modeljson': 'rf/dilbert.json'}, +Airlines,0,binary,0.705826,auc,{'_modeljson': 'rf/Dionis.json'}, +Airlines,0,binary,0.699484,auc,{'_modeljson': 'rf/poker.json'}, +Albert,0,binary,0.712348,auc,{'_modeljson': 'rf/2dplanes.json'}, +Albert,0,binary,0.72836,auc,{'_modeljson': 'rf/adult.json'}, +Albert,0,binary,0.734105,auc,{'_modeljson': 'rf/Airlines.json'}, +Albert,0,binary,0.737119,auc,{'_modeljson': 'rf/Albert.json'}, +Albert,0,binary,0.729216,auc,{'_modeljson': 'rf/Amazon_employee_access.json'}, +Albert,0,binary,0.731546,auc,{'_modeljson': 'rf/bng_breastTumor.json'}, +Albert,0,binary,0.734847,auc,{'_modeljson': 'rf/bng_pbc.json'}, +Albert,0,binary,0.713965,auc,{'_modeljson': 'rf/car.json'}, +Albert,0,binary,0.735372,auc,{'_modeljson': 'rf/connect-4.json'}, +Albert,0,binary,0.728232,auc,{'_modeljson': 'rf/default.json'}, +Albert,0,binary,0.726823,auc,{'_modeljson': 'rf/dilbert.json'}, +Albert,0,binary,0.735994,auc,{'_modeljson': 'rf/Dionis.json'}, +Amazon_employee_access,0,binary,0.728779,auc,{'_modeljson': 'rf/2dplanes.json'}, +Amazon_employee_access,0,binary,0.87801,auc,{'_modeljson': 'rf/adult.json'}, +Amazon_employee_access,0,binary,0.88085,auc,{'_modeljson': 'rf/Airlines.json'}, +Amazon_employee_access,0,binary,0.881869,auc,{'_modeljson': 'rf/Albert.json'}, +Amazon_employee_access,0,binary,0.881463,auc,{'_modeljson': 'rf/Amazon_employee_access.json'}, +Amazon_employee_access,0,binary,0.882723,auc,{'_modeljson': 'rf/bng_breastTumor.json'}, +Amazon_employee_access,0,binary,0.88299,auc,{'_modeljson': 'rf/bng_pbc.json'}, +Amazon_employee_access,0,binary,0.808575,auc,{'_modeljson': 'rf/car.json'}, +Amazon_employee_access,0,binary,0.881209,auc,{'_modeljson': 'rf/connect-4.json'}, +Amazon_employee_access,0,binary,0.877507,auc,{'_modeljson': 'rf/default.json'}, +Amazon_employee_access,0,binary,0.875146,auc,{'_modeljson': 'rf/dilbert.json'}, +Amazon_employee_access,0,binary,0.878121,auc,{'_modeljson': 'rf/Dionis.json'}, +Amazon_employee_access,0,binary,0.886312,auc,{'_modeljson': 'rf/poker.json'}, +bng_breastTumor,0,regression,0.153657,r2,{'_modeljson': 'rf/2dplanes.json'}, +bng_breastTumor,0,regression,0.156403,r2,{'_modeljson': 'rf/adult.json'}, +bng_breastTumor,0,regression,0.174569,r2,{'_modeljson': 'rf/Airlines.json'}, +bng_breastTumor,0,regression,0.0441869,r2,{'_modeljson': 'rf/Albert.json'}, +bng_breastTumor,0,regression,0.157992,r2,{'_modeljson': 'rf/Amazon_employee_access.json'}, +bng_breastTumor,0,regression,0.186635,r2,{'_modeljson': 'rf/bng_breastTumor.json'}, +bng_breastTumor,0,regression,0.0527547,r2,{'_modeljson': 'rf/bng_pbc.json'}, +bng_breastTumor,0,regression,0.158852,r2,{'_modeljson': 'rf/car.json'}, +bng_breastTumor,0,regression,0.150611,r2,{'_modeljson': 'rf/connect-4.json'}, +bng_breastTumor,0,regression,-0.02142,r2,{'_modeljson': 'rf/default.json'}, +bng_breastTumor,0,regression,0.183562,r2,{'_modeljson': 'rf/dilbert.json'}, +bng_breastTumor,0,regression,0.0414589,r2,{'_modeljson': 'rf/Dionis.json'}, +bng_breastTumor,0,regression,0.00390625,r2,{'_modeljson': 'rf/poker.json'}, +bng_pbc,0,regression,0.344043,r2,{'_modeljson': 'rf/2dplanes.json'}, +bng_pbc,0,regression,0.402376,r2,{'_modeljson': 'rf/adult.json'}, +bng_pbc,0,regression,0.423262,r2,{'_modeljson': 'rf/Airlines.json'}, +bng_pbc,0,regression,0.386142,r2,{'_modeljson': 'rf/Albert.json'}, +bng_pbc,0,regression,0.403857,r2,{'_modeljson': 'rf/Amazon_employee_access.json'}, +bng_pbc,0,regression,0.413944,r2,{'_modeljson': 'rf/bng_breastTumor.json'}, +bng_pbc,0,regression,0.43206,r2,{'_modeljson': 'rf/bng_pbc.json'}, +bng_pbc,0,regression,0.348594,r2,{'_modeljson': 'rf/car.json'}, +bng_pbc,0,regression,0.427588,r2,{'_modeljson': 'rf/connect-4.json'}, +bng_pbc,0,regression,0.415337,r2,{'_modeljson': 'rf/default.json'}, +bng_pbc,0,regression,0.393936,r2,{'_modeljson': 'rf/dilbert.json'}, +bng_pbc,0,regression,0.415246,r2,{'_modeljson': 'rf/Dionis.json'}, +car,0,multiclass,-0.0575382,neg_logloss,{'_modeljson': 'rf/2dplanes.json'}, +car,0,multiclass,-0.155878,neg_logloss,{'_modeljson': 'rf/adult.json'}, +car,0,multiclass,-0.0691041,neg_logloss,{'_modeljson': 'rf/Airlines.json'}, +car,0,multiclass,-0.156607,neg_logloss,{'_modeljson': 'rf/Albert.json'}, +car,0,multiclass,-0.156968,neg_logloss,{'_modeljson': 'rf/Amazon_employee_access.json'}, +car,0,multiclass,-0.0692317,neg_logloss,{'_modeljson': 'rf/bng_breastTumor.json'}, +car,0,multiclass,-0.159856,neg_logloss,{'_modeljson': 'rf/bng_pbc.json'}, +car,0,multiclass,-0.046769,neg_logloss,{'_modeljson': 'rf/car.json'}, +car,0,multiclass,-0.0981933,neg_logloss,{'_modeljson': 'rf/connect-4.json'}, +car,0,multiclass,-0.0971712,neg_logloss,{'_modeljson': 'rf/default.json'}, +car,0,multiclass,-0.0564843,neg_logloss,{'_modeljson': 'rf/dilbert.json'}, +car,0,multiclass,-0.157771,neg_logloss,{'_modeljson': 'rf/Dionis.json'}, +car,0,multiclass,-0.0511764,neg_logloss,{'_modeljson': 'rf/poker.json'}, +connect-4,0,multiclass,-0.725888,neg_logloss,{'_modeljson': 'rf/2dplanes.json'}, +connect-4,0,multiclass,-0.576056,neg_logloss,{'_modeljson': 'rf/adult.json'}, +connect-4,0,multiclass,-0.48458,neg_logloss,{'_modeljson': 'rf/Airlines.json'}, +connect-4,0,multiclass,-0.505598,neg_logloss,{'_modeljson': 'rf/Albert.json'}, +connect-4,0,multiclass,-0.568184,neg_logloss,{'_modeljson': 'rf/Amazon_employee_access.json'}, +connect-4,0,multiclass,-0.537511,neg_logloss,{'_modeljson': 'rf/bng_breastTumor.json'}, +connect-4,0,multiclass,-0.479022,neg_logloss,{'_modeljson': 'rf/bng_pbc.json'}, +connect-4,0,multiclass,-0.713123,neg_logloss,{'_modeljson': 'rf/car.json'}, +connect-4,0,multiclass,-0.475306,neg_logloss,{'_modeljson': 'rf/connect-4.json'}, +connect-4,0,multiclass,-0.518061,neg_logloss,{'_modeljson': 'rf/default.json'}, +connect-4,0,multiclass,-0.599112,neg_logloss,{'_modeljson': 'rf/dilbert.json'}, +connect-4,0,multiclass,-0.503642,neg_logloss,{'_modeljson': 'rf/Dionis.json'}, +connect-4,0,multiclass,-0.57852,neg_logloss,{'_modeljson': 'rf/poker.json'}, +dilbert,0,multiclass,-0.557959,neg_logloss,{'_modeljson': 'rf/2dplanes.json'}, +dilbert,0,multiclass,-0.294462,neg_logloss,{'_modeljson': 'rf/adult.json'}, +dilbert,0,multiclass,-0.293928,neg_logloss,{'_modeljson': 'rf/Airlines.json'}, +dilbert,0,multiclass,-0.299661,neg_logloss,{'_modeljson': 'rf/Albert.json'}, +dilbert,0,multiclass,-0.294668,neg_logloss,{'_modeljson': 'rf/Amazon_employee_access.json'}, +dilbert,0,multiclass,-0.314706,neg_logloss,{'_modeljson': 'rf/bng_breastTumor.json'}, +dilbert,0,multiclass,-0.313807,neg_logloss,{'_modeljson': 'rf/bng_pbc.json'}, +dilbert,0,multiclass,-0.51482,neg_logloss,{'_modeljson': 'rf/car.json'}, +dilbert,0,multiclass,-0.293982,neg_logloss,{'_modeljson': 'rf/connect-4.json'}, +dilbert,0,multiclass,-0.343209,neg_logloss,{'_modeljson': 'rf/default.json'}, +dilbert,0,multiclass,-0.2945,neg_logloss,{'_modeljson': 'rf/dilbert.json'}, +dilbert,0,multiclass,-0.298305,neg_logloss,{'_modeljson': 'rf/Dionis.json'}, +Dionis,0,multiclass,-3.55264,neg_logloss,{'_modeljson': 'rf/2dplanes.json'}, +Dionis,0,multiclass,-1.07117,neg_logloss,{'_modeljson': 'rf/bng_breastTumor.json'}, +Dionis,0,multiclass,-0.784388,neg_logloss,{'_modeljson': 'rf/default.json'}, +Dionis,0,multiclass,-0.580332,neg_logloss,{'_modeljson': 'rf/Dionis.json'}, +poker,0,regression,0.125176,r2,{'_modeljson': 'rf/2dplanes.json'}, +poker,0,regression,0.148019,r2,{'_modeljson': 'rf/adult.json'}, +poker,0,regression,0.322507,r2,{'_modeljson': 'rf/Airlines.json'}, +poker,0,regression,0.172264,r2,{'_modeljson': 'rf/Albert.json'}, +poker,0,regression,0.113673,r2,{'_modeljson': 'rf/Amazon_employee_access.json'}, +poker,0,regression,0.243427,r2,{'_modeljson': 'rf/bng_breastTumor.json'}, +poker,0,regression,0.379662,r2,{'_modeljson': 'rf/bng_pbc.json'}, +poker,0,regression,0.133342,r2,{'_modeljson': 'rf/car.json'}, +poker,0,regression,0.296597,r2,{'_modeljson': 'rf/connect-4.json'}, +poker,0,regression,0.608532,r2,{'_modeljson': 'rf/default.json'}, +poker,0,regression,0.192625,r2,{'_modeljson': 'rf/dilbert.json'}, +poker,0,regression,0.172139,r2,{'_modeljson': 'rf/Dionis.json'}, +poker,0,regression,0.528869,r2,{'_modeljson': 'rf/poker.json'}, diff --git a/test/default/test_defaults.py b/test/default/test_defaults.py new file mode 100644 index 000000000..57304d1c5 --- /dev/null +++ b/test/default/test_defaults.py @@ -0,0 +1,222 @@ +import sys +from sklearn.datasets import load_iris, fetch_california_housing, load_breast_cancer +from sklearn.model_selection import train_test_split +import pandas as pd +from flaml import AutoML +from flaml.default import ( + portfolio, + regret, + preprocess_and_suggest_hyperparams, + suggest_hyperparams, + suggest_learner, +) + + +def test_build_portfolio(path="test/default", strategy="greedy"): + sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split() + portfolio.main() + sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task multiclass --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split() + portfolio.main() + sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task regression --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split() + portfolio.main() + + +def test_greedy_feedback(path="test/default", strategy="greedy-feedback"): + # sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split() + # portfolio.main() + # sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task multiclass --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split() + # portfolio.main() + sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task regression --estimator lgbm --strategy {strategy}".split() + portfolio.main() + + +def test_iris(as_frame=True): + automl = AutoML() + automl_settings = { + "time_budget": 2, + "metric": "accuracy", + "task": "classification", + "log_file_name": "test/iris.log", + "n_jobs": 1, + "starting_points": "data", + } + X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame) + automl.fit(X_train, y_train, **automl_settings) + automl_settings["starting_points"] = "data:test/default" + automl.fit(X_train, y_train, **automl_settings) + + +def test_housing(as_frame=True): + automl = AutoML() + automl_settings = { + "time_budget": 2, + "task": "regression", + "estimator_list": ["xgboost", "lgbm"], + "log_file_name": "test/housing.log", + "n_jobs": 1, + "starting_points": "data", + "max_iter": 0, + } + X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=as_frame) + automl.fit(X_train, y_train, **automl_settings) + + +def test_regret(): + sys.argv = "regret.py --result_csv test/default/lgbm/results.csv --task_type binary --output test/default/lgbm/binary_regret.csv".split() + regret.main() + + +def test_suggest_classification(): + location = "test/default" + X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True) + suggested = suggest_hyperparams( + "classification", X_train, y_train, "lgbm", location=location + ) + print(suggested) + suggested = preprocess_and_suggest_hyperparams( + "classification", X_train, y_train, "xgboost", location=location + ) + print(suggested) + suggested = suggest_hyperparams( + "classification", X_train, y_train, "xgb_limitdepth", location=location + ) + print(suggested) + + X, y = load_iris(return_X_y=True, as_frame=True) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.33, random_state=42 + ) + ( + hyperparams, + estimator_class, + X, + y, + feature_transformer, + label_transformer, + ) = preprocess_and_suggest_hyperparams( + "classification", X_train, y_train, "lgbm", location=location + ) + model = estimator_class(**hyperparams) # estimator_class is LGBMClassifier + model.fit(X, y) + X_test = feature_transformer.transform(X_test) + y_pred = label_transformer.inverse_transform( + pd.Series(model.predict(X_test).astype(int)) + ) + print(y_pred) + suggested = suggest_hyperparams( + "classification", X_train, y_train, "xgboost", location=location + ) + print(suggested) + suggested = preprocess_and_suggest_hyperparams( + "classification", X_train, y_train, "xgb_limitdepth", location=location + ) + print(suggested) + suggested = suggest_hyperparams( + "classification", X_train, y_train, "xgb_limitdepth", location=location + ) + suggested = suggest_learner( + "classification", + X_train, + y_train, + estimator_list=["xgboost", "xgb_limitdepth"], + location=location, + ) + print(suggested) + + +def test_suggest_regression(): + location = "test/default" + X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True) + suggested = suggest_hyperparams( + "regression", X_train, y_train, "lgbm", location=location + ) + print(suggested) + suggested = preprocess_and_suggest_hyperparams( + "regression", X_train, y_train, "xgboost", location=location + ) + print(suggested) + suggested = suggest_hyperparams( + "regression", X_train, y_train, "xgb_limitdepth", location=location + ) + print(suggested) + suggested = suggest_learner("regression", X_train, y_train, location=location) + print(suggested) + + +def test_rf(): + from flaml.default.estimator import RandomForestRegressor, RandomForestClassifier + + X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True) + rf = RandomForestClassifier() + rf.fit(X_train[:100], y_train[:100]) + rf.predict(X_train) + rf.predict_proba(X_train) + print(rf) + + location = "test/default" + X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True) + rf = RandomForestRegressor(default_location=location) + rf.fit(X_train[:100], y_train[:100]) + rf.predict(X_train) + print(rf) + + +def test_extratrees(): + from flaml.default.estimator import ExtraTreesRegressor, ExtraTreesClassifier + + X_train, y_train = load_iris(return_X_y=True, as_frame=True) + classifier = ExtraTreesClassifier() + classifier.fit(X_train[:100], y_train[:100]) + classifier.predict(X_train) + classifier.predict_proba(X_train) + print(classifier) + + location = "test/default" + X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True) + regressor = ExtraTreesRegressor(default_location=location) + regressor.fit(X_train[:100], y_train[:100]) + regressor.predict(X_train) + print(regressor) + + +def test_lgbm(): + from flaml.default.estimator import LGBMRegressor, LGBMClassifier + + X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True) + classifier = LGBMClassifier(n_jobs=1) + classifier.fit(X_train, y_train) + classifier.predict(X_train, pred_contrib=True) + classifier.predict_proba(X_train) + print(classifier.get_params()) + print(classifier) + print(classifier.classes_) + + location = "test/default" + X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True) + regressor = LGBMRegressor(default_location=location) + regressor.fit(X_train, y_train) + regressor.predict(X_train) + print(regressor) + + +def test_xgboost(): + from flaml.default.estimator import XGBRegressor, XGBClassifier + + X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True) + classifier = XGBClassifier(max_depth=0) + classifier.fit(X_train[:100], y_train[:100]) + classifier.predict(X_train) + classifier.predict_proba(X_train) + print(classifier) + print(classifier.classes_) + + location = "test/default" + X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True) + regressor = XGBRegressor(default_location=location) + regressor.fit(X_train[:100], y_train[:100]) + regressor.predict(X_train) + print(regressor) + + +if __name__ == "__main__": + test_build_portfolio("flaml/default") diff --git a/test/default/xgb_limitdepth/2dplanes.json b/test/default/xgb_limitdepth/2dplanes.json new file mode 100644 index 000000000..98fd90de0 --- /dev/null +++ b/test/default/xgb_limitdepth/2dplanes.json @@ -0,0 +1 @@ +{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 2704, "max_depth": 2, "min_child_weight": 0.23751738294732322, "learning_rate": 0.019828117294812268, "subsample": 0.8798706041292946, "colsample_bylevel": 0.978891799553329, "colsample_bytree": 1.0, "reg_alpha": 0.3023181744217667, "reg_lambda": 101.10719177747677}} \ No newline at end of file diff --git a/test/default/xgb_limitdepth/Airlines.json b/test/default/xgb_limitdepth/Airlines.json new file mode 100644 index 000000000..037f27445 --- /dev/null +++ b/test/default/xgb_limitdepth/Airlines.json @@ -0,0 +1 @@ +{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 3573, "max_depth": 13, "min_child_weight": 2.921657581984971, "learning_rate": 0.00699976723859477, "subsample": 0.6110504706508572, "colsample_bylevel": 0.9998661537469163, "colsample_bytree": 0.5457693412489456, "reg_alpha": 0.05315763138176945, "reg_lambda": 23.067599600958623, "FLAML_sample_size": 436899}} \ No newline at end of file diff --git a/test/default/xgb_limitdepth/Amazon_employee_access.json b/test/default/xgb_limitdepth/Amazon_employee_access.json new file mode 100644 index 000000000..4c7798552 --- /dev/null +++ b/test/default/xgb_limitdepth/Amazon_employee_access.json @@ -0,0 +1 @@ +{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 3526, "max_depth": 13, "min_child_weight": 0.0994486725676356, "learning_rate": 0.0009765625, "subsample": 0.46123759274652554, "colsample_bylevel": 1.0, "colsample_bytree": 0.4498813776397717, "reg_alpha": 0.002599398546499414, "reg_lambda": 0.028336396854402753}} \ No newline at end of file diff --git a/test/default/xgb_limitdepth/adult.json b/test/default/xgb_limitdepth/adult.json new file mode 100644 index 000000000..ad1e7d9b7 --- /dev/null +++ b/test/default/xgb_limitdepth/adult.json @@ -0,0 +1 @@ +{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 5457, "max_depth": 6, "min_child_weight": 0.19978269031877885, "learning_rate": 0.003906732665632749, "subsample": 0.8207785234496902, "colsample_bylevel": 0.8438751931476698, "colsample_bytree": 0.42202862997585794, "reg_alpha": 0.017372558844968737, "reg_lambda": 0.03977802121721031}} \ No newline at end of file diff --git a/test/default/xgb_limitdepth/bng_breastTumor.json b/test/default/xgb_limitdepth/bng_breastTumor.json new file mode 100644 index 000000000..328f82a5e --- /dev/null +++ b/test/default/xgb_limitdepth/bng_breastTumor.json @@ -0,0 +1 @@ +{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 7782, "max_depth": 7, "min_child_weight": 0.3794874452608909, "learning_rate": 0.006733035771172325, "subsample": 1.0, "colsample_bylevel": 1.0, "colsample_bytree": 0.5611305922560855, "reg_alpha": 8.203853065625196, "reg_lambda": 56.48543538808782, "FLAML_sample_size": 94478}} \ No newline at end of file diff --git a/test/default/xgb_limitdepth/bng_pbc.json b/test/default/xgb_limitdepth/bng_pbc.json new file mode 100644 index 000000000..3495e1ad4 --- /dev/null +++ b/test/default/xgb_limitdepth/bng_pbc.json @@ -0,0 +1 @@ +{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 1013, "max_depth": 15, "min_child_weight": 57.33124114425335, "learning_rate": 0.009706354607542536, "subsample": 1.0, "colsample_bylevel": 0.7925997002174675, "colsample_bytree": 0.874062117666267, "reg_alpha": 0.7965442116152655, "reg_lambda": 2.769937488341342, "FLAML_sample_size": 810000}} \ No newline at end of file diff --git a/test/default/xgb_limitdepth/car.json b/test/default/xgb_limitdepth/car.json new file mode 100644 index 000000000..af7a7becc --- /dev/null +++ b/test/default/xgb_limitdepth/car.json @@ -0,0 +1 @@ +{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 624, "max_depth": 3, "min_child_weight": 0.0017043575728019624, "learning_rate": 0.8481863978692453, "subsample": 0.9897901748446495, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.0009765625, "reg_lambda": 0.008686469265798288}} \ No newline at end of file diff --git a/test/default/xgb_limitdepth/connect-4.json b/test/default/xgb_limitdepth/connect-4.json new file mode 100644 index 000000000..bdcf5a28b --- /dev/null +++ b/test/default/xgb_limitdepth/connect-4.json @@ -0,0 +1 @@ +{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 1499, "max_depth": 11, "min_child_weight": 0.07563529776156448, "learning_rate": 0.039042609221240955, "subsample": 0.7832981935783824, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.0009765625, "reg_lambda": 23.513066752844153}} \ No newline at end of file diff --git a/test/default/xgb_limitdepth/default.json b/test/default/xgb_limitdepth/default.json new file mode 100644 index 000000000..31a35648d --- /dev/null +++ b/test/default/xgb_limitdepth/default.json @@ -0,0 +1 @@ +{"class": "xgb_limitdepth", "hyperparameters": {}} \ No newline at end of file diff --git a/test/default/xgb_limitdepth/dilbert.json b/test/default/xgb_limitdepth/dilbert.json new file mode 100644 index 000000000..200ca8faa --- /dev/null +++ b/test/default/xgb_limitdepth/dilbert.json @@ -0,0 +1 @@ +{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 405, "max_depth": 4, "min_child_weight": 0.2264977130755997, "learning_rate": 0.3390883186947167, "subsample": 0.8078627200173096, "colsample_bylevel": 0.8570282862730856, "colsample_bytree": 0.8280063772581445, "reg_alpha": 0.007634576038353066, "reg_lambda": 1.7101180066063097}} \ No newline at end of file diff --git a/test/default/xgb_limitdepth/poker.json b/test/default/xgb_limitdepth/poker.json new file mode 100644 index 000000000..85b183afc --- /dev/null +++ b/test/default/xgb_limitdepth/poker.json @@ -0,0 +1 @@ +{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 3234, "max_depth": 13, "min_child_weight": 0.07784911437942721, "learning_rate": 0.0565426521738442, "subsample": 1.0, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.007928962402687697, "reg_lambda": 3.881249823648859, "FLAML_sample_size": 830258}} \ No newline at end of file diff --git a/test/default/xgb_limitdepth/results.csv b/test/default/xgb_limitdepth/results.csv new file mode 100644 index 000000000..a78278503 --- /dev/null +++ b/test/default/xgb_limitdepth/results.csv @@ -0,0 +1,116 @@ +task,fold,type,result,params +2dplanes,0,regression,0.946567,{'_modeljson': 'xgblimit/2dplanes.json'} +2dplanes,0,regression,0.94503,{'_modeljson': 'xgblimit/adult.json'} +2dplanes,0,regression,0.945074,{'_modeljson': 'xgblimit/Airlines.json'} +2dplanes,0,regression,0.806694,{'_modeljson': 'xgblimit/Amazon_employee_access.json'} +2dplanes,0,regression,0.945799,{'_modeljson': 'xgblimit/bng_breastTumor.json'} +2dplanes,0,regression,0.944103,{'_modeljson': 'xgblimit/bng_pbc.json'} +2dplanes,0,regression,0.945327,{'_modeljson': 'xgblimit/car.json'} +2dplanes,0,regression,0.923926,{'_modeljson': 'xgblimit/connect-4.json'} +2dplanes,0,regression,0.944454,{'_modeljson': 'xgblimit/default.json'} +2dplanes,0,regression,0.945212,{'_modeljson': 'xgblimit/dilbert.json'} +2dplanes,0,regression,0.910852,{'_modeljson': 'xgblimit/poker.json'} +adult,0,binary,0.923082,{'_modeljson': 'xgblimit/2dplanes.json'} +adult,0,binary,0.932355,{'_modeljson': 'xgblimit/adult.json'} +adult,0,binary,0.928373,{'_modeljson': 'xgblimit/Airlines.json'} +adult,0,binary,0.927574,{'_modeljson': 'xgblimit/Amazon_employee_access.json'} +adult,0,binary,0.929427,{'_modeljson': 'xgblimit/bng_breastTumor.json'} +adult,0,binary,0.92204,{'_modeljson': 'xgblimit/bng_pbc.json'} +adult,0,binary,0.721115,{'_modeljson': 'xgblimit/car.json'} +adult,0,binary,0.921465,{'_modeljson': 'xgblimit/connect-4.json'} +adult,0,binary,0.931234,{'_modeljson': 'xgblimit/default.json'} +adult,0,binary,0.927801,{'_modeljson': 'xgblimit/dilbert.json'} +adult,0,binary,0.916878,{'_modeljson': 'xgblimit/poker.json'} +Airlines,0,binary,0.699604,{'_modeljson': 'xgblimit/2dplanes.json'} +Airlines,0,binary,0.711053,{'_modeljson': 'xgblimit/adult.json'} +Airlines,0,binary,0.732443,{'_modeljson': 'xgblimit/Airlines.json'} +Airlines,0,binary,0.72875,{'_modeljson': 'xgblimit/Amazon_employee_access.json'} +Airlines,0,binary,0.725056,{'_modeljson': 'xgblimit/bng_breastTumor.json'} +Airlines,0,binary,0.730476,{'_modeljson': 'xgblimit/bng_pbc.json'} +Airlines,0,binary,0.71788,{'_modeljson': 'xgblimit/car.json'} +Airlines,0,binary,0.72604,{'_modeljson': 'xgblimit/connect-4.json'} +Airlines,0,binary,0.719845,{'_modeljson': 'xgblimit/default.json'} +Airlines,0,binary,0.719302,{'_modeljson': 'xgblimit/dilbert.json'} +Airlines,0,binary,0.684382,{'_modeljson': 'xgblimit/poker.json'} +Albert,0,binary,0.743682,{'_modeljson': 'xgblimit/2dplanes.json'} +Albert,0,binary,0.759246,{'_modeljson': 'xgblimit/adult.json'} +Albert,0,binary,0.766177,{'_modeljson': 'xgblimit/Airlines.json'} +Albert,0,binary,0.74969,{'_modeljson': 'xgblimit/Amazon_employee_access.json'} +Albert,0,binary,0.766961,{'_modeljson': 'xgblimit/bng_breastTumor.json'} +Albert,0,binary,0.764534,{'_modeljson': 'xgblimit/bng_pbc.json'} +Albert,0,binary,0.753311,{'_modeljson': 'xgblimit/car.json'} +Albert,0,binary,0.765229,{'_modeljson': 'xgblimit/connect-4.json'} +Albert,0,binary,0.757802,{'_modeljson': 'xgblimit/default.json'} +Albert,0,binary,0.7596,{'_modeljson': 'xgblimit/dilbert.json'} +Albert,0,binary,0.761456,{'_modeljson': 'xgblimit/poker.json'} +Amazon_employee_access,0,binary,0.759779,{'_modeljson': 'xgblimit/2dplanes.json'} +Amazon_employee_access,0,binary,0.876747,{'_modeljson': 'xgblimit/adult.json'} +Amazon_employee_access,0,binary,0.864954,{'_modeljson': 'xgblimit/Airlines.json'} +Amazon_employee_access,0,binary,0.894651,{'_modeljson': 'xgblimit/Amazon_employee_access.json'} +Amazon_employee_access,0,binary,0.845645,{'_modeljson': 'xgblimit/bng_breastTumor.json'} +Amazon_employee_access,0,binary,0.789099,{'_modeljson': 'xgblimit/bng_pbc.json'} +Amazon_employee_access,0,binary,0.550859,{'_modeljson': 'xgblimit/car.json'} +Amazon_employee_access,0,binary,0.870599,{'_modeljson': 'xgblimit/connect-4.json'} +Amazon_employee_access,0,binary,0.851702,{'_modeljson': 'xgblimit/default.json'} +Amazon_employee_access,0,binary,0.86385,{'_modeljson': 'xgblimit/dilbert.json'} +Amazon_employee_access,0,binary,0.864415,{'_modeljson': 'xgblimit/poker.json'} +bng_breastTumor,0,regression,0.163382,{'_modeljson': 'xgblimit/2dplanes.json'} +bng_breastTumor,0,regression,0.1789,{'_modeljson': 'xgblimit/adult.json'} +bng_breastTumor,0,regression,0.188483,{'_modeljson': 'xgblimit/Airlines.json'} +bng_breastTumor,0,regression,0.159704,{'_modeljson': 'xgblimit/Amazon_employee_access.json'} +bng_breastTumor,0,regression,0.1953,{'_modeljson': 'xgblimit/bng_breastTumor.json'} +bng_breastTumor,0,regression,0.191805,{'_modeljson': 'xgblimit/bng_pbc.json'} +bng_breastTumor,0,regression,0.12139,{'_modeljson': 'xgblimit/car.json'} +bng_breastTumor,0,regression,0.163165,{'_modeljson': 'xgblimit/connect-4.json'} +bng_breastTumor,0,regression,0.186541,{'_modeljson': 'xgblimit/default.json'} +bng_breastTumor,0,regression,0.183899,{'_modeljson': 'xgblimit/dilbert.json'} +bng_breastTumor,0,regression,0.108646,{'_modeljson': 'xgblimit/poker.json'} +bng_pbc,0,regression,0.384556,{'_modeljson': 'xgblimit/2dplanes.json'} +bng_pbc,0,regression,0.42041,{'_modeljson': 'xgblimit/adult.json'} +bng_pbc,0,regression,0.449808,{'_modeljson': 'xgblimit/Airlines.json'} +bng_pbc,0,regression,0.409944,{'_modeljson': 'xgblimit/Amazon_employee_access.json'} +bng_pbc,0,regression,0.439854,{'_modeljson': 'xgblimit/bng_breastTumor.json'} +bng_pbc,0,regression,0.457955,{'_modeljson': 'xgblimit/bng_pbc.json'} +bng_pbc,0,regression,0.418702,{'_modeljson': 'xgblimit/car.json'} +bng_pbc,0,regression,0.455731,{'_modeljson': 'xgblimit/connect-4.json'} +bng_pbc,0,regression,0.436902,{'_modeljson': 'xgblimit/default.json'} +bng_pbc,0,regression,0.423052,{'_modeljson': 'xgblimit/dilbert.json'} +bng_pbc,0,regression,0.447478,{'_modeljson': 'xgblimit/poker.json'} +car,0,multiclass,-0.18106,{'_modeljson': 'xgblimit/2dplanes.json'} +car,0,multiclass,-0.170386,{'_modeljson': 'xgblimit/adult.json'} +car,0,multiclass,-0.169973,{'_modeljson': 'xgblimit/Airlines.json'} +car,0,multiclass,-0.498314,{'_modeljson': 'xgblimit/Amazon_employee_access.json'} +car,0,multiclass,-0.230405,{'_modeljson': 'xgblimit/bng_breastTumor.json'} +car,0,multiclass,-0.330863,{'_modeljson': 'xgblimit/bng_pbc.json'} +car,0,multiclass,-8.16E-05,{'_modeljson': 'xgblimit/car.json'} +car,0,multiclass,-0.0239037,{'_modeljson': 'xgblimit/connect-4.json'} +car,0,multiclass,-0.010029,{'_modeljson': 'xgblimit/default.json'} +car,0,multiclass,-0.00720156,{'_modeljson': 'xgblimit/dilbert.json'} +car,0,multiclass,-0.00360416,{'_modeljson': 'xgblimit/poker.json'} +connect-4,0,multiclass,-0.597091,{'_modeljson': 'xgblimit/2dplanes.json'} +connect-4,0,multiclass,-0.484427,{'_modeljson': 'xgblimit/adult.json'} +connect-4,0,multiclass,-0.387769,{'_modeljson': 'xgblimit/Airlines.json'} +connect-4,0,multiclass,-0.553347,{'_modeljson': 'xgblimit/Amazon_employee_access.json'} +connect-4,0,multiclass,-0.425107,{'_modeljson': 'xgblimit/bng_breastTumor.json'} +connect-4,0,multiclass,-0.441974,{'_modeljson': 'xgblimit/bng_pbc.json'} +connect-4,0,multiclass,-0.410519,{'_modeljson': 'xgblimit/car.json'} +connect-4,0,multiclass,-0.342773,{'_modeljson': 'xgblimit/connect-4.json'} +connect-4,0,multiclass,-0.430665,{'_modeljson': 'xgblimit/default.json'} +connect-4,0,multiclass,-0.416631,{'_modeljson': 'xgblimit/dilbert.json'} +connect-4,0,multiclass,-0.466644,{'_modeljson': 'xgblimit/poker.json'} +dilbert,0,multiclass,-0.189149,{'_modeljson': 'xgblimit/2dplanes.json'} +dilbert,0,multiclass,-0.184569,{'_modeljson': 'xgblimit/bng_pbc.json'} +dilbert,0,multiclass,-0.0485906,{'_modeljson': 'xgblimit/car.json'} +dilbert,0,multiclass,-0.0643938,{'_modeljson': 'xgblimit/default.json'} +dilbert,0,multiclass,-0.0425865,{'_modeljson': 'xgblimit/dilbert.json'} +poker,0,regression,0.194424,{'_modeljson': 'xgblimit/2dplanes.json'} +poker,0,regression,0.443714,{'_modeljson': 'xgblimit/adult.json'} +poker,0,regression,0.837273,{'_modeljson': 'xgblimit/Airlines.json'} +poker,0,regression,0.354783,{'_modeljson': 'xgblimit/Amazon_employee_access.json'} +poker,0,regression,0.749681,{'_modeljson': 'xgblimit/bng_breastTumor.json'} +poker,0,regression,0.782336,{'_modeljson': 'xgblimit/bng_pbc.json'} +poker,0,regression,0.640848,{'_modeljson': 'xgblimit/car.json'} +poker,0,regression,0.924649,{'_modeljson': 'xgblimit/connect-4.json'} +poker,0,regression,0.635679,{'_modeljson': 'xgblimit/default.json'} +poker,0,regression,0.672338,{'_modeljson': 'xgblimit/dilbert.json'} +poker,0,regression,0.92563,{'_modeljson': 'xgblimit/poker.json'} diff --git a/test/default/xgboost/2dplanes.json b/test/default/xgboost/2dplanes.json new file mode 100644 index 000000000..451bef127 --- /dev/null +++ b/test/default/xgboost/2dplanes.json @@ -0,0 +1 @@ +{"class": "xgboost", "hyperparameters": {"n_estimators": 6705, "max_leaves": 24, "min_child_weight": 58.562722088466444, "learning_rate": 0.0009765625, "subsample": 0.8993009465247683, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.2679275019160531, "reg_lambda": 91.95034898844547}} \ No newline at end of file diff --git a/test/default/xgboost/Airlines.json b/test/default/xgboost/Airlines.json new file mode 100644 index 000000000..8d4ec1661 --- /dev/null +++ b/test/default/xgboost/Airlines.json @@ -0,0 +1 @@ +{"class": "xgboost", "hyperparameters": {"n_estimators": 17309, "max_leaves": 1146, "min_child_weight": 0.0193980002033358, "learning_rate": 0.0009765625, "subsample": 0.4169778612218198, "colsample_bylevel": 1.0, "colsample_bytree": 0.5504959296065052, "reg_alpha": 0.00505548829948545, "reg_lambda": 21.287234956122028, "FLAML_sample_size": 436899}} \ No newline at end of file diff --git a/test/default/xgboost/Albert.json b/test/default/xgboost/Albert.json new file mode 100644 index 000000000..cbcbe4d4d --- /dev/null +++ b/test/default/xgboost/Albert.json @@ -0,0 +1 @@ +{"class": "xgboost", "hyperparameters": {"n_estimators": 6357, "max_leaves": 206, "min_child_weight": 1.9495322566288034, "learning_rate": 0.0068766724195393905, "subsample": 0.9451618245005704, "colsample_bylevel": 0.9030482524943064, "colsample_bytree": 0.9278972006416252, "reg_alpha": 0.01857648400903689, "reg_lambda": 6.021166480604588, "FLAML_sample_size": 344444}} \ No newline at end of file diff --git a/test/default/xgboost/Amazon_employee_access.json b/test/default/xgboost/Amazon_employee_access.json new file mode 100644 index 000000000..2b6439350 --- /dev/null +++ b/test/default/xgboost/Amazon_employee_access.json @@ -0,0 +1 @@ +{"class": "xgboost", "hyperparameters": {"n_estimators": 591, "max_leaves": 16651, "min_child_weight": 0.03356567864689129, "learning_rate": 0.002595066436678338, "subsample": 0.9114132805513452, "colsample_bylevel": 0.9503441844594458, "colsample_bytree": 0.5703338448066768, "reg_alpha": 0.010405212349127894, "reg_lambda": 0.05352660657433639}} \ No newline at end of file diff --git a/test/default/xgboost/adult.json b/test/default/xgboost/adult.json new file mode 100644 index 000000000..d4b4d1def --- /dev/null +++ b/test/default/xgboost/adult.json @@ -0,0 +1 @@ +{"class": "xgboost", "hyperparameters": {"n_estimators": 23282, "max_leaves": 19, "min_child_weight": 0.02198438885474473, "learning_rate": 0.001700636796132106, "subsample": 1.0, "colsample_bylevel": 0.8954745234489918, "colsample_bytree": 0.22331977285961732, "reg_alpha": 0.4115502489939291, "reg_lambda": 0.015523027968801352}} \ No newline at end of file diff --git a/test/default/xgboost/bng_breastTumor.json b/test/default/xgboost/bng_breastTumor.json new file mode 100644 index 000000000..9a723d6c7 --- /dev/null +++ b/test/default/xgboost/bng_breastTumor.json @@ -0,0 +1 @@ +{"class": "xgboost", "hyperparameters": {"n_estimators": 4038, "max_leaves": 89, "min_child_weight": 0.23500921146599626, "learning_rate": 0.0039779941096963365, "subsample": 0.9421092355451888, "colsample_bylevel": 0.7772326835688742, "colsample_bytree": 0.6864341727912397, "reg_alpha": 4.8782018848557, "reg_lambda": 0.7531969031616396, "FLAML_sample_size": 94478}} \ No newline at end of file diff --git a/test/default/xgboost/bng_pbc.json b/test/default/xgboost/bng_pbc.json new file mode 100644 index 000000000..f09afbee5 --- /dev/null +++ b/test/default/xgboost/bng_pbc.json @@ -0,0 +1 @@ +{"class": "xgboost", "hyperparameters": {"n_estimators": 32767, "max_leaves": 623, "min_child_weight": 0.03783048691639616, "learning_rate": 0.0021758863899615554, "subsample": 0.9086242379539484, "colsample_bylevel": 0.5880499360809446, "colsample_bytree": 1.0, "reg_alpha": 0.0037398450188259108, "reg_lambda": 16.894310259361305, "FLAML_sample_size": 810000}} \ No newline at end of file diff --git a/test/default/xgboost/car.json b/test/default/xgboost/car.json new file mode 100644 index 000000000..8a042e245 --- /dev/null +++ b/test/default/xgboost/car.json @@ -0,0 +1 @@ +{"class": "xgboost", "hyperparameters": {"n_estimators": 765, "max_leaves": 6, "min_child_weight": 0.001, "learning_rate": 1.0, "subsample": 0.9833803894285497, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.0012553728257619922, "reg_lambda": 0.03280542610559108}} \ No newline at end of file diff --git a/test/default/xgboost/connect-4.json b/test/default/xgboost/connect-4.json new file mode 100644 index 000000000..598aadd04 --- /dev/null +++ b/test/default/xgboost/connect-4.json @@ -0,0 +1 @@ +{"class": "xgboost", "hyperparameters": {"n_estimators": 6458, "max_leaves": 196, "min_child_weight": 0.020541449256787844, "learning_rate": 0.0067240405208345, "subsample": 0.5764514509827234, "colsample_bylevel": 1.0, "colsample_bytree": 0.9478632468968712, "reg_alpha": 0.08196899811780128, "reg_lambda": 1.3914579996946315}} \ No newline at end of file diff --git a/test/default/xgboost/default.json b/test/default/xgboost/default.json new file mode 100644 index 000000000..5004b1432 --- /dev/null +++ b/test/default/xgboost/default.json @@ -0,0 +1 @@ +{"class": "xgboost", "hyperparameters": {}} \ No newline at end of file diff --git a/test/default/xgboost/dilbert.json b/test/default/xgboost/dilbert.json new file mode 100644 index 000000000..f0f8e0c68 --- /dev/null +++ b/test/default/xgboost/dilbert.json @@ -0,0 +1 @@ +{"class": "xgboost", "hyperparameters": {"n_estimators": 5739, "max_leaves": 5, "min_child_weight": 0.1359602026207002, "learning_rate": 0.14496176867613397, "subsample": 0.864897070662231, "colsample_bylevel": 0.01, "colsample_bytree": 0.9394057513384305, "reg_alpha": 0.001103317921178771, "reg_lambda": 0.1655504349283218}} \ No newline at end of file diff --git a/test/default/xgboost/poker.json b/test/default/xgboost/poker.json new file mode 100644 index 000000000..d4e2f4e6c --- /dev/null +++ b/test/default/xgboost/poker.json @@ -0,0 +1 @@ +{"class": "xgboost", "hyperparameters": {"n_estimators": 6866, "max_leaves": 238, "min_child_weight": 0.1000665069590469, "learning_rate": 0.05522440252112267, "subsample": 0.9621433799637473, "colsample_bylevel": 0.8366787895853636, "colsample_bytree": 1.0, "reg_alpha": 0.002455941636379231, "reg_lambda": 0.02487031358204277, "FLAML_sample_size": 830258}} \ No newline at end of file diff --git a/test/default/xgboost/results.csv b/test/default/xgboost/results.csv new file mode 100644 index 000000000..d68f782f7 --- /dev/null +++ b/test/default/xgboost/results.csv @@ -0,0 +1,222 @@ +task,fold,type,result,params +2dplanes,0,regression,0.946474,{'_modeljson': 'xgb/2dplanes.json'} +2dplanes,0,regression,0.849793,{'_modeljson': 'xgb/adult.json'} +2dplanes,0,regression,0.940611,{'_modeljson': 'xgb/Albert.json'} +2dplanes,0,regression,0.68908,{'_modeljson': 'xgb/Amazon_employee_access.json'} +2dplanes,0,regression,0.945551,{'_modeljson': 'xgb/bng_breastTumor.json'} +2dplanes,0,regression,0.929904,{'_modeljson': 'xgb/bng_pbc.json'} +2dplanes,0,regression,0.944099,{'_modeljson': 'xgb/car.json'} +2dplanes,0,regression,0.938336,{'_modeljson': 'xgb/connect-4.json'} +2dplanes,0,regression,0.944454,{'_modeljson': 'xgb/default.json'} +2dplanes,0,regression,0.945477,{'_modeljson': 'xgb/dilbert.json'} +2dplanes,0,regression,0.91563,{'_modeljson': 'xgb/poker.json'} +dilbert,0,multiclass,-0.362419,{'_modeljson': 'xgb/2dplanes.json'} +dilbert,0,multiclass,-0.515024,{'_modeljson': 'xgb/Amazon_employee_access.json'} +dilbert,0,multiclass,-0.158604,{'_modeljson': 'xgb/car.json'} +dilbert,0,multiclass,-0.0643938,{'_modeljson': 'xgb/default.json'} +dilbert,0,multiclass,-0.0383872,{'_modeljson': 'xgb/dilbert.json'} +dilbert,0,multiclass,-0.0611286,{'_modeljson': 'xgb/poker.json'} +poker,0,regression,0.20821,{'_modeljson': 'xgb/2dplanes.json'} +poker,0,regression,0.206438,{'_modeljson': 'xgb/adult.json'} +poker,0,regression,0.815665,{'_modeljson': 'xgb/Airlines.json'} +poker,0,regression,0.857257,{'_modeljson': 'xgb/Albert.json'} +poker,0,regression,0.362568,{'_modeljson': 'xgb/Amazon_employee_access.json'} +poker,0,regression,0.559622,{'_modeljson': 'xgb/bng_breastTumor.json'} +poker,0,regression,0.922282,{'_modeljson': 'xgb/bng_pbc.json'} +poker,0,regression,0.846139,{'_modeljson': 'xgb/car.json'} +poker,0,regression,0.891631,{'_modeljson': 'xgb/connect-4.json'} +poker,0,regression,0.635679,{'_modeljson': 'xgb/default.json'} +poker,0,regression,0.377996,{'_modeljson': 'xgb/dilbert.json'} +poker,0,regression,0.935986,{'_modeljson': 'xgb/poker.json'} +adult,0,binary,0.918094,{'_modeljson': 'xgb/2dplanes.json'} +adult,0,binary,0.932468,{'_modeljson': 'xgb/adult.json'} +adult,0,binary,0.92673,{'_modeljson': 'xgb/Airlines.json'} +adult,0,binary,0.922077,{'_modeljson': 'xgb/Albert.json'} +adult,0,binary,0.920837,{'_modeljson': 'xgb/Amazon_employee_access.json'} +adult,0,binary,0.92964,{'_modeljson': 'xgb/bng_breastTumor.json'} +adult,0,binary,0.916531,{'_modeljson': 'xgb/bng_pbc.json'} +adult,0,binary,0.884114,{'_modeljson': 'xgb/car.json'} +adult,0,binary,0.917887,{'_modeljson': 'xgb/connect-4.json'} +adult,0,binary,0.931234,{'_modeljson': 'xgb/default.json'} +adult,0,binary,0.928861,{'_modeljson': 'xgb/dilbert.json'} +adult,0,binary,0.909018,{'_modeljson': 'xgb/poker.json'} +Airlines,0,binary,0.703353,{'_modeljson': 'xgb/2dplanes.json'} +Airlines,0,binary,0.696962,{'_modeljson': 'xgb/adult.json'} +Airlines,0,binary,0.73153,{'_modeljson': 'xgb/Airlines.json'} +Airlines,0,binary,0.731577,{'_modeljson': 'xgb/Albert.json'} +Airlines,0,binary,0.725394,{'_modeljson': 'xgb/Amazon_employee_access.json'} +Airlines,0,binary,0.722896,{'_modeljson': 'xgb/bng_breastTumor.json'} +Airlines,0,binary,0.716839,{'_modeljson': 'xgb/bng_pbc.json'} +Airlines,0,binary,0.715654,{'_modeljson': 'xgb/car.json'} +Airlines,0,binary,0.73107,{'_modeljson': 'xgb/connect-4.json'} +Airlines,0,binary,0.719845,{'_modeljson': 'xgb/default.json'} +Airlines,0,binary,0.71873,{'_modeljson': 'xgb/dilbert.json'} +Airlines,0,binary,0.676427,{'_modeljson': 'xgb/poker.json'} +Albert,0,binary,0.742648,{'_modeljson': 'xgb/2dplanes.json'} +Albert,0,binary,0.758723,{'_modeljson': 'xgb/adult.json'} +Albert,0,binary,0.763066,{'_modeljson': 'xgb/Airlines.json'} +Albert,0,binary,0.768073,{'_modeljson': 'xgb/Albert.json'} +Albert,0,binary,0.74349,{'_modeljson': 'xgb/Amazon_employee_access.json'} +Albert,0,binary,0.764,{'_modeljson': 'xgb/bng_breastTumor.json'} +Albert,0,binary,0.767514,{'_modeljson': 'xgb/bng_pbc.json'} +Albert,0,binary,0.743392,{'_modeljson': 'xgb/car.json'} +Albert,0,binary,0.766006,{'_modeljson': 'xgb/connect-4.json'} +Albert,0,binary,0.757802,{'_modeljson': 'xgb/default.json'} +Albert,0,binary,0.746511,{'_modeljson': 'xgb/dilbert.json'} +Albert,0,binary,0.761985,{'_modeljson': 'xgb/poker.json'} +Amazon_employee_access,0,binary,0.727287,{'_modeljson': 'xgb/2dplanes.json'} +Amazon_employee_access,0,binary,0.855441,{'_modeljson': 'xgb/adult.json'} +Amazon_employee_access,0,binary,0.85984,{'_modeljson': 'xgb/Airlines.json'} +Amazon_employee_access,0,binary,0.873629,{'_modeljson': 'xgb/Albert.json'} +Amazon_employee_access,0,binary,0.897708,{'_modeljson': 'xgb/Amazon_employee_access.json'} +Amazon_employee_access,0,binary,0.862679,{'_modeljson': 'xgb/bng_breastTumor.json'} +Amazon_employee_access,0,binary,0.872059,{'_modeljson': 'xgb/bng_pbc.json'} +Amazon_employee_access,0,binary,0.657192,{'_modeljson': 'xgb/car.json'} +Amazon_employee_access,0,binary,0.877547,{'_modeljson': 'xgb/connect-4.json'} +Amazon_employee_access,0,binary,0.851702,{'_modeljson': 'xgb/default.json'} +Amazon_employee_access,0,binary,0.853361,{'_modeljson': 'xgb/dilbert.json'} +Amazon_employee_access,0,binary,0.859734,{'_modeljson': 'xgb/poker.json'} +bng_breastTumor,0,regression,0.184421,{'_modeljson': 'xgb/2dplanes.json'} +bng_breastTumor,0,regression,0.163226,{'_modeljson': 'xgb/adult.json'} +bng_breastTumor,0,regression,0.18037,{'_modeljson': 'xgb/Airlines.json'} +bng_breastTumor,0,regression,0.177238,{'_modeljson': 'xgb/Albert.json'} +bng_breastTumor,0,regression,-0.118976,{'_modeljson': 'xgb/Amazon_employee_access.json'} +bng_breastTumor,0,regression,0.195539,{'_modeljson': 'xgb/bng_breastTumor.json'} +bng_breastTumor,0,regression,0.106337,{'_modeljson': 'xgb/bng_pbc.json'} +bng_breastTumor,0,regression,0.149326,{'_modeljson': 'xgb/car.json'} +bng_breastTumor,0,regression,0.161193,{'_modeljson': 'xgb/connect-4.json'} +bng_breastTumor,0,regression,0.186541,{'_modeljson': 'xgb/default.json'} +bng_breastTumor,0,regression,0.186499,{'_modeljson': 'xgb/dilbert.json'} +bng_breastTumor,0,regression,-0.032219,{'_modeljson': 'xgb/poker.json'} +bng_pbc,0,regression,0.411719,{'_modeljson': 'xgb/2dplanes.json'} +bng_pbc,0,regression,0.409769,{'_modeljson': 'xgb/adult.json'} +bng_pbc,0,regression,0.450806,{'_modeljson': 'xgb/Airlines.json'} +bng_pbc,0,regression,0.458384,{'_modeljson': 'xgb/Albert.json'} +bng_pbc,0,regression,0.236669,{'_modeljson': 'xgb/Amazon_employee_access.json'} +bng_pbc,0,regression,0.441873,{'_modeljson': 'xgb/bng_breastTumor.json'} +bng_pbc,0,regression,0.462226,{'_modeljson': 'xgb/bng_pbc.json'} +bng_pbc,0,regression,0.431868,{'_modeljson': 'xgb/car.json'} +bng_pbc,0,regression,0.45678,{'_modeljson': 'xgb/connect-4.json'} +bng_pbc,0,regression,0.436902,{'_modeljson': 'xgb/default.json'} +bng_pbc,0,regression,0.418839,{'_modeljson': 'xgb/dilbert.json'} +bng_pbc,0,regression,0.448148,{'_modeljson': 'xgb/poker.json'} +car,0,multiclass,-0.38726,{'_modeljson': 'xgb/2dplanes.json'} +car,0,multiclass,-0.22547,{'_modeljson': 'xgb/adult.json'} +car,0,multiclass,-0.208402,{'_modeljson': 'xgb/Airlines.json'} +car,0,multiclass,-0.0256159,{'_modeljson': 'xgb/Albert.json'} +car,0,multiclass,-0.627705,{'_modeljson': 'xgb/Amazon_employee_access.json'} +car,0,multiclass,-0.166328,{'_modeljson': 'xgb/bng_breastTumor.json'} +car,0,multiclass,-0.0201057,{'_modeljson': 'xgb/bng_pbc.json'} +car,0,multiclass,-8.45E-05,{'_modeljson': 'xgb/car.json'} +car,0,multiclass,-0.0129025,{'_modeljson': 'xgb/connect-4.json'} +car,0,multiclass,-0.010029,{'_modeljson': 'xgb/default.json'} +car,0,multiclass,-0.00218674,{'_modeljson': 'xgb/dilbert.json'} +car,0,multiclass,-0.00426392,{'_modeljson': 'xgb/poker.json'} +connect-4,0,multiclass,-0.578339,{'_modeljson': 'xgb/2dplanes.json'} +connect-4,0,multiclass,-0.489378,{'_modeljson': 'xgb/adult.json'} +connect-4,0,multiclass,-0.406886,{'_modeljson': 'xgb/Airlines.json'} +connect-4,0,multiclass,-0.332411,{'_modeljson': 'xgb/Albert.json'} +connect-4,0,multiclass,-0.636516,{'_modeljson': 'xgb/Amazon_employee_access.json'} +connect-4,0,multiclass,-0.425947,{'_modeljson': 'xgb/bng_breastTumor.json'} +connect-4,0,multiclass,-0.354612,{'_modeljson': 'xgb/bng_pbc.json'} +connect-4,0,multiclass,-0.452201,{'_modeljson': 'xgb/car.json'} +connect-4,0,multiclass,-0.338363,{'_modeljson': 'xgb/connect-4.json'} +connect-4,0,multiclass,-0.430665,{'_modeljson': 'xgb/default.json'} +connect-4,0,multiclass,-0.497404,{'_modeljson': 'xgb/dilbert.json'} +connect-4,0,multiclass,-0.592309,{'_modeljson': 'xgb/poker.json'} +adult,0,binary,0.918094,{'_modeljson': 'xgb/2dplanes.json'} +adult,0,binary,0.932468,{'_modeljson': 'xgb/adult.json'} +adult,0,binary,0.92673,{'_modeljson': 'xgb/Airlines.json'} +adult,0,binary,0.922077,{'_modeljson': 'xgb/Albert.json'} +adult,0,binary,0.920837,{'_modeljson': 'xgb/Amazon_employee_access.json'} +adult,0,binary,0.92964,{'_modeljson': 'xgb/bng_breastTumor.json'} +adult,0,binary,0.916531,{'_modeljson': 'xgb/bng_pbc.json'} +adult,0,binary,0.884114,{'_modeljson': 'xgb/car.json'} +adult,0,binary,0.917887,{'_modeljson': 'xgb/connect-4.json'} +adult,0,binary,0.931234,{'_modeljson': 'xgb/default.json'} +adult,0,binary,0.928861,{'_modeljson': 'xgb/dilbert.json'} +adult,0,binary,0.909018,{'_modeljson': 'xgb/poker.json'} +Airlines,0,binary,0.703353,{'_modeljson': 'xgb/2dplanes.json'} +Airlines,0,binary,0.696962,{'_modeljson': 'xgb/adult.json'} +Airlines,0,binary,0.73153,{'_modeljson': 'xgb/Airlines.json'} +Airlines,0,binary,0.731577,{'_modeljson': 'xgb/Albert.json'} +Airlines,0,binary,0.725394,{'_modeljson': 'xgb/Amazon_employee_access.json'} +Airlines,0,binary,0.722896,{'_modeljson': 'xgb/bng_breastTumor.json'} +Airlines,0,binary,0.716839,{'_modeljson': 'xgb/bng_pbc.json'} +Airlines,0,binary,0.715654,{'_modeljson': 'xgb/car.json'} +Airlines,0,binary,0.73107,{'_modeljson': 'xgb/connect-4.json'} +Airlines,0,binary,0.719845,{'_modeljson': 'xgb/default.json'} +Airlines,0,binary,0.71873,{'_modeljson': 'xgb/dilbert.json'} +Airlines,0,binary,0.676427,{'_modeljson': 'xgb/poker.json'} +Albert,0,binary,0.742648,{'_modeljson': 'xgb/2dplanes.json'} +Albert,0,binary,0.758723,{'_modeljson': 'xgb/adult.json'} +Albert,0,binary,0.763066,{'_modeljson': 'xgb/Airlines.json'} +Albert,0,binary,0.768073,{'_modeljson': 'xgb/Albert.json'} +Albert,0,binary,0.74349,{'_modeljson': 'xgb/Amazon_employee_access.json'} +Albert,0,binary,0.764,{'_modeljson': 'xgb/bng_breastTumor.json'} +Albert,0,binary,0.767514,{'_modeljson': 'xgb/bng_pbc.json'} +Albert,0,binary,0.743392,{'_modeljson': 'xgb/car.json'} +Albert,0,binary,0.766006,{'_modeljson': 'xgb/connect-4.json'} +Albert,0,binary,0.757802,{'_modeljson': 'xgb/default.json'} +Albert,0,binary,0.746511,{'_modeljson': 'xgb/dilbert.json'} +Albert,0,binary,0.761985,{'_modeljson': 'xgb/poker.json'} +Amazon_employee_access,0,binary,0.727287,{'_modeljson': 'xgb/2dplanes.json'} +Amazon_employee_access,0,binary,0.855441,{'_modeljson': 'xgb/adult.json'} +Amazon_employee_access,0,binary,0.85984,{'_modeljson': 'xgb/Airlines.json'} +Amazon_employee_access,0,binary,0.873629,{'_modeljson': 'xgb/Albert.json'} +Amazon_employee_access,0,binary,0.897708,{'_modeljson': 'xgb/Amazon_employee_access.json'} +Amazon_employee_access,0,binary,0.862679,{'_modeljson': 'xgb/bng_breastTumor.json'} +Amazon_employee_access,0,binary,0.872059,{'_modeljson': 'xgb/bng_pbc.json'} +Amazon_employee_access,0,binary,0.657192,{'_modeljson': 'xgb/car.json'} +Amazon_employee_access,0,binary,0.877547,{'_modeljson': 'xgb/connect-4.json'} +Amazon_employee_access,0,binary,0.851702,{'_modeljson': 'xgb/default.json'} +Amazon_employee_access,0,binary,0.853361,{'_modeljson': 'xgb/dilbert.json'} +Amazon_employee_access,0,binary,0.859734,{'_modeljson': 'xgb/poker.json'} +bng_breastTumor,0,regression,0.184421,{'_modeljson': 'xgb/2dplanes.json'} +bng_breastTumor,0,regression,0.163226,{'_modeljson': 'xgb/adult.json'} +bng_breastTumor,0,regression,0.18037,{'_modeljson': 'xgb/Airlines.json'} +bng_breastTumor,0,regression,0.177238,{'_modeljson': 'xgb/Albert.json'} +bng_breastTumor,0,regression,-0.118976,{'_modeljson': 'xgb/Amazon_employee_access.json'} +bng_breastTumor,0,regression,0.195539,{'_modeljson': 'xgb/bng_breastTumor.json'} +bng_breastTumor,0,regression,0.106337,{'_modeljson': 'xgb/bng_pbc.json'} +bng_breastTumor,0,regression,0.149326,{'_modeljson': 'xgb/car.json'} +bng_breastTumor,0,regression,0.161193,{'_modeljson': 'xgb/connect-4.json'} +bng_breastTumor,0,regression,0.186541,{'_modeljson': 'xgb/default.json'} +bng_breastTumor,0,regression,0.186499,{'_modeljson': 'xgb/dilbert.json'} +bng_breastTumor,0,regression,-0.032219,{'_modeljson': 'xgb/poker.json'} +bng_pbc,0,regression,0.411719,{'_modeljson': 'xgb/2dplanes.json'} +bng_pbc,0,regression,0.409769,{'_modeljson': 'xgb/adult.json'} +bng_pbc,0,regression,0.450806,{'_modeljson': 'xgb/Airlines.json'} +bng_pbc,0,regression,0.458384,{'_modeljson': 'xgb/Albert.json'} +bng_pbc,0,regression,0.236669,{'_modeljson': 'xgb/Amazon_employee_access.json'} +bng_pbc,0,regression,0.441873,{'_modeljson': 'xgb/bng_breastTumor.json'} +bng_pbc,0,regression,0.462226,{'_modeljson': 'xgb/bng_pbc.json'} +bng_pbc,0,regression,0.431868,{'_modeljson': 'xgb/car.json'} +bng_pbc,0,regression,0.45678,{'_modeljson': 'xgb/connect-4.json'} +bng_pbc,0,regression,0.436902,{'_modeljson': 'xgb/default.json'} +bng_pbc,0,regression,0.418839,{'_modeljson': 'xgb/dilbert.json'} +bng_pbc,0,regression,0.448148,{'_modeljson': 'xgb/poker.json'} +car,0,multiclass,-0.38726,{'_modeljson': 'xgb/2dplanes.json'} +car,0,multiclass,-0.22547,{'_modeljson': 'xgb/adult.json'} +car,0,multiclass,-0.208402,{'_modeljson': 'xgb/Airlines.json'} +car,0,multiclass,-0.0256159,{'_modeljson': 'xgb/Albert.json'} +car,0,multiclass,-0.627705,{'_modeljson': 'xgb/Amazon_employee_access.json'} +car,0,multiclass,-0.166328,{'_modeljson': 'xgb/bng_breastTumor.json'} +car,0,multiclass,-0.0201057,{'_modeljson': 'xgb/bng_pbc.json'} +car,0,multiclass,-8.45E-05,{'_modeljson': 'xgb/car.json'} +car,0,multiclass,-0.0129025,{'_modeljson': 'xgb/connect-4.json'} +car,0,multiclass,-0.010029,{'_modeljson': 'xgb/default.json'} +car,0,multiclass,-0.00218674,{'_modeljson': 'xgb/dilbert.json'} +car,0,multiclass,-0.00426392,{'_modeljson': 'xgb/poker.json'} +connect-4,0,multiclass,-0.578339,{'_modeljson': 'xgb/2dplanes.json'} +connect-4,0,multiclass,-0.489378,{'_modeljson': 'xgb/adult.json'} +connect-4,0,multiclass,-0.406886,{'_modeljson': 'xgb/Airlines.json'} +connect-4,0,multiclass,-0.332411,{'_modeljson': 'xgb/Albert.json'} +connect-4,0,multiclass,-0.636516,{'_modeljson': 'xgb/Amazon_employee_access.json'} +connect-4,0,multiclass,-0.425947,{'_modeljson': 'xgb/bng_breastTumor.json'} +connect-4,0,multiclass,-0.354612,{'_modeljson': 'xgb/bng_pbc.json'} +connect-4,0,multiclass,-0.452201,{'_modeljson': 'xgb/car.json'} +connect-4,0,multiclass,-0.338363,{'_modeljson': 'xgb/connect-4.json'} +connect-4,0,multiclass,-0.430665,{'_modeljson': 'xgb/default.json'} +connect-4,0,multiclass,-0.497404,{'_modeljson': 'xgb/dilbert.json'} +connect-4,0,multiclass,-0.592309,{'_modeljson': 'xgb/poker.json'} diff --git a/test/default_lgbm.py b/test/default_lgbm.py new file mode 100644 index 000000000..9085db489 --- /dev/null +++ b/test/default_lgbm.py @@ -0,0 +1,16 @@ +from flaml.data import load_openml_dataset +from flaml.default import LGBMRegressor +from flaml.ml import sklearn_metric_loss_score + +X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir="./") +lgbm = LGBMRegressor() + +hyperparams, estimator_name, X_transformed, y_transformed = lgbm.suggest_hyperparams( + X_train, y_train +) +print(hyperparams) + +lgbm.fit(X_train, y_train) +y_pred = lgbm.predict(X_test) +print("flamlized lgbm r2 =", 1 - sklearn_metric_loss_score("r2", y_pred, y_test)) +print(lgbm) diff --git a/test/default_xgb.py b/test/default_xgb.py new file mode 100644 index 000000000..bda7307e5 --- /dev/null +++ b/test/default_xgb.py @@ -0,0 +1,13 @@ +from flaml.data import load_openml_dataset +from flaml.default import XGBClassifier +from flaml.ml import sklearn_metric_loss_score + +X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./") +xgb = XGBClassifier() +xgb.fit(X_train, y_train) +y_pred = xgb.predict(X_test) +print( + "flamlized xgb accuracy =", + 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test), +) +print(xgb) diff --git a/website/docs/Examples/Default-Flamlized.md b/website/docs/Examples/Default-Flamlized.md new file mode 100644 index 000000000..11f93d96a --- /dev/null +++ b/website/docs/Examples/Default-Flamlized.md @@ -0,0 +1,97 @@ +# Default - Flamlized Estimator + +Flamlized estimators automatically use data-dependent default hyperparameter configurations for each estimator, offering a unique zero-shot AutoML capability, or "no tuning" AutoML. + +This example requires openml==0.10.2. + +## Flamlized LGBMRegressor + +### Zero-shot AutoML + +```python +from flaml.data import load_openml_dataset +from flaml.default import LGBMRegressor +from flaml.ml import sklearn_metric_loss_score + +X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir="./") +lgbm = LGBMRegressor() +lgbm.fit(X_train, y_train) +y_pred = lgbm.predict(X_test) +print("flamlized lgbm r2", "=", 1 - sklearn_metric_loss_score("r2", y_pred, y_test)) +print(lgbm) +``` + +#### Sample output + +``` +load dataset from ./openml_ds537.pkl +Dataset name: houses +X_train.shape: (15480, 8), y_train.shape: (15480,); +X_test.shape: (5160, 8), y_test.shape: (5160,) +flamlized lgbm r2 = 0.8537444671194614 +LGBMRegressor(colsample_bytree=0.7019911744574896, + learning_rate=0.022635758411078528, max_bin=511, + min_child_samples=2, n_estimators=4797, num_leaves=122, + reg_alpha=0.004252223402511765, reg_lambda=0.11288241427227624, + verbose=-1) +``` + +### Suggest hyperparameters without training + +``` +from flaml.data import load_openml_dataset +from flaml.default import LGBMRegressor +from flaml.ml import sklearn_metric_loss_score + +X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir="./") +lgbm = LGBMRegressor() +hyperparams, estimator_name, X_transformed, y_transformed = lgbm.suggest_hyperparams(X_train, y_train) +print(hyperparams) +``` + +#### Sample output +``` +load dataset from ./openml_ds537.pkl +Dataset name: houses +X_train.shape: (15480, 8), y_train.shape: (15480,); +X_test.shape: (5160, 8), y_test.shape: (5160,) +{'n_estimators': 4797, 'num_leaves': 122, 'min_child_samples': 2, 'learning_rate': 0.022635758411078528, 'colsample_bytree': 0.7019911744574896, 'reg_alpha': 0.004252223402511765, 'reg_lambda': 0.11288241427227624, 'max_bin': 511, 'verbose': -1} +``` + +## Flamlized XGBClassifier + +### Zero-shot AutoML + +```python +from flaml.data import load_openml_dataset +from flaml.default import XGBClassifier +from flaml.ml import sklearn_metric_loss_score + +X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./") +xgb = XGBClassifier() +xgb.fit(X_train, y_train) +y_pred = xgb.predict(X_test) +print("flamlized xgb accuracy", "=", 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test)) +print(xgb) +``` + +#### Sample output + +``` +load dataset from ./openml_ds1169.pkl +Dataset name: airlines +X_train.shape: (404537, 7), y_train.shape: (404537,); +X_test.shape: (134846, 7), y_test.shape: (134846,) +flamlized xgb accuracy = 0.6729009388487608 +XGBClassifier(base_score=0.5, booster='gbtree', + colsample_bylevel=0.4601573737792679, colsample_bynode=1, + colsample_bytree=1.0, gamma=0, gpu_id=-1, grow_policy='lossguide', + importance_type='gain', interaction_constraints='', + learning_rate=0.04039771837785377, max_delta_step=0, max_depth=0, + max_leaves=159, min_child_weight=0.3396294979905001, missing=nan, + monotone_constraints='()', n_estimators=540, n_jobs=4, + num_parallel_tree=1, random_state=0, + reg_alpha=0.0012362430984376035, reg_lambda=3.093428791531145, + scale_pos_weight=1, subsample=1.0, tree_method='hist', + use_label_encoder=False, validate_parameters=1, verbosity=0) +``` diff --git a/website/docs/Getting-Started.md b/website/docs/Getting-Started.md index 5c20c2b43..781758eab 100644 --- a/website/docs/Getting-Started.md +++ b/website/docs/Getting-Started.md @@ -19,7 +19,7 @@ and learner selection method invented by Microsoft Research. Install FLAML from pip: `pip install flaml`. Find more options in [Installation](Installation). -There are two ways of using flaml: +There are several ways of using flaml: #### [Task-oriented AutoML](Use-Cases/task-oriented-automl) @@ -76,6 +76,16 @@ analysis = tune.run( ``` Please see this [script](https://github.com/microsoft/FLAML/blob/main/test/tune_example.py) for the complete version of the above example. +#### [Zero-shot AutoML](Use-Cases/Zero-Shot-AutoML) + +FLAML offers a unique, seamless and effortless way to leverage AutoML for the commonly used classifiers and regressors such as LightGBM and XGBoost. For example, if you are using `lightgbm.LGBMClassifier` as your current learner, all you need to do is to replace `from ligthgbm import LGBMClassifier` by: + +```python +from flaml.default import LGBMClassifier +``` + +Then, you can use it just like you use the original `LGMBClassifier`. Your other code can remain unchanged. When you call the `fit()` function from `flaml.default.LGBMClassifier`, it will automatically instantiate a good data-dependent hyperparameter configuration for your dataset, which is expected to work better than the default configuration. + ### Where to Go Next? * Understand the use cases for [Task-oriented AutoML](Use-Cases/task-oriented-automl) and [Tune user-defined function](Use-Cases/Tune-User-Defined-Function). diff --git a/website/docs/Research.md b/website/docs/Research.md index 80caa14dc..f91a10797 100644 --- a/website/docs/Research.md +++ b/website/docs/Research.md @@ -16,5 +16,6 @@ For technical details, please check our research publications. * [Frugal Optimization for Cost-related Hyperparameters](https://arxiv.org/abs/2005.01571). Qingyun Wu, Chi Wang, Silu Huang. AAAI 2021. * [Economical Hyperparameter Optimization With Blended Search Strategy](https://www.microsoft.com/en-us/research/publication/economical-hyperparameter-optimization-with-blended-search-strategy/). Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. ICLR 2021. * [ChaCha for Online AutoML](https://www.microsoft.com/en-us/research/publication/chacha-for-online-automl/). Qingyun Wu, Chi Wang, John Langford, Paul Mineiro and Marco Rossi. ICML 2021. +* [Mining Robust Default Configurations for Resource-constrained AutoML](https://arxiv.org/abs/2202.09927). Moe Kayali, Chi Wang. arXiv preprint arXiv:2202.09927 (2022). Many researchers and engineers have contributed to the technology development. In alphabetical order: Vijay Aski, Sebastien Bubeck, Surajit Chaudhuri, Kevin Chen, Yi Wei Chen, Nadiia Chepurko, Ofer Dekel, Alex Deng, Anshuman Dutt, Nicolo Fusi, Jianfeng Gao, Johannes Gehrke, Niklas Gustafsson, Silu Huang, Moe Kayali, Dongwoo Kim, Christian Konig, John Langford, Menghao Li, Mingqin Li, Xueqing Liu, Zhe Liu, Naveen Gaur, Paul Mineiro, Vivek Narasayya, Jake Radzikowski, Marco Rossi, Amin Saied, Neil Tenenholtz, Olga Vrousgou, Chi Wang, Yue Wang, Markus Weimer, Qingyun Wu, Qiufeng Yin, Haozhe Zhang, Minjia Zhang, XiaoYun Zhang, Eric Zhu. \ No newline at end of file diff --git a/website/docs/Use-Cases/Zero-Shot-AutoML.md b/website/docs/Use-Cases/Zero-Shot-AutoML.md new file mode 100644 index 000000000..489b2a9b8 --- /dev/null +++ b/website/docs/Use-Cases/Zero-Shot-AutoML.md @@ -0,0 +1,247 @@ +# Zero Shot AutoML + +`flaml.default` is a package for zero-shot AutoML, or "no-tuning" AutoML. It uses [`flaml.AutoML`](../reference/automl#automl-objects) and [`flaml.default.portfolio`](../reference/default/portfolio) to mine good hyperparameter configurations across different datasets offline, and recommend data-dependent default configurations at runtime without expensive tuning. + +Zero-shot AutoML has several benefits: +* The computation cost is just training one model. No tuning is involved. +* The decision of hyperparameter configuration is instant. No overhead to worry about. +* Your code remains the same. No breaking of the existing workflow. +* It requires less input from the user. No need to specify a tuning budget etc. +* All training data are used for, guess what, training. No need to worry about holding a subset of training data for validation (and overfitting the validation data). +* The offline preparation can be customized for a domain and leverage the historical tuning data. No experience is wasted. + +## How to Use at Runtime + +The easiest way to leverage this technique is to import a "flamlized" learner of your favorite choice and use it just as how you use the learner before. The automation is done behind the scene and you are not required to change your code. For example, if you are currently using: + +```python +from lgbm import LGBMRegressor + +estimator = LGBMRegressor() +estimator.fit(X_train, y_train) +estimator.predict(X_test, y_test) +``` + +Simply replace the first line with: + +```python +from flaml.default import LGBMRegressor +``` + +All the other code remains the same. And you are expected to get a equal or better model in most cases. + +The current list of "flamlized" learners are: +* LGBMClassifier, LGBMRegressor. +* XGBClassifier, XGBRegressor. +* RandomForestClassifier, RandomForestRegressor. +* ExtraTreesClassifier, ExtraTreesRegressor. + +### What's the magic behind the scene? + +`flaml.default.LGBMRegressor` inherits `lgbm.LGBMRegressor`, so all the APIs in `lgbm.LGBMRegressor` are still valid in `flaml.default.LGBMRegressor`. The difference is, `flaml.default.LGBMRegressor` decides the hyperparameter configurations based on the training data. It would use a different configuration if it is predicted to outperform the original data-independent default. If you inspect the params of the fitted estimator, you can find what configuration is used. If the original default configuration is used, then it is equivalent to the original estimator. + +The recommendation of which configuration should be used is based on offline AutoML run results. Information about the training dataset, such as the size of the dataset will be used to recommend a data-dependent configuration. The recommendation is done instantly in negligible time. The training can be faster or slower than using the original default configuration depending on the recommended configuration. Note that there is no tuning involved. Only one model is trained. + +### Can I check the configuration before training? + +Yes. You can use `suggest_hyperparams()` to find the suggested configuration. For example, + +```python +from flaml.default import LGBMRegressor +estimator = LGBMRegressor() +hyperparams, estimator_name, X_transformed, y_transformed = estimator.suggest_hyperparams(X_train, y_train) +print(hyperparams) +``` + +If you would like more control over the training, use an equivalent, open-box way for zero-shot AutoML. For example, + +```python +from flaml.default import preprocess_and_suggest_hyperparams +X, y = load_iris(return_X_y=True, as_frame=True) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) +hyperparams, estimator_class, X_transformed, y_transformed, feature_transformer, label_transformer = preprocess_and_suggest_hyperparams( + "classification", X_train, y_train, "lgbm" +) +model = estimator_class(**hyperparams) # estimator_class is lightgbm.LGBMClassifier +model.fit(X_transformed, y_train) # LGBMClassifier can handle raw labels +X_test = feature_transformer.transform(X_test) # preprocess test data +y_pred = model.predict(X_test) +``` + +Note that some classifiers like XGBClassifier require the labels to be integers, while others do not. So you can decide whether to use the transformed labels `y_transformed` and the label transformer `label_transformer`. +Also, each estimator may require specific preprocessing of the data. `X_transformed` is the preprocessed data, and `feature_transformer` is the preprocessor. It needs to be applied to the test data before prediction. These are automated when you use the "flamlized" learner. When you use the open-box way, pay attention to them. + +### Combine zero shot AutoML and hyperparameter tuning + +Zero Shot AutoML is fast. If tuning from the recommended data-dependent configuration is required, you can use `flaml.AutoML.fit()` and set `starting_points="data"`. For example, + +```python +from flaml import AutoML +automl = AutoML() +automl_settings = { + "task": "classification", + "starting_points": "data", + "estimator_list": ["lgbm"], + "time_budget": 600, + "max_iter": 50, +} +automl.fit(X_train, y_train, **automl_settings) +``` + +Note that if you set `max_iter=0` and `time_budget=None`, you are effectively using zero-shot AutoML. When `estimator_list` is omitted, the estimator together with its hyperparameter configuration will be decided in a zero-shot manner. + +### Use your own meta-learned defaults + +To use your own meta-learned defaults, specify the path containing the meta-learned defaults. For example, + +```python +estimator = flaml.default.LGBMRegressor(default_location="location_for_defaults") +``` + +Or, + +```python +preprocess_and_suggest_hyperparams( + "classification", X_train, y_train, "lgbm", location="location_for_defaults" +) +``` + +Or, + +```python +X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame) +automl = AutoML() +automl_settings = { + "task": "classification", + "log_file_name": "test/iris.log", + "starting_points": "data:location_for_defaults", + "estimator_list": ["lgbm", "xgb_limitdepth", "rf"] + "max_iter": 0, +} +automl.fit(X_train, y_train, **automl_settings) +``` + +Since this is a multiclass task, it will look for the following files under `{location_for_defaults}/`: + +- `all/multiclass.json`. +- `{learner_name}/multiclass.json` for every learner_name in the estimator_list. + +Read the next section to understand how to generate these files if you would like to meta-learn the defaults yourself. + +## How to Prepare Offline + +This section is intended for: +1. AutoML providers for a particular domain. +1. Data scientists or engineers who need to repeatedly train models for similar tasks with varying training data. + +Instead of running full hyperparameter tuning from scratch every time, one can leverage the tuning experiences in similar tasks before. While we have offered the meta-learned defaults from tuning experiences of several popular learners on benchmark datasets for classification and regression, you can customize the defaults for your own tasks/learners/metrics based on your own tuning experiences. + +### Prepare a collection of training tasks + +Collect a diverse set of training tasks. For each task, extract its meta feature and save in a .csv file. For example, test/default/all/metafeatures.csv: + +``` +Dataset,NumberOfInstances,NumberOfFeatures,NumberOfClasses,PercentageOfNumericFeatures +2dplanes,36691,10,0,1.0 +adult,43957,14,2,0.42857142857142855 +Airlines,485444,7,2,0.42857142857142855 +Albert,382716,78,2,0.3333333333333333 +Amazon_employee_access,29492,9,2,0.0 +bng_breastTumor,104976,9,0,0.1111111111111111 +bng_pbc,900000,18,0,0.5555555555555556 +car,1555,6,4,0.0 +connect-4,60801,42,3,0.0 +dilbert,9000,2000,5,1.0 +Dionis,374569,60,355,1.0 +poker,922509,10,0,1.0 +``` + +The first column is the dataset name, and the latter four are meta features. + +### Prepare the candidate configurations + +You can extract the best configurations for each task in your collection of training tasks by running flaml on each of them with a long enough budget. Save the best configuration in a .json file under `{location_for_defaults}/{learner_name}/{task_name}.json`. For example, + +```python +X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame) +automl.fit(X_train, y_train, estimator_list=["lgbm"], **settings) +automl.save_best_config("test/default/lgbm/iris.json") +``` + +### Evaluate each candidate configuration on each task + +Save the evaluation results in a .csv file. For example, save the evaluation results for lgbm under `test/default/lgbm/results.csv`: + +``` +task,fold,type,result,params +2dplanes,0,regression,0.946366,{'_modeljson': 'lgbm/2dplanes.json'} +2dplanes,0,regression,0.907774,{'_modeljson': 'lgbm/adult.json'} +2dplanes,0,regression,0.901643,{'_modeljson': 'lgbm/Airlines.json'} +2dplanes,0,regression,0.915098,{'_modeljson': 'lgbm/Albert.json'} +2dplanes,0,regression,0.302328,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +2dplanes,0,regression,0.94523,{'_modeljson': 'lgbm/bng_breastTumor.json'} +2dplanes,0,regression,0.945698,{'_modeljson': 'lgbm/bng_pbc.json'} +2dplanes,0,regression,0.946194,{'_modeljson': 'lgbm/car.json'} +2dplanes,0,regression,0.945549,{'_modeljson': 'lgbm/connect-4.json'} +2dplanes,0,regression,0.946232,{'_modeljson': 'lgbm/default.json'} +2dplanes,0,regression,0.945594,{'_modeljson': 'lgbm/dilbert.json'} +2dplanes,0,regression,0.836996,{'_modeljson': 'lgbm/Dionis.json'} +2dplanes,0,regression,0.917152,{'_modeljson': 'lgbm/poker.json'} +adult,0,binary,0.927203,{'_modeljson': 'lgbm/2dplanes.json'} +adult,0,binary,0.932072,{'_modeljson': 'lgbm/adult.json'} +adult,0,binary,0.926563,{'_modeljson': 'lgbm/Airlines.json'} +adult,0,binary,0.928604,{'_modeljson': 'lgbm/Albert.json'} +adult,0,binary,0.911171,{'_modeljson': 'lgbm/Amazon_employee_access.json'} +adult,0,binary,0.930645,{'_modeljson': 'lgbm/bng_breastTumor.json'} +adult,0,binary,0.928603,{'_modeljson': 'lgbm/bng_pbc.json'} +adult,0,binary,0.915825,{'_modeljson': 'lgbm/car.json'} +adult,0,binary,0.919499,{'_modeljson': 'lgbm/connect-4.json'} +adult,0,binary,0.930109,{'_modeljson': 'lgbm/default.json'} +adult,0,binary,0.932453,{'_modeljson': 'lgbm/dilbert.json'} +adult,0,binary,0.921959,{'_modeljson': 'lgbm/Dionis.json'} +adult,0,binary,0.910763,{'_modeljson': 'lgbm/poker.json'} +... +``` + +The `type` column indicates the type of the task, such as regression, binary or multiclass. +The `result` column stores the evaluation result, assumed the large the better. The `params` column indicates which json config is used. For example 'lgbm/2dplanes.json' indicates that the best lgbm configuration extracted from 2dplanes is used. +Different types of tasks can appear in the same file, as long as any json config file can be used in all the tasks. For example, 'lgbm/2dplanes.json' is extracted from a regression task, and it can be applied to binary and multiclass tasks as well. + +### Learn data-dependent defaults + +To recap, the inputs required for meta-learning are: + +1. Metafeatures: e.g., `{location}/all/metafeatures.csv`. +1. Configurations: `{location}/{learner_name}/{task_name}.json`. +1. Evaluation results: `{location}/{learner_name}/results.csv`. + +For example, if the input location is "test/default", learners are lgbm, xgb_limitdepth and rf, the following command learns data-dependent defaults for binary classification tasks. + +```bash +python portfolio.py --output test/default --input test/default --metafeatures test/default/all/metafeatures.csv --task binary --estimator lgbm xgb_limitdepth rf +``` + +In a few seconds, it will produce the following files as output: + +- test/default/lgbm/binary.json: the learned defaults for lgbm. +- test/default/xgb_limitdepth/binary.json: the learned defaults for xgb_limitdepth. +- test/default/rf/binary.json: the learned defaults for rf. +- test/default/all/binary.json: the learned defaults for lgbm, xgb_limitdepth and rf together. + +Change "binary" into "multiclass" or "regression", or your own types in your "results.csv" for the other types of tasks. To update the learned defaults when more experiences are available, simply update your input files and rerun the learning command. + +### "Flamlize" a learner + +You have now effectively built your own zero-shot AutoML solution. Congratulations! + +Optionally, you can "flamlize" a learner using [`flaml.default.flamlize_estimator`](../reference/default/estimator#flamlize_estimator) for easy dissemination. For example, + +```python +import sklearn.ensemble as ensemble +from flaml.default import flamlize_estimator +ExtraTreesClassifier = flamlize_estimator( + ensemble.ExtraTreesClassifier, "extra_tree", "classification" +) +``` + +Then, you can share this "flamlized" `ExtraTreesClassifier` together with the location of your learned defaults with others (or the _future_ yourself). They will benefit from your past experience. Your group can also share experiences in a central place and update the learned defaults continuously. Over time, your organization gets better collectively. \ No newline at end of file