Fix issues related to zero-shot automl (#783)

* skip in-search-space check for small max iter

* resolve Pickle Transformer #730

* resolve default config unrecognized #784

* Change definition of init_config

* copy points_to_evaluate

* make test pass

* check learner selector
This commit is contained in:
Chi Wang
2022-11-13 12:47:59 -08:00
committed by GitHub
parent 9a7a9ef1c6
commit 30e200985c
6 changed files with 73 additions and 42 deletions

View File

@@ -1,4 +1,5 @@
import sys
import pickle
from sklearn.datasets import load_iris, fetch_california_housing, load_breast_cancer
from sklearn.model_selection import train_test_split
import pandas as pd
@@ -12,15 +13,6 @@ from flaml.default import (
)
def test_build_portfolio(path="test/default", strategy="greedy"):
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
portfolio.main()
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task multiclass --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
portfolio.main()
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task regression --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
portfolio.main()
def test_greedy_feedback(path="test/default", strategy="greedy-feedback"):
# sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
# portfolio.main()
@@ -30,6 +22,15 @@ def test_greedy_feedback(path="test/default", strategy="greedy-feedback"):
portfolio.main()
def test_build_portfolio(path="test/default", strategy="greedy"):
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
portfolio.main()
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task multiclass --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
portfolio.main()
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task regression --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
portfolio.main()
def test_iris(as_frame=True):
automl = AutoML()
automl_settings = {
@@ -96,6 +97,8 @@ def test_suggest_classification():
) = preprocess_and_suggest_hyperparams(
"classification", X_train, y_train, "lgbm", location=location
)
with open("test/default/feature_transformer", "wb") as f:
pickle.dump(feature_transformer, f, pickle.HIGHEST_PROTOCOL)
model = estimator_class(**hyperparams) # estimator_class is LGBMClassifier
model.fit(X, y)
X_test = feature_transformer.transform(X_test)
@@ -218,5 +221,24 @@ def test_xgboost():
print(regressor)
def test_nobudget():
X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
automl = AutoML()
automl.fit(
X_train[:20],
y_train[:20],
estimator_list=["lgbm", "extra_tree", "rf"],
max_iter=12,
starting_points="data",
log_file_name="test/default/no_budget.txt",
log_type="all",
)
automl.fit(X_train[:20], y_train[:20], estimator_list=["lgbm", "extra_tree", "rf"])
# make sure that zero-shot config out of the search space does not degnerate to low cost init config
assert automl.best_config_per_estimator["extra_tree"]["n_estimators"] > 4
# make sure that the zero-shot config {} is not modified
assert "criterion" not in automl.best_config_per_estimator["rf"]
if __name__ == "__main__":
test_build_portfolio("flaml/default")

View File

@@ -36,7 +36,8 @@ def test_starting_point_not_in_search_space():
automl.fit(X_train, y_train, **automl_settings)
assert (
automl._search_states[this_estimator_name].init_config["learning_rate"] != 2e-3
automl._search_states[this_estimator_name].init_config[0]["learning_rate"]
!= 2e-3
)
"""
@@ -67,7 +68,7 @@ def test_starting_point_not_in_search_space():
automl_settings["starting_points"] = "data:test/nlp/default/"
automl.fit(X_train, y_train, **automl_settings)
assert len(automl._search_states[this_estimator_name].init_config) == len(
assert len(automl._search_states[this_estimator_name].init_config[0]) == len(
automl._search_states[this_estimator_name]._search_space_domain
) - len(automl_settings["custom_hp"][this_estimator_name]), (
"The search space is updated with the custom_hp on {} hyperparameters of "