mirror of
https://github.com/microsoft/autogen.git
synced 2026-02-17 05:44:30 -05:00
Fix issues related to zero-shot automl (#783)
* skip in-search-space check for small max iter * resolve Pickle Transformer #730 * resolve default config unrecognized #784 * Change definition of init_config * copy points_to_evaluate * make test pass * check learner selector
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import sys
|
||||
import pickle
|
||||
from sklearn.datasets import load_iris, fetch_california_housing, load_breast_cancer
|
||||
from sklearn.model_selection import train_test_split
|
||||
import pandas as pd
|
||||
@@ -12,15 +13,6 @@ from flaml.default import (
|
||||
)
|
||||
|
||||
|
||||
def test_build_portfolio(path="test/default", strategy="greedy"):
|
||||
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
|
||||
portfolio.main()
|
||||
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task multiclass --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
|
||||
portfolio.main()
|
||||
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task regression --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
|
||||
portfolio.main()
|
||||
|
||||
|
||||
def test_greedy_feedback(path="test/default", strategy="greedy-feedback"):
|
||||
# sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
|
||||
# portfolio.main()
|
||||
@@ -30,6 +22,15 @@ def test_greedy_feedback(path="test/default", strategy="greedy-feedback"):
|
||||
portfolio.main()
|
||||
|
||||
|
||||
def test_build_portfolio(path="test/default", strategy="greedy"):
|
||||
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
|
||||
portfolio.main()
|
||||
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task multiclass --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
|
||||
portfolio.main()
|
||||
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task regression --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
|
||||
portfolio.main()
|
||||
|
||||
|
||||
def test_iris(as_frame=True):
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
@@ -96,6 +97,8 @@ def test_suggest_classification():
|
||||
) = preprocess_and_suggest_hyperparams(
|
||||
"classification", X_train, y_train, "lgbm", location=location
|
||||
)
|
||||
with open("test/default/feature_transformer", "wb") as f:
|
||||
pickle.dump(feature_transformer, f, pickle.HIGHEST_PROTOCOL)
|
||||
model = estimator_class(**hyperparams) # estimator_class is LGBMClassifier
|
||||
model.fit(X, y)
|
||||
X_test = feature_transformer.transform(X_test)
|
||||
@@ -218,5 +221,24 @@ def test_xgboost():
|
||||
print(regressor)
|
||||
|
||||
|
||||
def test_nobudget():
|
||||
X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
|
||||
automl = AutoML()
|
||||
automl.fit(
|
||||
X_train[:20],
|
||||
y_train[:20],
|
||||
estimator_list=["lgbm", "extra_tree", "rf"],
|
||||
max_iter=12,
|
||||
starting_points="data",
|
||||
log_file_name="test/default/no_budget.txt",
|
||||
log_type="all",
|
||||
)
|
||||
automl.fit(X_train[:20], y_train[:20], estimator_list=["lgbm", "extra_tree", "rf"])
|
||||
# make sure that zero-shot config out of the search space does not degnerate to low cost init config
|
||||
assert automl.best_config_per_estimator["extra_tree"]["n_estimators"] > 4
|
||||
# make sure that the zero-shot config {} is not modified
|
||||
assert "criterion" not in automl.best_config_per_estimator["rf"]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_build_portfolio("flaml/default")
|
||||
|
||||
@@ -36,7 +36,8 @@ def test_starting_point_not_in_search_space():
|
||||
|
||||
automl.fit(X_train, y_train, **automl_settings)
|
||||
assert (
|
||||
automl._search_states[this_estimator_name].init_config["learning_rate"] != 2e-3
|
||||
automl._search_states[this_estimator_name].init_config[0]["learning_rate"]
|
||||
!= 2e-3
|
||||
)
|
||||
|
||||
"""
|
||||
@@ -67,7 +68,7 @@ def test_starting_point_not_in_search_space():
|
||||
automl_settings["starting_points"] = "data:test/nlp/default/"
|
||||
|
||||
automl.fit(X_train, y_train, **automl_settings)
|
||||
assert len(automl._search_states[this_estimator_name].init_config) == len(
|
||||
assert len(automl._search_states[this_estimator_name].init_config[0]) == len(
|
||||
automl._search_states[this_estimator_name]._search_space_domain
|
||||
) - len(automl_settings["custom_hp"][this_estimator_name]), (
|
||||
"The search space is updated with the custom_hp on {} hyperparameters of "
|
||||
|
||||
Reference in New Issue
Block a user