Fix issues related to zero-shot automl (#783)

* skip in-search-space check for small max iter * resolve Pickle Transformer #730 * resolve default config unrecognized #784 * Change definition of init_config * copy points_to_evaluate * make test pass * check learner selector
2026-02-17 05:44:30 -05:00 · 2022-11-13 12:47:59 -08:00
parent 9a7a9ef1c6
commit 30e200985c
6 changed files with 73 additions and 42 deletions
--- a/test/default/test_defaults.py
+++ b/test/default/test_defaults.py
@@ -1,4 +1,5 @@
 import sys
+import pickle
 from sklearn.datasets import load_iris, fetch_california_housing, load_breast_cancer
 from sklearn.model_selection import train_test_split
 import pandas as pd
@@ -12,15 +13,6 @@ from flaml.default import (
 )


-def test_build_portfolio(path="test/default", strategy="greedy"):
-    sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
-    portfolio.main()
-    sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task multiclass --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
-    portfolio.main()
-    sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task regression --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
-    portfolio.main()
-
-
 def test_greedy_feedback(path="test/default", strategy="greedy-feedback"):
    # sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
    # portfolio.main()
@@ -30,6 +22,15 @@ def test_greedy_feedback(path="test/default", strategy="greedy-feedback"):
    portfolio.main()


+def test_build_portfolio(path="test/default", strategy="greedy"):
+    sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
+    portfolio.main()
+    sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task multiclass --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
+    portfolio.main()
+    sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task regression --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
+    portfolio.main()
+
+
 def test_iris(as_frame=True):
    automl = AutoML()
    automl_settings = {
@@ -96,6 +97,8 @@ def test_suggest_classification():
    ) = preprocess_and_suggest_hyperparams(
        "classification", X_train, y_train, "lgbm", location=location
    )
+    with open("test/default/feature_transformer", "wb") as f:
+        pickle.dump(feature_transformer, f, pickle.HIGHEST_PROTOCOL)
    model = estimator_class(**hyperparams)  # estimator_class is LGBMClassifier
    model.fit(X, y)
    X_test = feature_transformer.transform(X_test)
@@ -218,5 +221,24 @@ def test_xgboost():
    print(regressor)


+def test_nobudget():
+    X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
+    automl = AutoML()
+    automl.fit(
+        X_train[:20],
+        y_train[:20],
+        estimator_list=["lgbm", "extra_tree", "rf"],
+        max_iter=12,
+        starting_points="data",
+        log_file_name="test/default/no_budget.txt",
+        log_type="all",
+    )
+    automl.fit(X_train[:20], y_train[:20], estimator_list=["lgbm", "extra_tree", "rf"])
+    # make sure that zero-shot config out of the search space does not degnerate to low cost init config
+    assert automl.best_config_per_estimator["extra_tree"]["n_estimators"] > 4
+    # make sure that the zero-shot config {} is not modified
+    assert "criterion" not in automl.best_config_per_estimator["rf"]
+
+
 if __name__ == "__main__":
    test_build_portfolio("flaml/default")
--- a/test/nlp/test_default.py
+++ b/test/nlp/test_default.py
@@ -36,7 +36,8 @@ def test_starting_point_not_in_search_space():

    automl.fit(X_train, y_train, **automl_settings)
    assert (
-        automl._search_states[this_estimator_name].init_config["learning_rate"] != 2e-3
+        automl._search_states[this_estimator_name].init_config[0]["learning_rate"]
+        != 2e-3
    )

    """
@@ -67,7 +68,7 @@ def test_starting_point_not_in_search_space():
    automl_settings["starting_points"] = "data:test/nlp/default/"

    automl.fit(X_train, y_train, **automl_settings)
-    assert len(automl._search_states[this_estimator_name].init_config) == len(
+    assert len(automl._search_states[this_estimator_name].init_config[0]) == len(
        automl._search_states[this_estimator_name]._search_space_domain
    ) - len(automl_settings["custom_hp"][this_estimator_name]), (
        "The search space is updated with the custom_hp on {} hyperparameters of "