cleanup

2026-04-20 03:02:16 -04:00 · 2023-09-16 10:57:57 +00:00
parent 4f8e30786c
commit bc4473fe8a
318 changed files with 56 additions and 70662 deletions
--- a/test/.Docker/Dockerfile-cpu
+++ b/test/.Docker/Dockerfile-cpu
@@ -1,14 +0,0 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04
-
-RUN pip install azureml-core
-RUN pip install flaml[blendsearch,ray]
-RUN pip install ray-on-aml
-
-EXPOSE 8265
-EXPOSE 6379
-
-USER root
-
-RUN apt-get update
-RUN apt-get install -y jq
-RUN apt-get install -y rsync
--- a/test/autogen/agentchat/extensions/init.py
+++ b/test/autogen/agentchat/extensions/init.py
--- a/test/autogen/agentchat/extensions/tsp.py
+++ b/test/autogen/agentchat/extensions/tsp.py
--- a/test/autogen/agentchat/extensions/tsp_api.py
+++ b/test/autogen/agentchat/extensions/tsp_api.py
--- a/test/autogen/agentchat/test_assistant_agent.py
+++ b/test/autogen/agentchat/test_assistant_agent.py
--- a/test/autogen/agentchat/test_async.py
+++ b/test/autogen/agentchat/test_async.py
--- a/test/autogen/agentchat/test_conversable_agent.py
+++ b/test/autogen/agentchat/test_conversable_agent.py
--- a/test/autogen/agentchat/test_groupchat.py
+++ b/test/autogen/agentchat/test_groupchat.py
--- a/test/autogen/agentchat/test_math_user_proxy_agent.py
+++ b/test/autogen/agentchat/test_math_user_proxy_agent.py
--- a/test/autogen/agentchat/test_retrievechat.py
+++ b/test/autogen/agentchat/test_retrievechat.py
--- a/test/autogen/agentchat/tsp_prompt.txt
+++ b/test/autogen/agentchat/tsp_prompt.txt
--- a/test/automl/init.py
+++ b/test/automl/init.py
--- a/test/automl/test_classification.py
+++ b/test/automl/test_classification.py
@@ -1,402 +0,0 @@
-import unittest
-import numpy as np
-import scipy.sparse
-from sklearn.datasets import load_breast_cancer
-from sklearn.model_selection import train_test_split
-import pandas as pd
-from datetime import datetime
-from flaml import AutoML
-from flaml.automl.model import LGBMEstimator
-from flaml import tune
-
-
-class MyLargeLGBM(LGBMEstimator):
-    @classmethod
-    def search_space(cls, **params):
-        return {
-            "n_estimators": {
-                "domain": tune.lograndint(lower=4, upper=32768),
-                "init_value": 32768,
-                "low_cost_init_value": 4,
-            },
-            "num_leaves": {
-                "domain": tune.lograndint(lower=4, upper=32768),
-                "init_value": 32768,
-                "low_cost_init_value": 4,
-            },
-        }
-
-
-class TestClassification(unittest.TestCase):
-    def test_preprocess(self):
-        automl = AutoML()
-        X = pd.DataFrame(
-            {
-                "f1": [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
-                "f2": [
-                    3.0,
-                    16.0,
-                    10.0,
-                    12.0,
-                    3.0,
-                    14.0,
-                    11.0,
-                    12.0,
-                    5.0,
-                    14.0,
-                    20.0,
-                    16.0,
-                    15.0,
-                    11.0,
-                ],
-                "f3": [
-                    "a",
-                    "b",
-                    "a",
-                    "c",
-                    "c",
-                    "b",
-                    "b",
-                    "b",
-                    "b",
-                    "a",
-                    "b",
-                    1.0,
-                    1.0,
-                    "a",
-                ],
-                "f4": [
-                    True,
-                    True,
-                    False,
-                    True,
-                    True,
-                    False,
-                    False,
-                    False,
-                    True,
-                    True,
-                    False,
-                    False,
-                    True,
-                    True,
-                ],
-            }
-        )
-        y = pd.Series([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
-
-        automl = AutoML()
-        automl_settings = {
-            "time_budget": 3,
-            "task": "classification",
-            "n_jobs": 1,
-            "estimator_list": ["xgboost", "catboost", "kneighbor"],
-            "eval_method": "cv",
-            "n_splits": 3,
-            "metric": "accuracy",
-            "log_training_metric": True,
-            # "verbose": 4,
-            "ensemble": True,
-        }
-        automl.fit(X, y, **automl_settings)
-        del automl
-
-        automl = AutoML()
-        automl_settings = {
-            "time_budget": 6,
-            "task": "classification",
-            "n_jobs": 1,
-            "estimator_list": ["catboost", "lrl2"],
-            "eval_method": "cv",
-            "n_splits": 3,
-            "metric": "accuracy",
-            "log_training_metric": True,
-            # "verbose": 4,
-            "ensemble": True,
-        }
-        automl.fit(X, y, **automl_settings)
-        print(automl.feature_names_in_)
-        print(automl.feature_importances_)
-        del automl
-
-        automl = AutoML()
-        try:
-            import ray
-
-            n_concurrent_trials = 2
-        except ImportError:
-            n_concurrent_trials = 1
-        automl_settings = {
-            "time_budget": 2,
-            "task": "classification",
-            "n_jobs": 1,
-            "estimator_list": ["lrl2", "kneighbor"],
-            "eval_method": "cv",
-            "n_splits": 3,
-            "metric": "accuracy",
-            "log_training_metric": True,
-            "verbose": 4,
-            "ensemble": True,
-            "n_concurrent_trials": n_concurrent_trials,
-        }
-        automl.fit(X, y, **automl_settings)
-        del automl
-
-        automl = AutoML()
-        automl_settings = {
-            "time_budget": 3,
-            "task": "classification",
-            "n_jobs": 1,
-            "estimator_list": ["lgbm", "catboost", "kneighbor"],
-            "eval_method": "cv",
-            "n_splits": 3,
-            "metric": "accuracy",
-            "log_training_metric": True,
-            # "verbose": 4,
-            "ensemble": True,
-        }
-        automl_settings["keep_search_state"] = True
-        automl.fit(X, y, **automl_settings)
-        X, y = automl._X_train_all, automl._y_train_all
-        del automl
-
-        automl = AutoML()
-        automl_settings = {
-            "time_budget": 3,
-            "task": "classification",
-            "n_jobs": 1,
-            "estimator_list": ["kneighbor"],
-            "eval_method": "cv",
-            "n_splits": 3,
-            "metric": "accuracy",
-            "log_training_metric": True,
-            # "verbose": 4,
-            "ensemble": True,
-            "skip_transform": True,
-        }
-        automl.fit(X, y, **automl_settings)
-        del automl
-
-        automl = AutoML()
-        automl_settings = {
-            "time_budget": 3,
-            "task": "classification",
-            "n_jobs": 1,
-            "estimator_list": ["kneighbor"],
-            "eval_method": "cv",
-            "n_splits": 3,
-            "metric": "roc_auc_weighted",
-            "log_training_metric": True,
-            # "verbose": 4,
-            "ensemble": True,
-            "skip_transform": True,
-        }
-        automl.fit(X, y, **automl_settings)
-        del automl
-
-    def test_binary(self):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 1,
-            "task": "binary",
-            "log_file_name": "test/breast_cancer.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        X_train, y_train = load_breast_cancer(return_X_y=True)
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        _ = automl_experiment.predict(X_train)
-
-    def test_datetime_columns(self):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 2,
-            "log_file_name": "test/datetime_columns.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        fake_df = pd.DataFrame(
-            {
-                "A": [
-                    datetime(1900, 2, 3),
-                    datetime(1900, 3, 4),
-                    datetime(1900, 3, 4),
-                    datetime(1900, 3, 4),
-                    datetime(1900, 7, 2),
-                    datetime(1900, 8, 9),
-                ],
-                "B": [
-                    datetime(1900, 1, 1),
-                    datetime(1900, 1, 1),
-                    datetime(1900, 1, 1),
-                    datetime(1900, 1, 1),
-                    datetime(1900, 1, 1),
-                    datetime(1900, 1, 1),
-                ],
-                "year_A": [
-                    datetime(1900, 1, 2),
-                    datetime(1900, 8, 1),
-                    datetime(1900, 1, 4),
-                    datetime(1900, 6, 1),
-                    datetime(1900, 1, 5),
-                    datetime(1900, 4, 1),
-                ],
-            }
-        )
-        y = np.array([0, 1, 0, 1, 0, 0])
-        automl_experiment.fit(X_train=fake_df, y_train=y, **automl_settings)
-        _ = automl_experiment.predict(fake_df)
-
-    def test_sparse_matrix_xgboost(self):
-        automl = AutoML()
-        automl_settings = {
-            "time_budget": 3,
-            "metric": "ap",
-            "task": "classification",
-            "log_file_name": "test/sparse_classification.log",
-            "estimator_list": ["xgboost"],
-            "log_type": "all",
-            "n_jobs": 1,
-        }
-        X_train = scipy.sparse.eye(900000)
-        y_train = np.random.randint(2, size=900000)
-        import xgboost as xgb
-
-        callback = xgb.callback.TrainingCallback()
-        automl.fit(X_train=X_train, y_train=y_train, callbacks=[callback], **automl_settings)
-        print(automl.predict(X_train))
-        print(automl.model)
-        print(automl.config_history)
-        print(automl.best_model_for_estimator("xgboost"))
-        print(automl.best_iteration)
-        print(automl.best_estimator)
-
-        # test an old version of xgboost
-        import subprocess
-        import sys
-
-        subprocess.check_call([sys.executable, "-m", "pip", "install", "xgboost==1.3.3", "--user"])
-        automl = AutoML()
-        automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        print(automl.feature_names_in_)
-        print(automl.feature_importances_)
-        subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "xgboost", "--user"])
-
-    def test_ray_classification(self):
-        X, y = load_breast_cancer(return_X_y=True)
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
-
-        automl = AutoML()
-        try:
-            automl.fit(
-                X_train,
-                y_train,
-                X_val=X_test,
-                y_val=y_test,
-                time_budget=10,
-                task="classification",
-                use_ray=True,
-            )
-            automl.fit(
-                X_train,
-                y_train,
-                X_val=X_test,
-                y_val=y_test,
-                time_budget=10,
-                task="classification",
-                n_concurrent_trials=2,
-                ensemble=True,
-            )
-        except ImportError:
-            return
-
-    def test_parallel_xgboost(self, hpo_method=None):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 10,
-            "metric": "ap",
-            "task": "classification",
-            "log_file_name": "test/sparse_classification.log",
-            "estimator_list": ["xgboost"],
-            "log_type": "all",
-            "n_jobs": 1,
-            "n_concurrent_trials": 2,
-            "hpo_method": hpo_method,
-        }
-        X_train = scipy.sparse.eye(900000)
-        y_train = np.random.randint(2, size=900000)
-        try:
-            import ray
-
-            X_train_ref = ray.put(X_train)
-            automl_experiment.fit(X_train=X_train_ref, y_train=y_train, **automl_settings)
-            print(automl_experiment.predict(X_train))
-            print(automl_experiment.model)
-            print(automl_experiment.config_history)
-            print(automl_experiment.best_model_for_estimator("xgboost"))
-            print(automl_experiment.best_iteration)
-            print(automl_experiment.best_estimator)
-        except ImportError:
-            return
-
-    def test_parallel_xgboost_others(self):
-        # use random search as the hpo_method
-        self.test_parallel_xgboost(hpo_method="random")
-
-    def test_random_skip_oom(self):
-        automl_experiment = AutoML()
-        automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
-        automl_settings = {
-            "time_budget": 2,
-            "task": "classification",
-            "log_file_name": "test/sparse_classification_oom.log",
-            "estimator_list": ["large_lgbm"],
-            "log_type": "all",
-            "n_jobs": 1,
-            "hpo_method": "random",
-            "n_concurrent_trials": 2,
-        }
-        X_train = scipy.sparse.eye(900000)
-        y_train = np.random.randint(2, size=900000)
-
-        try:
-            automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-            print(automl_experiment.predict(X_train))
-            print(automl_experiment.model)
-            print(automl_experiment.config_history)
-            print(automl_experiment.best_model_for_estimator("large_lgbm"))
-            print(automl_experiment.best_iteration)
-            print(automl_experiment.best_estimator)
-        except ImportError:
-            print("skipping concurrency test as ray is not installed")
-            return
-
-    def test_sparse_matrix_lr(self):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 3,
-            "metric": "f1",
-            "task": "classification",
-            "log_file_name": "test/sparse_classification.log",
-            "estimator_list": ["lrl1", "lrl2"],
-            "log_type": "all",
-            "n_jobs": 1,
-        }
-        X_train = scipy.sparse.random(3000, 3000, density=0.1)
-        y_train = np.random.randint(2, size=3000)
-        automl_experiment.fit(X_train=X_train, y_train=y_train, train_time_limit=1, **automl_settings)
-        automl_settings["time_budget"] = 5
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        print(automl_experiment.predict(X_train))
-        print(automl_experiment.model)
-        print(automl_experiment.config_history)
-        print(automl_experiment.best_model_for_estimator("lrl2"))
-        print(automl_experiment.best_iteration)
-        print(automl_experiment.best_estimator)
-
-
-if __name__ == "__main__":
-    test = TestClassification()
-    test.test_preprocess()
--- a/test/automl/test_constraints.py
+++ b/test/automl/test_constraints.py
@@ -1,163 +0,0 @@
-from urllib.error import URLError
-from sklearn.datasets import fetch_openml
-from sklearn.model_selection import train_test_split
-from sklearn.externals._arff import ArffException
-from functools import partial
-from flaml.automl import AutoML, size
-from flaml import tune
-
-dataset = "credit-g"
-
-
-def test_metric_constraints():
-    # impose metric constrains via "pred_time_limit"
-    automl = AutoML()
-
-    automl_settings = {
-        "estimator_list": ["xgboost"],
-        "task": "classification",
-        "log_file_name": f"test/constraints_{dataset}.log",
-        "n_jobs": 1,
-        "log_type": "all",
-        "retrain_full": "budget",
-        "keep_search_state": True,
-        "time_budget": 2,
-        "pred_time_limit": 5.1e-05,
-    }
-
-    try:
-        X, y = fetch_openml(name=dataset, return_X_y=True)
-    except (ArffException, ValueError, URLError):
-        from sklearn.datasets import load_wine
-
-        X, y = load_wine(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
-    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-    print(automl.estimator_list)
-    print(automl.search_space)
-    print(automl.points_to_evaluate)
-    config = automl.best_config.copy()
-    config["learner"] = automl.best_estimator
-    automl.trainable(config)
-    print("metric constraints used in automl", automl.metric_constraints)
-
-    analysis = tune.run(
-        automl.trainable,
-        automl.search_space,
-        metric="val_loss",
-        mode="min",
-        low_cost_partial_config=automl.low_cost_partial_config,
-        points_to_evaluate=automl.points_to_evaluate,
-        cat_hp_cost=automl.cat_hp_cost,
-        resource_attr=automl.resource_attr,
-        min_resource=automl.min_resource,
-        max_resource=automl.max_resource,
-        time_budget_s=automl._state.time_budget,
-        config_constraints=[(partial(size, automl._state.learner_classes), "<=", automl._mem_thres)],
-        metric_constraints=automl.metric_constraints,
-        num_samples=5,
-    )
-    print(analysis.trials[-1])
-
-
-def custom_metric(
-    X_val,
-    y_val,
-    estimator,
-    labels,
-    X_train,
-    y_train,
-    weight_val,
-    weight_train,
-    *args,
-):
-    from sklearn.metrics import log_loss
-    import time
-
-    start = time.time()
-    y_pred = estimator.predict_proba(X_val)
-    pred_time = (time.time() - start) / len(X_val)
-    val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
-    y_pred = estimator.predict_proba(X_train)
-    train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
-    alpha = 0.5
-    return val_loss * (1 + alpha) - alpha * train_loss, {
-        "val_loss": val_loss,
-        "val_train_loss_gap": val_loss - train_loss,
-        "pred_time": pred_time,
-    }
-
-
-def test_metric_constraints_custom():
-    automl = AutoML()
-    # When you are providing a custom metric function, you can also specify constraints
-    # on one or more of the metrics reported via the second object, i.e., a metrics_to_log dictionary,
-    # returned by the custom metric function.
-    # For example, in the following code, we add a constraint on the `pred_time` metrics and `val_train_loss_gap` metric
-    # reported in `custom_metric` defined above, respectively.
-    automl_settings = {
-        "estimator_list": ["xgboost"],
-        "task": "classification",
-        "log_file_name": f"test/constraints_custom_{dataset}.log",
-        "n_jobs": 1,
-        "metric": custom_metric,
-        "log_type": "all",
-        "retrain_full": "budget",
-        "keep_search_state": True,
-        "time_budget": 1,
-        "metric_constraints": [
-            ("pred_time", "<=", 5.1e-05),
-            ("val_train_loss_gap", "<=", 0.05),
-        ],
-    }
-
-    try:
-        X, y = fetch_openml(name=dataset, return_X_y=True)
-    except (ArffException, ValueError):
-        from sklearn.datasets import load_wine
-
-        X, y = load_wine(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
-    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-    print(automl.estimator_list)
-    print(automl.search_space)
-    print(automl.points_to_evaluate)
-    print("Best minimization objective on validation data: {0:.4g}".format(automl.best_loss))
-    print(
-        "pred_time of the best config on validation data: {0:.4g}".format(
-            automl.metrics_for_best_config[1]["pred_time"]
-        )
-    )
-    print(
-        "val_train_loss_gap of the best config on validation data: {0:.4g}".format(
-            automl.metrics_for_best_config[1]["val_train_loss_gap"]
-        )
-    )
-
-    config = automl.best_config.copy()
-    config["learner"] = automl.best_estimator
-    automl.trainable(config)
-    print("metric constraints in automl", automl.metric_constraints)
-
-    analysis = tune.run(
-        automl.trainable,
-        automl.search_space,
-        metric="val_loss",
-        mode="min",
-        low_cost_partial_config=automl.low_cost_partial_config,
-        points_to_evaluate=automl.points_to_evaluate,
-        cat_hp_cost=automl.cat_hp_cost,
-        resource_attr=automl.resource_attr,
-        min_resource=automl.min_resource,
-        max_resource=automl.max_resource,
-        time_budget_s=automl._state.time_budget,
-        config_constraints=[(partial(size, automl._state.learner_classes), "<=", automl._mem_thres)],
-        metric_constraints=automl.metric_constraints,
-        num_samples=5,
-    )
-    print(analysis.trials[-1])
-
-
-if __name__ == "__main__":
-    test_metric_constraints()
-    test_metric_constraints_custom()
--- a/test/automl/test_custom_hp.py
+++ b/test/automl/test_custom_hp.py
@@ -1,65 +0,0 @@
-import sys
-import pytest
-from flaml import AutoML, tune
-
-
-@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
-def test_custom_hp_nlp():
-    from test.nlp.utils import get_toy_data_seqclassification, get_automl_settings
-
-    X_train, y_train, X_val, y_val, X_test = get_toy_data_seqclassification()
-
-    automl = AutoML()
-
-    automl_settings = get_automl_settings()
-    automl_settings["custom_hp"] = None
-    automl_settings["custom_hp"] = {
-        "transformer": {
-            "model_path": {
-                "domain": tune.choice(["google/electra-small-discriminator"]),
-            },
-            "num_train_epochs": {"domain": 3},
-        }
-    }
-    automl_settings["fit_kwargs_by_estimator"] = {
-        "transformer": {
-            "output_dir": "test/data/output/",
-            "fp16": False,
-        }
-    }
-    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-
-
-def test_custom_hp():
-    from sklearn.datasets import load_iris
-
-    X_train, y_train = load_iris(return_X_y=True)
-    automl = AutoML()
-    custom_hp = {
-        "xgboost": {
-            "n_estimators": {
-                "domain": tune.lograndint(lower=1, upper=100),
-                "low_cost_init_value": 1,
-            },
-        },
-        "rf": {
-            "max_leaves": {
-                "domain": None,  # disable search
-            },
-        },
-        "lgbm": {
-            "subsample": {
-                "domain": tune.uniform(lower=0.1, upper=1.0),
-                "init_value": 1.0,
-            },
-            "subsample_freq": {
-                "domain": 1,  # subsample_freq must > 0 to enable subsample
-            },
-        },
-    }
-    automl.fit(X_train, y_train, custom_hp=custom_hp, time_budget=2)
-    print(automl.best_config_per_estimator)
-
-
-if __name__ == "__main__":
-    test_custom_hp()
--- a/test/automl/test_forecast.py
+++ b/test/automl/test_forecast.py
@@ -1,672 +0,0 @@
-import datetime
-
-import numpy as np
-import pandas as pd
-
-from flaml import AutoML
-
-from flaml.automl.task.time_series_task import TimeSeriesTask
-
-
-def test_forecast_automl(budget=10, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]):
-    # using dataframe
-    import statsmodels.api as sm
-
-    data = sm.datasets.co2.load_pandas().data["co2"].resample("MS").mean()
-    data = data.bfill().ffill().to_frame().reset_index().rename(columns={"index": "ds", "co2": "y"})
-    num_samples = data.shape[0]
-    time_horizon = 12
-    split_idx = num_samples - time_horizon
-    df = data[:split_idx]
-    X_test = data[split_idx:]["ds"]
-    y_test = data[split_idx:]["y"]
-    automl = AutoML()
-    settings = {
-        "time_budget": budget,  # total running time in seconds
-        "metric": "mape",  # primary metric
-        "task": "ts_forecast",  # task type
-        "log_file_name": "test/CO2_forecast.log",  # flaml log file
-        "eval_method": "holdout",
-        "label": "y",
-    }
-    """The main flaml automl API"""
-    try:
-        import prophet
-
-        automl.fit(dataframe=df, **settings, period=time_horizon)
-    except ImportError:
-        print("not using prophet due to ImportError")
-        automl.fit(
-            dataframe=df,
-            **settings,
-            estimator_list=estimators_when_no_prophet,
-            period=time_horizon,
-        )
-    """ retrieve best config and best learner"""
-    print("Best ML leaner:", automl.best_estimator)
-    print("Best hyperparmeter config:", automl.best_config)
-    print(f"Best mape on validation data: {automl.best_loss}")
-    print(f"Training duration of best run: {automl.best_config_train_time}s")
-    print(automl.model.estimator)
-    """ pickle and save the automl object """
-    import pickle
-
-    with open("automl.pkl", "wb") as f:
-        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
-    """ compute predictions of testing dataset """
-    y_pred = automl.predict(X_test)
-    print("Predicted labels", y_pred)
-    print("True labels", y_test)
-    """ compute different metric values on testing dataset"""
-    from flaml.automl.ml import sklearn_metric_loss_score
-
-    mape = sklearn_metric_loss_score("mape", y_pred, y_test)
-    print("mape", "=", mape)
-    assert mape <= 0.005, "the mape of flaml should be less than 0.005"
-    from flaml.automl.data import get_output_from_log
-
-    (
-        time_history,
-        best_valid_loss_history,
-        valid_loss_history,
-        config_history,
-        metric_history,
-    ) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
-    for config in config_history:
-        print(config)
-    print(automl.resource_attr)
-    print(automl.max_resource)
-    print(automl.min_resource)
-
-    X_train = df[["ds"]]
-    y_train = df["y"]
-    automl = AutoML()
-    try:
-        automl.fit(X_train=X_train, y_train=y_train, **settings, period=time_horizon)
-    except ImportError:
-        print("not using prophet due to ImportError")
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            **settings,
-            estimator_list=estimators_when_no_prophet,
-            period=time_horizon,
-        )
-
-
-def test_models(budget=3):
-    n = 100
-    X = pd.DataFrame(
-        {
-            "A": pd.date_range(start="1900-01-01", periods=n, freq="D"),
-        }
-    )
-    y = np.exp(np.random.randn(n))
-
-    task = TimeSeriesTask("ts_forecast")
-
-    for est in task.estimators.keys():
-        if est == "tft":
-            continue  # TFT is covered by its own test
-        automl = AutoML()
-        automl.fit(
-            X_train=X[:72],  # a single column of timestamp
-            y_train=y[:72],  # value for each timestamp
-            estimator_list=[est],
-            period=12,  # time horizon to forecast, e.g., 12 months
-            task="ts_forecast",
-            time_budget=budget,  # time budget in seconds
-        )
-        automl.predict(X[72:])
-
-
-def test_numpy():
-    X_train = np.arange("2014-01", "2021-01", dtype="datetime64[M]")
-    y_train = np.random.random(size=len(X_train))
-    automl = AutoML()
-    automl.fit(
-        X_train=X_train[:72],  # a single column of timestamp
-        y_train=y_train[:72],  # value for each timestamp
-        period=12,  # time horizon to forecast, e.g., 12 months
-        task="ts_forecast",
-        time_budget=3,  # time budget in seconds
-        log_file_name="test/ts_forecast.log",
-        n_splits=3,  # number of splits
-    )
-    print(automl.predict(X_train[72:]))
-
-    automl = AutoML()
-    automl.fit(
-        X_train=X_train[:72],  # a single column of timestamp
-        y_train=y_train[:72],  # value for each timestamp
-        period=12,  # time horizon to forecast, e.g., 12 months
-        task="ts_forecast",
-        time_budget=1,  # time budget in seconds
-        estimator_list=["arima", "sarimax"],
-        log_file_name="test/ts_forecast.log",
-    )
-    print(automl.predict(X_train[72:]))
-    # an alternative way to specify predict steps for arima/sarimax
-    print(automl.predict(12))
-
-
-def test_numpy_large():
-    import numpy as np
-    import pandas as pd
-    from flaml import AutoML
-
-    X_train = pd.date_range("2017-01-01", periods=70000, freq="T")
-    y_train = pd.DataFrame(np.random.randint(6500, 7500, 70000))
-    automl = AutoML()
-    automl.fit(
-        X_train=X_train[:-10].values,  # a single column of timestamp
-        y_train=y_train[:-10].values,  # value for each timestamp
-        period=10,  # time horizon to forecast, e.g., 12 months
-        task="ts_forecast",
-        time_budget=10,  # time budget in seconds
-    )
-
-
-def load_multi_dataset():
-    """multivariate time series forecasting dataset"""
-    import pandas as pd
-
-    # pd.set_option("display.max_rows", None, "display.max_columns", None)
-    df = pd.read_csv(
-        "https://raw.githubusercontent.com/srivatsan88/YouTubeLI/master/dataset/nyc_energy_consumption.csv"
-    )
-    # preprocessing data
-    df["timeStamp"] = pd.to_datetime(df["timeStamp"])
-    df = df.set_index("timeStamp")
-    df = df.resample("D").mean()
-    df["temp"] = df["temp"].fillna(method="ffill")
-    df["precip"] = df["precip"].fillna(method="ffill")
-    df = df[:-2]  # last two rows are NaN for 'demand' column so remove them
-    df = df.reset_index()
-
-    return df
-
-
-def test_multivariate_forecast_num(budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]):
-    df = load_multi_dataset()
-    # split data into train and test
-    time_horizon = 180
-    num_samples = df.shape[0]
-    split_idx = num_samples - time_horizon
-    train_df = df[:split_idx]
-    test_df = df[split_idx:]
-    # test dataframe must contain values for the regressors / multivariate variables
-    X_test = test_df[["timeStamp", "temp", "precip"]]
-    y_test = test_df["demand"]
-    # return
-    automl = AutoML()
-    settings = {
-        "time_budget": budget,  # total running time in seconds
-        "metric": "mape",  # primary metric
-        "task": "ts_forecast",  # task type
-        "log_file_name": "test/energy_forecast_numerical.log",  # flaml log file
-        "eval_method": "holdout",
-        "log_type": "all",
-        "label": "demand",
-    }
-    """The main flaml automl API"""
-    try:
-        import prophet
-
-        automl.fit(dataframe=train_df, **settings, period=time_horizon)
-    except ImportError:
-        print("not using prophet due to ImportError")
-        automl.fit(
-            dataframe=train_df,
-            **settings,
-            estimator_list=estimators_when_no_prophet,
-            period=time_horizon,
-        )
-    """ retrieve best config and best learner"""
-    print("Best ML leaner:", automl.best_estimator)
-    print("Best hyperparmeter config:", automl.best_config)
-    print(f"Best mape on validation data: {automl.best_loss}")
-    print(f"Training duration of best run: {automl.best_config_train_time}s")
-    print(automl.model.estimator)
-    """ pickle and save the automl object """
-    import pickle
-
-    with open("automl.pkl", "wb") as f:
-        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
-    """ compute predictions of testing dataset """
-    y_pred = automl.predict(X_test)
-    print("Predicted labels", y_pred)
-    print("True labels", y_test)
-    """ compute different metric values on testing dataset"""
-    from flaml.automl.ml import sklearn_metric_loss_score
-
-    print("mape", "=", sklearn_metric_loss_score("mape", y_pred, y_test))
-    from flaml.automl.data import get_output_from_log
-
-    (
-        time_history,
-        best_valid_loss_history,
-        valid_loss_history,
-        config_history,
-        metric_history,
-    ) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
-    for config in config_history:
-        print(config)
-    print(automl.resource_attr)
-    print(automl.max_resource)
-    print(automl.min_resource)
-
-    # import matplotlib.pyplot as plt
-    #
-    # plt.figure()
-    # plt.plot(X_test["timeStamp"], y_test, label="Actual Demand")
-    # plt.plot(X_test["timeStamp"], y_pred, label="FLAML Forecast")
-    # plt.xlabel("Date")
-    # plt.ylabel("Energy Demand")
-    # plt.legend()
-    # plt.show()
-
-
-def load_multi_dataset_cat(time_horizon):
-    df = load_multi_dataset()
-
-    df = df[["timeStamp", "demand", "temp"]]
-
-    # feature engineering - use discrete values to denote different categories
-    def season(date):
-        date = (date.month, date.day)
-        spring = (3, 20)
-        summer = (6, 21)
-        fall = (9, 22)
-        winter = (12, 21)
-        if date < spring or date >= winter:
-            return "winter"  # winter 0
-        elif spring <= date < summer:
-            return "spring"  # spring 1
-        elif summer <= date < fall:
-            return "summer"  # summer 2
-        elif fall <= date < winter:
-            return "fall"  # fall 3
-
-    def get_monthly_avg(data):
-        data["month"] = data["timeStamp"].dt.month
-        data = data[["month", "temp"]].groupby("month")
-        data = data.agg({"temp": "mean"})
-        return data
-
-    monthly_avg = get_monthly_avg(df).to_dict().get("temp")
-
-    def above_monthly_avg(date, temp):
-        month = date.month
-        if temp > monthly_avg.get(month):
-            return 1
-        else:
-            return 0
-
-    df["season"] = df["timeStamp"].apply(season)
-    df["above_monthly_avg"] = df.apply(lambda x: above_monthly_avg(x["timeStamp"], x["temp"]), axis=1)
-
-    # split data into train and test
-    num_samples = df.shape[0]
-    split_idx = num_samples - time_horizon
-    train_df = df[:split_idx]
-    test_df = df[split_idx:]
-
-    del train_df["temp"], train_df["month"]
-
-    return train_df, test_df
-
-
-def test_multivariate_forecast_cat(budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]):
-    time_horizon = 180
-    train_df, test_df = load_multi_dataset_cat(time_horizon)
-    X_test = test_df[
-        ["timeStamp", "season", "above_monthly_avg"]
-    ]  # test dataframe must contain values for the regressors / multivariate variables
-    y_test = test_df["demand"]
-    automl = AutoML()
-    settings = {
-        "time_budget": budget,  # total running time in seconds
-        "metric": "mape",  # primary metric
-        "task": "ts_forecast",  # task type
-        "log_file_name": "test/energy_forecast_categorical.log",  # flaml log file
-        "eval_method": "holdout",
-        "log_type": "all",
-        "label": "demand",
-    }
-    """The main flaml automl API"""
-    try:
-        import prophet
-
-        automl.fit(dataframe=train_df, **settings, period=time_horizon)
-    except ImportError:
-        print("not using prophet due to ImportError")
-        automl.fit(
-            dataframe=train_df,
-            **settings,
-            estimator_list=estimators_when_no_prophet,
-            period=time_horizon,
-        )
-    """ retrieve best config and best learner"""
-    print("Best ML leaner:", automl.best_estimator)
-    print("Best hyperparmeter config:", automl.best_config)
-    print(f"Best mape on validation data: {automl.best_loss}")
-    print(f"Training duration of best run: {automl.best_config_train_time}s")
-    print(automl.model.estimator)
-    """ pickle and save the automl object """
-    import pickle
-
-    with open("automl.pkl", "wb") as f:
-        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
-    """ compute predictions of testing dataset """
-    y_pred = automl.predict(X_test)
-    print("Predicted labels", y_pred)
-    print("True labels", y_test)
-    """ compute different metric values on testing dataset"""
-    from flaml.automl.ml import sklearn_metric_loss_score
-
-    print("mape", "=", sklearn_metric_loss_score("mape", y_pred, y_test))
-    print("rmse", "=", sklearn_metric_loss_score("rmse", y_pred, y_test))
-    print("mse", "=", sklearn_metric_loss_score("mse", y_pred, y_test))
-    print("mae", "=", sklearn_metric_loss_score("mae", y_pred, y_test))
-    from flaml.automl.data import get_output_from_log
-
-    (
-        time_history,
-        best_valid_loss_history,
-        valid_loss_history,
-        config_history,
-        metric_history,
-    ) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
-    for config in config_history:
-        print(config)
-    print(automl.resource_attr)
-    print(automl.max_resource)
-    print(automl.min_resource)
-
-    # import matplotlib.pyplot as plt
-    #
-    # plt.figure()
-    # plt.plot(X_test["timeStamp"], y_test, label="Actual Demand")
-    # plt.plot(X_test["timeStamp"], y_pred, label="FLAML Forecast")
-    # plt.xlabel("Date")
-    # plt.ylabel("Energy Demand")
-    # plt.legend()
-    # plt.show()
-
-
-def test_forecast_classification(budget=5):
-    from hcrystalball.utils import get_sales_data
-
-    time_horizon = 30
-    df = get_sales_data(n_dates=180, n_assortments=1, n_states=1, n_stores=1)
-    df = df[["Sales", "Open", "Promo", "Promo2"]]
-    # feature engineering
-    import numpy as np
-
-    df["above_mean_sales"] = np.where(df["Sales"] > df["Sales"].mean(), 1, 0)
-    df.reset_index(inplace=True)
-    train_df = df[:-time_horizon]
-    test_df = df[-time_horizon:]
-    X_train, X_test = (
-        train_df[["Date", "Open", "Promo", "Promo2"]],
-        test_df[["Date", "Open", "Promo", "Promo2"]],
-    )
-    y_train, y_test = train_df["above_mean_sales"], test_df["above_mean_sales"]
-    automl = AutoML()
-    settings = {
-        "time_budget": budget,  # total running time in seconds
-        "metric": "accuracy",  # primary metric
-        "task": "ts_forecast_classification",  # task type
-        "log_file_name": "test/sales_classification_forecast.log",  # flaml log file
-        "eval_method": "holdout",
-    }
-    """The main flaml automl API"""
-    automl.fit(X_train=X_train, y_train=y_train, **settings, period=time_horizon)
-    """ retrieve best config and best learner"""
-    print("Best ML leaner:", automl.best_estimator)
-    print("Best hyperparmeter config:", automl.best_config)
-    print(f"Best mape on validation data: {automl.best_loss}")
-    print(f"Training duration of best run: {automl.best_config_train_time}s")
-    print(automl.model.estimator)
-    """ pickle and save the automl object """
-    import pickle
-
-    with open("automl.pkl", "wb") as f:
-        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
-    """ compute predictions of testing dataset """
-    y_pred = automl.predict(X_test)
-    """ compute different metric values on testing dataset"""
-    from flaml.automl.ml import sklearn_metric_loss_score
-
-    print(y_test)
-    print(y_pred)
-    print("accuracy", "=", 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test))
-    from flaml.automl.data import get_output_from_log
-
-    (
-        time_history,
-        best_valid_loss_history,
-        valid_loss_history,
-        config_history,
-        metric_history,
-    ) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
-    for config in config_history:
-        print(config)
-    print(automl.resource_attr)
-    print(automl.max_resource)
-    print(automl.min_resource)
-    # import matplotlib.pyplot as plt
-    #
-    # plt.title("Learning Curve")
-    # plt.xlabel("Wall Clock Time (s)")
-    # plt.ylabel("Validation Accuracy")
-    # plt.scatter(time_history, 1 - np.array(valid_loss_history))
-    # plt.step(time_history, 1 - np.array(best_valid_loss_history), where="post")
-    # plt.show()
-
-
-def get_stalliion_data():
-    from pytorch_forecasting.data.examples import get_stallion_data
-
-    data = get_stallion_data()
-    # add time index - For datasets with no missing values, FLAML will automate this process
-    data["time_idx"] = data["date"].dt.year * 12 + data["date"].dt.month
-    data["time_idx"] -= data["time_idx"].min()
-    # add additional features
-    data["month"] = data.date.dt.month.astype(str).astype("category")  # categories have be strings
-    data["log_volume"] = np.log(data.volume + 1e-8)
-    data["avg_volume_by_sku"] = data.groupby(["time_idx", "sku"], observed=True).volume.transform("mean")
-    data["avg_volume_by_agency"] = data.groupby(["time_idx", "agency"], observed=True).volume.transform("mean")
-    # we want to encode special days as one variable and thus need to first reverse one-hot encoding
-    special_days = [
-        "easter_day",
-        "good_friday",
-        "new_year",
-        "christmas",
-        "labor_day",
-        "independence_day",
-        "revolution_day_memorial",
-        "regional_games",
-        "beer_capital",
-        "music_fest",
-    ]
-    data[special_days] = data[special_days].apply(lambda x: x.map({0: "-", 1: x.name})).astype("category")
-    return data, special_days
-
-
-def test_forecast_panel(budget=5):
-    data, special_days = get_stalliion_data()
-    time_horizon = 6  # predict six months
-    training_cutoff = data["time_idx"].max() - time_horizon
-    data["time_idx"] = data["time_idx"].astype("int")
-    ts_col = data.pop("date")
-    data.insert(0, "date", ts_col)
-    # FLAML assumes input is not sorted, but we sort here for comparison purposes with y_test
-    data = data.sort_values(["agency", "sku", "date"])
-    X_train = data[lambda x: x.time_idx <= training_cutoff]
-    X_test = data[lambda x: x.time_idx > training_cutoff]
-    y_train = X_train.pop("volume")
-    y_test = X_test.pop("volume")
-    automl = AutoML()
-    settings = {
-        "time_budget": budget,  # total running time in seconds
-        "metric": "mape",  # primary metric
-        "task": "ts_forecast_panel",  # task type
-        "log_file_name": "test/stallion_forecast.log",  # flaml log file
-        "eval_method": "holdout",
-    }
-    fit_kwargs_by_estimator = {
-        "tft": {
-            "max_encoder_length": 24,
-            "static_categoricals": ["agency", "sku"],
-            "static_reals": ["avg_population_2017", "avg_yearly_household_income_2017"],
-            "time_varying_known_categoricals": ["special_days", "month"],
-            "variable_groups": {
-                "special_days": special_days
-            },  # group of categorical variables can be treated as one variable
-            "time_varying_known_reals": [
-                "time_idx",
-                "price_regular",
-                "discount_in_percent",
-            ],
-            "time_varying_unknown_categoricals": [],
-            "time_varying_unknown_reals": [
-                "volume",  # target column
-                "log_volume",
-                "industry_volume",
-                "soda_volume",
-                "avg_max_temp",
-                "avg_volume_by_agency",
-                "avg_volume_by_sku",
-            ],
-            "batch_size": 256,
-            "max_epochs": 1,
-            "gpu_per_trial": -1,
-        }
-    }
-    """The main flaml automl API"""
-    automl.fit(
-        X_train=X_train,
-        y_train=y_train,
-        **settings,
-        period=time_horizon,
-        group_ids=["agency", "sku"],
-        fit_kwargs_by_estimator=fit_kwargs_by_estimator,
-    )
-    """ retrieve best config and best learner"""
-    print("Best ML leaner:", automl.best_estimator)
-    print("Best hyperparmeter config:", automl.best_config)
-    print(f"Best mape on validation data: {automl.best_loss}")
-    print(f"Training duration of best run: {automl.best_config_train_time}s")
-    print(automl.model.estimator)
-    """ pickle and save the automl object """
-    import pickle
-
-    with open("automl.pkl", "wb") as f:
-        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
-    """ compute predictions of testing dataset """
-    y_pred = automl.predict(X_test)
-    """ compute different metric values on testing dataset"""
-    from flaml.automl.ml import sklearn_metric_loss_score
-
-    print(y_test)
-    print(y_pred)
-    print("mape", "=", sklearn_metric_loss_score("mape", y_pred, y_test))
-
-    def smape(y_pred, y_test):
-        import numpy as np
-
-        y_test, y_pred = np.array(y_test), np.array(y_pred)
-        return round(
-            np.mean(np.abs(y_pred - y_test) / ((np.abs(y_pred) + np.abs(y_test)) / 2)) * 100,
-            2,
-        )
-
-    print("smape", "=", smape(y_pred, y_test))
-    # TODO: compute prediction for a specific time series
-    # """compute prediction for a specific time series"""
-    # a01_sku01_preds = automl.predict(X_test[(X_test["agency"] == "Agency_01") & (X_test["sku"] == "SKU_01")])
-    # print("Agency01 SKU_01 predictions: ", a01_sku01_preds)
-    from flaml.automl.data import get_output_from_log
-
-    (
-        time_history,
-        best_valid_loss_history,
-        valid_loss_history,
-        config_history,
-        metric_history,
-    ) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
-    for config in config_history:
-        print(config)
-    print(automl.resource_attr)
-    print(automl.max_resource)
-    print(automl.min_resource)
-
-
-def test_cv_step():
-    n = 300
-    time_col = "date"
-    df = pd.DataFrame(
-        {
-            time_col: pd.date_range(start="1/1/2001", periods=n, freq="D"),
-            "y": np.sin(np.linspace(start=0, stop=200, num=n)),
-        }
-    )
-
-    def split_by_date(df: pd.DataFrame, dt: datetime.date):
-        dt = datetime.datetime(dt.year, dt.month, dt.day)
-        return df[df[time_col] <= dt], df[df[time_col] > dt]
-
-    horizon = 60
-    data_end = df.date.max()
-    train_end = data_end - datetime.timedelta(days=horizon)
-
-    train_df, val_df = split_by_date(df, train_end)
-    from flaml import AutoML
-
-    tgts = ["y"]
-    # tgt = "SERIES_SANCTIONS"
-
-    preds = {}
-    for tgt in tgts:
-        features = []  # [c for c in train_df.columns if "SERIES" not in c and c != time_col]
-
-        automl = AutoML(time_budget=5, metric="mae", task="ts_forecast", eval_method="cv")
-
-        automl.fit(
-            dataframe=train_df[[time_col] + features + [tgt]],
-            label=tgt,
-            period=horizon,
-            time_col=time_col,
-            verbose=4,
-            n_splits=5,
-            cv_step_size=5,
-        )
-
-        pred = automl.predict(val_df)
-
-        if isinstance(pred, pd.DataFrame):
-            pred = pred[tgt]
-        assert not np.isnan(pred.sum())
-
-        import matplotlib.pyplot as plt
-
-        preds[tgt] = pred
-        # plt.figure(figsize=(16, 8), dpi=80)
-        # plt.plot(df[time_col], df[tgt])
-        # plt.plot(val_df[time_col], pred)
-        # plt.legend(["actual", "predicted"])
-        # plt.show()
-
-    print("yahoo!")
-
-
-if __name__ == "__main__":
-    # test_forecast_automl(60)
-    # test_multivariate_forecast_num(5)
-    # test_multivariate_forecast_cat(5)
-    # test_numpy()
-    # test_forecast_classification(5)
-    test_forecast_panel(5)
-    # test_cv_step()
--- a/test/automl/test_mlflow.py
+++ b/test/automl/test_mlflow.py
@@ -1,64 +0,0 @@
-import pytest
-from pandas import DataFrame
-from sklearn.datasets import load_iris
-import mlflow
-import mlflow.entities
-from flaml import AutoML
-
-
-class TestMLFlowLoggingParam:
-    def test_should_start_new_run_by_default(self, automl_settings):
-        with mlflow.start_run():
-            parent = mlflow.last_active_run()
-            automl = AutoML()
-            X_train, y_train = load_iris(return_X_y=True)
-            automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-
-        children = self._get_child_runs(parent)
-        assert len(children) >= 1, "Expected at least 1 child run, got {}".format(len(children))
-
-    def test_should_not_start_new_run_when_mlflow_logging_set_to_false_in_init(self, automl_settings):
-        with mlflow.start_run():
-            parent = mlflow.last_active_run()
-            automl = AutoML(mlflow_logging=False)
-            X_train, y_train = load_iris(return_X_y=True)
-            automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-
-        children = self._get_child_runs(parent)
-        assert len(children) == 0, "Expected 0 child runs, got {}".format(len(children))
-
-    def test_should_not_start_new_run_when_mlflow_logging_set_to_false_in_fit(self, automl_settings):
-        with mlflow.start_run():
-            parent = mlflow.last_active_run()
-            automl = AutoML()
-            X_train, y_train = load_iris(return_X_y=True)
-            automl.fit(X_train=X_train, y_train=y_train, mlflow_logging=False, **automl_settings)
-
-        children = self._get_child_runs(parent)
-        assert len(children) == 0, "Expected 0 child runs, got {}".format(len(children))
-
-    def test_should_start_new_run_when_mlflow_logging_set_to_true_in_fit(self, automl_settings):
-        with mlflow.start_run():
-            parent = mlflow.last_active_run()
-            automl = AutoML(mlflow_logging=False)
-            X_train, y_train = load_iris(return_X_y=True)
-            automl.fit(X_train=X_train, y_train=y_train, mlflow_logging=True, **automl_settings)
-
-        children = self._get_child_runs(parent)
-        assert len(children) >= 1, "Expected at least 1 child run, got {}".format(len(children))
-
-    @staticmethod
-    def _get_child_runs(parent_run: mlflow.entities.Run) -> DataFrame:
-        experiment_id = parent_run.info.experiment_id
-        return mlflow.search_runs(
-            [experiment_id], filter_string="tags.mlflow.parentRunId = '{}'".format(parent_run.info.run_id)
-        )
-
-    @pytest.fixture(scope="class")
-    def automl_settings(self):
-        return {
-            "time_budget": 2,  # in seconds
-            "metric": "accuracy",
-            "task": "classification",
-            "log_file_name": "iris.log",
-        }
--- a/test/automl/test_multiclass.py
+++ b/test/automl/test_multiclass.py
@@ -1,534 +0,0 @@
-import unittest
-import numpy as np
-import scipy.sparse
-from sklearn.datasets import load_iris, load_wine
-from flaml import AutoML
-from flaml.automl.data import get_output_from_log
-from flaml.automl.model import LGBMEstimator, XGBoostSklearnEstimator, SKLearnEstimator
-from flaml import tune
-from flaml.automl.training_log import training_log_reader
-
-
-class MyRegularizedGreedyForest(SKLearnEstimator):
-    def __init__(self, task="binary", **config):
-        super().__init__(task, **config)
-
-        if isinstance(task, str):
-            from flaml.automl.task.factory import task_factory
-
-            task = task_factory(task)
-
-        if task.is_classification():
-            from rgf.sklearn import RGFClassifier
-
-            self.estimator_class = RGFClassifier
-        else:
-            from rgf.sklearn import RGFRegressor
-
-            self.estimator_class = RGFRegressor
-
-    @classmethod
-    def search_space(cls, data_size, task):
-        space = {
-            "max_leaf": {
-                "domain": tune.lograndint(lower=4, upper=data_size[0]),
-                "init_value": 4,
-            },
-            "n_iter": {
-                "domain": tune.lograndint(lower=1, upper=data_size[0]),
-                "init_value": 1,
-            },
-            "n_tree_search": {
-                "domain": tune.lograndint(lower=1, upper=32768),
-                "init_value": 1,
-            },
-            "opt_interval": {
-                "domain": tune.lograndint(lower=1, upper=10000),
-                "init_value": 100,
-            },
-            "learning_rate": {"domain": tune.loguniform(lower=0.01, upper=20.0)},
-            "min_samples_leaf": {
-                "domain": tune.lograndint(lower=1, upper=20),
-                "init_value": 20,
-            },
-        }
-        return space
-
-    @classmethod
-    def size(cls, config):
-        max_leaves = int(round(config.get("max_leaf", 1)))
-        n_estimators = int(round(config.get("n_iter", 1)))
-        return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8
-
-    @classmethod
-    def cost_relative2lgbm(cls):
-        return 1.0
-
-
-class MyLargeXGB(XGBoostSklearnEstimator):
-    @classmethod
-    def search_space(cls, **params):
-        return {
-            "n_estimators": {
-                "domain": tune.lograndint(lower=4, upper=32768),
-                "init_value": 32768,
-                "low_cost_init_value": 4,
-            },
-            "max_leaves": {
-                "domain": tune.lograndint(lower=4, upper=3276),
-                "init_value": 3276,
-                "low_cost_init_value": 4,
-            },
-        }
-
-
-class MyLargeLGBM(LGBMEstimator):
-    @classmethod
-    def search_space(cls, **params):
-        return {
-            "n_estimators": {
-                "domain": tune.lograndint(lower=4, upper=32768),
-                "init_value": 32768,
-                "low_cost_init_value": 4,
-            },
-            "num_leaves": {
-                "domain": tune.lograndint(lower=4, upper=3276),
-                "init_value": 3276,
-                "low_cost_init_value": 4,
-            },
-        }
-
-
-def custom_metric(
-    X_val,
-    y_val,
-    estimator,
-    labels,
-    X_train,
-    y_train,
-    weight_val=None,
-    weight_train=None,
-    config=None,
-    groups_val=None,
-    groups_train=None,
-):
-    from sklearn.metrics import log_loss
-    import time
-
-    start = time.time()
-    y_pred = estimator.predict_proba(X_val)
-    pred_time = (time.time() - start) / len(X_val)
-    val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
-    y_pred = estimator.predict_proba(X_train)
-    train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
-    alpha = 0.5
-    return val_loss * (1 + alpha) - alpha * train_loss, {
-        "val_loss": val_loss,
-        "train_loss": train_loss,
-        "pred_time": pred_time,
-    }
-
-
-class TestMultiClass(unittest.TestCase):
-    def test_custom_learner(self):
-        automl = AutoML()
-        automl.add_learner(learner_name="RGF", learner_class=MyRegularizedGreedyForest)
-        X_train, y_train = load_wine(return_X_y=True)
-        settings = {
-            "time_budget": 8,  # total running time in seconds
-            "estimator_list": ["RGF", "lgbm", "rf", "xgboost"],
-            "task": "classification",  # task type
-            "sample": True,  # whether to subsample training data
-            "log_file_name": "test/wine.log",
-            "log_training_metric": True,  # whether to log training metric
-            "n_jobs": 1,
-        }
-        automl.fit(X_train=X_train, y_train=y_train, **settings)
-        # print the best model found for RGF
-        print(automl.best_model_for_estimator("RGF"))
-
-        MyRegularizedGreedyForest.search_space = lambda data_size, task: {}
-        automl.fit(X_train=X_train, y_train=y_train, **settings)
-
-        try:
-            import ray
-
-            del settings["time_budget"]
-            settings["max_iter"] = 5
-            # test the "_choice_" issue when using ray
-            automl.fit(X_train=X_train, y_train=y_train, n_concurrent_trials=2, **settings)
-        except ImportError:
-            return
-
-    def test_ensemble(self):
-        automl = AutoML()
-        automl.add_learner(learner_name="RGF", learner_class=MyRegularizedGreedyForest)
-        X_train, y_train = load_wine(return_X_y=True)
-        settings = {
-            "time_budget": 5,  # total running time in seconds
-            "estimator_list": ["rf", "xgboost", "catboost"],
-            "task": "classification",  # task type
-            "sample": True,  # whether to subsample training data
-            "log_file_name": "test/wine.log",
-            "log_training_metric": True,  # whether to log training metric
-            "ensemble": {
-                "final_estimator": MyRegularizedGreedyForest(),
-                "passthrough": False,
-            },
-            "n_jobs": 1,
-        }
-        automl.fit(X_train=X_train, y_train=y_train, **settings)
-
-    def test_dataframe(self):
-        self.test_classification(True)
-
-    def test_custom_metric(self):
-        df, y = load_iris(return_X_y=True, as_frame=True)
-        df["label"] = y
-        automl = AutoML()
-        settings = {
-            "dataframe": df,
-            "label": "label",
-            "time_budget": 5,
-            "eval_method": "cv",
-            "metric": custom_metric,
-            "task": "classification",
-            "log_file_name": "test/iris_custom.log",
-            "log_training_metric": True,
-            "log_type": "all",
-            "n_jobs": 1,
-            "model_history": True,
-            "sample_weight": np.ones(len(y)),
-            "pred_time_limit": 1e-5,
-            "ensemble": True,
-        }
-        automl.fit(**settings)
-        print(automl.classes_)
-        print(automl.model)
-        print(automl.config_history)
-        print(automl.best_model_for_estimator("rf"))
-        print(automl.best_iteration)
-        print(automl.best_estimator)
-        automl = AutoML()
-        estimator = automl.get_estimator_from_log(settings["log_file_name"], record_id=0, task="multiclass")
-        print(estimator)
-        (
-            time_history,
-            best_valid_loss_history,
-            valid_loss_history,
-            config_history,
-            metric_history,
-        ) = get_output_from_log(filename=settings["log_file_name"], time_budget=6)
-        print(metric_history)
-        try:
-            import ray
-
-            df = ray.put(df)
-            settings["dataframe"] = df
-            settings["use_ray"] = True
-            del settings["time_budget"]
-            settings["max_iter"] = 2
-            automl.fit(**settings)
-            estimator = automl.get_estimator_from_log(settings["log_file_name"], record_id=1, task="multiclass")
-        except ImportError:
-            pass
-
-    def test_classification(self, as_frame=False):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 4,
-            "metric": "accuracy",
-            "task": "classification",
-            "log_file_name": "test/iris.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
-        if as_frame:
-            # test drop column
-            X_train.columns = range(X_train.shape[1])
-            X_train[X_train.shape[1]] = np.zeros(len(y_train))
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        print(automl_experiment.classes_)
-        print(automl_experiment.predict(X_train)[:5])
-        print(automl_experiment.model)
-        print(automl_experiment.config_history)
-        print(automl_experiment.best_model_for_estimator("catboost"))
-        print(automl_experiment.best_iteration)
-        print(automl_experiment.best_estimator)
-        del automl_settings["metric"]
-        del automl_settings["model_history"]
-        del automl_settings["log_training_metric"]
-        automl_experiment = AutoML(task="classification")
-        duration = automl_experiment.retrain_from_log(
-            log_file_name=automl_settings["log_file_name"],
-            X_train=X_train,
-            y_train=y_train,
-            train_full=True,
-            record_id=0,
-        )
-        print(duration)
-        print(automl_experiment.model)
-        print(automl_experiment.predict_proba(X_train)[:5])
-
-    def test_micro_macro_f1(self):
-        automl_experiment_micro = AutoML()
-        automl_experiment_macro = AutoML()
-        automl_settings = {
-            "time_budget": 2,
-            "task": "classification",
-            "log_file_name": "test/micro_macro_f1.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        X_train, y_train = load_iris(return_X_y=True)
-        automl_experiment_micro.fit(X_train=X_train, y_train=y_train, metric="micro_f1", **automl_settings)
-        automl_experiment_macro.fit(X_train=X_train, y_train=y_train, metric="macro_f1", **automl_settings)
-        estimator = automl_experiment_macro.model
-        y_pred = estimator.predict(X_train)
-        y_pred_proba = estimator.predict_proba(X_train)
-        from flaml.automl.ml import norm_confusion_matrix, multi_class_curves
-
-        print(norm_confusion_matrix(y_train, y_pred))
-        from sklearn.metrics import roc_curve, precision_recall_curve
-
-        print(multi_class_curves(y_train, y_pred_proba, roc_curve))
-        print(multi_class_curves(y_train, y_pred_proba, precision_recall_curve))
-
-    def test_roc_auc_ovr(self):
-        automl_experiment = AutoML()
-        X_train, y_train = load_iris(return_X_y=True)
-        automl_settings = {
-            "time_budget": 1,
-            "metric": "roc_auc_ovr",
-            "task": "classification",
-            "log_file_name": "test/roc_auc_ovr.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "sample_weight": np.ones(len(y_train)),
-            "eval_method": "holdout",
-            "model_history": True,
-        }
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-
-    def test_roc_auc_ovo(self):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 1,
-            "metric": "roc_auc_ovo",
-            "task": "classification",
-            "log_file_name": "test/roc_auc_ovo.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        X_train, y_train = load_iris(return_X_y=True)
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-
-    def test_roc_auc_ovr_weighted(self):
-        automl = AutoML()
-        settings = {
-            "time_budget": 1,
-            "metric": "roc_auc_ovr_weighted",
-            "task": "classification",
-            "log_file_name": "test/roc_auc_weighted.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        X_train, y_train = load_iris(return_X_y=True)
-        automl.fit(X_train=X_train, y_train=y_train, **settings)
-
-    def test_roc_auc_ovo_weighted(self):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 1,
-            "metric": "roc_auc_ovo_weighted",
-            "task": "classification",
-            "log_file_name": "test/roc_auc_weighted.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        X_train, y_train = load_iris(return_X_y=True)
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-
-    def test_sparse_matrix_classification(self):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 2,
-            "metric": "auto",
-            "task": "classification",
-            "log_file_name": "test/sparse_classification.log",
-            "split_type": "uniform",
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        X_train = scipy.sparse.random(1554, 21, dtype=int)
-        y_train = np.random.randint(3, size=1554)
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        print(automl_experiment.classes_)
-        print(automl_experiment.predict_proba(X_train))
-        print(automl_experiment.model)
-        print(automl_experiment.config_history)
-        print(automl_experiment.best_model_for_estimator("extra_tree"))
-        print(automl_experiment.best_iteration)
-        print(automl_experiment.best_estimator)
-
-    def _test_memory_limit(self):
-        automl_experiment = AutoML()
-        automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
-        automl_settings = {
-            "time_budget": -1,
-            "task": "classification",
-            "log_file_name": "test/classification_oom.log",
-            "estimator_list": ["large_lgbm"],
-            "log_type": "all",
-            "hpo_method": "random",
-            "free_mem_ratio": 0.2,
-        }
-        X_train, y_train = load_iris(return_X_y=True, as_frame=True)
-
-        automl_experiment.fit(X_train=X_train, y_train=y_train, max_iter=1, **automl_settings)
-        print(automl_experiment.model)
-
-    def test_time_limit(self):
-        automl_experiment = AutoML()
-        automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
-        automl_experiment.add_learner(learner_name="large_xgb", learner_class=MyLargeXGB)
-        automl_settings = {
-            "time_budget": 0.5,
-            "task": "classification",
-            "log_file_name": "test/classification_timeout.log",
-            "estimator_list": ["catboost"],
-            "log_type": "all",
-            "hpo_method": "random",
-        }
-        X_train, y_train = load_iris(return_X_y=True, as_frame=True)
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        print(automl_experiment.model.params)
-        automl_settings["estimator_list"] = ["large_xgb"]
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        print(automl_experiment.model)
-        automl_settings["estimator_list"] = ["large_lgbm"]
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        print(automl_experiment.model)
-
-    def test_fit_w_starting_point(self, as_frame=True, n_concurrent_trials=1):
-        automl = AutoML()
-        settings = {
-            "max_iter": 3,
-            "metric": "accuracy",
-            "task": "classification",
-            "log_file_name": "test/iris.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
-        if as_frame:
-            # test drop column
-            X_train.columns = range(X_train.shape[1])
-            X_train[X_train.shape[1]] = np.zeros(len(y_train))
-        automl.fit(X_train=X_train, y_train=y_train, n_concurrent_trials=n_concurrent_trials, **settings)
-        automl_val_accuracy = 1.0 - automl.best_loss
-        print("Best ML leaner:", automl.best_estimator)
-        print("Best hyperparmeter config:", automl.best_config)
-        print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
-        print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
-
-        starting_points = automl.best_config_per_estimator
-        print("starting_points", starting_points)
-        print("loss of the starting_points", automl.best_loss_per_estimator)
-        settings_resume = {
-            "time_budget": 2,
-            "metric": "accuracy",
-            "task": "classification",
-            "log_file_name": "test/iris_resume.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-            "log_type": "all",
-            "starting_points": starting_points,
-        }
-        new_automl = AutoML()
-        new_automl.fit(X_train=X_train, y_train=y_train, **settings_resume)
-
-        new_automl_val_accuracy = 1.0 - new_automl.best_loss
-        print("Best ML leaner:", new_automl.best_estimator)
-        print("Best hyperparmeter config:", new_automl.best_config)
-        print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
-        print("Training duration of best run: {0:.4g} s".format(new_automl.best_config_train_time))
-
-    def test_fit_w_starting_point_2(self, as_frame=True):
-        try:
-            import ray
-
-            self.test_fit_w_starting_points_list(as_frame, 2)
-            self.test_fit_w_starting_point(as_frame, 2)
-        except ImportError:
-            pass
-
-    def test_fit_w_starting_points_list(self, as_frame=True, n_concurrent_trials=1):
-        automl = AutoML()
-        settings = {
-            "max_iter": 3,
-            "metric": "accuracy",
-            "task": "classification",
-            "log_file_name": "test/iris.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
-        if as_frame:
-            # test drop column
-            X_train.columns = range(X_train.shape[1])
-            X_train[X_train.shape[1]] = np.zeros(len(y_train))
-        automl.fit(X_train=X_train, y_train=y_train, n_concurrent_trials=n_concurrent_trials, **settings)
-        automl_val_accuracy = 1.0 - automl.best_loss
-        print("Best ML leaner:", automl.best_estimator)
-        print("Best hyperparmeter config:", automl.best_config)
-        print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
-        print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
-
-        starting_points = {}
-        log_file_name = settings["log_file_name"]
-        with training_log_reader(log_file_name) as reader:
-            sample_size = 1000
-            for record in reader.records():
-                config = record.config
-                config["FLAML_sample_size"] = sample_size
-                sample_size += 1000
-                learner = record.learner
-                if learner not in starting_points:
-                    starting_points[learner] = []
-                starting_points[learner].append(config)
-        max_iter = sum([len(s) for k, s in starting_points.items()])
-        settings_resume = {
-            "time_budget": 2,
-            "metric": "accuracy",
-            "task": "classification",
-            "log_file_name": "test/iris_resume_all.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "max_iter": max_iter,
-            "model_history": True,
-            "log_type": "all",
-            "starting_points": starting_points,
-            "append_log": True,
-        }
-        new_automl = AutoML()
-        new_automl.fit(X_train=X_train, y_train=y_train, **settings_resume)
-
-        new_automl_val_accuracy = 1.0 - new_automl.best_loss
-        # print('Best ML leaner:', new_automl.best_estimator)
-        # print('Best hyperparmeter config:', new_automl.best_config)
-        print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
-        # print('Training duration of best run: {0:.4g} s'.format(new_automl_experiment.best_config_train_time))
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/test/automl/test_notebook.py
+++ b/test/automl/test_notebook.py
@@ -1,45 +0,0 @@
-import nbformat
-from nbconvert.preprocessors import ExecutePreprocessor
-from nbconvert.preprocessors import CellExecutionError
-import os
-import sys
-import pytest
-
-
-here = os.path.abspath(os.path.dirname(__file__))
-
-
-def run_notebook(input_nb, output_nb="executed_notebook.ipynb", save=False):
-    try:
-        file_path = os.path.join(here, os.pardir, os.pardir, "notebook", input_nb)
-        with open(file_path) as f:
-            nb = nbformat.read(f, as_version=4)
-        ep = ExecutePreprocessor(timeout=3600, kernel_name="python3")
-        ep.preprocess(nb, {"metadata": {"path": here}})
-    except CellExecutionError:
-        raise
-    finally:
-        if save:
-            with open(os.path.join(here, output_nb), "w", encoding="utf-8") as f:
-                nbformat.write(nb, f)
-
-
-@pytest.mark.skipif(
-    sys.platform != "darwin" or "3.8" not in sys.version,
-    reason="Only run on macOS with Python 3.8",
-)
-def test_automl_classification(save=False):
-    run_notebook("automl_classification.ipynb", save=save)
-
-
-@pytest.mark.skipif(
-    sys.platform != "darwin" or "3.7" not in sys.version,
-    reason="Only run on macOS with Python 3.7",
-)
-def test_zeroshot_lightgbm(save=False):
-    run_notebook("zeroshot_lightgbm.ipynb", save=save)
-
-
-if __name__ == "__main__":
-    # test_automl_classification(save=True)
-    test_zeroshot_lightgbm(save=True)
--- a/test/automl/test_notebook_example.py
+++ b/test/automl/test_notebook_example.py
@@ -1,181 +0,0 @@
-import sys
-from openml.exceptions import OpenMLServerException
-from requests.exceptions import ChunkedEncodingError, SSLError
-from minio.error import ServerError
-
-
-def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
-    from flaml.automl.data import load_openml_dataset
-    import urllib3
-
-    performance_check_budget = 600
-    if (
-        sys.platform == "darwin"
-        and budget < performance_check_budget
-        and dataset_format == "dataframe"
-        and "3.9" in sys.version
-    ):
-        budget = performance_check_budget  # revise the buget on macos
-    if budget == performance_check_budget:
-        budget = None
-        max_iter = 60
-    else:
-        max_iter = None
-    try:
-        X_train, X_test, y_train, y_test = load_openml_dataset(
-            dataset_id=1169, data_dir="test/", dataset_format=dataset_format
-        )
-    except (
-        OpenMLServerException,
-        ChunkedEncodingError,
-        urllib3.exceptions.ReadTimeoutError,
-        SSLError,
-        ServerError,
-        Exception,
-    ) as e:
-        print(e)
-        return
-    """ import AutoML class from flaml package """
-    from flaml import AutoML
-
-    automl = AutoML()
-    settings = {
-        "time_budget": budget,  # total running time in seconds
-        "max_iter": max_iter,  # maximum number of iterations
-        "metric": "accuracy",  # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
-        "task": "classification",  # task type
-        "log_file_name": "airlines_experiment.log",  # flaml log file
-        "seed": 7654321,  # random seed
-        "hpo_method": hpo_method,
-        "log_type": "all",
-        "estimator_list": [
-            "lgbm",
-            "xgboost",
-            "xgb_limitdepth",
-            "rf",
-            "extra_tree",
-        ],  # list of ML learners
-        "eval_method": "holdout",
-    }
-    """The main flaml automl API"""
-    automl.fit(X_train=X_train, y_train=y_train, **settings)
-    """ retrieve best config and best learner """
-    print("Best ML leaner:", automl.best_estimator)
-    print("Best hyperparmeter config:", automl.best_config)
-    print("Best accuracy on validation data: {0:.4g}".format(1 - automl.best_loss))
-    print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
-    print(automl.model.estimator)
-    print(automl.best_config_per_estimator)
-    print("time taken to find best model:", automl.time_to_find_best_model)
-    """ pickle and save the automl object """
-    import pickle
-
-    with open("automl.pkl", "wb") as f:
-        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
-    """ compute predictions of testing dataset """
-    y_pred = automl.predict(X_test)
-    print("Predicted labels", y_pred)
-    print("True labels", y_test)
-    y_pred_proba = automl.predict_proba(X_test)[:, 1]
-    """ compute different metric values on testing dataset """
-    from flaml.automl.ml import sklearn_metric_loss_score
-
-    accuracy = 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test)
-    print("accuracy", "=", accuracy)
-    print("roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test))
-    print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test))
-    if budget is None:
-        assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67"
-    from flaml.automl.data import get_output_from_log
-
-    (
-        time_history,
-        best_valid_loss_history,
-        valid_loss_history,
-        config_history,
-        metric_history,
-    ) = get_output_from_log(filename=settings["log_file_name"], time_budget=6)
-    for config in config_history:
-        print(config)
-    print(automl.resource_attr)
-    print(automl.max_resource)
-    print(automl.min_resource)
-    print(automl.feature_names_in_)
-    print(automl.feature_importances_)
-    if budget is not None:
-        automl.fit(X_train=X_train, y_train=y_train, ensemble=True, **settings)
-
-
-def test_automl_array():
-    test_automl(5, "array", "bs")
-
-
-def _test_nobudget():
-    # needs large RAM to run this test
-    test_automl(-1)
-
-
-def test_mlflow():
-    # subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"])
-    import mlflow
-    from flaml.automl.data import load_openml_task
-
-    try:
-        X_train, X_test, y_train, y_test = load_openml_task(task_id=7592, data_dir="test/")
-    except (OpenMLServerException, ChunkedEncodingError, SSLError, ServerError, Exception) as e:
-        print(e)
-        return
-    """ import AutoML class from flaml package """
-    from flaml import AutoML
-
-    automl = AutoML()
-    settings = {
-        "time_budget": 5,  # total running time in seconds
-        "metric": "accuracy",  # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
-        "estimator_list": ["lgbm", "rf", "xgboost"],  # list of ML learners
-        "task": "classification",  # task type
-        "sample": False,  # whether to subsample training data
-        "log_file_name": "adult.log",  # flaml log file
-        "learner_selector": "roundrobin",
-    }
-    mlflow.set_experiment("flaml")
-    with mlflow.start_run() as run:
-        automl.fit(X_train=X_train, y_train=y_train, **settings)
-        mlflow.sklearn.log_model(automl, "automl")
-    loaded_model = mlflow.pyfunc.load_model(f"{run.info.artifact_uri}/automl")
-    print(loaded_model.predict(X_test))
-    automl._mem_thres = 0
-    print(automl.trainable(automl.points_to_evaluate[0]))
-
-    settings["use_ray"] = True
-    try:
-        with mlflow.start_run() as run:
-            automl.fit(X_train=X_train, y_train=y_train, **settings)
-            mlflow.sklearn.log_model(automl, "automl")
-        automl = mlflow.sklearn.load_model(f"{run.info.artifact_uri}/automl")
-        print(automl.predict_proba(X_test))
-    except ImportError:
-        pass
-
-
-def test_mlflow_iris():
-    from sklearn.datasets import load_iris
-    import mlflow
-    from flaml import AutoML
-
-    with mlflow.start_run():
-        automl = AutoML()
-        automl_settings = {
-            "time_budget": 2,  # in seconds
-            "metric": "accuracy",
-            "task": "classification",
-            "log_file_name": "iris.log",
-        }
-        X_train, y_train = load_iris(return_X_y=True)
-        automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-
-    # subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
-
-
-if __name__ == "__main__":
-    test_automl(600)
--- a/test/automl/test_python_log.py
+++ b/test/automl/test_python_log.py
@@ -1,118 +0,0 @@
-from flaml.tune.space import unflatten_hierarchical
-from flaml import AutoML
-from sklearn.datasets import fetch_california_housing
-import os
-import unittest
-import logging
-import tempfile
-import io
-
-
-class TestLogging(unittest.TestCase):
-    def test_logging_level(self):
-        from flaml import logger, logger_formatter
-
-        with tempfile.TemporaryDirectory() as d:
-            training_log = os.path.join(d, "training.log")
-
-            # Configure logging for the FLAML logger
-            # and add a handler that outputs to a buffer.
-            logger.setLevel(logging.INFO)
-            buf = io.StringIO()
-            ch = logging.StreamHandler(buf)
-            ch.setFormatter(logger_formatter)
-            logger.addHandler(ch)
-
-            # Run a simple job.
-            automl = AutoML()
-            automl_settings = {
-                "time_budget": 1,
-                "metric": "rmse",
-                "task": "regression",
-                "log_file_name": training_log,
-                "log_training_metric": True,
-                "n_jobs": 1,
-                "model_history": True,
-                "keep_search_state": True,
-                "learner_selector": "roundrobin",
-            }
-            X_train, y_train = fetch_california_housing(return_X_y=True)
-            n = len(y_train) >> 1
-            print(automl.model, automl.classes_, automl.predict(X_train))
-            automl.fit(
-                X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings
-            )
-            logger.info(automl.search_space)
-            logger.info(automl.low_cost_partial_config)
-            logger.info(automl.points_to_evaluate)
-            logger.info(automl.cat_hp_cost)
-            import optuna as ot
-
-            study = ot.create_study()
-            from flaml.tune.space import define_by_run_func, add_cost_to_space
-
-            sample = define_by_run_func(study.ask(), automl.search_space)
-            logger.info(sample)
-            logger.info(unflatten_hierarchical(sample, automl.search_space))
-            add_cost_to_space(automl.search_space, automl.low_cost_partial_config, automl.cat_hp_cost)
-            logger.info(automl.search_space["ml"].categories)
-            if automl.best_config:
-                config = automl.best_config.copy()
-                config["learner"] = automl.best_estimator
-                automl.trainable({"ml": config})
-            from flaml import tune, BlendSearch
-            from flaml.automl import size
-            from functools import partial
-
-            low_cost_partial_config = automl.low_cost_partial_config
-            search_alg = BlendSearch(
-                metric="val_loss",
-                mode="min",
-                space=automl.search_space,
-                low_cost_partial_config=low_cost_partial_config,
-                points_to_evaluate=automl.points_to_evaluate,
-                cat_hp_cost=automl.cat_hp_cost,
-                resource_attr=automl.resource_attr,
-                min_resource=automl.min_resource,
-                max_resource=automl.max_resource,
-                config_constraints=[
-                    (
-                        partial(size, automl._state.learner_classes),
-                        "<=",
-                        automl._mem_thres,
-                    )
-                ],
-                metric_constraints=automl.metric_constraints,
-            )
-            analysis = tune.run(
-                automl.trainable,
-                search_alg=search_alg,  # verbose=2,
-                time_budget_s=1,
-                num_samples=-1,
-            )
-            print(min(trial.last_result["val_loss"] for trial in analysis.trials))
-            config = analysis.trials[-1].last_result["config"]["ml"]
-            automl._state._train_with_config(config.pop("learner"), config)
-            for _ in range(3):
-                print(
-                    search_alg._ls.complete_config(
-                        low_cost_partial_config,
-                        search_alg._ls_bound_min,
-                        search_alg._ls_bound_max,
-                    )
-                )
-            # Check if the log buffer is populated.
-            self.assertTrue(len(buf.getvalue()) > 0)
-
-        import pickle
-
-        with open("automl.pkl", "wb") as f:
-            pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
-        print(automl.__version__)
-        pred1 = automl.predict(X_train)
-        with open("automl.pkl", "rb") as f:
-            automl = pickle.load(f)
-        pred2 = automl.predict(X_train)
-        delta = pred1 - pred2
-        assert max(delta) == 0 and min(delta) == 0
-        automl.save_best_config("test/housing.json")
--- a/test/automl/test_regression.py
+++ b/test/automl/test_regression.py
@@ -1,233 +0,0 @@
-import unittest
-import numpy as np
-import scipy.sparse
-from sklearn.datasets import (
-    fetch_california_housing,
-)
-
-from flaml import AutoML
-from flaml.automl.data import get_output_from_log
-from flaml.automl.model import XGBoostEstimator
-
-
-def logregobj(preds, dtrain):
-    labels = dtrain.get_label()
-    preds = 1.0 / (1.0 + np.exp(-preds))  # transform raw leaf weight
-    grad = preds - labels
-    hess = preds * (1.0 - preds)
-    return grad, hess
-
-
-class MyXGB1(XGBoostEstimator):
-    """XGBoostEstimator with logregobj as the objective function"""
-
-    def __init__(self, **config):
-        super().__init__(objective=logregobj, **config)
-
-
-class MyXGB2(XGBoostEstimator):
-    """XGBoostEstimator with 'reg:squarederror' as the objective function"""
-
-    def __init__(self, **config):
-        super().__init__(objective="reg:squarederror", **config)
-
-
-class TestRegression(unittest.TestCase):
-    def test_regression(self):
-        automl = AutoML()
-        automl_settings = {
-            "time_budget": 2,
-            "task": "regression",
-            "log_file_name": "test/california.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        X_train, y_train = fetch_california_housing(return_X_y=True)
-        n = int(len(y_train) * 9 // 10)
-        automl.fit(X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings)
-        assert automl._state.eval_method == "holdout"
-        y_pred = automl.predict(X_train)
-        print(y_pred)
-        print(automl.model.estimator)
-        n_iter = automl.model.estimator.get_params("n_estimators")
-        print(automl.config_history)
-        print(automl.best_model_for_estimator("xgboost"))
-        print(automl.best_iteration)
-        print(automl.best_estimator)
-        print(get_output_from_log(automl_settings["log_file_name"], 1))
-        automl.retrain_from_log(
-            task="regression",
-            log_file_name=automl_settings["log_file_name"],
-            X_train=X_train,
-            y_train=y_train,
-            train_full=True,
-            time_budget=1,
-        )
-        automl.retrain_from_log(
-            task="regression",
-            log_file_name=automl_settings["log_file_name"],
-            X_train=X_train,
-            y_train=y_train,
-            time_budget=0,
-        )
-        automl = AutoML()
-        automl.retrain_from_log(
-            task="regression",
-            log_file_name=automl_settings["log_file_name"],
-            X_train=X_train[:n],
-            y_train=y_train[:n],
-            train_full=True,
-        )
-        print(automl.model.estimator)
-        y_pred2 = automl.predict(X_train)
-        # In some rare case, the last config is early stopped and it's the best config. But the logged config's n_estimator is not reduced.
-        assert n_iter != automl.model.estimator.get_params("n_estimator") or (y_pred == y_pred2).all()
-
-    def test_sparse_matrix_regression(self):
-        X_train = scipy.sparse.random(300, 900, density=0.0001)
-        y_train = np.random.uniform(size=300)
-        X_val = scipy.sparse.random(100, 900, density=0.0001)
-        y_val = np.random.uniform(size=100)
-        automl = AutoML()
-        settings = {
-            "time_budget": 2,
-            "metric": "mae",
-            "task": "regression",
-            "log_file_name": "test/sparse_regression.log",
-            "n_jobs": 1,
-            "model_history": True,
-            "keep_search_state": True,
-            "verbose": 0,
-            "early_stop": True,
-        }
-        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings)
-        assert automl._state.X_val.shape == X_val.shape
-        print(automl.predict(X_train))
-        print(automl.model)
-        print(automl.config_history)
-        print(automl.best_model_for_estimator("rf"))
-        print(automl.best_iteration)
-        print(automl.best_estimator)
-        print(automl.best_config)
-        print(automl.best_loss)
-        print(automl.best_config_train_time)
-
-        settings.update(
-            {
-                "estimator_list": ["catboost"],
-                "keep_search_state": False,
-                "model_history": False,
-                "use_best_model": False,
-                "time_budget": None,
-                "max_iter": 2,
-                "custom_hp": {"catboost": {"n_estimators": {"domain": 100}}},
-            }
-        )
-        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings)
-
-    def test_parallel(self, hpo_method=None):
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 10,
-            "task": "regression",
-            "log_file_name": "test/california.log",
-            "log_type": "all",
-            "n_jobs": 1,
-            "n_concurrent_trials": 10,
-            "hpo_method": hpo_method,
-        }
-        X_train, y_train = fetch_california_housing(return_X_y=True)
-        try:
-            automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-            print(automl_experiment.predict(X_train))
-            print(automl_experiment.model)
-            print(automl_experiment.config_history)
-            print(automl_experiment.best_model_for_estimator("xgboost"))
-            print(automl_experiment.best_iteration)
-            print(automl_experiment.best_estimator)
-        except ImportError:
-            return
-
-    def test_sparse_matrix_regression_holdout(self):
-        X_train = scipy.sparse.random(8, 100)
-        y_train = np.random.uniform(size=8)
-        automl_experiment = AutoML()
-        automl_settings = {
-            "time_budget": 1,
-            "eval_method": "holdout",
-            "task": "regression",
-            "log_file_name": "test/sparse_regression.log",
-            "n_jobs": 1,
-            "model_history": True,
-            "metric": "mse",
-            "sample_weight": np.ones(len(y_train)),
-            "early_stop": True,
-        }
-        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        print(automl_experiment.predict(X_train))
-        print(automl_experiment.model)
-        print(automl_experiment.config_history)
-        print(automl_experiment.best_model_for_estimator("rf"))
-        print(automl_experiment.best_iteration)
-        print(automl_experiment.best_estimator)
-
-    def test_regression_xgboost(self):
-        X_train = scipy.sparse.random(300, 900, density=0.0001)
-        y_train = np.random.uniform(size=300)
-        X_val = scipy.sparse.random(100, 900, density=0.0001)
-        y_val = np.random.uniform(size=100)
-        automl_experiment = AutoML()
-        automl_experiment.add_learner(learner_name="my_xgb1", learner_class=MyXGB1)
-        automl_experiment.add_learner(learner_name="my_xgb2", learner_class=MyXGB2)
-        automl_settings = {
-            "time_budget": 2,
-            "estimator_list": ["my_xgb1", "my_xgb2"],
-            "task": "regression",
-            "log_file_name": "test/regression_xgboost.log",
-            "n_jobs": 1,
-            "model_history": True,
-            "keep_search_state": True,
-            "early_stop": True,
-        }
-        automl_experiment.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
-        assert automl_experiment._state.X_val.shape == X_val.shape
-        print(automl_experiment.predict(X_train))
-        print(automl_experiment.model)
-        print(automl_experiment.config_history)
-        print(automl_experiment.best_model_for_estimator("my_xgb2"))
-        print(automl_experiment.best_iteration)
-        print(automl_experiment.best_estimator)
-        print(automl_experiment.best_config)
-        print(automl_experiment.best_loss)
-        print(automl_experiment.best_config_train_time)
-
-
-def test_multioutput():
-    from sklearn.datasets import make_regression
-    from sklearn.model_selection import train_test_split
-    from sklearn.multioutput import MultiOutputRegressor, RegressorChain
-
-    # create regression data
-    X, y = make_regression(n_targets=3)
-
-    # split into train and test data
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
-
-    # train the model
-    model = MultiOutputRegressor(AutoML(task="regression", time_budget=1))
-    model.fit(X_train, y_train)
-
-    # predict
-    print(model.predict(X_test))
-
-    # train the model
-    model = RegressorChain(AutoML(task="regression", time_budget=1))
-    model.fit(X_train, y_train)
-
-    # predict
-    print(model.predict(X_test))
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/test/automl/test_score.py
+++ b/test/automl/test_score.py
@@ -1,271 +0,0 @@
-from flaml import AutoML
-import pandas as pd
-from sklearn.datasets import fetch_california_housing, fetch_openml
-
-
-class TestScore:
-    def test_forecast(self, budget=5):
-        import pickle
-
-        # using dataframe
-        import statsmodels.api as sm
-
-        data = sm.datasets.co2.load_pandas().data["co2"].resample("MS").mean()
-        data = data.fillna(data.bfill()).to_frame().reset_index().rename(columns={"index": "ds", "co2": "y"})
-        num_samples = data.shape[0]
-        time_horizon = 12
-        split_idx = num_samples - time_horizon
-        X_test = data[split_idx:]["ds"]
-        y_test = data[split_idx:]["y"]
-
-        df = data[:split_idx]
-        automl = AutoML()
-        settings = {
-            "time_budget": budget,  # total running time in seconds
-            "metric": "mape",  # primary metric
-            "task": "ts_forecast",  # task type
-            "log_file_name": "test/CO2_forecast.log",  # flaml log file
-            "eval_method": "holdout",
-            "label": "y",
-        }
-        """The main flaml automl API"""
-        try:
-            import prophet
-
-            automl.fit(
-                dataframe=df,
-                estimator_list=["prophet", "arima", "sarimax"],
-                **settings,
-                period=time_horizon,
-            )
-            automl.score(X_test, y_test)
-            automl.pickle("automl.pkl")
-            with open("automl.pkl", "rb") as f:
-                pickle.load(f)  # v1.1 of prophet raises RecursionError
-        except (ImportError, RecursionError):
-            print("not using prophet due to ImportError or RecursionError (when unpickling in v1.1)")
-            automl.fit(
-                dataframe=df,
-                **settings,
-                estimator_list=["arima", "sarimax"],
-                period=time_horizon,
-            )
-            automl.score(X_test, y_test)
-            automl.pickle("automl.pkl")
-            with open("automl.pkl", "rb") as f:
-                pickle.load(f)
-
-    def test_classification(self):
-        X = pd.DataFrame(
-            {
-                "f1": [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
-                "f2": [
-                    3.0,
-                    16.0,
-                    10.0,
-                    12.0,
-                    3.0,
-                    14.0,
-                    11.0,
-                    12.0,
-                    5.0,
-                    14.0,
-                    20.0,
-                    16.0,
-                    15.0,
-                    11.0,
-                ],
-                "f3": [
-                    "a",
-                    "b",
-                    "a",
-                    "c",
-                    "c",
-                    "b",
-                    "b",
-                    "b",
-                    "b",
-                    "a",
-                    "b",
-                    1.0,
-                    1.0,
-                    "a",
-                ],
-                "f4": [
-                    True,
-                    True,
-                    False,
-                    True,
-                    True,
-                    False,
-                    False,
-                    False,
-                    True,
-                    True,
-                    False,
-                    False,
-                    True,
-                    True,
-                ],
-            }
-        )
-        y = pd.Series([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
-
-        automl = AutoML()
-        for each_estimator in [
-            "catboost",
-            "lrl2",
-            "lrl1",
-            "rf",
-            "lgbm",
-            "extra_tree",
-            "kneighbor",
-            "xgboost",
-        ]:
-            automl_settings = {
-                "time_budget": 6,
-                "task": "classification",
-                "n_jobs": 1,
-                "estimator_list": [each_estimator],
-                "metric": "accuracy",
-                "log_training_metric": True,
-            }
-        automl.score(X, y)  # for covering the case no estimator is trained
-
-        automl.fit(X, y, **automl_settings)
-        automl.score(X, y)
-        automl.score(X, y, **{"metric": "accuracy"})
-
-        automl.pickle("automl.pkl")
-
-    def test_regression(self):
-        automl_experiment = AutoML()
-
-        X_train, y_train = fetch_california_housing(return_X_y=True)
-        n = int(len(y_train) * 9 // 10)
-
-        for each_estimator in [
-            "lgbm",
-            "xgboost",
-            "rf",
-            "extra_tree",
-            "catboost",
-            "kneighbor",
-        ]:
-            automl_settings = {
-                "time_budget": 2,
-                "task": "regression",
-                "log_file_name": "test/california.log",
-                "log_training_metric": True,
-                "estimator_list": [each_estimator],
-                "n_jobs": 1,
-                "model_history": True,
-            }
-            automl_experiment.fit(
-                X_train=X_train[:n],
-                y_train=y_train[:n],
-                X_val=X_train[n:],
-                y_val=y_train[n:],
-                **automl_settings,
-            )
-
-            automl_experiment.score(X_train[n:], y_train[n:], **{"metric": "mse"})
-            automl_experiment.pickle("automl.pkl")
-
-    def test_rank(self):
-        from sklearn.externals._arff import ArffException
-
-        dataset = "credit-g"
-
-        try:
-            X, y = fetch_openml(name=dataset, return_X_y=True)
-            y = y.cat.codes
-        except (ArffException, ValueError):
-            from sklearn.datasets import load_wine
-
-            X, y = load_wine(return_X_y=True)
-
-        import numpy as np
-
-        automl = AutoML()
-        n = 500
-
-        for each_estimator in ["lgbm", "xgboost"]:
-            automl_settings = {
-                "time_budget": 2,
-                "task": "rank",
-                "log_file_name": "test/{}.log".format(dataset),
-                "model_history": True,
-                "groups": np.array([0] * 200 + [1] * 200 + [2] * 100),  # group labels
-                "learner_selector": "roundrobin",
-                "estimator_list": [each_estimator],
-            }
-            automl.fit(X[:n], y[:n], **automl_settings)
-            try:
-                automl.score(X[n:], y[n:])
-                automl.pickle("automl.pkl")
-            except NotImplementedError:
-                pass
-
-    def test_class(self):
-        # to test classification task with labels need encoding
-        X = pd.DataFrame(
-            {
-                "f1": [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
-                "f2": [
-                    3.0,
-                    16.0,
-                    10.0,
-                    12.0,
-                    3.0,
-                    14.0,
-                    11.0,
-                    12.0,
-                    5.0,
-                    14.0,
-                    20.0,
-                    16.0,
-                    15.0,
-                    11.0,
-                ],
-            }
-        )
-        y = pd.Series(
-            [
-                "a",
-                "b",
-                "c",
-                "d",
-                "a",
-                "b",
-                "c",
-                "d",
-                "a",
-                "b",
-                "c",
-                "d",
-                "a",
-                "b",
-            ]
-        )
-
-        automl = AutoML()
-
-        automl_settings = {
-            "time_budget": 6,
-            "task": "classification",
-            "n_jobs": 1,
-            "estimator_list": ["xgboost"],
-            "metric": "accuracy",
-            "log_training_metric": True,
-        }
-
-        automl.fit(X, y, **automl_settings)
-        assert automl._label_transformer is not None
-        assert automl.score(X, y) > 0
-        automl.pickle("automl.pkl")
-
-
-if __name__ == "__main__":
-    test = TestScore()
-    test.test_forecast()
--- a/test/automl/test_split.py
+++ b/test/automl/test_split.py
@@ -1,205 +0,0 @@
-from sklearn.datasets import fetch_openml
-from flaml.automl import AutoML
-from sklearn.model_selection import GroupKFold, train_test_split, KFold
-from sklearn.metrics import accuracy_score
-
-
-dataset = "credit-g"
-
-
-def _test(split_type):
-    from sklearn.externals._arff import ArffException
-
-    automl = AutoML()
-
-    automl_settings = {
-        "time_budget": 2,
-        # "metric": 'accuracy',
-        "task": "classification",
-        "log_file_name": "test/{}.log".format(dataset),
-        "model_history": True,
-        "log_training_metric": True,
-        "split_type": split_type,
-    }
-
-    try:
-        X, y = fetch_openml(name=dataset, return_X_y=True)
-    except (ArffException, ValueError):
-        from sklearn.datasets import load_wine
-
-        X, y = load_wine(return_X_y=True)
-    if split_type != "time":
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
-    else:
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)
-    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-
-    pred = automl.predict(X_test)
-    acc = accuracy_score(y_test, pred)
-
-    print(acc)
-
-
-def _test_uniform():
-    _test(split_type="uniform")
-
-
-def test_time():
-    _test(split_type="time")
-
-
-def test_groups():
-    from sklearn.externals._arff import ArffException
-
-    try:
-        X, y = fetch_openml(name=dataset, return_X_y=True)
-    except (ArffException, ValueError):
-        from sklearn.datasets import load_wine
-
-        X, y = load_wine(return_X_y=True)
-
-    import numpy as np
-
-    automl = AutoML()
-    automl_settings = {
-        "time_budget": 2,
-        "task": "classification",
-        "log_file_name": "test/{}.log".format(dataset),
-        "model_history": True,
-        "eval_method": "cv",
-        "groups": np.random.randint(low=0, high=10, size=len(y)),
-        "estimator_list": ["lgbm", "rf", "xgboost", "kneighbor"],
-        "learner_selector": "roundrobin",
-    }
-    automl.fit(X, y, **automl_settings)
-
-    automl_settings["eval_method"] = "holdout"
-    automl.fit(X, y, **automl_settings)
-
-    automl_settings["split_type"] = GroupKFold(n_splits=3)
-    try:
-        automl.fit(X, y, **automl_settings)
-        raise RuntimeError("GroupKFold object as split_type should fail when eval_method is holdout")
-    except AssertionError:
-        # eval_method must be 'auto' or 'cv' for custom data splitter.
-        pass
-
-    automl_settings["eval_method"] = "cv"
-    automl.fit(X, y, **automl_settings)
-
-
-def test_stratified_groupkfold():
-    from sklearn.model_selection import StratifiedGroupKFold
-    from minio.error import ServerError
-    from flaml.data import load_openml_dataset
-
-    try:
-        X_train, _, y_train, _ = load_openml_dataset(dataset_id=1169, data_dir="test/")
-    except (ServerError, Exception):
-        return
-    splitter = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=0)
-
-    automl = AutoML()
-    settings = {
-        "time_budget": 6,
-        "metric": "ap",
-        "eval_method": "cv",
-        "split_type": splitter,
-        "groups": X_train["Airline"],
-        "estimator_list": [
-            "lgbm",
-            "rf",
-            "xgboost",
-            "extra_tree",
-            "xgb_limitdepth",
-            "lrl1",
-        ],
-    }
-
-    automl.fit(X_train=X_train, y_train=y_train, **settings)
-
-
-def test_rank():
-    from sklearn.externals._arff import ArffException
-
-    try:
-        X, y = fetch_openml(name=dataset, return_X_y=True)
-        y = y.cat.codes
-    except (ArffException, ValueError):
-        from sklearn.datasets import load_wine
-
-        X, y = load_wine(return_X_y=True)
-    import numpy as np
-
-    automl = AutoML()
-    automl_settings = {
-        "time_budget": 2,
-        "task": "rank",
-        "log_file_name": "test/{}.log".format(dataset),
-        "model_history": True,
-        "eval_method": "cv",
-        "groups": np.array([0] * 200 + [1] * 200 + [2] * 200 + [3] * 200 + [4] * 100 + [5] * 100),  # group labels
-        "learner_selector": "roundrobin",
-    }
-    automl.fit(X, y, **automl_settings)
-
-    automl = AutoML()
-    automl_settings = {
-        "time_budget": 2,
-        "task": "rank",
-        "metric": "ndcg@5",  # 5 can be replaced by any number
-        "log_file_name": "test/{}.log".format(dataset),
-        "model_history": True,
-        "groups": [200] * 4 + [100] * 2,  # alternative way: group counts
-        # "estimator_list": ['lgbm', 'xgboost'],  # list of ML learners
-        "learner_selector": "roundrobin",
-    }
-    automl.fit(X, y, **automl_settings)
-
-
-def test_object():
-    from sklearn.externals._arff import ArffException
-
-    try:
-        X, y = fetch_openml(name=dataset, return_X_y=True)
-    except (ArffException, ValueError):
-        from sklearn.datasets import load_wine
-
-        X, y = load_wine(return_X_y=True)
-
-    import numpy as np
-
-    class TestKFold(KFold):
-        def __init__(self, n_splits):
-            self.n_splits = int(n_splits)
-
-        def split(self, X):
-            rng = np.random.default_rng()
-            train_num = int(len(X) * 0.8)
-            for _ in range(self.n_splits):
-                permu_idx = rng.permutation(len(X))
-                yield permu_idx[:train_num], permu_idx[train_num:]
-
-        def get_n_splits(self, X=None, y=None, groups=None):
-            return self.n_splits
-
-    automl = AutoML()
-    automl_settings = {
-        "time_budget": 2,
-        "task": "classification",
-        "log_file_name": "test/{}.log".format(dataset),
-        "model_history": True,
-        "log_training_metric": True,
-        "split_type": TestKFold(5),
-    }
-    automl.fit(X, y, **automl_settings)
-    assert automl._state.eval_method == "cv", "eval_method must be 'cv' for custom data splitter"
-
-    kf = TestKFold(5)
-    kf.shuffle = True
-    automl_settings["split_type"] = kf
-    automl.fit(X, y, **automl_settings)
-
-
-if __name__ == "__main__":
-    test_groups()
--- a/test/automl/test_training_log.py
+++ b/test/automl/test_training_log.py
@@ -1,115 +0,0 @@
-import os
-import unittest
-from tempfile import TemporaryDirectory
-
-from sklearn.datasets import fetch_california_housing
-
-from flaml import AutoML
-from flaml.automl.training_log import training_log_reader
-
-
-class TestTrainingLog(unittest.TestCase):
-    def test_training_log(self, path="test_training_log.log", estimator_list="auto", use_ray=False):
-        with TemporaryDirectory() as d:
-            filename = os.path.join(d, path)
-
-            # Run a simple job.
-            automl = AutoML()
-            automl_settings = {
-                "time_budget": 1,
-                "metric": "mse",
-                "task": "regression",
-                "log_file_name": filename,
-                "log_training_metric": True,
-                "mem_thres": 1024 * 1024,
-                "n_jobs": 1,
-                "model_history": True,
-                "train_time_limit": 0.1,
-                "verbose": 3,
-                # "ensemble": True,
-                "keep_search_state": True,
-                "estimator_list": estimator_list,
-            }
-            X_train, y_train = fetch_california_housing(return_X_y=True)
-            automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-            # Check if the training log file is populated.
-            self.assertTrue(os.path.exists(filename))
-            if automl.best_estimator:
-                estimator, config = automl.best_estimator, automl.best_config
-                model0 = automl.best_model_for_estimator(estimator)
-                print(model0.params)
-                if "n_estimators" in config:
-                    assert model0.params["n_estimators"] == config["n_estimators"]
-
-                # train on full data with no time limit
-                automl._state.time_budget = -1
-                model, _ = automl._state._train_with_config(estimator, config)
-
-                # assuming estimator & config are saved and loaded as follows
-                automl = AutoML()
-                automl.fit(
-                    X_train=X_train,
-                    y_train=y_train,
-                    max_iter=1,
-                    task="regression",
-                    estimator_list=[estimator],
-                    n_jobs=1,
-                    starting_points={estimator: config},
-                    use_ray=use_ray,
-                )
-                print(automl.best_config)
-                # then the fitted model should be equivalent to model
-                assert (
-                    str(model.estimator) == str(automl.model.estimator)
-                    or estimator == "xgboost"
-                    and str(model.estimator.get_dump()) == str(automl.model.estimator.get_dump())
-                    or estimator == "catboost"
-                    and str(model.estimator.get_all_params()) == str(automl.model.estimator.get_all_params())
-                )
-                automl.fit(
-                    X_train=X_train,
-                    y_train=y_train,
-                    max_iter=1,
-                    task="regression",
-                    estimator_list=[estimator],
-                    n_jobs=1,
-                    starting_points={estimator: {}},
-                )
-                print(automl.best_config)
-
-                with training_log_reader(filename) as reader:
-                    count = 0
-                    for record in reader.records():
-                        print(record)
-                        count += 1
-                    self.assertGreater(count, 0)
-
-            automl_settings["log_file_name"] = ""
-            automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-            if automl._selected:
-                automl._selected.update(None, 0)
-            automl = AutoML()
-            automl.fit(X_train=X_train, y_train=y_train, max_iter=0, task="regression")
-
-    def test_illfilename(self):
-        try:
-            self.test_training_log("/")
-        except IsADirectoryError:
-            print("IsADirectoryError happens as expected in linux.")
-        except PermissionError:
-            print("PermissionError happens as expected in windows.")
-
-    def test_each_estimator(self):
-        try:
-            import ray
-
-            ray.shutdown()
-            ray.init()
-            use_ray = True
-        except ImportError:
-            use_ray = False
-        self.test_training_log(estimator_list=["xgboost"], use_ray=use_ray)
-        self.test_training_log(estimator_list=["catboost"], use_ray=use_ray)
-        self.test_training_log(estimator_list=["extra_tree"], use_ray=use_ray)
-        self.test_training_log(estimator_list=["rf"], use_ray=use_ray)
-        self.test_training_log(estimator_list=["lgbm"], use_ray=use_ray)
--- a/test/automl/test_warmstart.py
+++ b/test/automl/test_warmstart.py
@@ -1,212 +0,0 @@
-import unittest
-import numpy as np
-from sklearn.datasets import load_iris
-from flaml import AutoML
-from flaml.automl.model import LGBMEstimator
-from flaml import tune
-
-
-class TestWarmStart(unittest.TestCase):
-    def test_fit_w_freezinghp_starting_point(self, as_frame=True):
-        automl = AutoML()
-        automl_settings = {
-            "time_budget": 1,
-            "metric": "accuracy",
-            "task": "classification",
-            "estimator_list": ["lgbm"],
-            "log_file_name": "test/iris.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-        }
-        X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
-        if as_frame:
-            # test drop column
-            X_train.columns = range(X_train.shape[1])
-            X_train[X_train.shape[1]] = np.zeros(len(y_train))
-        automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-        automl_val_accuracy = 1.0 - automl.best_loss
-        print("Best ML leaner:", automl.best_estimator)
-        print("Best hyperparmeter config:", automl.best_config)
-        print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
-        print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
-        # 1. Get starting points from previous experiments.
-        starting_points = automl.best_config_per_estimator
-        print("starting_points", starting_points)
-        print("loss of the starting_points", automl.best_loss_per_estimator)
-        starting_point = starting_points["lgbm"]
-        hps_to_freeze = ["colsample_bytree", "reg_alpha", "reg_lambda", "log_max_bin"]
-
-        # 2. Constrct a new class:
-        # a. write the hps you want to freeze as hps with constant 'domain';
-        # b. specify the new search space of the other hps accrodingly.
-
-        class MyPartiallyFreezedLargeLGBM(LGBMEstimator):
-            @classmethod
-            def search_space(cls, **params):
-                # (1) Get the hps in the original search space
-                space = LGBMEstimator.search_space(**params)
-                # (2) Set up the fixed value from hps from the starting point
-                for hp_name in hps_to_freeze:
-                    # if an hp is specifed to be freezed, use tine value provided in the starting_point
-                    # otherwise use the setting from the original search space
-                    if hp_name in starting_point:
-                        space[hp_name] = {"domain": starting_point[hp_name]}
-                # (3.1) Configure the search space for hps that are in the original search space
-                #  but you want to change something, for example the range.
-                revised_hps_to_search = {
-                    "n_estimators": {
-                        "domain": tune.lograndint(lower=10, upper=32768),
-                        "init_value": starting_point.get("n_estimators") or space["n_estimators"].get("init_value", 10),
-                        "low_cost_init_value": space["n_estimators"].get("low_cost_init_value", 10),
-                    },
-                    "num_leaves": {
-                        "domain": tune.lograndint(lower=10, upper=3276),
-                        "init_value": starting_point.get("num_leaves") or space["num_leaves"].get("init_value", 10),
-                        "low_cost_init_value": space["num_leaves"].get("low_cost_init_value", 10),
-                    },
-                    # (3.2) Add a new hp which is not in the original search space
-                    "subsample": {
-                        "domain": tune.uniform(lower=0.1, upper=1.0),
-                        "init_value": 0.1,
-                    },
-                }
-                space.update(revised_hps_to_search)
-                return space
-
-        new_estimator_name = "large_lgbm"
-        new_automl = AutoML()
-        new_automl.add_learner(learner_name=new_estimator_name, learner_class=MyPartiallyFreezedLargeLGBM)
-
-        automl_settings_resume = {
-            "time_budget": 3,
-            "metric": "accuracy",
-            "task": "classification",
-            "estimator_list": [new_estimator_name],
-            "log_file_name": "test/iris_resume.log",
-            "log_training_metric": True,
-            "n_jobs": 1,
-            "model_history": True,
-            "log_type": "all",
-            "starting_points": {new_estimator_name: starting_point},
-        }
-
-        new_automl.fit(X_train=X_train, y_train=y_train, **automl_settings_resume)
-
-        new_automl_val_accuracy = 1.0 - new_automl.best_loss
-        print("Best ML leaner:", new_automl.best_estimator)
-        print("Best hyperparmeter config:", new_automl.best_config)
-        print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
-        print("Training duration of best run: {0:.4g} s".format(new_automl.best_config_train_time))
-
-    def test_nobudget(self):
-        automl = AutoML()
-        X_train, y_train = load_iris(return_X_y=True)
-        automl.fit(X_train, y_train)
-        print(automl.best_config_per_estimator)
-
-    def test_FLAML_sample_size_in_starting_points(self):
-        from openml.exceptions import OpenMLServerException
-        from requests.exceptions import ChunkedEncodingError, SSLError
-        from minio.error import ServerError
-        from flaml.automl.data import load_openml_dataset
-        from flaml import AutoML
-
-        try:
-            X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./")
-        except (OpenMLServerException, ChunkedEncodingError, SSLError, ServerError, Exception):
-            from sklearn.datasets import load_wine
-
-            X_train, y_train = load_wine(return_X_y=True)
-
-        automl_settings = {
-            "time_budget": 3,
-            "task": "classification",
-        }
-
-        automl1 = AutoML()
-        print(len(y_train))
-        automl1.fit(X_train, y_train, **automl_settings)
-        print("automl1.best_config_per_estimator", automl1.best_config_per_estimator)
-
-        automl_settings["starting_points"] = automl1.best_config_per_estimator
-        automl2 = AutoML()
-        automl2.fit(X_train, y_train, **automl_settings)
-
-        automl_settings["starting_points"] = {
-            "xgboost": {
-                "n_estimators": 4,
-                "max_leaves": 4,
-                "min_child_weight": 0.26208115308159446,
-                "learning_rate": 0.25912534572860507,
-                "subsample": 0.9266743941610592,
-                "colsample_bylevel": 1.0,
-                "colsample_bytree": 1.0,
-                "reg_alpha": 0.0013933617380144255,
-                "reg_lambda": 0.18096917948292954,
-                "FLAML_sample_size": 20000,
-            },
-            "xgb_limitdepth": None,
-            "lrl1": None,
-        }
-        from flaml import tune
-
-        automl_settings["custom_hp"] = {
-            "xgboost": {
-                "n_estimators": {
-                    "domain": tune.choice([10, 20]),
-                },
-            }
-        }
-        automl2 = AutoML()
-        automl2.fit(X_train, y_train, **automl_settings)
-
-        try:
-            import ray
-
-            automl_settings["n_concurrent_trials"] = 2
-        except ImportError:
-            automl_settings["n_concurrent_trials"] = 1
-        # setting different FLAML_sample_size
-        automl_settings["starting_points"] = {
-            "catboost": {
-                "early_stopping_rounds": 10,
-                "learning_rate": 0.09999999999999996,
-                "n_estimators": 1,
-                "FLAML_sample_size": 10000,
-            },
-            "xgboost": {
-                "n_estimators": 4,
-                "max_leaves": 4,
-                "min_child_weight": 0.26208115308159446,
-                "learning_rate": 0.25912534572860507,
-                "subsample": 0.9266743941610592,
-                "colsample_bylevel": 1.0,
-                "colsample_bytree": 1.0,
-                "reg_alpha": 0.0013933617380144255,
-                "reg_lambda": 0.18096917948292954,
-                "FLAML_sample_size": 20000,
-            },
-            "xgb_limitdepth": None,
-            "lrl1": None,
-        }
-        automl3 = AutoML()
-        automl3.fit(X_train, y_train, **automl_settings)
-
-        automl_settings["sample"] = False
-        automl4 = AutoML()
-        try:
-            automl4.fit(
-                X_train,
-                y_train,
-                **automl_settings,
-            )
-            raise RuntimeError(
-                "When sample=False and starting_points contain FLAML_sample_size, AssertionError is expected but not raised."
-            )
-        except AssertionError:
-            pass
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/test/automl/test_xgboost2d.py
+++ b/test/automl/test_xgboost2d.py
@@ -1,98 +0,0 @@
-import unittest
-
-from sklearn.datasets import fetch_openml
-from sklearn.model_selection import train_test_split
-from flaml.automl import AutoML
-from flaml.automl.model import XGBoostSklearnEstimator
-from flaml import tune
-
-
-dataset = "credit-g"
-
-
-class XGBoost2D(XGBoostSklearnEstimator):
-    @classmethod
-    def search_space(cls, data_size, task):
-        upper = min(32768, int(data_size[0]))
-        return {
-            "n_estimators": {
-                "domain": tune.lograndint(lower=4, upper=upper),
-                "low_cost_init_value": 4,
-            },
-            "max_leaves": {
-                "domain": tune.lograndint(lower=4, upper=upper),
-                "low_cost_init_value": 4,
-            },
-        }
-
-
-def test_simple(method=None):
-    automl = AutoML()
-    automl.add_learner(learner_name="XGBoost2D", learner_class=XGBoost2D)
-
-    automl_settings = {
-        "estimator_list": ["XGBoost2D"],
-        "task": "classification",
-        "log_file_name": f"test/xgboost2d_{dataset}_{method}.log",
-        "n_jobs": 1,
-        "hpo_method": method,
-        "log_type": "all",
-        "retrain_full": "budget",
-        "keep_search_state": True,
-        "time_budget": 1,
-    }
-    from sklearn.externals._arff import ArffException
-
-    try:
-        X, y = fetch_openml(name=dataset, return_X_y=True)
-    except (ArffException, ValueError):
-        from sklearn.datasets import load_wine
-
-        X, y = load_wine(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
-    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-    print(automl.estimator_list)
-    print(automl.search_space)
-    print(automl.points_to_evaluate)
-    if not automl.best_config:
-        return
-    config = automl.best_config.copy()
-    config["learner"] = automl.best_estimator
-    automl.trainable(config)
-    from flaml import tune
-    from flaml.automl import size
-    from functools import partial
-
-    analysis = tune.run(
-        automl.trainable,
-        automl.search_space,
-        metric="val_loss",
-        mode="min",
-        low_cost_partial_config=automl.low_cost_partial_config,
-        points_to_evaluate=automl.points_to_evaluate,
-        cat_hp_cost=automl.cat_hp_cost,
-        resource_attr=automl.resource_attr,
-        min_resource=automl.min_resource,
-        max_resource=automl.max_resource,
-        time_budget_s=automl._state.time_budget,
-        config_constraints=[(partial(size, automl._state.learner_classes), "<=", automl._mem_thres)],
-        metric_constraints=automl.metric_constraints,
-        num_samples=5,
-    )
-    print(analysis.trials[-1])
-
-
-def test_optuna():
-    test_simple(method="optuna")
-
-
-def test_random():
-    test_simple(method="random")
-
-
-def test_grid():
-    test_simple(method="grid")
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/test/automl/test_xgboost2d_sample_size.py
+++ b/test/automl/test_xgboost2d_sample_size.py
@@ -1,71 +0,0 @@
-import unittest
-
-from sklearn.datasets import fetch_openml
-from sklearn.model_selection import train_test_split
-from flaml.automl import AutoML
-from flaml.automl.model import XGBoostSklearnEstimator
-from flaml import tune
-
-
-dataset = "credit-g"
-
-
-class XGBoost2D(XGBoostSklearnEstimator):
-    @classmethod
-    def search_space(cls, data_size, task):
-        upper = min(32768, int(data_size))
-        return {
-            "n_estimators": {
-                "domain": tune.lograndint(lower=4, upper=upper),
-                "init_value": 4,
-            },
-            "max_leaves": {
-                "domain": tune.lograndint(lower=4, upper=upper),
-                "init_value": 4,
-            },
-        }
-
-
-def _test_simple(method=None, size_ratio=1.0):
-    automl = AutoML()
-    automl.add_learner(learner_name="XGBoost2D", learner_class=XGBoost2D)
-
-    X, y = fetch_openml(name=dataset, return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
-
-    final_size = int(len(y_train) * size_ratio)
-    X_train = X_train[:final_size]
-    y_train = y_train[:final_size]
-    automl_settings = {
-        "estimator_list": ["XGBoost2D"],
-        # "metric": 'accuracy',
-        "task": "classification",
-        "log_file_name": f"test/xgboost2d_{dataset}_{method}_{final_size}.log",
-        # "log_training_metric": True,
-        # "split_type": split_type,
-        "n_jobs": 1,
-        "hpo_method": method,
-        "log_type": "all",
-        "time_budget": 3600,
-    }
-    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-
-
-def _test_grid_1():
-    _test_simple(method="grid", size_ratio=1.0 / 3.0)
-
-
-def _test_grid_2():
-    _test_simple(method="grid", size_ratio=2.0 / 3.0)
-
-
-def _test_grid_4():
-    _test_simple(method="grid", size_ratio=0.5)
-
-
-def _test_grid_3():
-    _test_simple(method="grid", size_ratio=1.0)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/test/default/all/metafeatures.csv
+++ b/test/default/all/metafeatures.csv
@@ -1,13 +0,0 @@
-Dataset,NumberOfInstances,NumberOfFeatures,NumberOfClasses,PercentageOfNumericFeatures
-2dplanes,36691,10,0,1.0
-adult,43957,14,2,0.42857142857142855
-Airlines,485444,7,2,0.42857142857142855
-Albert,382716,78,2,0.3333333333333333
-Amazon_employee_access,29492,9,2,0.0
-bng_breastTumor,104976,9,0,0.1111111111111111
-bng_pbc,900000,18,0,0.5555555555555556
-car,1555,6,4,0.0
-connect-4,60801,42,3,0.0
-dilbert,9000,2000,5,1.0
-Dionis,374569,60,355,1.0
-poker,922509,10,0,1.0
--- a/test/default/extra_tree/2dplanes.json
+++ b/test/default/extra_tree/2dplanes.json
@@ -1 +0,0 @@
-{"class": "extra_tree", "hyperparameters": {"n_estimators": 16, "max_features": 1.0, "max_leaves": 54}}
--- a/test/default/extra_tree/Airlines.json
+++ b/test/default/extra_tree/Airlines.json
@@ -1 +0,0 @@
-{"class": "extra_tree", "hyperparameters": {"n_estimators": 2047, "max_features": 1.0, "max_leaves": 8194, "criterion": "gini", "FLAML_sample_size": 436899}}
--- a/test/default/extra_tree/Albert.json
+++ b/test/default/extra_tree/Albert.json
@@ -1 +0,0 @@
-{"class": "extra_tree", "hyperparameters": {"n_estimators": 1733, "max_features": 0.3841826938360253, "max_leaves": 32767, "criterion": "entropy", "FLAML_sample_size": 344444}}
--- a/test/default/extra_tree/Amazon_employee_access.json
+++ b/test/default/extra_tree/Amazon_employee_access.json
@@ -1 +0,0 @@
-{"class": "extra_tree", "hyperparameters": {"n_estimators": 812, "max_features": 1.0, "max_leaves": 1474, "criterion": "entropy"}}
--- a/test/default/extra_tree/adult.json
+++ b/test/default/extra_tree/adult.json
@@ -1 +0,0 @@
-{"class": "extra_tree", "hyperparameters": {"n_estimators": 859, "max_features": 1.0, "max_leaves": 967, "criterion": "entropy"}}
--- a/test/default/extra_tree/bng_breastTumor.json
+++ b/test/default/extra_tree/bng_breastTumor.json
@@ -1 +0,0 @@
-{"class": "extra_tree", "hyperparameters": {"n_estimators": 90, "max_features": 1.0, "max_leaves": 1301, "FLAML_sample_size": 94478}}
--- a/test/default/extra_tree/bng_pbc.json
+++ b/test/default/extra_tree/bng_pbc.json
@@ -1 +0,0 @@
-{"class": "extra_tree", "hyperparameters": {"n_estimators": 1211, "max_features": 1.0, "max_leaves": 32767, "FLAML_sample_size": 810000}}
--- a/test/default/extra_tree/car.json
+++ b/test/default/extra_tree/car.json
@@ -1 +0,0 @@
-{"class": "extra_tree", "hyperparameters": {"n_estimators": 333, "max_features": 1.0, "max_leaves": 201, "criterion": "gini"}}
--- a/test/default/extra_tree/connect-4.json
+++ b/test/default/extra_tree/connect-4.json
@@ -1 +0,0 @@
-{"class": "extra_tree", "hyperparameters": {"n_estimators": 229, "max_features": 0.5372053700721111, "max_leaves": 11150, "criterion": "entropy"}}
--- a/test/default/extra_tree/default.json
+++ b/test/default/extra_tree/default.json
@@ -1 +0,0 @@
-{"class": "extra_tree", "hyperparameters": {}}
--- a/test/default/extra_tree/dilbert.json
+++ b/test/default/extra_tree/dilbert.json
@@ -1 +0,0 @@
-{"class": "extra_tree", "hyperparameters": {"n_estimators": 346, "max_features": 1.0, "max_leaves": 1007, "criterion": "entropy"}}
--- a/test/default/extra_tree/poker.json
+++ b/test/default/extra_tree/poker.json
@@ -1 +0,0 @@
-{"class": "extra_tree", "hyperparameters": {"n_estimators": 1416, "max_features": 1.0, "max_leaves": 32767, "FLAML_sample_size": 830258}}
--- a/test/default/extra_tree/results.csv
+++ b/test/default/extra_tree/results.csv
@@ -1,142 +0,0 @@
-task,fold,type,result,params
-2dplanes,0,regression,0.946503,{'_modeljson': 'et/2dplanes.json'}
-2dplanes,0,regression,0.945047,{'_modeljson': 'et/adult.json'}
-2dplanes,0,regression,0.933571,{'_modeljson': 'et/Airlines.json'}
-2dplanes,0,regression,0.919021,{'_modeljson': 'et/Albert.json'}
-2dplanes,0,regression,0.944532,{'_modeljson': 'et/Amazon_employee_access.json'}
-2dplanes,0,regression,0.94471,{'_modeljson': 'et/bng_breastTumor.json'}
-2dplanes,0,regression,0.914912,{'_modeljson': 'et/bng_pbc.json'}
-2dplanes,0,regression,0.946045,{'_modeljson': 'et/car.json'}
-2dplanes,0,regression,0.935777,{'_modeljson': 'et/connect-4.json'}
-2dplanes,0,regression,0.91501,{'_modeljson': 'et/default.json'}
-2dplanes,0,regression,0.94497,{'_modeljson': 'et/dilbert.json'}
-2dplanes,0,regression,0.914907,{'_modeljson': 'et/poker.json'}
-adult,0,binary,0.902771,{'_modeljson': 'et/2dplanes.json'}
-adult,0,binary,0.919086,{'_modeljson': 'et/adult.json'}
-adult,0,binary,0.906742,{'_modeljson': 'et/Airlines.json'}
-adult,0,binary,0.897039,{'_modeljson': 'et/Albert.json'}
-adult,0,binary,0.919317,{'_modeljson': 'et/Amazon_employee_access.json'}
-adult,0,binary,0.918404,{'_modeljson': 'et/bng_breastTumor.json'}
-adult,0,binary,0.895193,{'_modeljson': 'et/bng_pbc.json'}
-adult,0,binary,0.912965,{'_modeljson': 'et/car.json'}
-adult,0,binary,0.904228,{'_modeljson': 'et/connect-4.json'}
-adult,0,binary,0.893933,{'_modeljson': 'et/default.json'}
-adult,0,binary,0.918539,{'_modeljson': 'et/dilbert.json'}
-adult,0,binary,0.895813,{'_modeljson': 'et/poker.json'}
-Airlines,0,binary,0.683928,{'_modeljson': 'et/2dplanes.json'}
-Airlines,0,binary,0.709673,{'_modeljson': 'et/adult.json'}
-Airlines,0,binary,0.724391,{'_modeljson': 'et/Airlines.json'}
-Airlines,0,binary,0.707411,{'_modeljson': 'et/Albert.json'}
-Airlines,0,binary,0.713548,{'_modeljson': 'et/Amazon_employee_access.json'}
-Airlines,0,binary,0.712774,{'_modeljson': 'et/bng_breastTumor.json'}
-Airlines,0,binary,0.708477,{'_modeljson': 'et/bng_pbc.json'}
-Airlines,0,binary,0.695604,{'_modeljson': 'et/car.json'}
-Airlines,0,binary,0.719631,{'_modeljson': 'et/connect-4.json'}
-Airlines,0,binary,0.619025,{'_modeljson': 'et/default.json'}
-Airlines,0,binary,0.710038,{'_modeljson': 'et/dilbert.json'}
-Airlines,0,binary,0.708628,{'_modeljson': 'et/poker.json'}
-Albert,0,binary,0.707126,{'_modeljson': 'et/2dplanes.json'}
-Albert,0,binary,0.727819,{'_modeljson': 'et/adult.json'}
-Albert,0,binary,0.733953,{'_modeljson': 'et/Airlines.json'}
-Albert,0,binary,0.739138,{'_modeljson': 'et/Albert.json'}
-Albert,0,binary,0.729251,{'_modeljson': 'et/Amazon_employee_access.json'}
-Albert,0,binary,0.728612,{'_modeljson': 'et/bng_breastTumor.json'}
-Albert,0,binary,0.736396,{'_modeljson': 'et/bng_pbc.json'}
-Albert,0,binary,0.719311,{'_modeljson': 'et/car.json'}
-Albert,0,binary,0.735032,{'_modeljson': 'et/connect-4.json'}
-Albert,0,binary,0.725017,{'_modeljson': 'et/default.json'}
-Albert,0,binary,0.728108,{'_modeljson': 'et/dilbert.json'}
-Albert,0,binary,0.736668,{'_modeljson': 'et/poker.json'}
-Amazon_employee_access,0,binary,0.708259,{'_modeljson': 'et/2dplanes.json'}
-Amazon_employee_access,0,binary,0.872603,{'_modeljson': 'et/adult.json'}
-Amazon_employee_access,0,binary,0.839293,{'_modeljson': 'et/Airlines.json'}
-Amazon_employee_access,0,binary,0.834606,{'_modeljson': 'et/Albert.json'}
-Amazon_employee_access,0,binary,0.873141,{'_modeljson': 'et/Amazon_employee_access.json'}
-Amazon_employee_access,0,binary,0.860569,{'_modeljson': 'et/bng_breastTumor.json'}
-Amazon_employee_access,0,binary,0.834654,{'_modeljson': 'et/bng_pbc.json'}
-Amazon_employee_access,0,binary,0.81679,{'_modeljson': 'et/car.json'}
-Amazon_employee_access,0,binary,0.831975,{'_modeljson': 'et/connect-4.json'}
-Amazon_employee_access,0,binary,0.839651,{'_modeljson': 'et/default.json'}
-Amazon_employee_access,0,binary,0.868815,{'_modeljson': 'et/dilbert.json'}
-Amazon_employee_access,0,binary,0.841461,{'_modeljson': 'et/poker.json'}
-bng_breastTumor,0,regression,0.137191,{'_modeljson': 'et/2dplanes.json'}
-bng_breastTumor,0,regression,0.181002,{'_modeljson': 'et/adult.json'}
-bng_breastTumor,0,regression,0.163121,{'_modeljson': 'et/Airlines.json'}
-bng_breastTumor,0,regression,0.116596,{'_modeljson': 'et/Albert.json'}
-bng_breastTumor,0,regression,0.181745,{'_modeljson': 'et/Amazon_employee_access.json'}
-bng_breastTumor,0,regression,0.180948,{'_modeljson': 'et/bng_breastTumor.json'}
-bng_breastTumor,0,regression,0.0784668,{'_modeljson': 'et/bng_pbc.json'}
-bng_breastTumor,0,regression,0.168552,{'_modeljson': 'et/car.json'}
-bng_breastTumor,0,regression,0.165576,{'_modeljson': 'et/connect-4.json'}
-bng_breastTumor,0,regression,-0.28734,{'_modeljson': 'et/default.json'}
-bng_breastTumor,0,regression,0.1822,{'_modeljson': 'et/dilbert.json'}
-bng_breastTumor,0,regression,0.0780929,{'_modeljson': 'et/poker.json'}
-bng_pbc,0,regression,0.332032,{'_modeljson': 'et/2dplanes.json'}
-bng_pbc,0,regression,0.3879,{'_modeljson': 'et/adult.json'}
-bng_pbc,0,regression,0.411442,{'_modeljson': 'et/Airlines.json'}
-bng_pbc,0,regression,0.400094,{'_modeljson': 'et/Albert.json'}
-bng_pbc,0,regression,0.394067,{'_modeljson': 'et/Amazon_employee_access.json'}
-bng_pbc,0,regression,0.391695,{'_modeljson': 'et/bng_breastTumor.json'}
-bng_pbc,0,regression,0.421267,{'_modeljson': 'et/bng_pbc.json'}
-bng_pbc,0,regression,0.361909,{'_modeljson': 'et/car.json'}
-bng_pbc,0,regression,0.402332,{'_modeljson': 'et/connect-4.json'}
-bng_pbc,0,regression,0.418622,{'_modeljson': 'et/default.json'}
-bng_pbc,0,regression,0.388768,{'_modeljson': 'et/dilbert.json'}
-bng_pbc,0,regression,0.421152,{'_modeljson': 'et/poker.json'}
-car,0,multiclass,-0.0815482,{'_modeljson': 'et/2dplanes.json'}
-car,0,multiclass,-0.218552,{'_modeljson': 'et/adult.json'}
-car,0,multiclass,-0.0474428,{'_modeljson': 'et/Airlines.json'}
-car,0,multiclass,-0.108586,{'_modeljson': 'et/Albert.json'}
-car,0,multiclass,-0.218073,{'_modeljson': 'et/Amazon_employee_access.json'}
-car,0,multiclass,-0.0397411,{'_modeljson': 'et/bng_breastTumor.json'}
-car,0,multiclass,-0.0485655,{'_modeljson': 'et/bng_pbc.json'}
-car,0,multiclass,-0.0524496,{'_modeljson': 'et/car.json'}
-car,0,multiclass,-0.0690461,{'_modeljson': 'et/connect-4.json'}
-car,0,multiclass,-0.111939,{'_modeljson': 'et/default.json'}
-car,0,multiclass,-0.218153,{'_modeljson': 'et/dilbert.json'}
-car,0,multiclass,-0.0502018,{'_modeljson': 'et/poker.json'}
-connect-4,0,multiclass,-0.706448,{'_modeljson': 'et/2dplanes.json'}
-connect-4,0,multiclass,-0.54998,{'_modeljson': 'et/adult.json'}
-connect-4,0,multiclass,-0.495074,{'_modeljson': 'et/Airlines.json'}
-connect-4,0,multiclass,-0.468797,{'_modeljson': 'et/Albert.json'}
-connect-4,0,multiclass,-0.528177,{'_modeljson': 'et/Amazon_employee_access.json'}
-connect-4,0,multiclass,-0.545043,{'_modeljson': 'et/bng_breastTumor.json'}
-connect-4,0,multiclass,-0.57415,{'_modeljson': 'et/bng_pbc.json'}
-connect-4,0,multiclass,-0.639965,{'_modeljson': 'et/car.json'}
-connect-4,0,multiclass,-0.459906,{'_modeljson': 'et/connect-4.json'}
-connect-4,0,multiclass,-0.540561,{'_modeljson': 'et/default.json'}
-connect-4,0,multiclass,-0.547218,{'_modeljson': 'et/dilbert.json'}
-connect-4,0,multiclass,-0.573145,{'_modeljson': 'et/poker.json'}
-dilbert,0,multiclass,-0.626964,{'_modeljson': 'et/2dplanes.json'}
-dilbert,0,multiclass,-0.230603,{'_modeljson': 'et/adult.json'}
-dilbert,0,multiclass,-0.246071,{'_modeljson': 'et/Airlines.json'}
-dilbert,0,multiclass,-0.237068,{'_modeljson': 'et/Albert.json'}
-dilbert,0,multiclass,-0.230785,{'_modeljson': 'et/Amazon_employee_access.json'}
-dilbert,0,multiclass,-0.253409,{'_modeljson': 'et/bng_breastTumor.json'}
-dilbert,0,multiclass,-0.247331,{'_modeljson': 'et/bng_pbc.json'}
-dilbert,0,multiclass,-0.383859,{'_modeljson': 'et/car.json'}
-dilbert,0,multiclass,-0.234819,{'_modeljson': 'et/connect-4.json'}
-dilbert,0,multiclass,-0.308227,{'_modeljson': 'et/default.json'}
-dilbert,0,multiclass,-0.231163,{'_modeljson': 'et/dilbert.json'}
-dilbert,0,multiclass,-0.245383,{'_modeljson': 'et/poker.json'}
-Dionis,0,multiclass,-3.354,{'_modeljson': 'et/2dplanes.json'}
-Dionis,0,multiclass,-1.56815,{'_modeljson': 'et/adult.json'}
-Dionis,0,multiclass,-0.758098,{'_modeljson': 'et/Airlines.json'}
-Dionis,0,multiclass,-1.36204,{'_modeljson': 'et/Amazon_employee_access.json'}
-Dionis,0,multiclass,-1.40398,{'_modeljson': 'et/bng_breastTumor.json'}
-Dionis,0,multiclass,-2.44773,{'_modeljson': 'et/car.json'}
-Dionis,0,multiclass,-0.759589,{'_modeljson': 'et/connect-4.json'}
-Dionis,0,multiclass,-0.789821,{'_modeljson': 'et/default.json'}
-Dionis,0,multiclass,-1.54593,{'_modeljson': 'et/dilbert.json'}
-poker,0,regression,0.103608,{'_modeljson': 'et/2dplanes.json'}
-poker,0,regression,0.314258,{'_modeljson': 'et/adult.json'}
-poker,0,regression,0.531285,{'_modeljson': 'et/Airlines.json'}
-poker,0,regression,0.30208,{'_modeljson': 'et/Albert.json'}
-poker,0,regression,0.358474,{'_modeljson': 'et/Amazon_employee_access.json'}
-poker,0,regression,0.344292,{'_modeljson': 'et/bng_breastTumor.json'}
-poker,0,regression,0.663188,{'_modeljson': 'et/bng_pbc.json'}
-poker,0,regression,0.180103,{'_modeljson': 'et/car.json'}
-poker,0,regression,0.394291,{'_modeljson': 'et/connect-4.json'}
-poker,0,regression,0.753355,{'_modeljson': 'et/default.json'}
-poker,0,regression,0.317809,{'_modeljson': 'et/dilbert.json'}
-poker,0,regression,0.663812,{'_modeljson': 'et/poker.json'}
--- a/test/default/lgbm/2dplanes.json
+++ b/test/default/lgbm/2dplanes.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {"n_estimators": 103, "num_leaves": 33, "min_child_samples": 4, "learning_rate": 0.05800185361316003, "log_max_bin": 6, "colsample_bytree": 1.0, "reg_alpha": 1.5987124004961213, "reg_lambda": 10.56445079499673}}
--- a/test/default/lgbm/APSFailure.json
+++ b/test/default/lgbm/APSFailure.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {"n_estimators": 733, "num_leaves": 11, "min_child_samples": 94, "learning_rate": 0.06276798296942972, "log_max_bin": 6, "colsample_bytree": 0.6341928918435795, "reg_alpha": 0.5811038918218691, "reg_lambda": 43.304997517523944}}
--- a/test/default/lgbm/Airlines.json
+++ b/test/default/lgbm/Airlines.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {"n_estimators": 2541, "num_leaves": 1667, "min_child_samples": 29, "learning_rate": 0.0016660662914022302, "log_max_bin": 8, "colsample_bytree": 0.5157078343718623, "reg_alpha": 0.045792841240713165, "reg_lambda": 0.0012362651138125363, "FLAML_sample_size": 436899}}
--- a/test/default/lgbm/Albert.json
+++ b/test/default/lgbm/Albert.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {"n_estimators": 12659, "num_leaves": 566, "min_child_samples": 51, "learning_rate": 0.0017248557932071625, "log_max_bin": 10, "colsample_bytree": 0.35373661752616337, "reg_alpha": 0.004824272162679245, "reg_lambda": 8.51563063056529, "FLAML_sample_size": 344444}}
--- a/test/default/lgbm/Amazon_employee_access.json
+++ b/test/default/lgbm/Amazon_employee_access.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {"n_estimators": 198, "num_leaves": 6241, "min_child_samples": 3, "learning_rate": 0.003807690748728824, "log_max_bin": 10, "colsample_bytree": 0.3192882305722113, "reg_alpha": 0.024630507311503163, "reg_lambda": 0.06738306675149014}}
--- a/test/default/lgbm/Dionis.json
+++ b/test/default/lgbm/Dionis.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {"n_estimators": 362, "num_leaves": 1208, "min_child_samples": 8, "learning_rate": 0.02070742242160566, "log_max_bin": 4, "colsample_bytree": 0.37915528071680865, "reg_alpha": 0.002982599447751338, "reg_lambda": 1.136605174453919, "FLAML_sample_size": 337147}}
--- a/test/default/lgbm/adult.json
+++ b/test/default/lgbm/adult.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {"n_estimators": 11842, "num_leaves": 31, "min_child_samples": 3, "learning_rate": 0.0015861878568503534, "log_max_bin": 8, "colsample_bytree": 0.3814347840573729, "reg_alpha": 0.0009765625, "reg_lambda": 0.011319689446351965}}
--- a/test/default/lgbm/bng_breastTumor.json
+++ b/test/default/lgbm/bng_breastTumor.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {"n_estimators": 644, "num_leaves": 40, "min_child_samples": 38, "learning_rate": 0.06007328261566753, "log_max_bin": 5, "colsample_bytree": 0.6950692048656423, "reg_alpha": 0.0009765625, "reg_lambda": 9.849318389111616, "FLAML_sample_size": 94478}}
--- a/test/default/lgbm/bng_pbc.json
+++ b/test/default/lgbm/bng_pbc.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {"n_estimators": 27202, "num_leaves": 848, "min_child_samples": 2, "learning_rate": 0.0019296395751528979, "log_max_bin": 5, "colsample_bytree": 0.7328229531785452, "reg_alpha": 6.112225454676263, "reg_lambda": 0.08606162543586986, "FLAML_sample_size": 810000}}
--- a/test/default/lgbm/car.json
+++ b/test/default/lgbm/car.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {"n_estimators": 311, "num_leaves": 4, "min_child_samples": 5, "learning_rate": 0.5547292134798673, "log_max_bin": 3, "colsample_bytree": 0.9917614238487915, "reg_alpha": 0.0009765625, "reg_lambda": 0.0019177370889840813}}
--- a/test/default/lgbm/connect-4.json
+++ b/test/default/lgbm/connect-4.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {"n_estimators": 3726, "num_leaves": 155, "min_child_samples": 4, "learning_rate": 0.040941607728296484, "log_max_bin": 5, "colsample_bytree": 0.5326256194627191, "reg_alpha": 0.7408711930398492, "reg_lambda": 0.5467731065349226}}
--- a/test/default/lgbm/default.json
+++ b/test/default/lgbm/default.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {}}
--- a/test/default/lgbm/dilbert.json
+++ b/test/default/lgbm/dilbert.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {"n_estimators": 7325, "num_leaves": 15, "min_child_samples": 6, "learning_rate": 0.009932524214971736, "log_max_bin": 6, "colsample_bytree": 0.8592091503131608, "reg_alpha": 0.0009997224940106115, "reg_lambda": 0.04069855891326503}}
--- a/test/default/lgbm/poker.json
+++ b/test/default/lgbm/poker.json
@@ -1 +0,0 @@
-{"class": "lgbm", "hyperparameters": {"n_estimators": 32767, "num_leaves": 372, "min_child_samples": 4, "learning_rate": 0.03517259015200922, "log_max_bin": 5, "colsample_bytree": 1.0, "reg_alpha": 0.02271142170225636, "reg_lambda": 0.001963791798843179, "FLAML_sample_size": 830258}}
--- a/test/default/lgbm/results.csv
+++ b/test/default/lgbm/results.csv
@@ -1,167 +0,0 @@
-task,fold,type,result,params
-2dplanes,0,regression,0.946366,{'_modeljson': 'lgbm/2dplanes.json'}
-2dplanes,0,regression,0.907774,{'_modeljson': 'lgbm/adult.json'}
-2dplanes,0,regression,0.901643,{'_modeljson': 'lgbm/Airlines.json'}
-2dplanes,0,regression,0.915098,{'_modeljson': 'lgbm/Albert.json'}
-2dplanes,0,regression,0.302328,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
-2dplanes,0,regression,0.94523,{'_modeljson': 'lgbm/bng_breastTumor.json'}
-2dplanes,0,regression,0.945698,{'_modeljson': 'lgbm/bng_pbc.json'}
-2dplanes,0,regression,0.946194,{'_modeljson': 'lgbm/car.json'}
-2dplanes,0,regression,0.945549,{'_modeljson': 'lgbm/connect-4.json'}
-2dplanes,0,regression,0.946232,{'_modeljson': 'lgbm/default.json'}
-2dplanes,0,regression,0.945594,{'_modeljson': 'lgbm/dilbert.json'}
-2dplanes,0,regression,0.836996,{'_modeljson': 'lgbm/Dionis.json'}
-2dplanes,0,regression,0.917152,{'_modeljson': 'lgbm/poker.json'}
-adult,0,binary,0.927203,{'_modeljson': 'lgbm/2dplanes.json'}
-adult,0,binary,0.932072,{'_modeljson': 'lgbm/adult.json'}
-adult,0,binary,0.926563,{'_modeljson': 'lgbm/Airlines.json'}
-adult,0,binary,0.928604,{'_modeljson': 'lgbm/Albert.json'}
-adult,0,binary,0.911171,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
-adult,0,binary,0.930645,{'_modeljson': 'lgbm/bng_breastTumor.json'}
-adult,0,binary,0.928603,{'_modeljson': 'lgbm/bng_pbc.json'}
-adult,0,binary,0.915825,{'_modeljson': 'lgbm/car.json'}
-adult,0,binary,0.919499,{'_modeljson': 'lgbm/connect-4.json'}
-adult,0,binary,0.930109,{'_modeljson': 'lgbm/default.json'}
-adult,0,binary,0.932453,{'_modeljson': 'lgbm/dilbert.json'}
-adult,0,binary,0.921959,{'_modeljson': 'lgbm/Dionis.json'}
-adult,0,binary,0.910763,{'_modeljson': 'lgbm/poker.json'}
-Airlines,0,binary,0.705404,{'_modeljson': 'lgbm/2dplanes.json'}
-Airlines,0,binary,0.714521,{'_modeljson': 'lgbm/adult.json'}
-Airlines,0,binary,0.732288,{'_modeljson': 'lgbm/Airlines.json'}
-Airlines,0,binary,0.710273,{'_modeljson': 'lgbm/Albert.json'}
-Airlines,0,binary,0.707107,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
-Airlines,0,binary,0.718682,{'_modeljson': 'lgbm/bng_breastTumor.json'}
-Airlines,0,binary,0.724703,{'_modeljson': 'lgbm/bng_pbc.json'}
-Airlines,0,binary,0.690574,{'_modeljson': 'lgbm/car.json'}
-Airlines,0,binary,0.725808,{'_modeljson': 'lgbm/connect-4.json'}
-Airlines,0,binary,0.710419,{'_modeljson': 'lgbm/default.json'}
-Airlines,0,binary,0.710419,{'_modeljson': 'lgbm/default.json'}
-Airlines,0,binary,0.718609,{'_modeljson': 'lgbm/dilbert.json'}
-Airlines,0,binary,0.716213,{'_modeljson': 'lgbm/Dionis.json'}
-Airlines,0,binary,0.654868,{'_modeljson': 'lgbm/poker.json'}
-Albert,0,binary,0.744825,{'_modeljson': 'lgbm/2dplanes.json'}
-Albert,0,binary,0.758979,{'_modeljson': 'lgbm/adult.json'}
-Albert,0,binary,0.758364,{'_modeljson': 'lgbm/Airlines.json'}
-Albert,0,binary,0.770923,{'_modeljson': 'lgbm/Albert.json'}
-Albert,0,binary,0.745091,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
-Albert,0,binary,0.754523,{'_modeljson': 'lgbm/APSFailure.json'}
-Albert,0,binary,0.759939,{'_modeljson': 'lgbm/bng_breastTumor.json'}
-Albert,0,binary,0.765119,{'_modeljson': 'lgbm/bng_pbc.json'}
-Albert,0,binary,0.745067,{'_modeljson': 'lgbm/car.json'}
-Albert,0,binary,0.762311,{'_modeljson': 'lgbm/connect-4.json'}
-Albert,0,binary,0.753181,{'_modeljson': 'lgbm/default.json'}
-Albert,0,binary,0.753181,{'_modeljson': 'lgbm/default.json'}
-Albert,0,binary,0.760248,{'_modeljson': 'lgbm/dilbert.json'}
-Albert,0,binary,0.758111,{'_modeljson': 'lgbm/Dionis.json'}
-Albert,0,binary,0.761768,{'_modeljson': 'lgbm/poker.json'}
-Amazon_employee_access,0,binary,0.811238,{'_modeljson': 'lgbm/2dplanes.json'}
-Amazon_employee_access,0,binary,0.867285,{'_modeljson': 'lgbm/adult.json'}
-Amazon_employee_access,0,binary,0.8888,{'_modeljson': 'lgbm/Airlines.json'}
-Amazon_employee_access,0,binary,0.881302,{'_modeljson': 'lgbm/Albert.json'}
-Amazon_employee_access,0,binary,0.891085,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
-Amazon_employee_access,0,binary,0.816736,{'_modeljson': 'lgbm/APSFailure.json'}
-Amazon_employee_access,0,binary,0.861187,{'_modeljson': 'lgbm/bng_breastTumor.json'}
-Amazon_employee_access,0,binary,0.848348,{'_modeljson': 'lgbm/bng_pbc.json'}
-Amazon_employee_access,0,binary,0.760891,{'_modeljson': 'lgbm/car.json'}
-Amazon_employee_access,0,binary,0.872951,{'_modeljson': 'lgbm/connect-4.json'}
-Amazon_employee_access,0,binary,0.851183,{'_modeljson': 'lgbm/default.json'}
-Amazon_employee_access,0,binary,0.851183,{'_modeljson': 'lgbm/default.json'}
-Amazon_employee_access,0,binary,0.851173,{'_modeljson': 'lgbm/dilbert.json'}
-Amazon_employee_access,0,binary,0.843577,{'_modeljson': 'lgbm/Dionis.json'}
-Amazon_employee_access,0,binary,0.866543,{'_modeljson': 'lgbm/poker.json'}
-bng_breastTumor,0,regression,0.186246,{'_modeljson': 'lgbm/2dplanes.json'}
-bng_breastTumor,0,regression,0.181787,{'_modeljson': 'lgbm/adult.json'}
-bng_breastTumor,0,regression,0.177175,{'_modeljson': 'lgbm/Airlines.json'}
-bng_breastTumor,0,regression,0.169053,{'_modeljson': 'lgbm/Albert.json'}
-bng_breastTumor,0,regression,0.0734972,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
-bng_breastTumor,0,regression,0.192189,{'_modeljson': 'lgbm/APSFailure.json'}
-bng_breastTumor,0,regression,0.195887,{'_modeljson': 'lgbm/bng_breastTumor.json'}
-bng_breastTumor,0,regression,0.144786,{'_modeljson': 'lgbm/bng_pbc.json'}
-bng_breastTumor,0,regression,0.168074,{'_modeljson': 'lgbm/car.json'}
-bng_breastTumor,0,regression,0.159819,{'_modeljson': 'lgbm/connect-4.json'}
-bng_breastTumor,0,regression,0.192813,{'_modeljson': 'lgbm/default.json'}
-bng_breastTumor,0,regression,0.192813,{'_modeljson': 'lgbm/default.json'}
-bng_breastTumor,0,regression,0.193994,{'_modeljson': 'lgbm/dilbert.json'}
-bng_breastTumor,0,regression,0.162977,{'_modeljson': 'lgbm/Dionis.json'}
-bng_breastTumor,0,regression,-0.0283641,{'_modeljson': 'lgbm/poker.json'}
-bng_pbc,0,regression,0.415569,{'_modeljson': 'lgbm/2dplanes.json'}
-bng_pbc,0,regression,0.421659,{'_modeljson': 'lgbm/adult.json'}
-bng_pbc,0,regression,0.433399,{'_modeljson': 'lgbm/Airlines.json'}
-bng_pbc,0,regression,0.429397,{'_modeljson': 'lgbm/Albert.json'}
-bng_pbc,0,regression,0.218693,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
-bng_pbc,0,regression,0.426949,{'_modeljson': 'lgbm/APSFailure.json'}
-bng_pbc,0,regression,0.444361,{'_modeljson': 'lgbm/bng_breastTumor.json'}
-bng_pbc,0,regression,0.459898,{'_modeljson': 'lgbm/bng_pbc.json'}
-bng_pbc,0,regression,0.404274,{'_modeljson': 'lgbm/car.json'}
-bng_pbc,0,regression,0.453742,{'_modeljson': 'lgbm/connect-4.json'}
-bng_pbc,0,regression,0.425581,{'_modeljson': 'lgbm/default.json'}
-bng_pbc,0,regression,0.425581,{'_modeljson': 'lgbm/default.json'}
-bng_pbc,0,regression,0.440833,{'_modeljson': 'lgbm/dilbert.json'}
-bng_pbc,0,regression,0.42319,{'_modeljson': 'lgbm/Dionis.json'}
-bng_pbc,0,regression,0.440263,{'_modeljson': 'lgbm/poker.json'}
-car,0,multiclass,-0.126115,{'_modeljson': 'lgbm/2dplanes.json'}
-car,0,multiclass,-0.20528,{'_modeljson': 'lgbm/adult.json'}
-car,0,multiclass,-0.189212,{'_modeljson': 'lgbm/Airlines.json'}
-car,0,multiclass,-0.233147,{'_modeljson': 'lgbm/Albert.json'}
-car,0,multiclass,-0.598807,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
-car,0,multiclass,-0.119622,{'_modeljson': 'lgbm/APSFailure.json'}
-car,0,multiclass,-0.0372956,{'_modeljson': 'lgbm/bng_breastTumor.json'}
-car,0,multiclass,-0.179642,{'_modeljson': 'lgbm/bng_pbc.json'}
-car,0,multiclass,-0.000121047,{'_modeljson': 'lgbm/car.json'}
-car,0,multiclass,-0.050453,{'_modeljson': 'lgbm/connect-4.json'}
-car,0,multiclass,-0.00234879,{'_modeljson': 'lgbm/default.json'}
-car,0,multiclass,-0.00234879,{'_modeljson': 'lgbm/default.json'}
-car,0,multiclass,-0.000295737,{'_modeljson': 'lgbm/dilbert.json'}
-car,0,multiclass,-0.297016,{'_modeljson': 'lgbm/Dionis.json'}
-car,0,multiclass,-0.00178529,{'_modeljson': 'lgbm/poker.json'}
-connect-4,0,multiclass,-0.527657,{'_modeljson': 'lgbm/2dplanes.json'}
-connect-4,0,multiclass,-0.462894,{'_modeljson': 'lgbm/adult.json'}
-connect-4,0,multiclass,-0.449048,{'_modeljson': 'lgbm/Airlines.json'}
-connect-4,0,multiclass,-0.393871,{'_modeljson': 'lgbm/Albert.json'}
-connect-4,0,multiclass,-0.73746,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
-connect-4,0,multiclass,-0.485399,{'_modeljson': 'lgbm/APSFailure.json'}
-connect-4,0,multiclass,-0.393378,{'_modeljson': 'lgbm/bng_breastTumor.json'}
-connect-4,0,multiclass,-0.388117,{'_modeljson': 'lgbm/bng_pbc.json'}
-connect-4,0,multiclass,-0.484577,{'_modeljson': 'lgbm/car.json'}
-connect-4,0,multiclass,-0.32741,{'_modeljson': 'lgbm/connect-4.json'}
-connect-4,0,multiclass,-0.482328,{'_modeljson': 'lgbm/default.json'}
-connect-4,0,multiclass,-0.482328,{'_modeljson': 'lgbm/default.json'}
-connect-4,0,multiclass,-0.413426,{'_modeljson': 'lgbm/dilbert.json'}
-connect-4,0,multiclass,-0.438676,{'_modeljson': 'lgbm/Dionis.json'}
-connect-4,0,multiclass,-0.489035,{'_modeljson': 'lgbm/poker.json'}
-dilbert,0,multiclass,-0.134669,{'_modeljson': 'lgbm/2dplanes.json'}
-dilbert,0,multiclass,-0.0405039,{'_modeljson': 'lgbm/adult.json'}
-dilbert,0,multiclass,-0.0888238,{'_modeljson': 'lgbm/Airlines.json'}
-dilbert,0,multiclass,-0.0618876,{'_modeljson': 'lgbm/Albert.json'}
-dilbert,0,multiclass,-0.0653412,{'_modeljson': 'lgbm/APSFailure.json'}
-dilbert,0,multiclass,-0.0484292,{'_modeljson': 'lgbm/bng_breastTumor.json'}
-dilbert,0,multiclass,-0.126248,{'_modeljson': 'lgbm/bng_pbc.json'}
-dilbert,0,multiclass,-0.0473867,{'_modeljson': 'lgbm/car.json'}
-dilbert,0,multiclass,-0.0759236,{'_modeljson': 'lgbm/connect-4.json'}
-dilbert,0,multiclass,-0.0490604,{'_modeljson': 'lgbm/default.json'}
-dilbert,0,multiclass,-0.0490604,{'_modeljson': 'lgbm/default.json'}
-dilbert,0,multiclass,-0.034108,{'_modeljson': 'lgbm/dilbert.json'}
-dilbert,0,multiclass,-0.0661046,{'_modeljson': 'lgbm/Dionis.json'}
-dilbert,0,multiclass,-0.0744684,{'_modeljson': 'lgbm/poker.json'}
-Dionis,0,multiclass,-0.395452,{'_modeljson': 'lgbm/2dplanes.json'}
-Dionis,0,multiclass,-1.40235,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
-Dionis,0,multiclass,-0.306241,{'_modeljson': 'lgbm/APSFailure.json'}
-Dionis,0,multiclass,-33.7902,{'_modeljson': 'lgbm/car.json'}
-Dionis,0,multiclass,-27.9456,{'_modeljson': 'lgbm/default.json'}
-Dionis,0,multiclass,-28.095,{'_modeljson': 'lgbm/default.json'}
-Dionis,0,multiclass,-0.318142,{'_modeljson': 'lgbm/Dionis.json'}
-poker,0,regression,0.203695,{'_modeljson': 'lgbm/2dplanes.json'}
-poker,0,regression,0.424513,{'_modeljson': 'lgbm/adult.json'}
-poker,0,regression,0.490528,{'_modeljson': 'lgbm/Airlines.json'}
-poker,0,regression,0.767652,{'_modeljson': 'lgbm/Albert.json'}
-poker,0,regression,0.0592655,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
-poker,0,regression,0.393168,{'_modeljson': 'lgbm/APSFailure.json'}
-poker,0,regression,0.614152,{'_modeljson': 'lgbm/bng_breastTumor.json'}
-poker,0,regression,0.854134,{'_modeljson': 'lgbm/bng_pbc.json'}
-poker,0,regression,0.197075,{'_modeljson': 'lgbm/car.json'}
-poker,0,regression,0.879695,{'_modeljson': 'lgbm/connect-4.json'}
-poker,0,regression,0.284102,{'_modeljson': 'lgbm/default.json'}
-poker,0,regression,0.284102,{'_modeljson': 'lgbm/default.json'}
-poker,0,regression,0.433648,{'_modeljson': 'lgbm/dilbert.json'}
-poker,0,regression,0.657666,{'_modeljson': 'lgbm/Dionis.json'}
-poker,0,regression,0.940835,{'_modeljson': 'lgbm/poker.json'}
--- a/test/default/rf/2dplanes.json
+++ b/test/default/rf/2dplanes.json
@@ -1 +0,0 @@
-{"class": "rf", "hyperparameters": {"n_estimators": 38, "max_features": 1.0, "max_leaves": 58}}
--- a/test/default/rf/Airlines.json
+++ b/test/default/rf/Airlines.json
@@ -1 +0,0 @@
-{"class": "rf", "hyperparameters": {"n_estimators": 418, "max_features": 0.5303485415288045, "max_leaves": 6452, "criterion": "entropy", "FLAML_sample_size": 436899}}
--- a/test/default/rf/Albert.json
+++ b/test/default/rf/Albert.json
@@ -1 +0,0 @@
-{"class": "rf", "hyperparameters": {"n_estimators": 2047, "max_features": 0.10091610074262287, "max_leaves": 32767, "criterion": "entropy", "FLAML_sample_size": 344444}}
--- a/test/default/rf/Amazon_employee_access.json
+++ b/test/default/rf/Amazon_employee_access.json
@@ -1 +0,0 @@
-{"class": "rf", "hyperparameters": {"n_estimators": 501, "max_features": 0.24484242524861066, "max_leaves": 1156, "criterion": "entropy"}}
--- a/test/default/rf/Dionis.json
+++ b/test/default/rf/Dionis.json
@@ -1 +0,0 @@
-{"class": "rf", "hyperparameters": {"n_estimators": 510, "max_features": 0.12094682590862652, "max_leaves": 32767, "criterion": "entropy", "FLAML_sample_size": 337147}}
--- a/test/default/rf/adult.json
+++ b/test/default/rf/adult.json
@@ -1 +0,0 @@
-{"class": "rf", "hyperparameters": {"n_estimators": 1212, "max_features": 0.3129111648657632, "max_leaves": 779, "criterion": "entropy"}}
--- a/test/default/rf/bng_breastTumor.json
+++ b/test/default/rf/bng_breastTumor.json
@@ -1 +0,0 @@
-{"class": "rf", "hyperparameters": {"n_estimators": 288, "max_features": 0.6436380990499977, "max_leaves": 1823, "FLAML_sample_size": 94478}}
--- a/test/default/rf/bng_pbc.json
+++ b/test/default/rf/bng_pbc.json
@@ -1 +0,0 @@
-{"class": "rf", "hyperparameters": {"n_estimators": 2047, "max_features": 0.3158919059422144, "max_leaves": 32767, "FLAML_sample_size": 810000}}
--- a/test/default/rf/car.json
+++ b/test/default/rf/car.json
@@ -1 +0,0 @@
-{"class": "rf", "hyperparameters": {"n_estimators": 792, "max_features": 1.0, "max_leaves": 67, "criterion": "entropy"}}
--- a/test/default/rf/connect-4.json
+++ b/test/default/rf/connect-4.json
@@ -1 +0,0 @@
-{"class": "rf", "hyperparameters": {"n_estimators": 1907, "max_features": 0.3728618389498168, "max_leaves": 11731, "criterion": "entropy"}}
--- a/test/default/rf/default.json
+++ b/test/default/rf/default.json
@@ -1 +0,0 @@
-{"class": "rf", "hyperparameters": {}}
--- a/test/default/rf/dilbert.json
+++ b/test/default/rf/dilbert.json
@@ -1 +0,0 @@
-{"class": "rf", "hyperparameters": {"n_estimators": 350, "max_features": 0.748250835121453, "max_leaves": 433, "criterion": "entropy"}}
--- a/test/default/rf/poker.json
+++ b/test/default/rf/poker.json
@@ -1 +0,0 @@
-{"class": "rf", "hyperparameters": {"n_estimators": 2047, "max_features": 1.0, "max_leaves": 32767, "FLAML_sample_size": 830258}}
--- a/test/default/rf/results.csv
+++ b/test/default/rf/results.csv
@@ -1,145 +0,0 @@
-task,fold,type,result,metric,params,info
-2dplanes,0,regression,0.946488,r2,{'_modeljson': 'rf/2dplanes.json'},
-2dplanes,0,regression,0.936392,r2,{'_modeljson': 'rf/adult.json'},
-2dplanes,0,regression,0.940486,r2,{'_modeljson': 'rf/Airlines.json'},
-2dplanes,0,regression,0.924025,r2,{'_modeljson': 'rf/Albert.json'},
-2dplanes,0,regression,0.911362,r2,{'_modeljson': 'rf/Amazon_employee_access.json'},
-2dplanes,0,regression,0.944353,r2,{'_modeljson': 'rf/bng_breastTumor.json'},
-2dplanes,0,regression,0.932343,r2,{'_modeljson': 'rf/bng_pbc.json'},
-2dplanes,0,regression,0.946423,r2,{'_modeljson': 'rf/car.json'},
-2dplanes,0,regression,0.937309,r2,{'_modeljson': 'rf/connect-4.json'},
-2dplanes,0,regression,0.930126,r2,{'_modeljson': 'rf/default.json'},
-2dplanes,0,regression,0.945707,r2,{'_modeljson': 'rf/dilbert.json'},
-2dplanes,0,regression,0.923313,r2,{'_modeljson': 'rf/Dionis.json'},
-2dplanes,0,regression,0.930579,r2,{'_modeljson': 'rf/poker.json'},
-adult,0,binary,0.912946,auc,{'_modeljson': 'rf/2dplanes.json'},
-adult,0,binary,0.91978,auc,{'_modeljson': 'rf/adult.json'},
-adult,0,binary,0.910127,auc,{'_modeljson': 'rf/Airlines.json'},
-adult,0,binary,0.910553,auc,{'_modeljson': 'rf/Albert.json'},
-adult,0,binary,0.919662,auc,{'_modeljson': 'rf/Amazon_employee_access.json'},
-adult,0,binary,0.915769,auc,{'_modeljson': 'rf/bng_breastTumor.json'},
-adult,0,binary,0.91003,auc,{'_modeljson': 'rf/bng_pbc.json'},
-adult,0,binary,0.914697,auc,{'_modeljson': 'rf/car.json'},
-adult,0,binary,0.911118,auc,{'_modeljson': 'rf/connect-4.json'},
-adult,0,binary,0.907368,auc,{'_modeljson': 'rf/default.json'},
-adult,0,binary,0.919216,auc,{'_modeljson': 'rf/dilbert.json'},
-adult,0,binary,0.910528,auc,{'_modeljson': 'rf/Dionis.json'},
-adult,0,binary,0.904508,auc,{'_modeljson': 'rf/poker.json'},
-Airlines,0,binary,0.687817,auc,{'_modeljson': 'rf/2dplanes.json'},
-Airlines,0,binary,0.712804,auc,{'_modeljson': 'rf/adult.json'},
-Airlines,0,binary,0.727357,auc,{'_modeljson': 'rf/Airlines.json'},
-Airlines,0,binary,0.705541,auc,{'_modeljson': 'rf/Albert.json'},
-Airlines,0,binary,0.71012,auc,{'_modeljson': 'rf/Amazon_employee_access.json'},
-Airlines,0,binary,0.722532,auc,{'_modeljson': 'rf/bng_breastTumor.json'},
-Airlines,0,binary,0.709287,auc,{'_modeljson': 'rf/bng_pbc.json'},
-Airlines,0,binary,0.688678,auc,{'_modeljson': 'rf/car.json'},
-Airlines,0,binary,0.725288,auc,{'_modeljson': 'rf/connect-4.json'},
-Airlines,0,binary,0.657276,auc,{'_modeljson': 'rf/default.json'},
-Airlines,0,binary,0.708515,auc,{'_modeljson': 'rf/dilbert.json'},
-Airlines,0,binary,0.705826,auc,{'_modeljson': 'rf/Dionis.json'},
-Airlines,0,binary,0.699484,auc,{'_modeljson': 'rf/poker.json'},
-Albert,0,binary,0.712348,auc,{'_modeljson': 'rf/2dplanes.json'},
-Albert,0,binary,0.72836,auc,{'_modeljson': 'rf/adult.json'},
-Albert,0,binary,0.734105,auc,{'_modeljson': 'rf/Airlines.json'},
-Albert,0,binary,0.737119,auc,{'_modeljson': 'rf/Albert.json'},
-Albert,0,binary,0.729216,auc,{'_modeljson': 'rf/Amazon_employee_access.json'},
-Albert,0,binary,0.731546,auc,{'_modeljson': 'rf/bng_breastTumor.json'},
-Albert,0,binary,0.734847,auc,{'_modeljson': 'rf/bng_pbc.json'},
-Albert,0,binary,0.713965,auc,{'_modeljson': 'rf/car.json'},
-Albert,0,binary,0.735372,auc,{'_modeljson': 'rf/connect-4.json'},
-Albert,0,binary,0.728232,auc,{'_modeljson': 'rf/default.json'},
-Albert,0,binary,0.726823,auc,{'_modeljson': 'rf/dilbert.json'},
-Albert,0,binary,0.735994,auc,{'_modeljson': 'rf/Dionis.json'},
-Amazon_employee_access,0,binary,0.728779,auc,{'_modeljson': 'rf/2dplanes.json'},
-Amazon_employee_access,0,binary,0.87801,auc,{'_modeljson': 'rf/adult.json'},
-Amazon_employee_access,0,binary,0.88085,auc,{'_modeljson': 'rf/Airlines.json'},
-Amazon_employee_access,0,binary,0.881869,auc,{'_modeljson': 'rf/Albert.json'},
-Amazon_employee_access,0,binary,0.881463,auc,{'_modeljson': 'rf/Amazon_employee_access.json'},
-Amazon_employee_access,0,binary,0.882723,auc,{'_modeljson': 'rf/bng_breastTumor.json'},
-Amazon_employee_access,0,binary,0.88299,auc,{'_modeljson': 'rf/bng_pbc.json'},
-Amazon_employee_access,0,binary,0.808575,auc,{'_modeljson': 'rf/car.json'},
-Amazon_employee_access,0,binary,0.881209,auc,{'_modeljson': 'rf/connect-4.json'},
-Amazon_employee_access,0,binary,0.877507,auc,{'_modeljson': 'rf/default.json'},
-Amazon_employee_access,0,binary,0.875146,auc,{'_modeljson': 'rf/dilbert.json'},
-Amazon_employee_access,0,binary,0.878121,auc,{'_modeljson': 'rf/Dionis.json'},
-Amazon_employee_access,0,binary,0.886312,auc,{'_modeljson': 'rf/poker.json'},
-bng_breastTumor,0,regression,0.153657,r2,{'_modeljson': 'rf/2dplanes.json'},
-bng_breastTumor,0,regression,0.156403,r2,{'_modeljson': 'rf/adult.json'},
-bng_breastTumor,0,regression,0.174569,r2,{'_modeljson': 'rf/Airlines.json'},
-bng_breastTumor,0,regression,0.0441869,r2,{'_modeljson': 'rf/Albert.json'},
-bng_breastTumor,0,regression,0.157992,r2,{'_modeljson': 'rf/Amazon_employee_access.json'},
-bng_breastTumor,0,regression,0.186635,r2,{'_modeljson': 'rf/bng_breastTumor.json'},
-bng_breastTumor,0,regression,0.0527547,r2,{'_modeljson': 'rf/bng_pbc.json'},
-bng_breastTumor,0,regression,0.158852,r2,{'_modeljson': 'rf/car.json'},
-bng_breastTumor,0,regression,0.150611,r2,{'_modeljson': 'rf/connect-4.json'},
-bng_breastTumor,0,regression,-0.02142,r2,{'_modeljson': 'rf/default.json'},
-bng_breastTumor,0,regression,0.183562,r2,{'_modeljson': 'rf/dilbert.json'},
-bng_breastTumor,0,regression,0.0414589,r2,{'_modeljson': 'rf/Dionis.json'},
-bng_breastTumor,0,regression,0.00390625,r2,{'_modeljson': 'rf/poker.json'},
-bng_pbc,0,regression,0.344043,r2,{'_modeljson': 'rf/2dplanes.json'},
-bng_pbc,0,regression,0.402376,r2,{'_modeljson': 'rf/adult.json'},
-bng_pbc,0,regression,0.423262,r2,{'_modeljson': 'rf/Airlines.json'},
-bng_pbc,0,regression,0.386142,r2,{'_modeljson': 'rf/Albert.json'},
-bng_pbc,0,regression,0.403857,r2,{'_modeljson': 'rf/Amazon_employee_access.json'},
-bng_pbc,0,regression,0.413944,r2,{'_modeljson': 'rf/bng_breastTumor.json'},
-bng_pbc,0,regression,0.43206,r2,{'_modeljson': 'rf/bng_pbc.json'},
-bng_pbc,0,regression,0.348594,r2,{'_modeljson': 'rf/car.json'},
-bng_pbc,0,regression,0.427588,r2,{'_modeljson': 'rf/connect-4.json'},
-bng_pbc,0,regression,0.415337,r2,{'_modeljson': 'rf/default.json'},
-bng_pbc,0,regression,0.393936,r2,{'_modeljson': 'rf/dilbert.json'},
-bng_pbc,0,regression,0.415246,r2,{'_modeljson': 'rf/Dionis.json'},
-car,0,multiclass,-0.0575382,neg_logloss,{'_modeljson': 'rf/2dplanes.json'},
-car,0,multiclass,-0.155878,neg_logloss,{'_modeljson': 'rf/adult.json'},
-car,0,multiclass,-0.0691041,neg_logloss,{'_modeljson': 'rf/Airlines.json'},
-car,0,multiclass,-0.156607,neg_logloss,{'_modeljson': 'rf/Albert.json'},
-car,0,multiclass,-0.156968,neg_logloss,{'_modeljson': 'rf/Amazon_employee_access.json'},
-car,0,multiclass,-0.0692317,neg_logloss,{'_modeljson': 'rf/bng_breastTumor.json'},
-car,0,multiclass,-0.159856,neg_logloss,{'_modeljson': 'rf/bng_pbc.json'},
-car,0,multiclass,-0.046769,neg_logloss,{'_modeljson': 'rf/car.json'},
-car,0,multiclass,-0.0981933,neg_logloss,{'_modeljson': 'rf/connect-4.json'},
-car,0,multiclass,-0.0971712,neg_logloss,{'_modeljson': 'rf/default.json'},
-car,0,multiclass,-0.0564843,neg_logloss,{'_modeljson': 'rf/dilbert.json'},
-car,0,multiclass,-0.157771,neg_logloss,{'_modeljson': 'rf/Dionis.json'},
-car,0,multiclass,-0.0511764,neg_logloss,{'_modeljson': 'rf/poker.json'},
-connect-4,0,multiclass,-0.725888,neg_logloss,{'_modeljson': 'rf/2dplanes.json'},
-connect-4,0,multiclass,-0.576056,neg_logloss,{'_modeljson': 'rf/adult.json'},
-connect-4,0,multiclass,-0.48458,neg_logloss,{'_modeljson': 'rf/Airlines.json'},
-connect-4,0,multiclass,-0.505598,neg_logloss,{'_modeljson': 'rf/Albert.json'},
-connect-4,0,multiclass,-0.568184,neg_logloss,{'_modeljson': 'rf/Amazon_employee_access.json'},
-connect-4,0,multiclass,-0.537511,neg_logloss,{'_modeljson': 'rf/bng_breastTumor.json'},
-connect-4,0,multiclass,-0.479022,neg_logloss,{'_modeljson': 'rf/bng_pbc.json'},
-connect-4,0,multiclass,-0.713123,neg_logloss,{'_modeljson': 'rf/car.json'},
-connect-4,0,multiclass,-0.475306,neg_logloss,{'_modeljson': 'rf/connect-4.json'},
-connect-4,0,multiclass,-0.518061,neg_logloss,{'_modeljson': 'rf/default.json'},
-connect-4,0,multiclass,-0.599112,neg_logloss,{'_modeljson': 'rf/dilbert.json'},
-connect-4,0,multiclass,-0.503642,neg_logloss,{'_modeljson': 'rf/Dionis.json'},
-connect-4,0,multiclass,-0.57852,neg_logloss,{'_modeljson': 'rf/poker.json'},
-dilbert,0,multiclass,-0.557959,neg_logloss,{'_modeljson': 'rf/2dplanes.json'},
-dilbert,0,multiclass,-0.294462,neg_logloss,{'_modeljson': 'rf/adult.json'},
-dilbert,0,multiclass,-0.293928,neg_logloss,{'_modeljson': 'rf/Airlines.json'},
-dilbert,0,multiclass,-0.299661,neg_logloss,{'_modeljson': 'rf/Albert.json'},
-dilbert,0,multiclass,-0.294668,neg_logloss,{'_modeljson': 'rf/Amazon_employee_access.json'},
-dilbert,0,multiclass,-0.314706,neg_logloss,{'_modeljson': 'rf/bng_breastTumor.json'},
-dilbert,0,multiclass,-0.313807,neg_logloss,{'_modeljson': 'rf/bng_pbc.json'},
-dilbert,0,multiclass,-0.51482,neg_logloss,{'_modeljson': 'rf/car.json'},
-dilbert,0,multiclass,-0.293982,neg_logloss,{'_modeljson': 'rf/connect-4.json'},
-dilbert,0,multiclass,-0.343209,neg_logloss,{'_modeljson': 'rf/default.json'},
-dilbert,0,multiclass,-0.2945,neg_logloss,{'_modeljson': 'rf/dilbert.json'},
-dilbert,0,multiclass,-0.298305,neg_logloss,{'_modeljson': 'rf/Dionis.json'},
-Dionis,0,multiclass,-3.55264,neg_logloss,{'_modeljson': 'rf/2dplanes.json'},
-Dionis,0,multiclass,-1.07117,neg_logloss,{'_modeljson': 'rf/bng_breastTumor.json'},
-Dionis,0,multiclass,-0.784388,neg_logloss,{'_modeljson': 'rf/default.json'},
-Dionis,0,multiclass,-0.580332,neg_logloss,{'_modeljson': 'rf/Dionis.json'},
-poker,0,regression,0.125176,r2,{'_modeljson': 'rf/2dplanes.json'},
-poker,0,regression,0.148019,r2,{'_modeljson': 'rf/adult.json'},
-poker,0,regression,0.322507,r2,{'_modeljson': 'rf/Airlines.json'},
-poker,0,regression,0.172264,r2,{'_modeljson': 'rf/Albert.json'},
-poker,0,regression,0.113673,r2,{'_modeljson': 'rf/Amazon_employee_access.json'},
-poker,0,regression,0.243427,r2,{'_modeljson': 'rf/bng_breastTumor.json'},
-poker,0,regression,0.379662,r2,{'_modeljson': 'rf/bng_pbc.json'},
-poker,0,regression,0.133342,r2,{'_modeljson': 'rf/car.json'},
-poker,0,regression,0.296597,r2,{'_modeljson': 'rf/connect-4.json'},
-poker,0,regression,0.608532,r2,{'_modeljson': 'rf/default.json'},
-poker,0,regression,0.192625,r2,{'_modeljson': 'rf/dilbert.json'},
-poker,0,regression,0.172139,r2,{'_modeljson': 'rf/Dionis.json'},
-poker,0,regression,0.528869,r2,{'_modeljson': 'rf/poker.json'},
--- a/test/default/test_defaults.py
+++ b/test/default/test_defaults.py
@@ -1,221 +0,0 @@
-import sys
-import pickle
-from sklearn.datasets import load_iris, fetch_california_housing, load_breast_cancer
-from sklearn.model_selection import train_test_split
-import pandas as pd
-from flaml import AutoML
-from flaml.default import (
-    preprocess_and_suggest_hyperparams,
-    suggest_hyperparams,
-    suggest_learner,
-)
-from flaml.default import portfolio, regret
-
-
-def test_greedy_feedback(path="test/default", strategy="greedy-feedback"):
-    # sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
-    # portfolio.main()
-    # sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task multiclass --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
-    # portfolio.main()
-    sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task regression --estimator lgbm --strategy {strategy}".split()
-    portfolio.main()
-
-
-def test_build_portfolio(path="test/default", strategy="greedy"):
-    sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
-    portfolio.main()
-    sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task multiclass --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
-    portfolio.main()
-    sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task regression --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
-    portfolio.main()
-
-
-def test_iris(as_frame=True):
-    automl = AutoML()
-    automl_settings = {
-        "time_budget": 2,
-        "metric": "accuracy",
-        "task": "classification",
-        "log_file_name": "test/iris.log",
-        "n_jobs": 1,
-        "starting_points": "data",
-    }
-    X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
-    automl.fit(X_train, y_train, **automl_settings)
-    automl_settings["starting_points"] = "data:test/default"
-    automl.fit(X_train, y_train, **automl_settings)
-
-
-def test_housing(as_frame=True):
-    automl = AutoML()
-    automl_settings = {
-        "time_budget": 2,
-        "task": "regression",
-        "estimator_list": ["xgboost", "lgbm"],
-        "log_file_name": "test/housing.log",
-        "n_jobs": 1,
-        "starting_points": "data",
-        "max_iter": 0,
-    }
-    X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=as_frame)
-    automl.fit(X_train, y_train, **automl_settings)
-
-
-def test_regret():
-    sys.argv = "regret.py --result_csv test/default/lgbm/results.csv --task_type binary --output test/default/lgbm/binary_regret.csv".split()
-    regret.main()
-
-
-def test_suggest_classification():
-    location = "test/default"
-    X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
-    suggested = suggest_hyperparams("classification", X_train, y_train, "lgbm", location=location)
-    print(suggested)
-    suggested = preprocess_and_suggest_hyperparams("classification", X_train, y_train, "xgboost", location=location)
-    print(suggested)
-    suggested = suggest_hyperparams("classification", X_train, y_train, "xgb_limitdepth", location=location)
-    print(suggested)
-
-    X, y = load_iris(return_X_y=True, as_frame=True)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
-    (
-        hyperparams,
-        estimator_class,
-        X,
-        y,
-        feature_transformer,
-        label_transformer,
-    ) = preprocess_and_suggest_hyperparams("classification", X_train, y_train, "lgbm", location=location)
-    with open("test/default/feature_transformer", "wb") as f:
-        pickle.dump(feature_transformer, f, pickle.HIGHEST_PROTOCOL)
-    model = estimator_class(**hyperparams)  # estimator_class is LGBMClassifier
-    model.fit(X, y)
-    X_test = feature_transformer.transform(X_test)
-    y_pred = label_transformer.inverse_transform(pd.Series(model.predict(X_test).astype(int)))
-    print(y_pred)
-    suggested = suggest_hyperparams("classification", X_train, y_train, "xgboost", location=location)
-    print(suggested)
-    suggested = preprocess_and_suggest_hyperparams(
-        "classification", X_train, y_train, "xgb_limitdepth", location=location
-    )
-    print(suggested)
-    suggested = suggest_hyperparams("classification", X_train, y_train, "xgb_limitdepth", location=location)
-    suggested = suggest_learner(
-        "classification",
-        X_train,
-        y_train,
-        estimator_list=["xgboost", "xgb_limitdepth"],
-        location=location,
-    )
-    print(suggested)
-
-
-def test_suggest_regression():
-    location = "test/default"
-    X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True)
-    suggested = suggest_hyperparams("regression", X_train, y_train, "lgbm", location=location)
-    print(suggested)
-    suggested = preprocess_and_suggest_hyperparams("regression", X_train, y_train, "xgboost", location=location)
-    print(suggested)
-    suggested = suggest_hyperparams("regression", X_train, y_train, "xgb_limitdepth", location=location)
-    print(suggested)
-    suggested = suggest_learner("regression", X_train, y_train, location=location)
-    print(suggested)
-
-
-def test_rf():
-    from flaml.default import RandomForestRegressor, RandomForestClassifier
-
-    X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
-    rf = RandomForestClassifier()
-    rf.fit(X_train[:100], y_train[:100])
-    rf.predict(X_train)
-    rf.predict_proba(X_train)
-    print(rf)
-
-    location = "test/default"
-    X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True)
-    rf = RandomForestRegressor(default_location=location)
-    rf.fit(X_train[:100], y_train[:100])
-    rf.predict(X_train)
-    print(rf)
-
-
-def test_extratrees():
-    from flaml.default import ExtraTreesRegressor, ExtraTreesClassifier
-
-    X_train, y_train = load_iris(return_X_y=True, as_frame=True)
-    classifier = ExtraTreesClassifier()
-    classifier.fit(X_train[:100], y_train[:100])
-    classifier.predict(X_train)
-    classifier.predict_proba(X_train)
-    print(classifier)
-
-    location = "test/default"
-    X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True)
-    regressor = ExtraTreesRegressor(default_location=location)
-    regressor.fit(X_train[:100], y_train[:100])
-    regressor.predict(X_train)
-    print(regressor)
-
-
-def test_lgbm():
-    from flaml.default import LGBMRegressor, LGBMClassifier
-
-    X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
-    classifier = LGBMClassifier(n_jobs=1)
-    classifier.fit(X_train, y_train)
-    classifier.predict(X_train, pred_contrib=True)
-    classifier.predict_proba(X_train)
-    print(classifier.get_params())
-    print(classifier)
-    print(classifier.classes_)
-
-    location = "test/default"
-    X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True)
-    regressor = LGBMRegressor(default_location=location)
-    regressor.fit(X_train, y_train)
-    regressor.predict(X_train)
-    print(regressor)
-
-
-def test_xgboost():
-    from flaml.default import XGBRegressor, XGBClassifier
-
-    X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
-    classifier = XGBClassifier(max_depth=0)
-    classifier.fit(X_train[:100], y_train[:100])
-    classifier.predict(X_train)
-    classifier.predict_proba(X_train)
-    print(classifier)
-    print(classifier.classes_)
-
-    location = "test/default"
-    X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True)
-    regressor = XGBRegressor(default_location=location)
-    regressor.fit(X_train[:100], y_train[:100])
-    regressor.predict(X_train)
-    print(regressor)
-
-
-def test_nobudget():
-    X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
-    automl = AutoML()
-    automl.fit(
-        X_train[:20],
-        y_train[:20],
-        estimator_list=["lgbm", "extra_tree", "rf"],
-        max_iter=12,
-        starting_points="data",
-        log_file_name="test/default/no_budget.txt",
-        log_type="all",
-    )
-    automl.fit(X_train[:20], y_train[:20], estimator_list=["lgbm", "extra_tree", "rf"])
-    # make sure that zero-shot config out of the search space does not degnerate to low cost init config
-    assert automl.best_config_per_estimator["extra_tree"]["n_estimators"] > 4
-    # make sure that the zero-shot config {} is not modified
-    assert "criterion" not in automl.best_config_per_estimator["rf"]
-
-
-if __name__ == "__main__":
-    test_build_portfolio("flaml/default")
--- a/test/default/xgb_limitdepth/2dplanes.json
+++ b/test/default/xgb_limitdepth/2dplanes.json
@@ -1 +0,0 @@
-{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 2704, "max_depth": 2, "min_child_weight": 0.23751738294732322, "learning_rate": 0.019828117294812268, "subsample": 0.8798706041292946, "colsample_bylevel": 0.978891799553329, "colsample_bytree": 1.0, "reg_alpha": 0.3023181744217667, "reg_lambda": 101.10719177747677}}
--- a/test/default/xgb_limitdepth/Airlines.json
+++ b/test/default/xgb_limitdepth/Airlines.json
@@ -1 +0,0 @@
-{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 3573, "max_depth": 13, "min_child_weight": 2.921657581984971, "learning_rate": 0.00699976723859477, "subsample": 0.6110504706508572, "colsample_bylevel": 0.9998661537469163, "colsample_bytree": 0.5457693412489456, "reg_alpha": 0.05315763138176945, "reg_lambda": 23.067599600958623, "FLAML_sample_size": 436899}}
--- a/test/default/xgb_limitdepth/Amazon_employee_access.json
+++ b/test/default/xgb_limitdepth/Amazon_employee_access.json
@@ -1 +0,0 @@
-{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 3526, "max_depth": 13, "min_child_weight": 0.0994486725676356, "learning_rate": 0.0009765625, "subsample": 0.46123759274652554, "colsample_bylevel": 1.0, "colsample_bytree": 0.4498813776397717, "reg_alpha": 0.002599398546499414, "reg_lambda": 0.028336396854402753}}
--- a/test/default/xgb_limitdepth/adult.json
+++ b/test/default/xgb_limitdepth/adult.json
@@ -1 +0,0 @@
-{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 5457, "max_depth": 6, "min_child_weight": 0.19978269031877885, "learning_rate": 0.003906732665632749, "subsample": 0.8207785234496902, "colsample_bylevel": 0.8438751931476698, "colsample_bytree": 0.42202862997585794, "reg_alpha": 0.017372558844968737, "reg_lambda": 0.03977802121721031}}
--- a/test/default/xgb_limitdepth/bng_breastTumor.json
+++ b/test/default/xgb_limitdepth/bng_breastTumor.json
@@ -1 +0,0 @@
-{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 7782, "max_depth": 7, "min_child_weight": 0.3794874452608909, "learning_rate": 0.006733035771172325, "subsample": 1.0, "colsample_bylevel": 1.0, "colsample_bytree": 0.5611305922560855, "reg_alpha": 8.203853065625196, "reg_lambda": 56.48543538808782, "FLAML_sample_size": 94478}}
--- a/test/default/xgb_limitdepth/bng_pbc.json
+++ b/test/default/xgb_limitdepth/bng_pbc.json
@@ -1 +0,0 @@
-{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 1013, "max_depth": 15, "min_child_weight": 57.33124114425335, "learning_rate": 0.009706354607542536, "subsample": 1.0, "colsample_bylevel": 0.7925997002174675, "colsample_bytree": 0.874062117666267, "reg_alpha": 0.7965442116152655, "reg_lambda": 2.769937488341342, "FLAML_sample_size": 810000}}
--- a/test/default/xgb_limitdepth/car.json
+++ b/test/default/xgb_limitdepth/car.json
@@ -1 +0,0 @@
-{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 624, "max_depth": 3, "min_child_weight": 0.0017043575728019624, "learning_rate": 0.8481863978692453, "subsample": 0.9897901748446495, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.0009765625, "reg_lambda": 0.008686469265798288}}
--- a/test/default/xgb_limitdepth/connect-4.json
+++ b/test/default/xgb_limitdepth/connect-4.json
@@ -1 +0,0 @@
-{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 1499, "max_depth": 11, "min_child_weight": 0.07563529776156448, "learning_rate": 0.039042609221240955, "subsample": 0.7832981935783824, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.0009765625, "reg_lambda": 23.513066752844153}}
--- a/test/default/xgb_limitdepth/default.json
+++ b/test/default/xgb_limitdepth/default.json
@@ -1 +0,0 @@
-{"class": "xgb_limitdepth", "hyperparameters": {}}
--- a/test/default/xgb_limitdepth/dilbert.json
+++ b/test/default/xgb_limitdepth/dilbert.json
@@ -1 +0,0 @@
-{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 405, "max_depth": 4, "min_child_weight": 0.2264977130755997, "learning_rate": 0.3390883186947167, "subsample": 0.8078627200173096, "colsample_bylevel": 0.8570282862730856, "colsample_bytree": 0.8280063772581445, "reg_alpha": 0.007634576038353066, "reg_lambda": 1.7101180066063097}}
--- a/test/default/xgb_limitdepth/poker.json
+++ b/test/default/xgb_limitdepth/poker.json
@@ -1 +0,0 @@
-{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 3234, "max_depth": 13, "min_child_weight": 0.07784911437942721, "learning_rate": 0.0565426521738442, "subsample": 1.0, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.007928962402687697, "reg_lambda": 3.881249823648859, "FLAML_sample_size": 830258}}
--- a/test/default/xgb_limitdepth/results.csv
+++ b/test/default/xgb_limitdepth/results.csv
@@ -1,116 +0,0 @@
-task,fold,type,result,params
-2dplanes,0,regression,0.946567,{'_modeljson': 'xgblimit/2dplanes.json'}
-2dplanes,0,regression,0.94503,{'_modeljson': 'xgblimit/adult.json'}
-2dplanes,0,regression,0.945074,{'_modeljson': 'xgblimit/Airlines.json'}
-2dplanes,0,regression,0.806694,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
-2dplanes,0,regression,0.945799,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
-2dplanes,0,regression,0.944103,{'_modeljson': 'xgblimit/bng_pbc.json'}
-2dplanes,0,regression,0.945327,{'_modeljson': 'xgblimit/car.json'}
-2dplanes,0,regression,0.923926,{'_modeljson': 'xgblimit/connect-4.json'}
-2dplanes,0,regression,0.944454,{'_modeljson': 'xgblimit/default.json'}
-2dplanes,0,regression,0.945212,{'_modeljson': 'xgblimit/dilbert.json'}
-2dplanes,0,regression,0.910852,{'_modeljson': 'xgblimit/poker.json'}
-adult,0,binary,0.923082,{'_modeljson': 'xgblimit/2dplanes.json'}
-adult,0,binary,0.932355,{'_modeljson': 'xgblimit/adult.json'}
-adult,0,binary,0.928373,{'_modeljson': 'xgblimit/Airlines.json'}
-adult,0,binary,0.927574,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
-adult,0,binary,0.929427,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
-adult,0,binary,0.92204,{'_modeljson': 'xgblimit/bng_pbc.json'}
-adult,0,binary,0.721115,{'_modeljson': 'xgblimit/car.json'}
-adult,0,binary,0.921465,{'_modeljson': 'xgblimit/connect-4.json'}
-adult,0,binary,0.931234,{'_modeljson': 'xgblimit/default.json'}
-adult,0,binary,0.927801,{'_modeljson': 'xgblimit/dilbert.json'}
-adult,0,binary,0.916878,{'_modeljson': 'xgblimit/poker.json'}
-Airlines,0,binary,0.699604,{'_modeljson': 'xgblimit/2dplanes.json'}
-Airlines,0,binary,0.711053,{'_modeljson': 'xgblimit/adult.json'}
-Airlines,0,binary,0.732443,{'_modeljson': 'xgblimit/Airlines.json'}
-Airlines,0,binary,0.72875,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
-Airlines,0,binary,0.725056,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
-Airlines,0,binary,0.730476,{'_modeljson': 'xgblimit/bng_pbc.json'}
-Airlines,0,binary,0.71788,{'_modeljson': 'xgblimit/car.json'}
-Airlines,0,binary,0.72604,{'_modeljson': 'xgblimit/connect-4.json'}
-Airlines,0,binary,0.719845,{'_modeljson': 'xgblimit/default.json'}
-Airlines,0,binary,0.719302,{'_modeljson': 'xgblimit/dilbert.json'}
-Airlines,0,binary,0.684382,{'_modeljson': 'xgblimit/poker.json'}
-Albert,0,binary,0.743682,{'_modeljson': 'xgblimit/2dplanes.json'}
-Albert,0,binary,0.759246,{'_modeljson': 'xgblimit/adult.json'}
-Albert,0,binary,0.766177,{'_modeljson': 'xgblimit/Airlines.json'}
-Albert,0,binary,0.74969,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
-Albert,0,binary,0.766961,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
-Albert,0,binary,0.764534,{'_modeljson': 'xgblimit/bng_pbc.json'}
-Albert,0,binary,0.753311,{'_modeljson': 'xgblimit/car.json'}
-Albert,0,binary,0.765229,{'_modeljson': 'xgblimit/connect-4.json'}
-Albert,0,binary,0.757802,{'_modeljson': 'xgblimit/default.json'}
-Albert,0,binary,0.7596,{'_modeljson': 'xgblimit/dilbert.json'}
-Albert,0,binary,0.761456,{'_modeljson': 'xgblimit/poker.json'}
-Amazon_employee_access,0,binary,0.759779,{'_modeljson': 'xgblimit/2dplanes.json'}
-Amazon_employee_access,0,binary,0.876747,{'_modeljson': 'xgblimit/adult.json'}
-Amazon_employee_access,0,binary,0.864954,{'_modeljson': 'xgblimit/Airlines.json'}
-Amazon_employee_access,0,binary,0.894651,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
-Amazon_employee_access,0,binary,0.845645,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
-Amazon_employee_access,0,binary,0.789099,{'_modeljson': 'xgblimit/bng_pbc.json'}
-Amazon_employee_access,0,binary,0.550859,{'_modeljson': 'xgblimit/car.json'}
-Amazon_employee_access,0,binary,0.870599,{'_modeljson': 'xgblimit/connect-4.json'}
-Amazon_employee_access,0,binary,0.851702,{'_modeljson': 'xgblimit/default.json'}
-Amazon_employee_access,0,binary,0.86385,{'_modeljson': 'xgblimit/dilbert.json'}
-Amazon_employee_access,0,binary,0.864415,{'_modeljson': 'xgblimit/poker.json'}
-bng_breastTumor,0,regression,0.163382,{'_modeljson': 'xgblimit/2dplanes.json'}
-bng_breastTumor,0,regression,0.1789,{'_modeljson': 'xgblimit/adult.json'}
-bng_breastTumor,0,regression,0.188483,{'_modeljson': 'xgblimit/Airlines.json'}
-bng_breastTumor,0,regression,0.159704,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
-bng_breastTumor,0,regression,0.1953,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
-bng_breastTumor,0,regression,0.191805,{'_modeljson': 'xgblimit/bng_pbc.json'}
-bng_breastTumor,0,regression,0.12139,{'_modeljson': 'xgblimit/car.json'}
-bng_breastTumor,0,regression,0.163165,{'_modeljson': 'xgblimit/connect-4.json'}
-bng_breastTumor,0,regression,0.186541,{'_modeljson': 'xgblimit/default.json'}
-bng_breastTumor,0,regression,0.183899,{'_modeljson': 'xgblimit/dilbert.json'}
-bng_breastTumor,0,regression,0.108646,{'_modeljson': 'xgblimit/poker.json'}
-bng_pbc,0,regression,0.384556,{'_modeljson': 'xgblimit/2dplanes.json'}
-bng_pbc,0,regression,0.42041,{'_modeljson': 'xgblimit/adult.json'}
-bng_pbc,0,regression,0.449808,{'_modeljson': 'xgblimit/Airlines.json'}
-bng_pbc,0,regression,0.409944,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
-bng_pbc,0,regression,0.439854,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
-bng_pbc,0,regression,0.457955,{'_modeljson': 'xgblimit/bng_pbc.json'}
-bng_pbc,0,regression,0.418702,{'_modeljson': 'xgblimit/car.json'}
-bng_pbc,0,regression,0.455731,{'_modeljson': 'xgblimit/connect-4.json'}
-bng_pbc,0,regression,0.436902,{'_modeljson': 'xgblimit/default.json'}
-bng_pbc,0,regression,0.423052,{'_modeljson': 'xgblimit/dilbert.json'}
-bng_pbc,0,regression,0.447478,{'_modeljson': 'xgblimit/poker.json'}
-car,0,multiclass,-0.18106,{'_modeljson': 'xgblimit/2dplanes.json'}
-car,0,multiclass,-0.170386,{'_modeljson': 'xgblimit/adult.json'}
-car,0,multiclass,-0.169973,{'_modeljson': 'xgblimit/Airlines.json'}
-car,0,multiclass,-0.498314,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
-car,0,multiclass,-0.230405,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
-car,0,multiclass,-0.330863,{'_modeljson': 'xgblimit/bng_pbc.json'}
-car,0,multiclass,-8.16E-05,{'_modeljson': 'xgblimit/car.json'}
-car,0,multiclass,-0.0239037,{'_modeljson': 'xgblimit/connect-4.json'}
-car,0,multiclass,-0.010029,{'_modeljson': 'xgblimit/default.json'}
-car,0,multiclass,-0.00720156,{'_modeljson': 'xgblimit/dilbert.json'}
-car,0,multiclass,-0.00360416,{'_modeljson': 'xgblimit/poker.json'}
-connect-4,0,multiclass,-0.597091,{'_modeljson': 'xgblimit/2dplanes.json'}
-connect-4,0,multiclass,-0.484427,{'_modeljson': 'xgblimit/adult.json'}
-connect-4,0,multiclass,-0.387769,{'_modeljson': 'xgblimit/Airlines.json'}
-connect-4,0,multiclass,-0.553347,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
-connect-4,0,multiclass,-0.425107,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
-connect-4,0,multiclass,-0.441974,{'_modeljson': 'xgblimit/bng_pbc.json'}
-connect-4,0,multiclass,-0.410519,{'_modeljson': 'xgblimit/car.json'}
-connect-4,0,multiclass,-0.342773,{'_modeljson': 'xgblimit/connect-4.json'}
-connect-4,0,multiclass,-0.430665,{'_modeljson': 'xgblimit/default.json'}
-connect-4,0,multiclass,-0.416631,{'_modeljson': 'xgblimit/dilbert.json'}
-connect-4,0,multiclass,-0.466644,{'_modeljson': 'xgblimit/poker.json'}
-dilbert,0,multiclass,-0.189149,{'_modeljson': 'xgblimit/2dplanes.json'}
-dilbert,0,multiclass,-0.184569,{'_modeljson': 'xgblimit/bng_pbc.json'}
-dilbert,0,multiclass,-0.0485906,{'_modeljson': 'xgblimit/car.json'}
-dilbert,0,multiclass,-0.0643938,{'_modeljson': 'xgblimit/default.json'}
-dilbert,0,multiclass,-0.0425865,{'_modeljson': 'xgblimit/dilbert.json'}
-poker,0,regression,0.194424,{'_modeljson': 'xgblimit/2dplanes.json'}
-poker,0,regression,0.443714,{'_modeljson': 'xgblimit/adult.json'}
-poker,0,regression,0.837273,{'_modeljson': 'xgblimit/Airlines.json'}
-poker,0,regression,0.354783,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
-poker,0,regression,0.749681,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
-poker,0,regression,0.782336,{'_modeljson': 'xgblimit/bng_pbc.json'}
-poker,0,regression,0.640848,{'_modeljson': 'xgblimit/car.json'}
-poker,0,regression,0.924649,{'_modeljson': 'xgblimit/connect-4.json'}
-poker,0,regression,0.635679,{'_modeljson': 'xgblimit/default.json'}
-poker,0,regression,0.672338,{'_modeljson': 'xgblimit/dilbert.json'}
-poker,0,regression,0.92563,{'_modeljson': 'xgblimit/poker.json'}
--- a/test/default/xgboost/2dplanes.json
+++ b/test/default/xgboost/2dplanes.json
@@ -1 +0,0 @@
-{"class": "xgboost", "hyperparameters": {"n_estimators": 6705, "max_leaves": 24, "min_child_weight": 58.562722088466444, "learning_rate": 0.0009765625, "subsample": 0.8993009465247683, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.2679275019160531, "reg_lambda": 91.95034898844547}}
--- a/test/default/xgboost/Airlines.json
+++ b/test/default/xgboost/Airlines.json
@@ -1 +0,0 @@
-{"class": "xgboost", "hyperparameters": {"n_estimators": 17309, "max_leaves": 1146, "min_child_weight": 0.0193980002033358, "learning_rate": 0.0009765625, "subsample": 0.4169778612218198, "colsample_bylevel": 1.0, "colsample_bytree": 0.5504959296065052, "reg_alpha": 0.00505548829948545, "reg_lambda": 21.287234956122028, "FLAML_sample_size": 436899}}
--- a/test/default/xgboost/Albert.json
+++ b/test/default/xgboost/Albert.json
@@ -1 +0,0 @@
-{"class": "xgboost", "hyperparameters": {"n_estimators": 6357, "max_leaves": 206, "min_child_weight": 1.9495322566288034, "learning_rate": 0.0068766724195393905, "subsample": 0.9451618245005704, "colsample_bylevel": 0.9030482524943064, "colsample_bytree": 0.9278972006416252, "reg_alpha": 0.01857648400903689, "reg_lambda": 6.021166480604588, "FLAML_sample_size": 344444}}
--- a/test/default/xgboost/Amazon_employee_access.json
+++ b/test/default/xgboost/Amazon_employee_access.json
@@ -1 +0,0 @@
-{"class": "xgboost", "hyperparameters": {"n_estimators": 591, "max_leaves": 16651, "min_child_weight": 0.03356567864689129, "learning_rate": 0.002595066436678338, "subsample": 0.9114132805513452, "colsample_bylevel": 0.9503441844594458, "colsample_bytree": 0.5703338448066768, "reg_alpha": 0.010405212349127894, "reg_lambda": 0.05352660657433639}}
--- a/test/default/xgboost/adult.json
+++ b/test/default/xgboost/adult.json
@@ -1 +0,0 @@
-{"class": "xgboost", "hyperparameters": {"n_estimators": 23282, "max_leaves": 19, "min_child_weight": 0.02198438885474473, "learning_rate": 0.001700636796132106, "subsample": 1.0, "colsample_bylevel": 0.8954745234489918, "colsample_bytree": 0.22331977285961732, "reg_alpha": 0.4115502489939291, "reg_lambda": 0.015523027968801352}}
--- a/test/default/xgboost/bng_breastTumor.json
+++ b/test/default/xgboost/bng_breastTumor.json
@@ -1 +0,0 @@
-{"class": "xgboost", "hyperparameters": {"n_estimators": 4038, "max_leaves": 89, "min_child_weight": 0.23500921146599626, "learning_rate": 0.0039779941096963365, "subsample": 0.9421092355451888, "colsample_bylevel": 0.7772326835688742, "colsample_bytree": 0.6864341727912397, "reg_alpha": 4.8782018848557, "reg_lambda": 0.7531969031616396, "FLAML_sample_size": 94478}}
--- a/test/default/xgboost/bng_pbc.json
+++ b/test/default/xgboost/bng_pbc.json
@@ -1 +0,0 @@
-{"class": "xgboost", "hyperparameters": {"n_estimators": 32767, "max_leaves": 623, "min_child_weight": 0.03783048691639616, "learning_rate": 0.0021758863899615554, "subsample": 0.9086242379539484, "colsample_bylevel": 0.5880499360809446, "colsample_bytree": 1.0, "reg_alpha": 0.0037398450188259108, "reg_lambda": 16.894310259361305, "FLAML_sample_size": 810000}}
--- a/test/default/xgboost/car.json
+++ b/test/default/xgboost/car.json
@@ -1 +0,0 @@
-{"class": "xgboost", "hyperparameters": {"n_estimators": 765, "max_leaves": 6, "min_child_weight": 0.001, "learning_rate": 1.0, "subsample": 0.9833803894285497, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.0012553728257619922, "reg_lambda": 0.03280542610559108}}
--- a/test/default/xgboost/connect-4.json
+++ b/test/default/xgboost/connect-4.json
@@ -1 +0,0 @@
-{"class": "xgboost", "hyperparameters": {"n_estimators": 6458, "max_leaves": 196, "min_child_weight": 0.020541449256787844, "learning_rate": 0.0067240405208345, "subsample": 0.5764514509827234, "colsample_bylevel": 1.0, "colsample_bytree": 0.9478632468968712, "reg_alpha": 0.08196899811780128, "reg_lambda": 1.3914579996946315}}
--- a/test/default/xgboost/default.json
+++ b/test/default/xgboost/default.json
@@ -1 +0,0 @@
-{"class": "xgboost", "hyperparameters": {}}
--- a/test/default/xgboost/dilbert.json
+++ b/test/default/xgboost/dilbert.json
@@ -1 +0,0 @@
-{"class": "xgboost", "hyperparameters": {"n_estimators": 5739, "max_leaves": 5, "min_child_weight": 0.1359602026207002, "learning_rate": 0.14496176867613397, "subsample": 0.864897070662231, "colsample_bylevel": 0.01, "colsample_bytree": 0.9394057513384305, "reg_alpha": 0.001103317921178771, "reg_lambda": 0.1655504349283218}}
--- a/test/default/xgboost/poker.json
+++ b/test/default/xgboost/poker.json
@@ -1 +0,0 @@
-{"class": "xgboost", "hyperparameters": {"n_estimators": 6866, "max_leaves": 238, "min_child_weight": 0.1000665069590469, "learning_rate": 0.05522440252112267, "subsample": 0.9621433799637473, "colsample_bylevel": 0.8366787895853636, "colsample_bytree": 1.0, "reg_alpha": 0.002455941636379231, "reg_lambda": 0.02487031358204277, "FLAML_sample_size": 830258}}
--- a/test/default/xgboost/results.csv
+++ b/test/default/xgboost/results.csv
@@ -1,222 +0,0 @@
-task,fold,type,result,params
-2dplanes,0,regression,0.946474,{'_modeljson': 'xgb/2dplanes.json'}
-2dplanes,0,regression,0.849793,{'_modeljson': 'xgb/adult.json'}
-2dplanes,0,regression,0.940611,{'_modeljson': 'xgb/Albert.json'}
-2dplanes,0,regression,0.68908,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-2dplanes,0,regression,0.945551,{'_modeljson': 'xgb/bng_breastTumor.json'}
-2dplanes,0,regression,0.929904,{'_modeljson': 'xgb/bng_pbc.json'}
-2dplanes,0,regression,0.944099,{'_modeljson': 'xgb/car.json'}
-2dplanes,0,regression,0.938336,{'_modeljson': 'xgb/connect-4.json'}
-2dplanes,0,regression,0.944454,{'_modeljson': 'xgb/default.json'}
-2dplanes,0,regression,0.945477,{'_modeljson': 'xgb/dilbert.json'}
-2dplanes,0,regression,0.91563,{'_modeljson': 'xgb/poker.json'}
-dilbert,0,multiclass,-0.362419,{'_modeljson': 'xgb/2dplanes.json'}
-dilbert,0,multiclass,-0.515024,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-dilbert,0,multiclass,-0.158604,{'_modeljson': 'xgb/car.json'}
-dilbert,0,multiclass,-0.0643938,{'_modeljson': 'xgb/default.json'}
-dilbert,0,multiclass,-0.0383872,{'_modeljson': 'xgb/dilbert.json'}
-dilbert,0,multiclass,-0.0611286,{'_modeljson': 'xgb/poker.json'}
-poker,0,regression,0.20821,{'_modeljson': 'xgb/2dplanes.json'}
-poker,0,regression,0.206438,{'_modeljson': 'xgb/adult.json'}
-poker,0,regression,0.815665,{'_modeljson': 'xgb/Airlines.json'}
-poker,0,regression,0.857257,{'_modeljson': 'xgb/Albert.json'}
-poker,0,regression,0.362568,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-poker,0,regression,0.559622,{'_modeljson': 'xgb/bng_breastTumor.json'}
-poker,0,regression,0.922282,{'_modeljson': 'xgb/bng_pbc.json'}
-poker,0,regression,0.846139,{'_modeljson': 'xgb/car.json'}
-poker,0,regression,0.891631,{'_modeljson': 'xgb/connect-4.json'}
-poker,0,regression,0.635679,{'_modeljson': 'xgb/default.json'}
-poker,0,regression,0.377996,{'_modeljson': 'xgb/dilbert.json'}
-poker,0,regression,0.935986,{'_modeljson': 'xgb/poker.json'}
-adult,0,binary,0.918094,{'_modeljson': 'xgb/2dplanes.json'}
-adult,0,binary,0.932468,{'_modeljson': 'xgb/adult.json'}
-adult,0,binary,0.92673,{'_modeljson': 'xgb/Airlines.json'}
-adult,0,binary,0.922077,{'_modeljson': 'xgb/Albert.json'}
-adult,0,binary,0.920837,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-adult,0,binary,0.92964,{'_modeljson': 'xgb/bng_breastTumor.json'}
-adult,0,binary,0.916531,{'_modeljson': 'xgb/bng_pbc.json'}
-adult,0,binary,0.884114,{'_modeljson': 'xgb/car.json'}
-adult,0,binary,0.917887,{'_modeljson': 'xgb/connect-4.json'}
-adult,0,binary,0.931234,{'_modeljson': 'xgb/default.json'}
-adult,0,binary,0.928861,{'_modeljson': 'xgb/dilbert.json'}
-adult,0,binary,0.909018,{'_modeljson': 'xgb/poker.json'}
-Airlines,0,binary,0.703353,{'_modeljson': 'xgb/2dplanes.json'}
-Airlines,0,binary,0.696962,{'_modeljson': 'xgb/adult.json'}
-Airlines,0,binary,0.73153,{'_modeljson': 'xgb/Airlines.json'}
-Airlines,0,binary,0.731577,{'_modeljson': 'xgb/Albert.json'}
-Airlines,0,binary,0.725394,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-Airlines,0,binary,0.722896,{'_modeljson': 'xgb/bng_breastTumor.json'}
-Airlines,0,binary,0.716839,{'_modeljson': 'xgb/bng_pbc.json'}
-Airlines,0,binary,0.715654,{'_modeljson': 'xgb/car.json'}
-Airlines,0,binary,0.73107,{'_modeljson': 'xgb/connect-4.json'}
-Airlines,0,binary,0.719845,{'_modeljson': 'xgb/default.json'}
-Airlines,0,binary,0.71873,{'_modeljson': 'xgb/dilbert.json'}
-Airlines,0,binary,0.676427,{'_modeljson': 'xgb/poker.json'}
-Albert,0,binary,0.742648,{'_modeljson': 'xgb/2dplanes.json'}
-Albert,0,binary,0.758723,{'_modeljson': 'xgb/adult.json'}
-Albert,0,binary,0.763066,{'_modeljson': 'xgb/Airlines.json'}
-Albert,0,binary,0.768073,{'_modeljson': 'xgb/Albert.json'}
-Albert,0,binary,0.74349,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-Albert,0,binary,0.764,{'_modeljson': 'xgb/bng_breastTumor.json'}
-Albert,0,binary,0.767514,{'_modeljson': 'xgb/bng_pbc.json'}
-Albert,0,binary,0.743392,{'_modeljson': 'xgb/car.json'}
-Albert,0,binary,0.766006,{'_modeljson': 'xgb/connect-4.json'}
-Albert,0,binary,0.757802,{'_modeljson': 'xgb/default.json'}
-Albert,0,binary,0.746511,{'_modeljson': 'xgb/dilbert.json'}
-Albert,0,binary,0.761985,{'_modeljson': 'xgb/poker.json'}
-Amazon_employee_access,0,binary,0.727287,{'_modeljson': 'xgb/2dplanes.json'}
-Amazon_employee_access,0,binary,0.855441,{'_modeljson': 'xgb/adult.json'}
-Amazon_employee_access,0,binary,0.85984,{'_modeljson': 'xgb/Airlines.json'}
-Amazon_employee_access,0,binary,0.873629,{'_modeljson': 'xgb/Albert.json'}
-Amazon_employee_access,0,binary,0.897708,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-Amazon_employee_access,0,binary,0.862679,{'_modeljson': 'xgb/bng_breastTumor.json'}
-Amazon_employee_access,0,binary,0.872059,{'_modeljson': 'xgb/bng_pbc.json'}
-Amazon_employee_access,0,binary,0.657192,{'_modeljson': 'xgb/car.json'}
-Amazon_employee_access,0,binary,0.877547,{'_modeljson': 'xgb/connect-4.json'}
-Amazon_employee_access,0,binary,0.851702,{'_modeljson': 'xgb/default.json'}
-Amazon_employee_access,0,binary,0.853361,{'_modeljson': 'xgb/dilbert.json'}
-Amazon_employee_access,0,binary,0.859734,{'_modeljson': 'xgb/poker.json'}
-bng_breastTumor,0,regression,0.184421,{'_modeljson': 'xgb/2dplanes.json'}
-bng_breastTumor,0,regression,0.163226,{'_modeljson': 'xgb/adult.json'}
-bng_breastTumor,0,regression,0.18037,{'_modeljson': 'xgb/Airlines.json'}
-bng_breastTumor,0,regression,0.177238,{'_modeljson': 'xgb/Albert.json'}
-bng_breastTumor,0,regression,-0.118976,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-bng_breastTumor,0,regression,0.195539,{'_modeljson': 'xgb/bng_breastTumor.json'}
-bng_breastTumor,0,regression,0.106337,{'_modeljson': 'xgb/bng_pbc.json'}
-bng_breastTumor,0,regression,0.149326,{'_modeljson': 'xgb/car.json'}
-bng_breastTumor,0,regression,0.161193,{'_modeljson': 'xgb/connect-4.json'}
-bng_breastTumor,0,regression,0.186541,{'_modeljson': 'xgb/default.json'}
-bng_breastTumor,0,regression,0.186499,{'_modeljson': 'xgb/dilbert.json'}
-bng_breastTumor,0,regression,-0.032219,{'_modeljson': 'xgb/poker.json'}
-bng_pbc,0,regression,0.411719,{'_modeljson': 'xgb/2dplanes.json'}
-bng_pbc,0,regression,0.409769,{'_modeljson': 'xgb/adult.json'}
-bng_pbc,0,regression,0.450806,{'_modeljson': 'xgb/Airlines.json'}
-bng_pbc,0,regression,0.458384,{'_modeljson': 'xgb/Albert.json'}
-bng_pbc,0,regression,0.236669,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-bng_pbc,0,regression,0.441873,{'_modeljson': 'xgb/bng_breastTumor.json'}
-bng_pbc,0,regression,0.462226,{'_modeljson': 'xgb/bng_pbc.json'}
-bng_pbc,0,regression,0.431868,{'_modeljson': 'xgb/car.json'}
-bng_pbc,0,regression,0.45678,{'_modeljson': 'xgb/connect-4.json'}
-bng_pbc,0,regression,0.436902,{'_modeljson': 'xgb/default.json'}
-bng_pbc,0,regression,0.418839,{'_modeljson': 'xgb/dilbert.json'}
-bng_pbc,0,regression,0.448148,{'_modeljson': 'xgb/poker.json'}
-car,0,multiclass,-0.38726,{'_modeljson': 'xgb/2dplanes.json'}
-car,0,multiclass,-0.22547,{'_modeljson': 'xgb/adult.json'}
-car,0,multiclass,-0.208402,{'_modeljson': 'xgb/Airlines.json'}
-car,0,multiclass,-0.0256159,{'_modeljson': 'xgb/Albert.json'}
-car,0,multiclass,-0.627705,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-car,0,multiclass,-0.166328,{'_modeljson': 'xgb/bng_breastTumor.json'}
-car,0,multiclass,-0.0201057,{'_modeljson': 'xgb/bng_pbc.json'}
-car,0,multiclass,-8.45E-05,{'_modeljson': 'xgb/car.json'}
-car,0,multiclass,-0.0129025,{'_modeljson': 'xgb/connect-4.json'}
-car,0,multiclass,-0.010029,{'_modeljson': 'xgb/default.json'}
-car,0,multiclass,-0.00218674,{'_modeljson': 'xgb/dilbert.json'}
-car,0,multiclass,-0.00426392,{'_modeljson': 'xgb/poker.json'}
-connect-4,0,multiclass,-0.578339,{'_modeljson': 'xgb/2dplanes.json'}
-connect-4,0,multiclass,-0.489378,{'_modeljson': 'xgb/adult.json'}
-connect-4,0,multiclass,-0.406886,{'_modeljson': 'xgb/Airlines.json'}
-connect-4,0,multiclass,-0.332411,{'_modeljson': 'xgb/Albert.json'}
-connect-4,0,multiclass,-0.636516,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-connect-4,0,multiclass,-0.425947,{'_modeljson': 'xgb/bng_breastTumor.json'}
-connect-4,0,multiclass,-0.354612,{'_modeljson': 'xgb/bng_pbc.json'}
-connect-4,0,multiclass,-0.452201,{'_modeljson': 'xgb/car.json'}
-connect-4,0,multiclass,-0.338363,{'_modeljson': 'xgb/connect-4.json'}
-connect-4,0,multiclass,-0.430665,{'_modeljson': 'xgb/default.json'}
-connect-4,0,multiclass,-0.497404,{'_modeljson': 'xgb/dilbert.json'}
-connect-4,0,multiclass,-0.592309,{'_modeljson': 'xgb/poker.json'}
-adult,0,binary,0.918094,{'_modeljson': 'xgb/2dplanes.json'}
-adult,0,binary,0.932468,{'_modeljson': 'xgb/adult.json'}
-adult,0,binary,0.92673,{'_modeljson': 'xgb/Airlines.json'}
-adult,0,binary,0.922077,{'_modeljson': 'xgb/Albert.json'}
-adult,0,binary,0.920837,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-adult,0,binary,0.92964,{'_modeljson': 'xgb/bng_breastTumor.json'}
-adult,0,binary,0.916531,{'_modeljson': 'xgb/bng_pbc.json'}
-adult,0,binary,0.884114,{'_modeljson': 'xgb/car.json'}
-adult,0,binary,0.917887,{'_modeljson': 'xgb/connect-4.json'}
-adult,0,binary,0.931234,{'_modeljson': 'xgb/default.json'}
-adult,0,binary,0.928861,{'_modeljson': 'xgb/dilbert.json'}
-adult,0,binary,0.909018,{'_modeljson': 'xgb/poker.json'}
-Airlines,0,binary,0.703353,{'_modeljson': 'xgb/2dplanes.json'}
-Airlines,0,binary,0.696962,{'_modeljson': 'xgb/adult.json'}
-Airlines,0,binary,0.73153,{'_modeljson': 'xgb/Airlines.json'}
-Airlines,0,binary,0.731577,{'_modeljson': 'xgb/Albert.json'}
-Airlines,0,binary,0.725394,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-Airlines,0,binary,0.722896,{'_modeljson': 'xgb/bng_breastTumor.json'}
-Airlines,0,binary,0.716839,{'_modeljson': 'xgb/bng_pbc.json'}
-Airlines,0,binary,0.715654,{'_modeljson': 'xgb/car.json'}
-Airlines,0,binary,0.73107,{'_modeljson': 'xgb/connect-4.json'}
-Airlines,0,binary,0.719845,{'_modeljson': 'xgb/default.json'}
-Airlines,0,binary,0.71873,{'_modeljson': 'xgb/dilbert.json'}
-Airlines,0,binary,0.676427,{'_modeljson': 'xgb/poker.json'}
-Albert,0,binary,0.742648,{'_modeljson': 'xgb/2dplanes.json'}
-Albert,0,binary,0.758723,{'_modeljson': 'xgb/adult.json'}
-Albert,0,binary,0.763066,{'_modeljson': 'xgb/Airlines.json'}
-Albert,0,binary,0.768073,{'_modeljson': 'xgb/Albert.json'}
-Albert,0,binary,0.74349,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-Albert,0,binary,0.764,{'_modeljson': 'xgb/bng_breastTumor.json'}
-Albert,0,binary,0.767514,{'_modeljson': 'xgb/bng_pbc.json'}
-Albert,0,binary,0.743392,{'_modeljson': 'xgb/car.json'}
-Albert,0,binary,0.766006,{'_modeljson': 'xgb/connect-4.json'}
-Albert,0,binary,0.757802,{'_modeljson': 'xgb/default.json'}
-Albert,0,binary,0.746511,{'_modeljson': 'xgb/dilbert.json'}
-Albert,0,binary,0.761985,{'_modeljson': 'xgb/poker.json'}
-Amazon_employee_access,0,binary,0.727287,{'_modeljson': 'xgb/2dplanes.json'}
-Amazon_employee_access,0,binary,0.855441,{'_modeljson': 'xgb/adult.json'}
-Amazon_employee_access,0,binary,0.85984,{'_modeljson': 'xgb/Airlines.json'}
-Amazon_employee_access,0,binary,0.873629,{'_modeljson': 'xgb/Albert.json'}
-Amazon_employee_access,0,binary,0.897708,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-Amazon_employee_access,0,binary,0.862679,{'_modeljson': 'xgb/bng_breastTumor.json'}
-Amazon_employee_access,0,binary,0.872059,{'_modeljson': 'xgb/bng_pbc.json'}
-Amazon_employee_access,0,binary,0.657192,{'_modeljson': 'xgb/car.json'}
-Amazon_employee_access,0,binary,0.877547,{'_modeljson': 'xgb/connect-4.json'}
-Amazon_employee_access,0,binary,0.851702,{'_modeljson': 'xgb/default.json'}
-Amazon_employee_access,0,binary,0.853361,{'_modeljson': 'xgb/dilbert.json'}
-Amazon_employee_access,0,binary,0.859734,{'_modeljson': 'xgb/poker.json'}
-bng_breastTumor,0,regression,0.184421,{'_modeljson': 'xgb/2dplanes.json'}
-bng_breastTumor,0,regression,0.163226,{'_modeljson': 'xgb/adult.json'}
-bng_breastTumor,0,regression,0.18037,{'_modeljson': 'xgb/Airlines.json'}
-bng_breastTumor,0,regression,0.177238,{'_modeljson': 'xgb/Albert.json'}
-bng_breastTumor,0,regression,-0.118976,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-bng_breastTumor,0,regression,0.195539,{'_modeljson': 'xgb/bng_breastTumor.json'}
-bng_breastTumor,0,regression,0.106337,{'_modeljson': 'xgb/bng_pbc.json'}
-bng_breastTumor,0,regression,0.149326,{'_modeljson': 'xgb/car.json'}
-bng_breastTumor,0,regression,0.161193,{'_modeljson': 'xgb/connect-4.json'}
-bng_breastTumor,0,regression,0.186541,{'_modeljson': 'xgb/default.json'}
-bng_breastTumor,0,regression,0.186499,{'_modeljson': 'xgb/dilbert.json'}
-bng_breastTumor,0,regression,-0.032219,{'_modeljson': 'xgb/poker.json'}
-bng_pbc,0,regression,0.411719,{'_modeljson': 'xgb/2dplanes.json'}
-bng_pbc,0,regression,0.409769,{'_modeljson': 'xgb/adult.json'}
-bng_pbc,0,regression,0.450806,{'_modeljson': 'xgb/Airlines.json'}
-bng_pbc,0,regression,0.458384,{'_modeljson': 'xgb/Albert.json'}
-bng_pbc,0,regression,0.236669,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-bng_pbc,0,regression,0.441873,{'_modeljson': 'xgb/bng_breastTumor.json'}
-bng_pbc,0,regression,0.462226,{'_modeljson': 'xgb/bng_pbc.json'}
-bng_pbc,0,regression,0.431868,{'_modeljson': 'xgb/car.json'}
-bng_pbc,0,regression,0.45678,{'_modeljson': 'xgb/connect-4.json'}
-bng_pbc,0,regression,0.436902,{'_modeljson': 'xgb/default.json'}
-bng_pbc,0,regression,0.418839,{'_modeljson': 'xgb/dilbert.json'}
-bng_pbc,0,regression,0.448148,{'_modeljson': 'xgb/poker.json'}
-car,0,multiclass,-0.38726,{'_modeljson': 'xgb/2dplanes.json'}
-car,0,multiclass,-0.22547,{'_modeljson': 'xgb/adult.json'}
-car,0,multiclass,-0.208402,{'_modeljson': 'xgb/Airlines.json'}
-car,0,multiclass,-0.0256159,{'_modeljson': 'xgb/Albert.json'}
-car,0,multiclass,-0.627705,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-car,0,multiclass,-0.166328,{'_modeljson': 'xgb/bng_breastTumor.json'}
-car,0,multiclass,-0.0201057,{'_modeljson': 'xgb/bng_pbc.json'}
-car,0,multiclass,-8.45E-05,{'_modeljson': 'xgb/car.json'}
-car,0,multiclass,-0.0129025,{'_modeljson': 'xgb/connect-4.json'}
-car,0,multiclass,-0.010029,{'_modeljson': 'xgb/default.json'}
-car,0,multiclass,-0.00218674,{'_modeljson': 'xgb/dilbert.json'}
-car,0,multiclass,-0.00426392,{'_modeljson': 'xgb/poker.json'}
-connect-4,0,multiclass,-0.578339,{'_modeljson': 'xgb/2dplanes.json'}
-connect-4,0,multiclass,-0.489378,{'_modeljson': 'xgb/adult.json'}
-connect-4,0,multiclass,-0.406886,{'_modeljson': 'xgb/Airlines.json'}
-connect-4,0,multiclass,-0.332411,{'_modeljson': 'xgb/Albert.json'}
-connect-4,0,multiclass,-0.636516,{'_modeljson': 'xgb/Amazon_employee_access.json'}
-connect-4,0,multiclass,-0.425947,{'_modeljson': 'xgb/bng_breastTumor.json'}
-connect-4,0,multiclass,-0.354612,{'_modeljson': 'xgb/bng_pbc.json'}
-connect-4,0,multiclass,-0.452201,{'_modeljson': 'xgb/car.json'}
-connect-4,0,multiclass,-0.338363,{'_modeljson': 'xgb/connect-4.json'}
-connect-4,0,multiclass,-0.430665,{'_modeljson': 'xgb/default.json'}
-connect-4,0,multiclass,-0.497404,{'_modeljson': 'xgb/dilbert.json'}
-connect-4,0,multiclass,-0.592309,{'_modeljson': 'xgb/poker.json'}
--- a/test/default_lgbm.py
+++ b/test/default_lgbm.py
@@ -1,14 +0,0 @@
-from flaml.automl.data import load_openml_dataset
-from flaml.default import LGBMRegressor
-from flaml.automl.ml import sklearn_metric_loss_score
-
-X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir="./")
-lgbm = LGBMRegressor()
-
-hyperparams, estimator_name, X_transformed, y_transformed = lgbm.suggest_hyperparams(X_train, y_train)
-print(hyperparams)
-
-lgbm.fit(X_train, y_train)
-y_pred = lgbm.predict(X_test)
-print("flamlized lgbm r2 =", 1 - sklearn_metric_loss_score("r2", y_pred, y_test))
-print(lgbm)
--- a/test/default_xgb.py
+++ b/test/default_xgb.py
@@ -1,13 +0,0 @@
-from flaml.automl.data import load_openml_dataset
-from flaml.default import XGBClassifier
-from flaml.automl.ml import sklearn_metric_loss_score
-
-X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./")
-xgb = XGBClassifier()
-xgb.fit(X_train, y_train)
-y_pred = xgb.predict(X_test)
-print(
-    "flamlized xgb accuracy =",
-    1 - sklearn_metric_loss_score("accuracy", y_pred, y_test),
-)
-print(xgb)
--- a/test/load_args.py
+++ b/test/load_args.py
@@ -1,8 +0,0 @@
-def test_load_args_sub():
-    from flaml.automl.nlp.huggingface.training_args import TrainingArgumentsForAuto
-
-    TrainingArgumentsForAuto.load_args_from_console()
-
-
-if __name__ == "__main__":
-    test_load_args_sub()
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				`{"class": "extra_tree", "hyperparameters": {"n_estimators": 16, "max_features": 1.0, "max_leaves": 54}}`
				`@@ -1 +0,0 @@`
				`{"class": "lgbm", "hyperparameters": {"n_estimators": 103, "num_leaves": 33, "min_child_samples": 4, "learning_rate": 0.05800185361316003, "log_max_bin": 6, "colsample_bytree": 1.0, "reg_alpha": 1.5987124004961213, "reg_lambda": 10.56445079499673}}`
				`@@ -1 +0,0 @@`
				`{"class": "rf", "hyperparameters": {"n_estimators": 38, "max_features": 1.0, "max_leaves": 58}}`
				`@@ -1 +0,0 @@`
				`{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 2704, "max_depth": 2, "min_child_weight": 0.23751738294732322, "learning_rate": 0.019828117294812268, "subsample": 0.8798706041292946, "colsample_bylevel": 0.978891799553329, "colsample_bytree": 1.0, "reg_alpha": 0.3023181744217667, "reg_lambda": 101.10719177747677}}`
				`@@ -1 +0,0 @@`
				`{"class": "xgboost", "hyperparameters": {"n_estimators": 6705, "max_leaves": 24, "min_child_weight": 58.562722088466444, "learning_rate": 0.0009765625, "subsample": 0.8993009465247683, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.2679275019160531, "reg_lambda": 91.95034898844547}}`