mirror of
https://github.com/microsoft/autogen.git
synced 2026-02-04 02:24:56 -05:00
automl fit with starting points (#141)
* add starting point in fit * add estimator best config * add test * add doc string * when there are multiple points_to_evaluate in CFO, use the best one to start local search; after that use low cost partial config as the start point; then, remove the points whose performance is worse than the converged, and start local search from the remaining ones ordered by their performance. Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local> Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
@@ -154,10 +154,10 @@ class TestAutoML(unittest.TestCase):
|
||||
def test_preprocess(self):
|
||||
automl = AutoML()
|
||||
X = pd.DataFrame({
|
||||
'f1': [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
|
||||
'f2': [3., 16., 10., 12., 3., 14., 11., 12., 5., 14., 20., 16., 15., 11.,],
|
||||
'f3': ['a', 'b', 'a', 'c', 'c', 'b', 'b', 'b', 'b', 'a', 'b', 'e', 'e', 'a'],
|
||||
'f4': [True, True, False, True, True, False, False, False, True, True, False, False, True, True],
|
||||
'f1': [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
|
||||
'f2': [3., 16., 10., 12., 3., 14., 11., 12., 5., 14., 20., 16., 15., 11.],
|
||||
'f3': ['a', 'b', 'a', 'c', 'c', 'b', 'b', 'b', 'b', 'a', 'b', 'e', 'e', 'a'],
|
||||
'f4': [True, True, False, True, True, False, False, False, True, True, False, False, True, True],
|
||||
})
|
||||
y = pd.Series([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
|
||||
|
||||
@@ -476,6 +476,53 @@ class TestAutoML(unittest.TestCase):
|
||||
print(automl_experiment.best_loss)
|
||||
print(automl_experiment.best_config_train_time)
|
||||
|
||||
def test_fit_w_starting_point(self, as_frame=True):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 3,
|
||||
"metric": 'accuracy',
|
||||
"task": 'classification',
|
||||
"log_file_name": "test/iris.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
|
||||
if as_frame:
|
||||
# test drop column
|
||||
X_train.columns = range(X_train.shape[1])
|
||||
X_train[X_train.shape[1]] = np.zeros(len(y_train))
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings)
|
||||
automl_val_accuracy = 1.0 - automl_experiment.best_loss
|
||||
print('Best ML leaner:', automl_experiment.best_estimator)
|
||||
print('Best hyperparmeter config:', automl_experiment.best_config)
|
||||
print('Best accuracy on validation data: {0:.4g}'.format(automl_val_accuracy))
|
||||
print('Training duration of best run: {0:.4g} s'.format(automl_experiment.best_config_train_time))
|
||||
|
||||
starting_points = automl_experiment.best_config_per_estimator
|
||||
print('starting_points', starting_points)
|
||||
automl_settings_resume = {
|
||||
"time_budget": 2,
|
||||
"metric": 'accuracy',
|
||||
"task": 'classification',
|
||||
"log_file_name": "test/iris_resume.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
"log_type": 'all',
|
||||
"starting_points": starting_points,
|
||||
}
|
||||
new_automl_experiment = AutoML()
|
||||
new_automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||
**automl_settings_resume)
|
||||
|
||||
new_automl_val_accuracy = 1.0 - new_automl_experiment.best_loss
|
||||
print('Best ML leaner:', new_automl_experiment.best_estimator)
|
||||
print('Best hyperparmeter config:', new_automl_experiment.best_config)
|
||||
print('Best accuracy on validation data: {0:.4g}'.format(new_automl_val_accuracy))
|
||||
print('Training duration of best run: {0:.4g} s'.format(new_automl_experiment.best_config_train_time))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -163,7 +163,7 @@ def _test_xgboost(method='BlendSearch'):
|
||||
|
||||
|
||||
def test_nested():
|
||||
from flaml import tune
|
||||
from flaml import tune, CFO
|
||||
search_space = {
|
||||
# test nested search space
|
||||
"cost_related": {
|
||||
@@ -178,6 +178,27 @@ def test_nested():
|
||||
tune.report(obj=obj)
|
||||
tune.report(obj=obj, ab=config["cost_related"]["a"] * config["b"])
|
||||
|
||||
analysis = tune.run(
|
||||
simple_func,
|
||||
search_alg=CFO(
|
||||
space=search_space, metric="obj", mode="min",
|
||||
low_cost_partial_config={
|
||||
"cost_related": {"a": 1}
|
||||
},
|
||||
points_to_evaluate=[
|
||||
{"b": .99, "cost_related": {"a": 3}},
|
||||
{"b": .99, "cost_related": {"a": 2}},
|
||||
{"cost_related": {"a": 8}}
|
||||
],
|
||||
metric_constraints=[("ab", "<=", 4)]),
|
||||
local_dir='logs/',
|
||||
num_samples=-1,
|
||||
time_budget_s=.1)
|
||||
|
||||
best_trial = analysis.get_best_trial()
|
||||
logger.info(f"CFO best config: {best_trial.config}")
|
||||
logger.info(f"CFO best result: {best_trial.last_result}")
|
||||
|
||||
analysis = tune.run(
|
||||
simple_func,
|
||||
config=search_space,
|
||||
@@ -189,11 +210,11 @@ def test_nested():
|
||||
metric_constraints=[("ab", "<=", 4)],
|
||||
local_dir='logs/',
|
||||
num_samples=-1,
|
||||
time_budget_s=1)
|
||||
time_budget_s=.1)
|
||||
|
||||
best_trial = analysis.get_best_trial()
|
||||
logger.info(f"Best config: {best_trial.config}")
|
||||
logger.info(f"Best result: {best_trial.last_result}")
|
||||
logger.info(f"BlendSearch best config: {best_trial.config}")
|
||||
logger.info(f"BlendSearch best result: {best_trial.last_result}")
|
||||
|
||||
|
||||
def test_xgboost_bs():
|
||||
|
||||
Reference in New Issue
Block a user