warning -> info for low cost partial config (#231)

* warning -> info for low cost partial config
#195, #110

* when n_estimators < 0, use trained_estimator's

* log debug info

* test random seed

* remove "objective"; avoid ZeroDivisionError

* hp config to estimator params

* check type of searcher

* default n_jobs

* try import

* Update searchalgo_auto.py

* CLASSIFICATION

* auto_augment flag

* min_sample_size

* make catboost optional
This commit is contained in:
Chi Wang
2021-10-08 16:09:43 -07:00
committed by GitHub
parent a99e939404
commit f48ca2618f
22 changed files with 1938 additions and 1859 deletions

View File

@@ -1,6 +1,6 @@
from flaml.tune.space import unflatten_hierarchical
from flaml import AutoML
from sklearn.datasets import load_boston
from sklearn.datasets import fetch_california_housing
import os
import unittest
import logging
@@ -9,7 +9,6 @@ import io
class TestLogging(unittest.TestCase):
def test_logging_level(self):
from flaml import logger, logger_formatter
@@ -30,8 +29,8 @@ class TestLogging(unittest.TestCase):
automl = AutoML()
automl_settings = {
"time_budget": 1,
"metric": 'rmse',
"task": 'regression',
"metric": "rmse",
"task": "regression",
"log_file_name": training_log,
"log_training_metric": True,
"n_jobs": 1,
@@ -39,35 +38,42 @@ class TestLogging(unittest.TestCase):
"keep_search_state": True,
"learner_selector": "roundrobin",
}
X_train, y_train = load_boston(return_X_y=True)
X_train, y_train = fetch_california_housing(return_X_y=True)
n = len(y_train) >> 1
print(automl.model, automl.classes_, automl.predict(X_train))
automl.fit(X_train=X_train[:n], y_train=y_train[:n],
X_val=X_train[n:], y_val=y_train[n:],
**automl_settings)
automl.fit(
X_train=X_train[:n],
y_train=y_train[:n],
X_val=X_train[n:],
y_val=y_train[n:],
**automl_settings
)
logger.info(automl.search_space)
logger.info(automl.low_cost_partial_config)
logger.info(automl.points_to_evaluate)
logger.info(automl.cat_hp_cost)
import optuna as ot
study = ot.create_study()
from flaml.tune.space import define_by_run_func, add_cost_to_space
sample = define_by_run_func(study.ask(), automl.search_space)
logger.info(sample)
logger.info(unflatten_hierarchical(sample, automl.search_space))
add_cost_to_space(
automl.search_space, automl.low_cost_partial_config,
automl.cat_hp_cost
automl.search_space, automl.low_cost_partial_config, automl.cat_hp_cost
)
logger.info(automl.search_space["ml"].categories)
config = automl.best_config.copy()
config['learner'] = automl.best_estimator
config["learner"] = automl.best_estimator
automl.trainable({"ml": config})
from flaml import tune, BlendSearch
from flaml.automl import size
from functools import partial
search_alg = BlendSearch(
metric='val_loss', mode='min',
metric="val_loss",
mode="min",
space=automl.search_space,
low_cost_partial_config=automl.low_cost_partial_config,
points_to_evaluate=automl.points_to_evaluate,
@@ -75,19 +81,25 @@ class TestLogging(unittest.TestCase):
prune_attr=automl.prune_attr,
min_resource=automl.min_resource,
max_resource=automl.max_resource,
config_constraints=[(partial(size, automl._state), '<=', automl._mem_thres)],
metric_constraints=automl.metric_constraints)
config_constraints=[
(partial(size, automl._state), "<=", automl._mem_thres)
],
metric_constraints=automl.metric_constraints,
)
analysis = tune.run(
automl.trainable, search_alg=search_alg, # verbose=2,
time_budget_s=1, num_samples=-1)
print(min(trial.last_result["val_loss"]
for trial in analysis.trials))
config = analysis.trials[-1].last_result['config']['ml']
automl._state._train_with_config(config['learner'], config)
automl.trainable,
search_alg=search_alg, # verbose=2,
time_budget_s=1,
num_samples=-1,
)
print(min(trial.last_result["val_loss"] for trial in analysis.trials))
config = analysis.trials[-1].last_result["config"]["ml"]
automl._state._train_with_config(config["learner"], config)
# Check if the log buffer is populated.
self.assertTrue(len(buf.getvalue()) > 0)
import pickle
with open('automl.pkl', 'wb') as f:
with open("automl.pkl", "wb") as f:
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
print(automl.__version__)