model_history, ITER_HP, settings in AutoML(), checkpoint bug fix (#283)

if save_best_model_per_estimator is False and retrain_final is True, unfit the model after evaluation in HPO.
retrain if using ray.
update ITER_HP in config after a trial is finished.
change prophet logging level.
example and notebook update.
allow settings to be passed to AutoML constructor. Are you planning to add multi-output-regression capability to FLAML #192 Is multi-tasking allowed? #277 can pass the auotml setting to the constructor instead of requiring a derived class.
remove model_history.
checkpoint bug fix.

* model_history meaning save_best_model_per_estimator

* ITER_HP

* example update

* prophet logging level

* comment update in forecast notebook

* print format improvement

* allow settings to be passed to AutoML constructor

* checkpoint bug fix

* time limit for autohf regression test

* skip slow test on macos

* cleanup before del
This commit is contained in:
Chi Wang
2021-11-18 09:39:45 -08:00
committed by GitHub
parent e9551de3cc
commit 72caa2172d
22 changed files with 476 additions and 320 deletions

View File

@@ -216,7 +216,7 @@ class TestClassification(unittest.TestCase):
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_model_for_estimator("xgboost"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
@@ -253,7 +253,7 @@ class TestClassification(unittest.TestCase):
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_model_for_estimator("xgboost"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
except ImportError:
@@ -286,7 +286,7 @@ class TestClassification(unittest.TestCase):
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_model_for_estimator("large_lgbm"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
except ImportError:
@@ -314,7 +314,7 @@ class TestClassification(unittest.TestCase):
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_model_for_estimator("lrl2"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)

View File

@@ -198,7 +198,7 @@ class TestMultiClass(unittest.TestCase):
print(automl_experiment.classes_)
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_model_for_estimator("rf"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
automl_experiment = AutoML()
@@ -238,13 +238,13 @@ class TestMultiClass(unittest.TestCase):
print(automl_experiment.predict(X_train)[:5])
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_model_for_estimator("catboost"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
del automl_settings["metric"]
del automl_settings["model_history"]
del automl_settings["log_training_metric"]
automl_experiment = AutoML()
automl_experiment = AutoML(task="classification")
duration = automl_experiment.retrain_from_log(
log_file_name=automl_settings["log_file_name"],
X_train=X_train,
@@ -333,7 +333,7 @@ class TestMultiClass(unittest.TestCase):
print(automl_experiment.predict_proba(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_model_for_estimator("extra_tree"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
@@ -343,7 +343,7 @@ class TestMultiClass(unittest.TestCase):
learner_name="large_lgbm", learner_class=MyLargeLGBM
)
automl_settings = {
"time_budget": None,
"time_budget": -1,
"task": "classification",
"log_file_name": "test/classification_oom.log",
"estimator_list": ["large_lgbm"],

View File

@@ -56,7 +56,7 @@ class TestRegression(unittest.TestCase):
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_model_for_estimator("xgboost"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
print(get_output_from_log(automl_settings["log_file_name"], 1))
@@ -77,28 +77,6 @@ class TestRegression(unittest.TestCase):
time_budget=0,
)
def test_sparse_matrix_classification(self):
automl_experiment = AutoML()
automl_settings = {
"time_budget": 2,
"metric": "auto",
"task": "classification",
"log_file_name": "test/sparse_classification.log",
"split_type": "uniform",
"n_jobs": 1,
"model_history": True,
}
X_train = scipy.sparse.random(1554, 21, dtype=int)
y_train = np.random.randint(3, size=1554)
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
print(automl_experiment.classes_)
print(automl_experiment.predict_proba(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
def test_sparse_matrix_regression(self):
X_train = scipy.sparse.random(300, 900, density=0.0001)
y_train = np.random.uniform(size=300)
@@ -127,7 +105,7 @@ class TestRegression(unittest.TestCase):
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_model_for_estimator("rf"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
print(automl_experiment.best_config)
@@ -151,7 +129,7 @@ class TestRegression(unittest.TestCase):
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_model_for_estimator("xgboost"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
except ImportError:
@@ -176,7 +154,7 @@ class TestRegression(unittest.TestCase):
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_model_for_estimator("rf"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
@@ -209,7 +187,7 @@ class TestRegression(unittest.TestCase):
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_model_for_estimator("my_xgb2"))
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
print(automl_experiment.best_config)

View File

@@ -30,7 +30,6 @@ class TestTrainingLog(unittest.TestCase):
# "ensemble": True,
"keep_search_state": True,
"estimator_list": estimator_list,
"model_history": True,
}
X_train, y_train = fetch_california_housing(return_X_y=True)
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
@@ -85,7 +84,7 @@ class TestTrainingLog(unittest.TestCase):
count += 1
self.assertGreater(count, 0)
automl_settings["log_file_name"] = None
automl_settings["log_file_name"] = ""
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
automl._selected.update(None, 0)
automl = AutoML()

View File

@@ -2,7 +2,6 @@ import unittest
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import numpy as np
from flaml.automl import AutoML
from flaml.model import XGBoostSklearnEstimator
from flaml import tune

View File

@@ -2,7 +2,6 @@ import unittest
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import numpy as np
from flaml.automl import AutoML
from flaml.model import XGBoostSklearnEstimator
from flaml import tune
@@ -44,7 +43,6 @@ def _test_simple(method=None, size_ratio=1.0):
# "metric": 'accuracy',
"task": "classification",
"log_file_name": f"test/xgboost2d_{dataset}_{method}_{final_size}.log",
# "model_history": True,
# "log_training_metric": True,
# "split_type": split_type,
"n_jobs": 1,

View File

@@ -1,3 +1,8 @@
import os
import pytest
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
def test_hf_data():
try:
import ray
@@ -33,15 +38,15 @@ def test_hf_data():
automl_settings = {
"gpu_per_trial": 0,
"max_iter": 3,
"time_budget": 20,
"time_budget": 5,
"task": "seq-classification",
"metric": "accuracy",
"model_history": True,
"log_file_name": "seqclass.log",
}
automl_settings["custom_hpo_args"] = {
"model_path": "google/electra-small-discriminator",
"output_dir": "data/output/",
"output_dir": "test/data/output/",
"ckpt_per_epoch": 5,
"fp16": False,
}
@@ -51,7 +56,6 @@ def test_hf_data():
)
automl = AutoML()
automl.retrain_from_log(
log_file_name="flaml.log",
X_train=X_train,
y_train=y_train,
train_full=True,
@@ -71,10 +75,6 @@ def test_hf_data():
def _test_custom_data():
try:
import ray
except ImportError:
return
from flaml import AutoML
import pandas as pd

View File

@@ -1,8 +1,4 @@
def test_classification_head():
try:
import ray
except ImportError:
return
from flaml import AutoML
from datasets import load_dataset
@@ -24,15 +20,14 @@ def test_classification_head():
automl_settings = {
"gpu_per_trial": 0,
"max_iter": 3,
"time_budget": 20,
"time_budget": 5,
"task": "seq-classification",
"metric": "accuracy",
"model_history": True,
}
automl_settings["custom_hpo_args"] = {
"model_path": "google/electra-small-discriminator",
"output_dir": "data/output/",
"output_dir": "test/data/output/",
"ckpt_per_epoch": 5,
"fp16": False,
}

View File

@@ -1,8 +1,9 @@
import os
import pytest
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
def test_cv():
try:
import ray
except ImportError:
return
from flaml import AutoML
from datasets import load_dataset
@@ -22,16 +23,15 @@ def test_cv():
automl_settings = {
"gpu_per_trial": 0,
"max_iter": 3,
"time_budget": 20,
"time_budget": 5,
"task": "seq-classification",
"metric": "accuracy",
"n_splits": 3,
"model_history": True,
}
automl_settings["custom_hpo_args"] = {
"model_path": "google/electra-small-discriminator",
"output_dir": "data/output/",
"output_dir": "test/data/output/",
"ckpt_per_epoch": 1,
"fp16": False,
}

View File

@@ -1,17 +1,18 @@
import os
import pytest
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
def test_regression():
try:
import ray
except ImportError:
return
from flaml import AutoML
from datasets import load_dataset
train_dataset = (
load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[0:4]
load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20]
)
dev_dataset = (
load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[0:4]
load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20]
)
custom_sent_keys = ["sentence1", "sentence2"]
@@ -27,16 +28,16 @@ def test_regression():
automl_settings = {
"gpu_per_trial": 0,
"max_iter": 3,
"time_budget": 20,
"max_iter": 2,
"time_budget": 5,
"task": "seq-regression",
"metric": "rmse",
"model_history": True,
"starting_points": {"transformer": {"num_train_epochs": 1}},
}
automl_settings["custom_hpo_args"] = {
"model_path": "google/electra-small-discriminator",
"output_dir": "data/output/",
"output_dir": "test/data/output/",
"ckpt_per_epoch": 5,
"fp16": False,
}
@@ -44,3 +45,7 @@ def test_regression():
automl.fit(
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
)
if __name__ == "main":
test_regression()