mirror of
https://github.com/microsoft/autogen.git
synced 2026-02-03 23:05:02 -05:00
model_history, ITER_HP, settings in AutoML(), checkpoint bug fix (#283)
if save_best_model_per_estimator is False and retrain_final is True, unfit the model after evaluation in HPO. retrain if using ray. update ITER_HP in config after a trial is finished. change prophet logging level. example and notebook update. allow settings to be passed to AutoML constructor. Are you planning to add multi-output-regression capability to FLAML #192 Is multi-tasking allowed? #277 can pass the auotml setting to the constructor instead of requiring a derived class. remove model_history. checkpoint bug fix. * model_history meaning save_best_model_per_estimator * ITER_HP * example update * prophet logging level * comment update in forecast notebook * print format improvement * allow settings to be passed to AutoML constructor * checkpoint bug fix * time limit for autohf regression test * skip slow test on macos * cleanup before del
This commit is contained in:
@@ -216,7 +216,7 @@ class TestClassification(unittest.TestCase):
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_model_for_estimator("xgboost"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
|
||||
@@ -253,7 +253,7 @@ class TestClassification(unittest.TestCase):
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_model_for_estimator("xgboost"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
except ImportError:
|
||||
@@ -286,7 +286,7 @@ class TestClassification(unittest.TestCase):
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_model_for_estimator("large_lgbm"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
except ImportError:
|
||||
@@ -314,7 +314,7 @@ class TestClassification(unittest.TestCase):
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_model_for_estimator("lrl2"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
|
||||
@@ -198,7 +198,7 @@ class TestMultiClass(unittest.TestCase):
|
||||
print(automl_experiment.classes_)
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_model_for_estimator("rf"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
automl_experiment = AutoML()
|
||||
@@ -238,13 +238,13 @@ class TestMultiClass(unittest.TestCase):
|
||||
print(automl_experiment.predict(X_train)[:5])
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_model_for_estimator("catboost"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
del automl_settings["metric"]
|
||||
del automl_settings["model_history"]
|
||||
del automl_settings["log_training_metric"]
|
||||
automl_experiment = AutoML()
|
||||
automl_experiment = AutoML(task="classification")
|
||||
duration = automl_experiment.retrain_from_log(
|
||||
log_file_name=automl_settings["log_file_name"],
|
||||
X_train=X_train,
|
||||
@@ -333,7 +333,7 @@ class TestMultiClass(unittest.TestCase):
|
||||
print(automl_experiment.predict_proba(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_model_for_estimator("extra_tree"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
|
||||
@@ -343,7 +343,7 @@ class TestMultiClass(unittest.TestCase):
|
||||
learner_name="large_lgbm", learner_class=MyLargeLGBM
|
||||
)
|
||||
automl_settings = {
|
||||
"time_budget": None,
|
||||
"time_budget": -1,
|
||||
"task": "classification",
|
||||
"log_file_name": "test/classification_oom.log",
|
||||
"estimator_list": ["large_lgbm"],
|
||||
@@ -56,7 +56,7 @@ class TestRegression(unittest.TestCase):
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_model_for_estimator("xgboost"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
print(get_output_from_log(automl_settings["log_file_name"], 1))
|
||||
@@ -77,28 +77,6 @@ class TestRegression(unittest.TestCase):
|
||||
time_budget=0,
|
||||
)
|
||||
|
||||
def test_sparse_matrix_classification(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"metric": "auto",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/sparse_classification.log",
|
||||
"split_type": "uniform",
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train = scipy.sparse.random(1554, 21, dtype=int)
|
||||
y_train = np.random.randint(3, size=1554)
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl_experiment.classes_)
|
||||
print(automl_experiment.predict_proba(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
|
||||
def test_sparse_matrix_regression(self):
|
||||
X_train = scipy.sparse.random(300, 900, density=0.0001)
|
||||
y_train = np.random.uniform(size=300)
|
||||
@@ -127,7 +105,7 @@ class TestRegression(unittest.TestCase):
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_model_for_estimator("rf"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
print(automl_experiment.best_config)
|
||||
@@ -151,7 +129,7 @@ class TestRegression(unittest.TestCase):
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_model_for_estimator("xgboost"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
except ImportError:
|
||||
@@ -176,7 +154,7 @@ class TestRegression(unittest.TestCase):
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_model_for_estimator("rf"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
|
||||
@@ -209,7 +187,7 @@ class TestRegression(unittest.TestCase):
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.model_history)
|
||||
print(automl_experiment.best_model_for_estimator("my_xgb2"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
print(automl_experiment.best_config)
|
||||
@@ -30,7 +30,6 @@ class TestTrainingLog(unittest.TestCase):
|
||||
# "ensemble": True,
|
||||
"keep_search_state": True,
|
||||
"estimator_list": estimator_list,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
@@ -85,7 +84,7 @@ class TestTrainingLog(unittest.TestCase):
|
||||
count += 1
|
||||
self.assertGreater(count, 0)
|
||||
|
||||
automl_settings["log_file_name"] = None
|
||||
automl_settings["log_file_name"] = ""
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
automl._selected.update(None, 0)
|
||||
automl = AutoML()
|
||||
@@ -2,7 +2,6 @@ import unittest
|
||||
|
||||
from sklearn.datasets import fetch_openml
|
||||
from sklearn.model_selection import train_test_split
|
||||
import numpy as np
|
||||
from flaml.automl import AutoML
|
||||
from flaml.model import XGBoostSklearnEstimator
|
||||
from flaml import tune
|
||||
@@ -2,7 +2,6 @@ import unittest
|
||||
|
||||
from sklearn.datasets import fetch_openml
|
||||
from sklearn.model_selection import train_test_split
|
||||
import numpy as np
|
||||
from flaml.automl import AutoML
|
||||
from flaml.model import XGBoostSklearnEstimator
|
||||
from flaml import tune
|
||||
@@ -44,7 +43,6 @@ def _test_simple(method=None, size_ratio=1.0):
|
||||
# "metric": 'accuracy',
|
||||
"task": "classification",
|
||||
"log_file_name": f"test/xgboost2d_{dataset}_{method}_{final_size}.log",
|
||||
# "model_history": True,
|
||||
# "log_training_metric": True,
|
||||
# "split_type": split_type,
|
||||
"n_jobs": 1,
|
||||
@@ -1,3 +1,8 @@
|
||||
import os
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
|
||||
def test_hf_data():
|
||||
try:
|
||||
import ray
|
||||
@@ -33,15 +38,15 @@ def test_hf_data():
|
||||
automl_settings = {
|
||||
"gpu_per_trial": 0,
|
||||
"max_iter": 3,
|
||||
"time_budget": 20,
|
||||
"time_budget": 5,
|
||||
"task": "seq-classification",
|
||||
"metric": "accuracy",
|
||||
"model_history": True,
|
||||
"log_file_name": "seqclass.log",
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
"model_path": "google/electra-small-discriminator",
|
||||
"output_dir": "data/output/",
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 5,
|
||||
"fp16": False,
|
||||
}
|
||||
@@ -51,7 +56,6 @@ def test_hf_data():
|
||||
)
|
||||
automl = AutoML()
|
||||
automl.retrain_from_log(
|
||||
log_file_name="flaml.log",
|
||||
X_train=X_train,
|
||||
y_train=y_train,
|
||||
train_full=True,
|
||||
@@ -71,10 +75,6 @@ def test_hf_data():
|
||||
|
||||
|
||||
def _test_custom_data():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
from flaml import AutoML
|
||||
|
||||
import pandas as pd
|
||||
@@ -1,8 +1,4 @@
|
||||
def test_classification_head():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
from flaml import AutoML
|
||||
|
||||
from datasets import load_dataset
|
||||
@@ -24,15 +20,14 @@ def test_classification_head():
|
||||
automl_settings = {
|
||||
"gpu_per_trial": 0,
|
||||
"max_iter": 3,
|
||||
"time_budget": 20,
|
||||
"time_budget": 5,
|
||||
"task": "seq-classification",
|
||||
"metric": "accuracy",
|
||||
"model_history": True,
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
"model_path": "google/electra-small-discriminator",
|
||||
"output_dir": "data/output/",
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 5,
|
||||
"fp16": False,
|
||||
}
|
||||
@@ -1,8 +1,9 @@
|
||||
import os
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
|
||||
def test_cv():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
from flaml import AutoML
|
||||
|
||||
from datasets import load_dataset
|
||||
@@ -22,16 +23,15 @@ def test_cv():
|
||||
automl_settings = {
|
||||
"gpu_per_trial": 0,
|
||||
"max_iter": 3,
|
||||
"time_budget": 20,
|
||||
"time_budget": 5,
|
||||
"task": "seq-classification",
|
||||
"metric": "accuracy",
|
||||
"n_splits": 3,
|
||||
"model_history": True,
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
"model_path": "google/electra-small-discriminator",
|
||||
"output_dir": "data/output/",
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 1,
|
||||
"fp16": False,
|
||||
}
|
||||
@@ -1,17 +1,18 @@
|
||||
import os
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
|
||||
def test_regression():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
from flaml import AutoML
|
||||
|
||||
from datasets import load_dataset
|
||||
|
||||
train_dataset = (
|
||||
load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[0:4]
|
||||
load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20]
|
||||
)
|
||||
dev_dataset = (
|
||||
load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[0:4]
|
||||
load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20]
|
||||
)
|
||||
|
||||
custom_sent_keys = ["sentence1", "sentence2"]
|
||||
@@ -27,16 +28,16 @@ def test_regression():
|
||||
|
||||
automl_settings = {
|
||||
"gpu_per_trial": 0,
|
||||
"max_iter": 3,
|
||||
"time_budget": 20,
|
||||
"max_iter": 2,
|
||||
"time_budget": 5,
|
||||
"task": "seq-regression",
|
||||
"metric": "rmse",
|
||||
"model_history": True,
|
||||
"starting_points": {"transformer": {"num_train_epochs": 1}},
|
||||
}
|
||||
|
||||
automl_settings["custom_hpo_args"] = {
|
||||
"model_path": "google/electra-small-discriminator",
|
||||
"output_dir": "data/output/",
|
||||
"output_dir": "test/data/output/",
|
||||
"ckpt_per_epoch": 5,
|
||||
"fp16": False,
|
||||
}
|
||||
@@ -44,3 +45,7 @@ def test_regression():
|
||||
automl.fit(
|
||||
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "main":
|
||||
test_regression()
|
||||
Reference in New Issue
Block a user