mirror of
https://github.com/microsoft/autogen.git
synced 2026-02-15 18:55:03 -05:00
metric constraints in flaml.automl (#479)
* metric constraints * revise docstr * fix docstr * improve docstr * Update flaml/automl.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update flaml/automl.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update flaml/automl.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * docstr Co-authored-by: Qingyun Wu <qxw5138@psu.edu> Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
159
test/automl/test_constraints.py
Normal file
159
test/automl/test_constraints.py
Normal file
@@ -0,0 +1,159 @@
|
||||
import unittest
|
||||
|
||||
from sklearn.datasets import fetch_openml
|
||||
from sklearn.model_selection import train_test_split
|
||||
from flaml.automl import AutoML
|
||||
from flaml import tune
|
||||
|
||||
|
||||
dataset = "credit-g"
|
||||
|
||||
|
||||
def test_metric_constraints():
|
||||
# impose metric constrains via "pred_time_limit"
|
||||
automl = AutoML()
|
||||
|
||||
automl_settings = {
|
||||
"estimator_list": ["xgboost"],
|
||||
"task": "classification",
|
||||
"log_file_name": f"test/constraints_{dataset}.log",
|
||||
"n_jobs": 1,
|
||||
"log_type": "all",
|
||||
"retrain_full": "budget",
|
||||
"keep_search_state": True,
|
||||
"time_budget": 1,
|
||||
"pred_time_limit": 5.1e-05,
|
||||
}
|
||||
from sklearn.externals._arff import ArffException
|
||||
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
except (ArffException, ValueError):
|
||||
from sklearn.datasets import load_wine
|
||||
|
||||
X, y = load_wine(return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.33, random_state=42
|
||||
)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl.estimator_list)
|
||||
print(automl.search_space)
|
||||
print(automl.points_to_evaluate)
|
||||
config = automl.best_config.copy()
|
||||
config["learner"] = automl.best_estimator
|
||||
automl.trainable(config)
|
||||
|
||||
from flaml.automl import size
|
||||
from functools import partial
|
||||
|
||||
print("metric constraints used in automl", automl.metric_constraints)
|
||||
|
||||
analysis = tune.run(
|
||||
automl.trainable,
|
||||
automl.search_space,
|
||||
metric="val_loss",
|
||||
mode="min",
|
||||
low_cost_partial_config=automl.low_cost_partial_config,
|
||||
points_to_evaluate=automl.points_to_evaluate,
|
||||
cat_hp_cost=automl.cat_hp_cost,
|
||||
resource_attr=automl.resource_attr,
|
||||
min_resource=automl.min_resource,
|
||||
max_resource=automl.max_resource,
|
||||
time_budget_s=automl._state.time_budget,
|
||||
config_constraints=[(partial(size, automl._state), "<=", automl._mem_thres)],
|
||||
metric_constraints=automl.metric_constraints,
|
||||
num_samples=5,
|
||||
)
|
||||
print(analysis.trials[-1])
|
||||
|
||||
|
||||
def custom_metric(
|
||||
X_val,
|
||||
y_val,
|
||||
estimator,
|
||||
labels,
|
||||
X_train,
|
||||
y_train,
|
||||
weight_val,
|
||||
weight_train,
|
||||
*args,
|
||||
):
|
||||
from sklearn.metrics import log_loss
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
y_pred = estimator.predict_proba(X_val)
|
||||
pred_time = (time.time() - start) / len(X_val)
|
||||
val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
|
||||
y_pred = estimator.predict_proba(X_train)
|
||||
train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
|
||||
alpha = 0.5
|
||||
return val_loss * (1 + alpha) - alpha * train_loss, {
|
||||
"val_loss": val_loss,
|
||||
"val_train_loss_gap": val_loss - train_loss,
|
||||
"pred_time": pred_time,
|
||||
}
|
||||
|
||||
|
||||
def test_metric_constraints_custom():
|
||||
automl = AutoML()
|
||||
|
||||
automl_settings = {
|
||||
"estimator_list": ["xgboost"],
|
||||
"task": "classification",
|
||||
"log_file_name": f"test/constraints_custom_{dataset}.log",
|
||||
"n_jobs": 1,
|
||||
"metric": custom_metric,
|
||||
"log_type": "all",
|
||||
"retrain_full": "budget",
|
||||
"keep_search_state": True,
|
||||
"time_budget": 1,
|
||||
"metric_constraints": [
|
||||
("pred_time", "<=", 5.1e-05),
|
||||
("val_train_loss_gap", "<=", 0.05),
|
||||
],
|
||||
}
|
||||
from sklearn.externals._arff import ArffException
|
||||
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
except (ArffException, ValueError):
|
||||
from sklearn.datasets import load_wine
|
||||
|
||||
X, y = load_wine(return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.33, random_state=42
|
||||
)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl.estimator_list)
|
||||
print(automl.search_space)
|
||||
print(automl.points_to_evaluate)
|
||||
config = automl.best_config.copy()
|
||||
config["learner"] = automl.best_estimator
|
||||
automl.trainable(config)
|
||||
|
||||
from flaml.automl import size
|
||||
from functools import partial
|
||||
|
||||
print("metric constraints in automl", automl.metric_constraints)
|
||||
analysis = tune.run(
|
||||
automl.trainable,
|
||||
automl.search_space,
|
||||
metric="val_loss",
|
||||
mode="min",
|
||||
low_cost_partial_config=automl.low_cost_partial_config,
|
||||
points_to_evaluate=automl.points_to_evaluate,
|
||||
cat_hp_cost=automl.cat_hp_cost,
|
||||
resource_attr=automl.resource_attr,
|
||||
min_resource=automl.min_resource,
|
||||
max_resource=automl.max_resource,
|
||||
time_budget_s=automl._state.time_budget,
|
||||
config_constraints=[(partial(size, automl._state), "<=", automl._mem_thres)],
|
||||
metric_constraints=automl.metric_constraints,
|
||||
num_samples=5,
|
||||
)
|
||||
print(analysis.trials[-1])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user