tune api for schedulers (#322)

* revise api and tests

* rename prune_attr

* update finetune notebook

* add scheduler test and notebook

* update tune api for scheduler

* remove scheduler notebook

* Update flaml/tune/tune.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* docstr

* fix imports

* clear notebook output

* fix ray import

* Update flaml/tune/tune.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* improve docstr

* Update flaml/searcher/blendsearch.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* remove redundant import

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>
Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
Qingyun Wu
2021-12-04 21:52:20 -05:00
committed by GitHub
parent 7d269435ae
commit 17b17d084f
15 changed files with 499 additions and 970 deletions

View File

@@ -71,7 +71,7 @@ def test_forecast_automl(budget=5):
) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
for config in config_history:
print(config)
print(automl.prune_attr)
print(automl.resource_attr)
print(automl.max_resource)
print(automl.min_resource)
@@ -210,7 +210,7 @@ def test_multivariate_forecast_num(budget=5):
) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
for config in config_history:
print(config)
print(automl.prune_attr)
print(automl.resource_attr)
print(automl.max_resource)
print(automl.min_resource)
@@ -341,7 +341,7 @@ def test_multivariate_forecast_cat(budget=5):
) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
for config in config_history:
print(config)
print(automl.prune_attr)
print(automl.resource_attr)
print(automl.max_resource)
print(automl.min_resource)

View File

@@ -64,7 +64,7 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
) = get_output_from_log(filename=settings["log_file_name"], time_budget=6)
for config in config_history:
print(config)
print(automl.prune_attr)
print(automl.resource_attr)
print(automl.max_resource)
print(automl.min_resource)

View File

@@ -80,7 +80,7 @@ class TestLogging(unittest.TestCase):
low_cost_partial_config=low_cost_partial_config,
points_to_evaluate=automl.points_to_evaluate,
cat_hp_cost=automl.cat_hp_cost,
prune_attr=automl.prune_attr,
resource_attr=automl.resource_attr,
min_resource=automl.min_resource,
max_resource=automl.max_resource,
config_constraints=[

View File

@@ -71,7 +71,7 @@ def test_simple(method=None):
low_cost_partial_config=automl.low_cost_partial_config,
points_to_evaluate=automl.points_to_evaluate,
cat_hp_cost=automl.cat_hp_cost,
prune_attr=automl.prune_attr,
resource_attr=automl.resource_attr,
min_resource=automl.min_resource,
max_resource=automl.max_resource,
time_budget_s=automl._state.time_budget,

View File

@@ -239,7 +239,7 @@ def cifar10_main(
low_cost_partial_config={"num_epochs": 1},
max_resource=max_num_epochs,
min_resource=1,
report_intermediate_result=True,
scheduler="asha",
resources_per_trial={"cpu": 1, "gpu": gpus_per_trial},
local_dir="logs/",
num_samples=num_samples,

157
test/tune/test_scheduler.py Normal file
View File

@@ -0,0 +1,157 @@
"""Require: pip install flaml[test,ray]
"""
from logging import raiseExceptions
from flaml.scheduler.trial_scheduler import TrialScheduler
import numpy as np
from flaml import tune
import time
def rand_vector_unit_sphere(dim):
"""this function allows you to generate
points that uniformly distribute on
the (dim-1)-sphere.
"""
vec = np.random.normal(0, 1, dim)
mag = np.linalg.norm(vec)
return vec / mag
def simple_obj(config, resource=10000):
config_value_vector = np.array([config["x"], config["y"], config["z"]])
score_sequence = []
for i in range(resource):
a = rand_vector_unit_sphere(3)
a[2] = abs(a[2])
point_projection = np.dot(config_value_vector, a)
score_sequence.append(point_projection)
score_avg = np.mean(np.array(score_sequence))
score_std = np.std(np.array(score_sequence))
score_lb = score_avg - 1.96 * score_std / np.sqrt(resource)
tune.report(samplesize=resource, sphere_projection=score_lb)
def obj_w_intermediate_report(resource, config):
config_value_vector = np.array([config["x"], config["y"], config["z"]])
score_sequence = []
for i in range(resource):
a = rand_vector_unit_sphere(3)
a[2] = abs(a[2])
point_projection = np.dot(config_value_vector, a)
score_sequence.append(point_projection)
if (i + 1) % 100 == 0:
score_avg = np.mean(np.array(score_sequence))
score_std = np.std(np.array(score_sequence))
score_lb = score_avg - 1.96 * score_std / np.sqrt(i + 1)
tune.report(samplesize=i + 1, sphere_projection=score_lb)
def obj_w_suggested_resource(resource_attr, config):
resource = config[resource_attr]
simple_obj(config, resource)
def test_scheduler(scheduler=None):
from functools import partial
resource_attr = "samplesize"
max_resource = 10000
# specify the objective functions
if scheduler is None:
evaluation_obj = simple_obj
elif scheduler == "flaml":
evaluation_obj = partial(obj_w_suggested_resource, resource_attr)
elif scheduler == "asha" or isinstance(scheduler, TrialScheduler):
evaluation_obj = partial(obj_w_intermediate_report, max_resource)
else:
try:
from ray.tune.schedulers import TrialScheduler as RayTuneTrialScheduler
except ImportError:
print(
"skip this condition, which may require TrialScheduler from ray tune, \
as ray tune cannot be imported."
)
return
if isinstance(scheduler, RayTuneTrialScheduler):
evaluation_obj = partial(obj_w_intermediate_report, max_resource)
else:
raise ValueError
analysis = tune.run(
evaluation_obj,
config={
"x": tune.uniform(5, 20),
"y": tune.uniform(0, 10),
"z": tune.uniform(0, 10),
},
metric="sphere_projection",
mode="max",
verbose=1,
resource_attr=resource_attr,
scheduler=scheduler,
max_resource=max_resource,
min_resource=100,
reduction_factor=2,
time_budget_s=1,
num_samples=500,
)
print("Best hyperparameters found were: ", analysis.best_config)
# print(analysis.get_best_trial)
return analysis.best_config
def test_no_scheduler():
best_config = test_scheduler()
print("No scheduler, test error:", abs(10 / 2 - best_config["z"] / 2))
def test_asha_scheduler():
try:
from ray.tune.schedulers import ASHAScheduler
except ImportError:
print("skip the test as ray tune cannot be imported.")
return
best_config = test_scheduler(scheduler="asha")
print("Auto ASHA scheduler, test error:", abs(10 / 2 - best_config["z"] / 2))
def test_custom_scheduler():
try:
from ray.tune.schedulers import HyperBandScheduler
except ImportError:
print("skip the test as ray tune cannot be imported.")
return
my_scheduler = HyperBandScheduler(
time_attr="samplesize", max_t=1000, reduction_factor=2
)
best_config = test_scheduler(scheduler=my_scheduler)
print("Custom ASHA scheduler, test error:", abs(10 / 2 - best_config["z"] / 2))
def test_custom_scheduler_default_time_attr():
try:
from ray.tune.schedulers import ASHAScheduler
except ImportError:
print("skip the test as ray tune cannot be imported.")
return
my_scheduler = ASHAScheduler(max_t=10)
best_config = test_scheduler(scheduler=my_scheduler)
print(
"Custom ASHA scheduler (with ASHA default time attr), test error:",
abs(10 / 2 - best_config["z"] / 2),
)
def test_flaml_scheduler():
best_config = test_scheduler(scheduler="flaml")
print("FLAML scheduler, test error", abs(10 / 2 - best_config["z"] / 2))
if __name__ == "__main__":
test_no_scheduler()
test_asha_scheduler()
test_custom_scheduler()
test_custom_scheduler_default_time_attr()
test_flaml_scheduler()

View File

@@ -83,7 +83,7 @@ def _test_xgboost(method="BlendSearch"):
mode="min",
max_resource=max_iter,
min_resource=1,
report_intermediate_result=True,
scheduler="asha",
# You can add "gpu": 0.1 to allocate GPUs
resources_per_trial={"cpu": 1},
local_dir="logs/",