diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py index f281cb81a..03017777c 100644 --- a/flaml/tune/tune.py +++ b/flaml/tune/tune.py @@ -47,6 +47,7 @@ class ExperimentAnalysis(EA): def report(_metric=None, **kwargs): + """A function called by the HPO application to report final or intermediate results. @@ -78,6 +79,10 @@ def report(_metric=None, **kwargs): _metric: Optional default anonymous metric for ``tune.report(value)``. (For compatibility with ray.tune.report) **kwargs: Any key value pair to be reported. + + Raises: + StopIteration (when not using ray, i.e., _use_ray=False): + A StopIteration exception is raised if the trial has been signaled to stop. """ global _use_ray global _verbose @@ -109,9 +114,7 @@ def report(_metric=None, **kwargs): if _verbose > 2: logger.info(f"result: {result}") if trial.is_finished(): - return None - else: - return True + raise StopIteration def run( @@ -223,7 +226,7 @@ def run( reduction_factor: A float of the reduction factor used for incremental pruning. scheduler: A scheduler for executing the experiment. Can be None, 'flaml', - 'asha' or a custom instance of the TrialScheduler class. Default is None: + 'asha' (or 'async_hyperband', 'asynchyperband') or a custom instance of the TrialScheduler class. Default is None: in this case when resource_attr is provided, the 'flaml' scheduler will be used, otherwise no scheduler will be used. When set 'flaml', an authentic scheduler implemented in FLAML will be used. It does not @@ -236,9 +239,22 @@ def run( respectively. You can also provide a self-defined scheduler instance of the TrialScheduler class. When 'asha' or self-defined scheduler is used, you usually need to report intermediate results in the evaluation - function. Please find examples using different types of schedulers + function via 'tune.report()'. In addition, when 'use_ray' is not enabled, + you also need to stop the evaluation function by explicitly catching the + `StopIteration` exception, as shown in the following example. + Please find more examples using different types of schedulers and how to set up the corresponding evaluation functions in - test/tune/test_scheduler.py. TODO: point to notebook examples. + test/tune/test_scheduler.py, and test/tune/example_scheduler.py. + ```python + def easy_objective(config): + width, height = config["width"], config["height"] + for step in range(config["steps"]): + intermediate_score = evaluation_fn(step, width, height) + try: + tune.report(iterations=step, mean_loss=intermediate_score) + except StopIteration: + return + ``` search_alg: An instance of BlendSearch as the search algorithm to be used. The same instance can be used for iterative tuning. e.g., @@ -316,8 +332,7 @@ def run( flaml_scheduler_min_resource ) = flaml_scheduler_max_resource = flaml_scheduler_reduction_factor = None if scheduler in (None, "flaml"): - - # when scheduler is set 'flaml', we will use a scheduler that is + # when scheduler is set 'flaml' or None, we will use a scheduler that is # authentic to the search algorithms in flaml. After setting up # the search algorithm accordingly, we need to set scheduler to # None in case it is later used in the trial runner. @@ -388,7 +403,7 @@ def run( searcher.set_search_properties(metric, mode, config, setting) else: searcher.set_search_properties(metric, mode, config) - if scheduler == "asha": + if scheduler in ("asha", "asynchyperband", "async_hyperband"): params = {} # scheduler resource_dimension=resource_attr if resource_attr: diff --git a/test/tune/example_scheduler.py b/test/tune/example_scheduler.py new file mode 100644 index 000000000..5fc5e2854 --- /dev/null +++ b/test/tune/example_scheduler.py @@ -0,0 +1,107 @@ +from functools import partial +import time + + +def evaluation_fn(step, width, height): + return (0.1 + width * step / 100) ** (-1) + height * 0.1 + + +def easy_objective(use_raytune, config): + if use_raytune: + from ray import tune + else: + from flaml import tune + # Hyperparameters + width, height = config["width"], config["height"] + + for step in range(config["steps"]): + # Iterative training function - can be any arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height) + # Feed the score back back to Tune. + try: + tune.report(iterations=step, mean_loss=intermediate_score) + except StopIteration: + return + + +def test_tune_scheduler(smoke_test=True, use_ray=True, use_raytune=False): + import numpy as np + from flaml.searcher.blendsearch import BlendSearch + + np.random.seed(100) + easy_objective_custom_tune = partial(easy_objective, use_raytune) + if use_raytune: + try: + from ray import tune + except ImportError: + print("ray[tune] is not installed, skipping test") + return + searcher = BlendSearch( + space={ + "steps": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + # This is an ignored parameter. + "activation": tune.choice(["relu", "tanh"]), + "test4": np.zeros((3, 1)), + } + ) + analysis = tune.run( + easy_objective_custom_tune, + search_alg=searcher, + metric="mean_loss", + mode="min", + num_samples=10 if smoke_test else 100, + scheduler="asynchyperband", + config={ + "steps": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + # This is an ignored parameter. + "activation": tune.choice(["relu", "tanh"]), + "test4": np.zeros((3, 1)), + }, + ) + else: + from flaml import tune + + searcher = BlendSearch( + space={ + "steps": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + # This is an ignored parameter. + "activation": tune.choice(["relu", "tanh"]), + "test4": np.zeros((3, 1)), + } + ) + analysis = tune.run( + easy_objective_custom_tune, + search_alg=searcher, + metric="mean_loss", + mode="min", + num_samples=10 if smoke_test else 100, + scheduler="asynchyperband", + resource_attr="iterations", + max_resource=99, + # min_resource=1, + # reduction_factor=4, + config={ + "steps": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + # This is an ignored parameter. + "activation": tune.choice(["relu", "tanh"]), + "test4": np.zeros((3, 1)), + }, + use_ray=use_ray, + ) + + print("Best hyperparameters found were: ", analysis.best_config) + print("best results", analysis.best_result) + + +if __name__ == "__main__": + test_tune_scheduler(smoke_test=True, use_ray=True, use_raytune=True) + test_tune_scheduler(smoke_test=True, use_ray=True) + test_tune_scheduler(smoke_test=True, use_ray=False) diff --git a/test/tune/test_flaml_raytune_consistency.py b/test/tune/test_flaml_raytune_consistency.py index 4ac26001e..0e011279f 100644 --- a/test/tune/test_flaml_raytune_consistency.py +++ b/test/tune/test_flaml_raytune_consistency.py @@ -58,7 +58,6 @@ def _test_flaml_raytune_consistency( "skip _test_flaml_raytune_consistency because ray tune cannot be imported." ) return - np.random.seed(100) searcher = setup_searcher(searcher_name) analysis = tune.run( evaluate_config, # the function to evaluate a config @@ -78,7 +77,6 @@ def _test_flaml_raytune_consistency( flaml_time_in_results = [v["time_total_s"] for v in analysis.results.values()] print(analysis.best_trial.last_result) # the best trial's result - np.random.seed(100) searcher = setup_searcher(searcher_name) from ray.tune.suggest import ConcurrencyLimiter diff --git a/test/tune/test_scheduler.py b/test/tune/test_scheduler.py index e7b573882..52bf50092 100644 --- a/test/tune/test_scheduler.py +++ b/test/tune/test_scheduler.py @@ -15,7 +15,7 @@ def rand_vector_unit_sphere(dim): return vec / mag -def simple_obj(config, resource=10000): +def simple_obj(resource, config): config_value_vector = np.array([config["x"], config["y"], config["z"]]) score_sequence = [] for i in range(resource): @@ -41,23 +41,29 @@ def obj_w_intermediate_report(resource, config): score_avg = np.mean(np.array(score_sequence)) score_std = np.std(np.array(score_sequence)) score_lb = score_avg - 1.96 * score_std / np.sqrt(i + 1) - tune.report(samplesize=i + 1, sphere_projection=score_lb) + try: + tune.report(samplesize=i + 1, sphere_projection=score_lb) + except StopIteration: + return def obj_w_suggested_resource(resource_attr, config): resource = config[resource_attr] - simple_obj(config, resource) + simple_obj(resource, config) -def test_scheduler(scheduler=None): +def test_scheduler(scheduler=None, use_ray=False, time_budget_s=1): from functools import partial resource_attr = "samplesize" max_resource = 10000 - + min_resource = 1000 + reduction_factor = 2 + time_budget_s = time_budget_s # specify the objective functions if scheduler is None: - evaluation_obj = simple_obj + evaluation_obj = partial(simple_obj, max_resource) + min_resource = max_resource = reduction_factor = None elif scheduler == "flaml": evaluation_obj = partial(obj_w_suggested_resource, resource_attr) elif scheduler == "asha" or isinstance(scheduler, TrialScheduler): @@ -89,14 +95,17 @@ def test_scheduler(scheduler=None): resource_attr=resource_attr, scheduler=scheduler, max_resource=max_resource, - min_resource=100, - reduction_factor=2, - time_budget_s=1, + min_resource=min_resource, + reduction_factor=reduction_factor, + time_budget_s=time_budget_s, num_samples=500, + use_ray=use_ray, ) - print("Best hyperparameters found were: ", analysis.best_config) - # print(analysis.get_best_trial) + print( + f"{len(analysis.results)} trials finished \ + in {time_budget_s} seconds with {str(scheduler)} scheduler" + ) return analysis.best_config @@ -105,13 +114,15 @@ def test_no_scheduler(): print("No scheduler, test error:", abs(10 / 2 - best_config["z"] / 2)) -def test_asha_scheduler(): +def test_asha_scheduler(use_ray=False, time_budget_s=1): try: from ray.tune.schedulers import ASHAScheduler except ImportError: print("skip the test as ray tune cannot be imported.") return - best_config = test_scheduler(scheduler="asha") + best_config = test_scheduler( + scheduler="asha", use_ray=use_ray, time_budget_s=time_budget_s + ) print("Auto ASHA scheduler, test error:", abs(10 / 2 - best_config["z"] / 2)) @@ -150,6 +161,7 @@ def test_flaml_scheduler(): if __name__ == "__main__": test_no_scheduler() test_asha_scheduler() + test_asha_scheduler(use_ray=True, time_budget_s=3) test_custom_scheduler() test_custom_scheduler_default_time_attr() test_flaml_scheduler() diff --git a/website/docs/Use-Cases/Tune-User-Defined-Function.md b/website/docs/Use-Cases/Tune-User-Defined-Function.md index af8e18188..80224d559 100644 --- a/website/docs/Use-Cases/Tune-User-Defined-Function.md +++ b/website/docs/Use-Cases/Tune-User-Defined-Function.md @@ -350,7 +350,9 @@ tune.run(.., scheduler=my_scheduler, ...) ``` - Similar to the case where the `flaml` scheduler is used, you need to specify the resource dimension, use the resource dimension accordingly in your `evaluation_function`, and provide the necessary information needed for scheduling, such as `min_resource`, `max_resource` and `reduction_factor` (depending on the requirements of the specific scheduler). -- Different from the case when the `flaml` scheduler is used, the amount of resources to use at each iteration is not suggested by the search algorithm through the `resource_attr` in a configuration. You need to specify the evaluation schedule explicitly by yourself in the `evaluation_function` and report intermediate results (using `tune.report()`) accordingly. In the following code example, we use the ASHA scheduler by setting `scheduler="asha"`, we specify `resource_attr`, `min_resource`, `min_resource` and `reduction_factor` the same way as in the previous example (when "flaml" is used as the scheduler). We perform the evaluation in a customized schedule. +- Different from the case when the `flaml` scheduler is used, the amount of resources to use at each iteration is not suggested by the search algorithm through the `resource_attr` in a configuration. You need to specify the evaluation schedule explicitly by yourself in the `evaluation_function` and **report intermediate results (using `tune.report()`) accordingly**. In the following code example, we use the ASHA scheduler by setting `scheduler="asha"`. We specify `resource_attr`, `min_resource`, `min_resource` and `reduction_factor` the same way as in the previous example (when "flaml" is used as the scheduler). We perform the evaluation in a customized schedule. + +- Use ray backend or not? You can choose to use ray backend or not by specifying `use_ray=True` or `use_ray=False`. When ray backend is not used, i.e., `use_ray=False`, you also need to stop the evaluation function by explicitly catching the `StopIteration` exception, as shown in the last two lines of the evaluation function `obj_w_intermediate_report()` in the following code example. ```python def obj_w_intermediate_report(resource_attr, X_train, X_test, y_train, y_test, min_resource, max_resource, config): @@ -370,7 +372,10 @@ def obj_w_intermediate_report(resource_attr, X_train, X_test, y_train, y_test, m y_test_predict = model.predict(X_test) test_loss = 1.0 - accuracy_score(y_test, y_test_predict) # need to report the resource attribute used and the corresponding intermediate results - tune.report(sample_size=resource, loss=test_loss) + try: + tune.report(sample_size=resource, loss=test_loss) + except StopIteration: + return resource_attr = "sample_size" min_resource = 1000