Class variables, cost_attr, and reproducibility (#587)

* test reproducibility

* cost_attr, _eps and penalty as instance variables
This commit is contained in:
Qingyun Wu
2022-06-14 00:52:42 -04:00
committed by GitHub
parent 5de3f54fd9
commit 65fa72d583
5 changed files with 173 additions and 20 deletions

View File

@@ -6,8 +6,10 @@
N_SPLITS = 5
RANDOM_SEED = 1
SPLIT_RATIO = 0.1
MEM_THRES = 4 * (1024 ** 3)
MEM_THRES = 4 * (1024**3)
SMALL_LARGE_THRES = 10000000
MIN_SAMPLE_TRAIN = 10000
CV_HOLDOUT_THRESHOLD = 100000
SAMPLE_MULTIPLY_FACTOR = 4
SEARCH_THREAD_EPS = 1.0
PENALTY = 1e10 # penalty term for constraints

View File

@@ -7,7 +7,6 @@ import numpy as np
import time
import pickle
try:
from ray import __version__ as ray_version
@@ -22,17 +21,19 @@ from ..tune import INCUMBENT_RESULT
from .search_thread import SearchThread
from .flow2 import FLOW2
from ..tune.space import add_cost_to_space, indexof, normalize, define_by_run_func
from ..tune.result import TIME_TOTAL_S
import logging
SEARCH_THREAD_EPS = 1.0
PENALTY = 1e10 # penalty term for constraints
logger = logging.getLogger(__name__)
class BlendSearch(Searcher):
"""class for BlendSearch algorithm."""
cost_attr = "time_total_s" # cost attribute in result
lagrange = "_lagrange" # suffix for lagrange-modified metric
penalty = 1e10 # penalty term for constraints
LocalSearch = FLOW2
def __init__(
@@ -56,6 +57,7 @@ class BlendSearch(Searcher):
] = None,
metric_constraints: Optional[List[Tuple[str, str, float]]] = None,
seed: Optional[int] = 20,
cost_attr: Optional[str] = "auto",
experimental: Optional[bool] = False,
use_incumbent_result_in_evaluation=False,
):
@@ -102,8 +104,23 @@ class BlendSearch(Searcher):
metric_constraints: A list of metric constraints to be satisfied.
E.g., `['precision', '>=', 0.9]`. The sign can be ">=" or "<=".
seed: An integer of the random seed.
cost_attr: Choose from ["auto", None] to specify the attribute to evaluate the cost of different trials.
Default is "auto", which means that we will automatically chose the cost attribute to use (depending
on the nature of the resource budget). When cost_attr is set to None, cost differences between different trials will be omitted
in our search algorithm.
experimental: A bool of whether to use experimental features.
"""
self._eps = SEARCH_THREAD_EPS
self._input_cost_attr = cost_attr
if cost_attr == "auto":
if time_budget_s is not None:
self.cost_attr = TIME_TOTAL_S
else:
self.cost_attr = None
else:
self.cost_attr = cost_attr
self.penalty = PENALTY # penalty term for constraints
self._metric, self._mode = metric, mode
self._use_incumbent_result_in_evaluation = use_incumbent_result_in_evaluation
init_config = low_cost_partial_config or {}
@@ -263,6 +280,8 @@ class BlendSearch(Searcher):
self._time_used += now - self._start_time
self._start_time = now
self._set_deadline()
if self._input_cost_attr == "auto":
self.cost_attr = TIME_TOTAL_S
if "metric_target" in setting:
self._metric_target = setting.get("metric_target")
if "num_samples" in setting:
@@ -276,10 +295,14 @@ class BlendSearch(Searcher):
def _set_deadline(self):
if self._time_budget_s is not None:
self._deadline = self._time_budget_s + self._start_time
SearchThread.set_eps(self._time_budget_s)
self._set_eps()
else:
self._deadline = np.inf
def _set_eps(self):
"""set eps for search threads according to time budget"""
self._eps = max(min(self._time_budget_s / 1000.0, 1.0), 1e-9)
def _init_search(self):
"""initialize the search"""
self._start_time = time.time()
@@ -290,7 +313,7 @@ class BlendSearch(Searcher):
self._metric_target = np.inf * self._ls.metric_op
self._search_thread_pool = {
# id: int -> thread: SearchThread
0: SearchThread(self._ls.mode, self._gs)
0: SearchThread(self._ls.mode, self._gs, self.cost_attr, self._eps)
}
self._thread_count = 1 # total # threads created
self._init_used = self._ls.init_config is None
@@ -462,6 +485,7 @@ class BlendSearch(Searcher):
space=space,
),
self.cost_attr,
self._eps,
)
self._thread_count += 1
self._update_admissible_region(

View File

@@ -88,7 +88,9 @@ class FLOW2(Searcher):
self.best_config = flatten_dict(init_config)
self.resource_attr = resource_attr
self.min_resource = min_resource
self.resource_multiple_factor = resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR
self.resource_multiple_factor = (
resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR
)
self.cost_attr = cost_attr
self.max_resource = max_resource
self._resource = None
@@ -316,7 +318,7 @@ class FLOW2(Searcher):
self.best_obj = obj
self.best_config, self.step = self._configs[trial_id]
self.incumbent = self.normalize(self.best_config)
self.cost_incumbent = result.get(self.cost_attr)
self.cost_incumbent = result.get(self.cost_attr, 1)
if self._resource:
self._resource = self.best_config[self.resource_attr]
self._num_complete4incumbent = 0
@@ -339,7 +341,9 @@ class FLOW2(Searcher):
# proposed by current incumbent and no better
self._num_complete4incumbent += 1
cost = (
result.get(self.cost_attr) if result else self._trial_cost.get(trial_id)
result.get(self.cost_attr, 1)
if result
else self._trial_cost.get(trial_id)
)
if cost:
self._cost_complete4incumbent += cost
@@ -369,14 +373,14 @@ class FLOW2(Searcher):
if self._resource:
self._resource = config[self.resource_attr]
self.incumbent = self.normalize(self.best_config)
self.cost_incumbent = result.get(self.cost_attr)
self.cost_incumbent = result.get(self.cost_attr, 1)
self._cost_complete4incumbent = 0
self._num_complete4incumbent = 0
self._num_proposedby_incumbent = 0
self._num_allowed4incumbent = 2 * self.dim
self._proposed_by.clear()
self._iter_best_config = self.trial_count_complete
cost = result.get(self.cost_attr)
cost = result.get(self.cost_attr, 1)
# record the cost in case it is pruned and cost info is lost
self._trial_cost[trial_id] = cost

View File

@@ -22,13 +22,12 @@ logger = logging.getLogger(__name__)
class SearchThread:
"""Class of global or local search thread."""
_eps = 1.0
def __init__(
self,
mode: str = "min",
search_alg: Optional[Searcher] = None,
cost_attr: Optional[str] = "time_total_s",
eps: Optional[float] = 1.0,
):
"""When search_alg is omitted, use local search FLOW2."""
self._search_alg = search_alg
@@ -38,6 +37,7 @@ class SearchThread:
self.cost_best = self.cost_last = self.cost_total = self.cost_best1 = getattr(
search_alg, "cost_incumbent", 0
)
self._eps = eps
self.cost_best2 = 0
self.obj_best1 = self.obj_best2 = getattr(
search_alg, "best_obj", np.inf
@@ -59,10 +59,6 @@ class SearchThread:
# remember const config
self._const = add_cost_to_space(self.space, {}, {})
@classmethod
def set_eps(cls, time_budget_s):
cls._eps = max(min(time_budget_s / 1000.0, 1.0), 1e-9)
def suggest(self, trial_id: str) -> Optional[Dict]:
"""Use the suggest() of the underlying search algorithm."""
if isinstance(self._search_alg, FLOW2):
@@ -107,7 +103,7 @@ class SearchThread:
self.speed = (
(self.obj_best2 - self.obj_best1)
/ self.running
/ (max(self.cost_total - self.cost_best2, SearchThread._eps))
/ (max(self.cost_total - self.cost_best2, self._eps))
)
else:
self.speed = 0
@@ -164,8 +160,9 @@ class SearchThread:
# rs is used in place of optuna sometimes
if not str(e).endswith("has already finished and can not be updated."):
raise e
if self.cost_attr in result and self.cost_last < result[self.cost_attr]:
self.cost_last = result[self.cost_attr]
new_cost = result.get(self.cost_attr, 1)
if self.cost_last < new_cost:
self.cost_last = new_cost
# self._update_speed()
@property

View File

@@ -0,0 +1,126 @@
from functools import partial
def _evaluation_fn(step, width, height):
return (0.1 + width * step / 100) ** (-1) + height * 0.1
def _easy_objective(use_raytune, config):
if use_raytune:
from ray import tune
else:
from flaml import tune
# Hyperparameters
width, height = config["width"], config["height"]
for step in range(config["steps"]):
# Iterative training function - can be any arbitrary training procedure
intermediate_score = _evaluation_fn(step, width, height)
# Feed the score back back to Tune.
try:
tune.report(iterations=step, mean_loss=intermediate_score)
except StopIteration:
print("Trial stopped", step)
return
def test_tune(
smoke_test=True, externally_setup_searcher=False, use_ray=False, use_raytune=False
):
from flaml import tune
from flaml.searcher.blendsearch import BlendSearch
easy_objective_custom_tune = partial(_easy_objective, use_raytune)
search_space = {
"steps": 100,
"width": tune.uniform(0, 20),
"height": tune.uniform(-100, 100),
}
if externally_setup_searcher:
searcher = BlendSearch(
space=search_space,
time_budget_s=5,
metric="mean_loss",
mode="min",
)
assert (
searcher.cost_attr == "time_total_s"
), "when time_budget_s is provided, cost_attr should be time_total_s"
searcher = BlendSearch(
space=search_space,
num_samples=10,
metric="mean_loss",
mode="min",
)
assert (
searcher.cost_attr is None
), "when time_budget_s is not provided, cost_attr should be None."
searcher = BlendSearch(
space=search_space,
num_samples=10,
time_budget_s=5,
metric="mean_loss",
mode="min",
)
assert (
searcher.cost_attr == "time_total_s"
), "As long as time_budget_s is provided and cost_attr not otherwise specified (i.e., using the default auto value), time_total_s is used as the cost_attr"
searcher = BlendSearch(
space=search_space,
num_samples=10,
time_budget_s=5,
metric="mean_loss",
mode="min",
cost_attr=None,
)
assert (
searcher.cost_attr is None
), "When the cost_attr is explicitly specified to be None, BS should use None as the cost_attr."
searcher = BlendSearch(
space=search_space,
metric="mean_loss",
mode="min",
)
else:
searcher = None
analysis = tune.run(
easy_objective_custom_tune,
search_alg=searcher,
metric="mean_loss",
mode="min",
num_samples=10,
time_budget_s=5,
use_ray=use_ray,
config=search_space,
)
print("Best hyperparameters found were: ", analysis.best_config)
print("best results", analysis.best_result)
print("best results", analysis.results)
return analysis.best_config
def test_reproducibility():
best_config_1 = test_tune(smoke_test=True)
best_config_2 = test_tune(smoke_test=True)
print(best_config_1)
print(best_config_2)
assert best_config_1 == best_config_2, "flaml.tune not reproducible"
best_config_1 = test_tune(smoke_test=True, externally_setup_searcher=True)
best_config_2 = test_tune(smoke_test=True, externally_setup_searcher=True)
print(best_config_1)
print(best_config_2)
assert (
best_config_1 == best_config_2
), "flaml.tune not reproducible when the searcher is set up externally"
if __name__ == "__main__":
test_reproducibility()