From 9bc32acafb86297de406f776c3331624dc2b8f31 Mon Sep 17 00:00:00 2001 From: Anonymous-submission-repo Date: Sun, 9 Oct 2022 11:39:29 -0400 Subject: [PATCH] first --- flaml/automl.py | 27 +++- flaml/nlp/huggingface/data_collator.py | 2 +- flaml/onlineml/README.md | 2 +- flaml/tune/README.md | 6 +- flaml/tune/analysis.py | 46 ++++++- flaml/tune/result.py | 3 + flaml/tune/searcher/blendsearch.py | 10 +- flaml/tune/searcher/flow2.py | 121 +++++++++++++++-- flaml/tune/searcher/search_thread.py | 11 +- flaml/tune/tune.py | 94 ++++++++++++- test/automl/test_lexiflow.py | 40 ++++++ .../data_prep/data_prep.py | 2 +- .../data_prep/data_prep.yaml | 2 +- .../submit_train_pipeline.py | 8 +- .../submit_tuner_pipeline.py | 29 ++-- test/pipeline_tuning_example/train/train.py | 20 ++- test/pipeline_tuning_example/train/train.yaml | 8 +- .../tuner/component_spec.yaml | 2 +- .../tuner/tuner_func.py | 11 +- test/tune/test_lexiflow.py | 128 ++++++++++++++++++ website/docs/Examples/Integrate - AzureML.md | 2 +- .../docs/Examples/Tune-AzureML-pipeline.md | 2 +- website/docs/Installation.md | 4 +- website/docs/Research.md | 2 +- 24 files changed, 511 insertions(+), 71 deletions(-) create mode 100644 test/automl/test_lexiflow.py create mode 100644 test/tune/test_lexiflow.py diff --git a/flaml/automl.py b/flaml/automl.py index f91074cac..cda468217 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -736,6 +736,7 @@ class AutoML(BaseEstimator): settings["use_ray"] = settings.get("use_ray", False) settings["metric_constraints"] = settings.get("metric_constraints", []) settings["cv_score_agg_func"] = settings.get("cv_score_agg_func", None) + settings["lexico_objectives"] = settings.get("lexico_objectives", None) settings["fit_kwargs_by_estimator"] = settings.get( "fit_kwargs_by_estimator", {} ) @@ -2149,6 +2150,7 @@ class AutoML(BaseEstimator): cv_score_agg_func=None, skip_transform=None, fit_kwargs_by_estimator=None, + lexico_objectives=None, **fit_kwargs, ): """Find a model for a given task. @@ -2402,6 +2404,19 @@ class AutoML(BaseEstimator): [TrainingArgumentsForAuto](nlp/huggingface/training_args). e.g., skip_transform: boolean, default=False | Whether to pre-process data prior to modeling. + + lexico_objectives: A dictionary with four elements. + It specifics the information used for multiple objectives optimization with lexicographic preference. + e.g.,```lexico_objectives = {"metrics":["error_rate","pred_time"], "modes":["min","min"], + "tolerances":{"error_rate":0.01,"pred_time":0.0}, "targets":{"error_rate":0.0,"pred_time":0.0}}``` + + Either "metrics" or "modes" is a list of str. + It represents the optimization objectives, the objective as minimization or maximization respectively. + Both "metrics" and "modes" are ordered by priorities from high to low. + "tolerances" is a dictionary to specify the optimality tolerance of each objective. + "targets" is a dictionary to specify the optimization targets for each objective. + If providing lexico_objectives, the arguments metric, hpo_method will be invalid. + fit_kwargs_by_estimator: dict, default=None | The user specified keywords arguments, grouped by estimator name. For TransformersEstimator, available fit_kwargs can be found from [TrainingArgumentsForAuto](nlp/huggingface/training_args). @@ -2502,7 +2517,11 @@ class AutoML(BaseEstimator): self._settings.get("retrain_full") if retrain_full is None else retrain_full ) split_type = split_type or self._settings.get("split_type") - hpo_method = hpo_method or self._settings.get("hpo_method") + if lexico_objectives is None: + hpo_method = hpo_method or self._settings.get("hpo_method") + else: + hpo_method = "cfo" + learner_selector = learner_selector or self._settings.get("learner_selector") no_starting_points = starting_points is None if no_starting_points: @@ -2606,7 +2625,9 @@ class AutoML(BaseEstimator): self._state.cv_score_agg_func = cv_score_agg_func or self._settings.get( "cv_score_agg_func" ) - + self._state.lexico_objectives = lexico_objectives or self._settings.get( + "lexico_objectives" + ) self._retrain_in_budget = retrain_full == "budget" and ( eval_method == "holdout" and self._state.X_val is None ) @@ -2996,6 +3017,7 @@ class AutoML(BaseEstimator): metric_constraints=self.metric_constraints, seed=self._seed, time_budget_s=time_left, + lexico_objectives=self._state.lexico_objectives, ) else: # if self._hpo_method is bo, sometimes the search space and the initial config dimension do not match @@ -3232,6 +3254,7 @@ class AutoML(BaseEstimator): ], metric_constraints=self.metric_constraints, seed=self._seed, + lexico_objectives=self._state.lexico_objectives, ) else: # if self._hpo_method is bo, sometimes the search space and the initial config dimension do not match diff --git a/flaml/nlp/huggingface/data_collator.py b/flaml/nlp/huggingface/data_collator.py index b3911c291..330846198 100644 --- a/flaml/nlp/huggingface/data_collator.py +++ b/flaml/nlp/huggingface/data_collator.py @@ -11,7 +11,7 @@ from flaml.data import ( MULTICHOICECLASSIFICATION, SUMMARIZATION, SEQCLASSIFICATION, - SEQREGRESSION + SEQREGRESSION, ) diff --git a/flaml/onlineml/README.md b/flaml/onlineml/README.md index 9eabc11b6..751da0646 100644 --- a/flaml/onlineml/README.md +++ b/flaml/onlineml/README.md @@ -1,6 +1,6 @@ # ChaCha for Online AutoML -FLAML includes *ChaCha* which is an automatic hyperparameter tuning solution for online machine learning. Online machine learning has the following properties: (1) data comes in sequential order; and (2) the performance of the machine learning model is evaluated online, i.e., at every iteration. *ChaCha* performs online AutoML respecting the aforementioned properties of online learning, and at the same time respecting the following constraints: (1) only a small constant number of 'live' models are allowed to perform online learning at the same time; and (2) no model persistence or offline training is allowed, which means that once we decide to replace a 'live' model with a new one, the replaced model can no longer be retrieved. +FLAML includes *ChaCha* which is an automatic hyperparameter tuning solution for online machine learning. Online machine learning has the following properties: (1) data comes in sequential order; and (2) the performance of the machine learning model is evaluated online, i.e., at every iteration. *ChaCha* performs online AutoML respecting the aforementioned properties of online learning, and at the same time respecting the following constraints: (1) only a small constant number of 'live' models are allowed to perform online learning at the same time; and (2) no model persistence or offline training is allowed, which means that once we decide to replace a 'live' model with a new one, the replaced model can no longer be retrieved. For more technical details about *ChaCha*, please check our paper. diff --git a/flaml/tune/README.md b/flaml/tune/README.md index 29557e694..b1e57f79c 100644 --- a/flaml/tune/README.md +++ b/flaml/tune/README.md @@ -20,7 +20,7 @@ def evaluate_config(config): # and the cost could be related to certain hyperparameters # in this example, we assume it's proportional to x time.sleep(config['x']/100000) - # use tune.report to report the metric to optimize + # use tune.report to report the metric to optimize tune.report(metric=metric) analysis = tune.run( @@ -35,7 +35,7 @@ analysis = tune.run( num_samples=-1, # the maximal number of configs to try, -1 means infinite time_budget_s=60, # the time budget in seconds local_dir='logs/', # the local directory to store logs - # verbose=0, # verbosity + # verbose=0, # verbosity # use_ray=True, # uncomment when performing parallel tuning using ray ) @@ -59,7 +59,7 @@ def evaluate_config(config): # and the cost could be related to certain hyperparameters # in this example, we assume it's proportional to x time.sleep(config['x']/100000) - # use tune.report to report the metric to optimize + # use tune.report to report the metric to optimize tune.report(metric=metric) # provide a time budget (in seconds) for the tuning process diff --git a/flaml/tune/analysis.py b/flaml/tune/analysis.py index 11b446616..4aedfb543 100644 --- a/flaml/tune/analysis.py +++ b/flaml/tune/analysis.py @@ -17,7 +17,10 @@ # Copyright (c) Microsoft Corporation. from typing import Dict, Optional import numpy as np + +from flaml.tune import result from .trial import Trial +from collections import defaultdict import logging @@ -68,7 +71,6 @@ class ExperimentAnalysis: @property def results(self) -> Dict[str, Dict]: """Get the last result of all the trials of the experiment""" - return {trial.trial_id: trial.last_result for trial in self.trials} def _validate_metric(self, metric: str) -> str: @@ -89,6 +91,42 @@ class ExperimentAnalysis: raise ValueError("If set, `mode` has to be one of [min, max]") return mode or self.default_mode + def lexico_best(self, trials): + results = {index: trial.last_result for index, trial in enumerate(trials)} + metrics = self.lexico_objectives["metrics"] + modes = self.lexico_objectives["modes"] + f_best = {} + keys = list(results.keys()) + length = len(keys) + histories = defaultdict(list) + for time_index in range(length): + for objective, mode in zip(metrics, modes): + histories[objective].append( + results[keys[time_index]][objective] + if mode == "min" + else trials[keys[time_index]][objective] * -1 + ) + obj_initial = self.lexico_objectives["metrics"][0] + feasible_index = [*range(len(histories[obj_initial]))] + for k_metric in self.lexico_objectives["metrics"]: + k_values = np.array(histories[k_metric]) + f_best[k_metric] = np.min(k_values.take(feasible_index)) + feasible_index_prior = np.where( + k_values + <= max( + [ + f_best[k_metric] + + self.lexico_objectives["tolerances"][k_metric], + self.lexico_objectives["targets"][k_metric], + ] + ) + )[0].tolist() + feasible_index = [ + val for val in feasible_index if val in feasible_index_prior + ] + best_trial = trials[feasible_index[-1]] + return best_trial + def get_best_trial( self, metric: Optional[str] = None, @@ -120,9 +158,11 @@ class ExperimentAnalysis: values are disregarded and these trials are never selected as the best trial. """ + if self.lexico_objectives is not None: + best_trial = self.lexico_best(self.trials) + return best_trial metric = self._validate_metric(metric) mode = self._validate_mode(mode) - if scope not in ["all", "last", "avg", "last-5-avg", "last-10-avg"]: raise ValueError( "ExperimentAnalysis: attempting to get best trial for " @@ -138,7 +178,6 @@ class ExperimentAnalysis: for trial in self.trials: if metric not in trial.metric_analysis: continue - if scope in ["last", "avg", "last-5-avg", "last-10-avg"]: metric_score = trial.metric_analysis[metric][scope] else: @@ -158,7 +197,6 @@ class ExperimentAnalysis: elif (mode == "min") and (best_metric_score > metric_score): best_metric_score = metric_score best_trial = trial - if not best_trial: logger.warning( "Could not find best trial. Did you pass the correct `metric` " diff --git a/flaml/tune/result.py b/flaml/tune/result.py index 461c991f1..e9f1edc94 100644 --- a/flaml/tune/result.py +++ b/flaml/tune/result.py @@ -43,6 +43,9 @@ PID = "pid" # (Optional) Default (anonymous) metric when using tune.report(x) DEFAULT_METRIC = "_metric" +# (Optional) Default (anonymous) mode when using tune.report(x) +DEFAULT_MODE = "min" + # (Optional) Mean reward for current training iteration EPISODE_REWARD_MEAN = "episode_reward_mean" diff --git a/flaml/tune/searcher/blendsearch.py b/flaml/tune/searcher/blendsearch.py index e22ae6055..841fa5ff5 100644 --- a/flaml/tune/searcher/blendsearch.py +++ b/flaml/tune/searcher/blendsearch.py @@ -63,6 +63,7 @@ class BlendSearch(Searcher): seed: Optional[int] = 20, cost_attr: Optional[str] = "auto", experimental: Optional[bool] = False, + lexico_objectives: Optional[dict] = None, use_incumbent_result_in_evaluation=False, ): """Constructor. @@ -127,6 +128,7 @@ class BlendSearch(Searcher): self.penalty = PENALTY # penalty term for constraints self._metric, self._mode = metric, mode self._use_incumbent_result_in_evaluation = use_incumbent_result_in_evaluation + self.lexico_objectives = lexico_objectives init_config = low_cost_partial_config or {} if not init_config: logger.info( @@ -176,6 +178,7 @@ class BlendSearch(Searcher): max_resource, reduction_factor, self.cost_attr, + self.lexico_objectives, seed, ) if global_search_alg is not None: @@ -480,11 +483,15 @@ class BlendSearch(Searcher): del self._subspace[trial_id] def _create_thread(self, config, result, space): + if self.lexico_objectives is None: + obj = result[self._ls.metric] + else: + obj = {k: result[k] for k in self.lexico_objectives["metrics"]} self._search_thread_pool[self._thread_count] = SearchThread( self._ls.mode, self._ls.create( config, - result[self._ls.metric], + obj, cost=result.get(self.cost_attr, 1), space=space, ), @@ -1044,6 +1051,7 @@ class BlendSearchTuner(BlendSearch, NNITuner): self._ls.max_resource, self._ls.resource_multiple_factor, cost_attr=self.cost_attr, + lexico_objectives=self.lexico_objectives, seed=self._ls.seed, ) if self._gs is not None: diff --git a/flaml/tune/searcher/flow2.py b/flaml/tune/searcher/flow2.py index 226f624ac..6d774b543 100644 --- a/flaml/tune/searcher/flow2.py +++ b/flaml/tune/searcher/flow2.py @@ -5,6 +5,7 @@ from typing import Dict, Optional, Tuple import numpy as np import logging +from collections import defaultdict try: from ray import __version__ as ray_version @@ -48,6 +49,7 @@ class FLOW2(Searcher): max_resource: Optional[float] = None, resource_multiple_factor: Optional[float] = None, cost_attr: Optional[str] = "time_total_s", + lexico_objectives=None, seed: Optional[int] = 20, ): """Constructor. @@ -90,13 +92,16 @@ class FLOW2(Searcher): self.best_config = flatten_dict(init_config) self.resource_attr = resource_attr self.min_resource = min_resource + self.lexico_objectives = lexico_objectives self.resource_multiple_factor = ( resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR ) self.cost_attr = cost_attr self.max_resource = max_resource self._resource = None + self._f_best = None self._step_lb = np.Inf + self._histories = None if space is not None: self._init_search() @@ -263,9 +268,22 @@ class FLOW2(Searcher): self.max_resource, self.resource_multiple_factor, self.cost_attr, + self.lexico_objectives, self.seed + 1, ) - flow2.best_obj = obj * self.metric_op # minimize internally + if self.lexico_objectives is not None: + flow2.best_obj = {} + for k, v in obj.items(): + flow2.best_obj[k] = ( + v * -1 + if self.lexico_objectives["modes"][ + self.lexico_objectives["metrics"].index(k) + ] + == "max" + else v + ) + else: + flow2.best_obj = obj * self.metric_op # minimize internally flow2.cost_incumbent = cost self.seed += 1 return flow2 @@ -303,6 +321,56 @@ class FLOW2(Searcher): self._init_search() return True + def lexico_compare(self, result) -> bool: + def update_fbest(): + obj_initial = self.lexico_objectives["metrics"][0] + feasible_index = [*range(len(self._histories[obj_initial]))] + for k_metric in self.lexico_objectives["metrics"]: + k_values = np.array(self._histories[k_metric]) + self._f_best[k_metric] = np.min(k_values.take(feasible_index)) + feasible_index_prior = np.where( + k_values + <= max( + [ + self._f_best[k_metric] + + self.lexico_objectives["tolerances"][k_metric], + self.lexico_objectives["targets"][k_metric], + ] + ) + )[0].tolist() + feasible_index = [ + val for val in feasible_index if val in feasible_index_prior + ] + + if self._histories is None: + self._histories, self._f_best = defaultdict(list), {} + for k in self.lexico_objectives["metrics"]: + self._histories[k].append(result[k]) + update_fbest() + return True + else: + for k in self.lexico_objectives["metrics"]: + self._histories[k].append(result[k]) + update_fbest() + for k_metric in self.lexico_objectives["metrics"]: + k_T = self.lexico_objectives["tolerances"][k_metric] + k_c = self.lexico_objectives["targets"][k_metric] + if (result[k_metric] < max([self._f_best[k_metric] + k_T, k_c])) and ( + self.best_obj[k_metric] < max([self._f_best[k_metric] + k_T, k_c]) + ): + continue + elif result[k_metric] < self.best_obj[k_metric]: + return True + else: + return False + for k_metr in self.lexico_objectives["metrics"]: + if result[k_metr] == self.best_obj[k_metr]: + continue + elif result[k_metr] < self.best_obj[k_metr]: + return True + else: + return False + def on_trial_complete( self, trial_id: str, result: Optional[Dict] = None, error: bool = False ): @@ -313,10 +381,28 @@ class FLOW2(Searcher): """ self.trial_count_complete += 1 if not error and result: - obj = result.get(self._metric) + obj = ( + result.get(self._metric) + if self.lexico_objectives is None + else {k: result[k] for k in self.lexico_objectives["metrics"]} + ) if obj: - obj *= self.metric_op - if self.best_obj is None or obj < self.best_obj: + obj = ( + { + k: obj[k] * -1 if m == "max" else obj[k] + for k, m in zip( + self.lexico_objectives["metrics"], + self.lexico_objectives["modes"], + ) + } + if isinstance(obj, dict) + else obj * self.metric_op + ) + if ( + self.best_obj is None + or (self.lexico_objectives is None and obj < self.best_obj) + or (self.lexico_objectives is not None and self.lexico_compare(obj)) + ): self.best_obj = obj self.best_config, self.step = self._configs[trial_id] self.incumbent = self.normalize(self.best_config) @@ -329,7 +415,6 @@ class FLOW2(Searcher): self._num_allowed4incumbent = 2 * self.dim self._proposed_by.clear() if self._K > 0: - # self._oldK must have been set when self._K>0 self.step *= np.sqrt(self._K / self._oldK) self.step = min(self.step, self.step_ub) self._iter_best_config = self.trial_count_complete @@ -340,7 +425,6 @@ class FLOW2(Searcher): self._trunc = max(self._trunc >> 1, 1) proposed_by = self._proposed_by.get(trial_id) if proposed_by == self.incumbent: - # proposed by current incumbent and no better self._num_complete4incumbent += 1 cost = ( result.get(self.cost_attr, 1) @@ -357,17 +441,34 @@ class FLOW2(Searcher): if self._num_complete4incumbent == self.dir and ( not self._resource or self._resource == self.max_resource ): - # check stuck condition if using max resource self._num_complete4incumbent -= 2 self._num_allowed4incumbent = max(self._num_allowed4incumbent, 2) def on_trial_result(self, trial_id: str, result: Dict): """Early update of incumbent.""" if result: - obj = result.get(self._metric) + obj = ( + result.get(self._metric) + if self.lexico_objectives is None + else {k: result[k] for k in self.lexico_objectives["metrics"]} + ) if obj: - obj *= self.metric_op - if self.best_obj is None or obj < self.best_obj: + obj = ( + { + k: obj[k] * -1 if m == "max" else obj[k] + for k, m in zip( + self.lexico_objectives["metrics"], + self.lexico_objectives["modes"], + ) + } + if isinstance(obj, dict) + else obj * self.metric_op + ) + if ( + self.best_obj is None + or (self.lexico_objectives is None and obj < self.best_obj) + or (self.lexico_objectives is not None and self.lexico_compare(obj)) + ): self.best_obj = obj config = self._configs[trial_id][0] if self.best_config != config: diff --git a/flaml/tune/searcher/search_thread.py b/flaml/tune/searcher/search_thread.py index d61089e49..9866abed9 100644 --- a/flaml/tune/searcher/search_thread.py +++ b/flaml/tune/searcher/search_thread.py @@ -137,7 +137,10 @@ class SearchThread: if result: self.cost_last = result.get(self.cost_attr, 1) self.cost_total += self.cost_last - if self._search_alg.metric in result: + if self._search_alg.metric in result and ( + not hasattr(self._search_alg, "lexico_objectives") + or self._search_alg.lexico_objectives is None + ): obj = result[self._search_alg.metric] * self._metric_op if obj < self.obj_best1 or self.best_result is None: self.cost_best2 = self.cost_best1 @@ -146,7 +149,11 @@ class SearchThread: self.obj_best1 = obj self.cost_best = self.cost_last self.best_result = result - self._update_speed() + if ( + not hasattr(self._search_alg, "lexico_objectives") + or self._search_alg.lexico_objectives is None + ): + self._update_speed() self.running -= 1 assert self.running >= 0 diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py index 56e8dbaed..c6f333e80 100644 --- a/flaml/tune/tune.py +++ b/flaml/tune/tune.py @@ -7,6 +7,7 @@ import numpy as np import datetime import time import os +from collections import defaultdict try: from ray import __version__ as ray_version @@ -20,7 +21,7 @@ except (ImportError, AssertionError): from .analysis import ExperimentAnalysis as EA from .trial import Trial -from .result import DEFAULT_METRIC +from .result import DEFAULT_METRIC, DEFAULT_MODE import logging logger = logging.getLogger(__name__) @@ -33,16 +34,70 @@ _training_iteration = 0 INCUMBENT_RESULT = "__incumbent_result__" +def is_nan_or_inf(value): + return np.isnan(value) or np.isinf(value) + + class ExperimentAnalysis(EA): """Class for storing the experiment results.""" - def __init__(self, trials, metric, mode): + def __init__(self, trials, metric, mode, lexico_objectives): try: super().__init__(self, None, trials, metric, mode) except (TypeError, ValueError): self.trials = trials self.default_metric = metric or DEFAULT_METRIC - self.default_mode = mode + self.default_mode = mode or DEFAULT_MODE + self.lexico_objectives = lexico_objectives + + def lexico_best(self, trials): + results = {index: trial.last_result for index, trial in enumerate(trials)} + metrics = self.lexico_objectives["metrics"] + modes = self.lexico_objectives["modes"] + f_best = {} + keys = list(results.keys()) + length = len(keys) + histories = defaultdict(list) + for time_index in range(length): + for objective, mode in zip(metrics, modes): + histories[objective].append( + results[keys[time_index]][objective] + if mode == "min" + else trials[keys[time_index]][objective] * -1 + ) + obj_initial = self.lexico_objectives["metrics"][0] + feasible_index = [*range(len(histories[obj_initial]))] + for k_metric in self.lexico_objectives["metrics"]: + k_values = np.array(histories[k_metric]) + f_best[k_metric] = np.min(k_values.take(feasible_index)) + feasible_index_prior = np.where( + k_values + <= max( + [ + f_best[k_metric] + + self.lexico_objectives["tolerances"][k_metric], + self.lexico_objectives["targets"][k_metric], + ] + ) + )[0].tolist() + feasible_index = [ + val for val in feasible_index if val in feasible_index_prior + ] + best_trial = trials[feasible_index[-1]] + return best_trial + + def get_best_trial( + self, + metric: Optional[str] = None, + mode: Optional[str] = None, + scope: str = "last", + filter_nan_and_inf: bool = True, + ) -> Optional[Trial]: + if self.lexico_objectives is not None: + best_trial = self.lexico_best(self.trials) + else: + best_trial = super().get_best_trial(metric, mode, scope, filter_nan_and_inf) + return best_trial def report(_metric=None, **kwargs): @@ -148,6 +203,7 @@ def run( max_failure: Optional[int] = 100, use_ray: Optional[bool] = False, use_incumbent_result_in_evaluation: Optional[bool] = None, + lexico_objectives: Optional[dict] = None, log_file_name: Optional[str] = None, **ray_args, ): @@ -300,6 +356,18 @@ def run( max_failure: int | the maximal consecutive number of failures to sample a trial before the tuning is terminated. use_ray: A boolean of whether to use ray as the backend. + lexico_objectives: A dictionary with four elements. + It specifics the information used for multiple objectives optimization with lexicographic preference. + e.g.,```lexico_objectives = {"metrics":["error_rate","pred_time"], "modes":["min","min"], + "tolerances":{"error_rate":0.01,"pred_time":0.0}, "targets":{"error_rate":0.0,"pred_time":0.0}}``` + + Either "metrics" or "modes" is a list of str. + It represents the optimization objectives, the objective as minimization or maximization respectively. + Both "metrics" and "modes" are ordered by priorities from high to low. + "tolerances" is a dictionary to specify the optimality tolerance of each objective. + "targets" is a dictionary to specify the optimization targets for each objective. + If providing lexico_objectives, the arguments metric, mode, and search_alg will be invalid. + log_file_name: A string of the log file name. Default to None. When set to None: if local_dir is not given, no log file is created; @@ -374,15 +442,21 @@ def run( try: import optuna as _ - SearchAlgorithm = BlendSearch + if lexico_objectives is None: + SearchAlgorithm = BlendSearch + else: + SearchAlgorithm = CFO except ImportError: SearchAlgorithm = CFO logger.warning( "Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]" ) - + if lexico_objectives is None: + metric = metric or DEFAULT_METRIC + else: + metric = lexico_objectives["metrics"][0] or DEFAULT_METRIC search_alg = SearchAlgorithm( - metric=metric or DEFAULT_METRIC, + metric=metric, mode=mode, space=config, points_to_evaluate=points_to_evaluate, @@ -398,6 +472,7 @@ def run( config_constraints=config_constraints, metric_constraints=metric_constraints, use_incumbent_result_in_evaluation=use_incumbent_result_in_evaluation, + lexico_objectives=lexico_objectives, ) else: if metric is None or mode is None: @@ -532,7 +607,12 @@ def run( logger.warning( f"fail to sample a trial for {max_failure} times in a row, stopping." ) - analysis = ExperimentAnalysis(_runner.get_trials(), metric=metric, mode=mode) + analysis = ExperimentAnalysis( + _runner.get_trials(), + metric=metric, + mode=mode, + lexico_objectives=lexico_objectives, + ) return analysis finally: # recover the global variables in case of nested run diff --git a/test/automl/test_lexiflow.py b/test/automl/test_lexiflow.py new file mode 100644 index 000000000..97da819e4 --- /dev/null +++ b/test/automl/test_lexiflow.py @@ -0,0 +1,40 @@ +from flaml import AutoML +from flaml.data import load_openml_dataset + + +def _test_lexiflow(): + + X_train, X_test, y_train, y_test = load_openml_dataset( + dataset_id=179, data_dir="test/data" + ) + + lexico_objectives = {} + lexico_objectives["metrics"] = ["val_loss", "pred_time"] + lexico_objectives["tolerances"] = {"val_loss": 0.01, "pred_time": 0.0} + lexico_objectives["targets"] = {"val_loss": 0.0, "pred_time": 0.0} + lexico_objectives["modes"] = ["min", "min"] + + automl = AutoML() + settings = { + "time_budget": 100, + "lexico_objectives": lexico_objectives, + "estimator_list": ["xgboost"], + "use_ray": True, + "task": "classification", + "max_iter": 10000000, + "train_time_limit": 60, + "verbose": 0, + "eval_method": "holdout", + "mem_thres": 128 * (1024**3), + "seed": 1, + } + automl.fit(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test, **settings) + print(automl.predict(X_train)) + print(automl.model) + print(automl.config_history) + print(automl.best_iteration) + print(automl.best_estimator) + + +if __name__ == "__main__": + _test_lexiflow() diff --git a/test/pipeline_tuning_example/data_prep/data_prep.py b/test/pipeline_tuning_example/data_prep/data_prep.py index 6bd4f286c..aba4bf711 100644 --- a/test/pipeline_tuning_example/data_prep/data_prep.py +++ b/test/pipeline_tuning_example/data_prep/data_prep.py @@ -20,7 +20,7 @@ def main(): logger.info(" ".join(f"{k}={v}" for k, v in vars(args).items())) - data_path = os.path.join(args.data, 'data.csv') + data_path = os.path.join(args.data, "data.csv") df = pd.read_csv(data_path) train_df, test_df = train_test_split( diff --git a/test/pipeline_tuning_example/data_prep/data_prep.yaml b/test/pipeline_tuning_example/data_prep/data_prep.yaml index 0d9ca1cca..17da7ef34 100644 --- a/test/pipeline_tuning_example/data_prep/data_prep.yaml +++ b/test/pipeline_tuning_example/data_prep/data_prep.yaml @@ -19,7 +19,7 @@ environment: os: Linux command: >- - python data_prep.py + python data_prep.py --data {inputs.data} --test_train_ratio {inputs.test_train_ratio} --train_data {outputs.train_data} diff --git a/test/pipeline_tuning_example/submit_train_pipeline.py b/test/pipeline_tuning_example/submit_train_pipeline.py index ee380d16e..d01edef1a 100644 --- a/test/pipeline_tuning_example/submit_train_pipeline.py +++ b/test/pipeline_tuning_example/submit_train_pipeline.py @@ -83,10 +83,10 @@ def build_and_submit_aml_pipeline(config): ################################################ # load component functions ################################################ - data_prep_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR - / "data_prep/data_prep.yaml") - train_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR - / "train/train.yaml") + data_prep_component = Component.from_yaml( + ws, yaml_file=LOCAL_DIR / "data_prep/data_prep.yaml" + ) + train_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR / "train/train.yaml") ################################################ # build pipeline diff --git a/test/pipeline_tuning_example/submit_tuner_pipeline.py b/test/pipeline_tuning_example/submit_tuner_pipeline.py index d10fbf5ca..a9cfdbecc 100644 --- a/test/pipeline_tuning_example/submit_tuner_pipeline.py +++ b/test/pipeline_tuning_example/submit_tuner_pipeline.py @@ -14,16 +14,19 @@ def remote_run(): ################################################ # connect to your Azure ML workspace ################################################ - ws = Workspace(subscription_id=args.subscription_id, - resource_group=args.resource_group, - workspace_name=args.workspace) + ws = Workspace( + subscription_id=args.subscription_id, + resource_group=args.resource_group, + workspace_name=args.workspace, + ) ################################################ # load component functions ################################################ - pipeline_tuning_func = Component.from_yaml(ws, yaml_file=LOCAL_DIR - / "tuner/component_spec.yaml") + pipeline_tuning_func = Component.from_yaml( + ws, yaml_file=LOCAL_DIR / "tuner/component_spec.yaml" + ) ################################################ # build pipeline @@ -44,6 +47,7 @@ def remote_run(): def local_run(): logger.info("Run tuner locally.") from tuner import tuner_func + tuner_func.tune_pipeline(concurrent_run=2) @@ -52,15 +56,18 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_mutually_exclusive_group(required=False) parser.add_argument( - "--subscription_id", type=str, help="your_subscription_id", required=False, + "--subscription_id", + type=str, + help="your_subscription_id", + required=False, ) parser.add_argument( - "--resource_group", type=str, help="your_resource_group", required=False) - parser.add_argument( - "--workspace", type=str, help="your_workspace", required=False) + "--resource_group", type=str, help="your_resource_group", required=False + ) + parser.add_argument("--workspace", type=str, help="your_workspace", required=False) - parser.add_argument('--remote', dest='remote', action='store_true') - parser.add_argument('--local', dest='remote', action='store_false') + parser.add_argument("--remote", dest="remote", action="store_true") + parser.add_argument("--local", dest="remote", action="store_false") parser.set_defaults(remote=True) args = parser.parse_args() diff --git a/test/pipeline_tuning_example/train/train.py b/test/pipeline_tuning_example/train/train.py index 510def5fc..ebf87f722 100644 --- a/test/pipeline_tuning_example/train/train.py +++ b/test/pipeline_tuning_example/train/train.py @@ -5,7 +5,7 @@ import pandas as pd from azureml.core import Run -class LightGBMCallbackHandler(): +class LightGBMCallbackHandler: def __init__(self): pass @@ -24,16 +24,22 @@ class LightGBMCallbackHandler(): def main(args): """Main function of the script.""" - train_path = os.path.join(args.train_data, 'data.csv') + train_path = os.path.join(args.train_data, "data.csv") print("traning_path:", train_path) - test_path = os.path.join(args.test_data, 'data.csv') + test_path = os.path.join(args.test_data, "data.csv") train_set = lgb.Dataset(train_path) test_set = lgb.Dataset(test_path) callbacks_handler = LightGBMCallbackHandler() - config = {"header": True, "objective": "binary", "label_column": 30, "metric": "binary_error", - "n_estimators": args.n_estimators, "learning_rate": args.learning_rate} + config = { + "header": True, + "objective": "binary", + "label_column": 30, + "metric": "binary_error", + "n_estimators": args.n_estimators, + "learning_rate": args.learning_rate, + } gbm = lgb.train( config, train_set, @@ -44,9 +50,9 @@ def main(args): ], ) - print('Saving model...') + print("Saving model...") # save model to file - gbm.save_model(os.path.join(args.model, 'model.txt')) + gbm.save_model(os.path.join(args.model, "model.txt")) if __name__ == "__main__": diff --git a/test/pipeline_tuning_example/train/train.yaml b/test/pipeline_tuning_example/train/train.yaml index e2acdb161..c989f0b40 100644 --- a/test/pipeline_tuning_example/train/train.yaml +++ b/test/pipeline_tuning_example/train/train.yaml @@ -4,9 +4,9 @@ name: classifier version: 0.0.1 display_name: Train lgbm classifier inputs: - train_data: + train_data: type: path - test_data: + test_data: type: path learning_rate: type: float @@ -20,8 +20,8 @@ environment: conda_dependencies_file: env.yaml os: Linux command: >- - python train.py - --train_data {inputs.train_data} + python train.py + --train_data {inputs.train_data} --test_data {inputs.test_data} --learning_rate {inputs.learning_rate} --n_estimators {inputs.n_estimators} diff --git a/test/pipeline_tuning_example/tuner/component_spec.yaml b/test/pipeline_tuning_example/tuner/component_spec.yaml index 55f132ddf..6bbad1bdc 100644 --- a/test/pipeline_tuning_example/tuner/component_spec.yaml +++ b/test/pipeline_tuning_example/tuner/component_spec.yaml @@ -9,4 +9,4 @@ environment: conda_dependencies_file: env.yaml os: Linux command: >- - python tuner/tuner_func.py + python tuner/tuner_func.py diff --git a/test/pipeline_tuning_example/tuner/tuner_func.py b/test/pipeline_tuning_example/tuner/tuner_func.py index 9ee0c614b..e633a386d 100644 --- a/test/pipeline_tuning_example/tuner/tuner_func.py +++ b/test/pipeline_tuning_example/tuner/tuner_func.py @@ -8,8 +8,7 @@ logger = logging.getLogger(__name__) def run_with_config(config: dict): - """Run the pipeline with a given config dict - """ + """Run the pipeline with a given config dict""" # pass the hyperparameters to AzureML jobs by overwriting the config file. overrides = [f"{key}={value}" for key, value in config.items()] @@ -24,25 +23,25 @@ def run_with_config(config: dict): while not stop: # get status status = run._core_run.get_status() - print(f'status: {status}') + print(f"status: {status}") # get metrics metrics = run._core_run.get_metrics(recursive=True) if metrics: run_metrics = list(metrics.values()) - new_metric = run_metrics[0]['eval_binary_error'] + new_metric = run_metrics[0]["eval_binary_error"] if type(new_metric) == list: new_metric = new_metric[-1] - print(f'eval_binary_error: {new_metric}') + print(f"eval_binary_error: {new_metric}") tune.report(eval_binary_error=new_metric) time.sleep(5) - if status == 'FAILED' or status == 'Completed': + if status == "FAILED" or status == "Completed": stop = True print("The run is terminated.") diff --git a/test/tune/test_lexiflow.py b/test/tune/test_lexiflow.py new file mode 100644 index 000000000..0e128547e --- /dev/null +++ b/test/tune/test_lexiflow.py @@ -0,0 +1,128 @@ +import torch +import thop +import torch.nn as nn +from flaml import tune +import torch.nn.functional as F +import torchvision +import numpy as np +import time +from ray import tune as raytune + +DEVICE = torch.device("cpu") +BATCHSIZE = 128 +N_TRAIN_EXAMPLES = BATCHSIZE * 30 +N_VALID_EXAMPLES = BATCHSIZE * 10 + + +def _test_lexiflow(): + train_dataset = torchvision.datasets.FashionMNIST( + "test/data", + train=True, + download=True, + transform=torchvision.transforms.ToTensor(), + ) + + train_loader = torch.utils.data.DataLoader( + torch.utils.data.Subset(train_dataset, list(range(N_TRAIN_EXAMPLES))), + batch_size=BATCHSIZE, + shuffle=True, + ) + + val_dataset = torchvision.datasets.FashionMNIST( + "test/data", train=False, transform=torchvision.transforms.ToTensor() + ) + + val_loader = torch.utils.data.DataLoader( + torch.utils.data.Subset(val_dataset, list(range(N_VALID_EXAMPLES))), + batch_size=BATCHSIZE, + shuffle=True, + ) + + def define_model(configuration): + n_layers = configuration["n_layers"] + layers = [] + in_features = 28 * 28 + for i in range(n_layers): + out_features = configuration["n_units_l{}".format(i)] + layers.append(nn.Linear(in_features, out_features)) + layers.append(nn.ReLU()) + p = configuration["dropout_{}".format(i)] + layers.append(nn.Dropout(p)) + in_features = out_features + layers.append(nn.Linear(in_features, 10)) + layers.append(nn.LogSoftmax(dim=1)) + return nn.Sequential(*layers) + + def train_model(model, optimizer, train_loader): + model.train() + for batch_idx, (data, target) in enumerate(train_loader): + data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE) + optimizer.zero_grad() + F.nll_loss(model(data), target).backward() + optimizer.step() + + def eval_model(model, valid_loader): + model.eval() + correct = 0 + with torch.no_grad(): + for batch_idx, (data, target) in enumerate(valid_loader): + data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE) + pred = model(data).argmax(dim=1, keepdim=True) + correct += pred.eq(target.view_as(pred)).sum().item() + + accuracy = correct / N_VALID_EXAMPLES + flops, params = thop.profile( + model, inputs=(torch.randn(1, 28 * 28).to(DEVICE),), verbose=False + ) + return np.log2(flops), 1 - accuracy, params + + def evaluate_function(configuration): + model = define_model(configuration).to(DEVICE) + optimizer = torch.optim.Adam(model.parameters(), configuration["lr"]) + n_epoch = configuration["n_epoch"] + for epoch in range(n_epoch): + train_model(model, optimizer, train_loader) + flops, error_rate, params = eval_model(model, val_loader) + return {"error_rate": error_rate, "flops": flops, "params": params} + + lexico_objectives = {} + lexico_objectives["metrics"] = ["error_rate", "flops"] + lexico_objectives["tolerances"] = {"error_rate": 0.02, "flops": 0.0} + lexico_objectives["targets"] = {"error_rate": 0.0, "flops": 0.0} + lexico_objectives["modes"] = ["min", "min"] + + search_space = { + "n_layers": raytune.randint(lower=1, upper=3), + "n_units_l0": raytune.randint(lower=4, upper=128), + "n_units_l1": raytune.randint(lower=4, upper=128), + "n_units_l2": raytune.randint(lower=4, upper=128), + "dropout_0": raytune.uniform(lower=0.2, upper=0.5), + "dropout_1": raytune.uniform(lower=0.2, upper=0.5), + "dropout_2": raytune.uniform(lower=0.2, upper=0.5), + "lr": raytune.loguniform(lower=1e-5, upper=1e-1), + "n_epoch": raytune.randint(lower=1, upper=20), + } + + low_cost_partial_config = { + "n_layers": 1, + "n_units_l0": 4, + "n_units_l1": 4, + "n_units_l2": 4, + "n_epoch": 1, + } + + analysis = tune.run( + evaluate_function, + num_samples=100000000, + time_budget_s=100, + config=search_space, + use_ray=False, + lexico_objectives=lexico_objectives, + low_cost_partial_config=low_cost_partial_config, + ) + result = analysis.best_result + print(result) + + +if __name__ == "__main__": + _test_lexiflow() diff --git a/website/docs/Examples/Integrate - AzureML.md b/website/docs/Examples/Integrate - AzureML.md index 430eda795..6a7e6a3c6 100644 --- a/website/docs/Examples/Integrate - AzureML.md +++ b/website/docs/Examples/Integrate - AzureML.md @@ -37,7 +37,7 @@ automl = AutoML() settings = { "time_budget": 60, # total running time in seconds "metric": "accuracy", # metric to optimize - "task": "classification", # task type + "task": "classification", # task type "log_file_name": "airlines_experiment.log", # flaml log file } experiment = mlflow.set_experiment("flaml") # the experiment name in AzureML workspace diff --git a/website/docs/Examples/Tune-AzureML-pipeline.md b/website/docs/Examples/Tune-AzureML-pipeline.md index d7228bc20..8954ae4cc 100644 --- a/website/docs/Examples/Tune-AzureML-pipeline.md +++ b/website/docs/Examples/Tune-AzureML-pipeline.md @@ -205,7 +205,7 @@ Overall, to tune the hyperparameters of the AzureML pipeline, run: ```bash # the training job will run remotely as an AzureML job in both choices -# run the tuning job locally +# run the tuning job locally python submit_tune.py --local # run the tuning job remotely python submit_tune.py --remote --subscription_id --resource_group --workspace diff --git a/website/docs/Installation.md b/website/docs/Installation.md index e902791cd..7cc37943a 100644 --- a/website/docs/Installation.md +++ b/website/docs/Installation.md @@ -79,10 +79,10 @@ You can use FLAML in .NET in the following ways: **Low-code** - [*Model Builder*](https://dotnet.microsoft.com/apps/machinelearning-ai/ml-dotnet/model-builder) - A Visual Studio extension for training ML models using FLAML. For more information on how to install the, see the [install Model Builder](https://docs.microsoft.com/dotnet/machine-learning/how-to-guides/install-model-builder?tabs=visual-studio-2022) guide. -- [*ML.NET CLI*](https://docs.microsoft.com/dotnet/machine-learning/automate-training-with-cli) - A dotnet CLI tool for training machine learning models using FLAML on Windows, MacOS, and Linux. For more information on how to install the ML.NET CLI, see the [install the ML.NET CLI](https://docs.microsoft.com/dotnet/machine-learning/how-to-guides/install-ml-net-cli?tabs=windows) guide. +- [*ML.NET CLI*](https://docs.microsoft.com/dotnet/machine-learning/automate-training-with-cli) - A dotnet CLI tool for training machine learning models using FLAML on Windows, MacOS, and Linux. For more information on how to install the ML.NET CLI, see the [install the ML.NET CLI](https://docs.microsoft.com/dotnet/machine-learning/how-to-guides/install-ml-net-cli?tabs=windows) guide. **Code-first** -- [*Microsoft.ML.AutoML*](https://www.nuget.org/packages/Microsoft.ML.AutoML/0.20.0-preview.22313.1) - NuGet package that provides direct access to the FLAML AutoML APIs that power low-code solutions like Model Builder and the ML.NET CLI. For more information on installing NuGet packages, see the install and use a NuGet package in [Visual Studio](https://docs.microsoft.com/nuget/quickstart/install-and-use-a-package-in-visual-studio) or [dotnet CLI](https://docs.microsoft.com/nuget/quickstart/install-and-use-a-package-using-the-dotnet-cli) guides. +- [*Microsoft.ML.AutoML*](https://www.nuget.org/packages/Microsoft.ML.AutoML/0.20.0-preview.22313.1) - NuGet package that provides direct access to the FLAML AutoML APIs that power low-code solutions like Model Builder and the ML.NET CLI. For more information on installing NuGet packages, see the install and use a NuGet package in [Visual Studio](https://docs.microsoft.com/nuget/quickstart/install-and-use-a-package-in-visual-studio) or [dotnet CLI](https://docs.microsoft.com/nuget/quickstart/install-and-use-a-package-using-the-dotnet-cli) guides. To get started with the ML.NET API and AutoML, see the [csharp-notebooks](https://github.com/dotnet/csharp-notebooks#machine-learning). \ No newline at end of file diff --git a/website/docs/Research.md b/website/docs/Research.md index 360aa0c52..2a875c54e 100644 --- a/website/docs/Research.md +++ b/website/docs/Research.md @@ -17,7 +17,7 @@ For technical details, please check our research publications. * [Economical Hyperparameter Optimization With Blended Search Strategy](https://www.microsoft.com/en-us/research/publication/economical-hyperparameter-optimization-with-blended-search-strategy/). Chi Wang, Qingyun Wu, Silu Huang, Amin Saied. ICLR 2021. * [An Empirical Study on Hyperparameter Optimization for Fine-Tuning Pre-trained Language Models](https://aclanthology.org/2021.acl-long.178.pdf). Susan Xueqing Liu, Chi Wang. ACL 2021. * [ChaCha for Online AutoML](https://www.microsoft.com/en-us/research/publication/chacha-for-online-automl/). Qingyun Wu, Chi Wang, John Langford, Paul Mineiro and Marco Rossi. ICML 2021. -* [Fair AutoML](https://arxiv.org/abs/2111.06495). Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2111.06495 (2021). +* [Fair AutoML](https://arxiv.org/abs/2111.06495). Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2111.06495 (2021). * [Mining Robust Default Configurations for Resource-constrained AutoML](https://arxiv.org/abs/2202.09927). Moe Kayali, Chi Wang. ArXiv preprint arXiv:2202.09927 (2022). Many researchers and engineers have contributed to the technology development. In alphabetical order: Vijay Aski, Sebastien Bubeck, Surajit Chaudhuri, Kevin Chen, Yi Wei Chen, Nadiia Chepurko, Ofer Dekel, Alex Deng, Anshuman Dutt, Nicolo Fusi, Jianfeng Gao, Johannes Gehrke, Niklas Gustafsson, Silu Huang, Moe Kayali, Dongwoo Kim, Christian Konig, John Langford, Menghao Li, Mingqin Li, Susan Xueqing Liu, Zhe Liu, Naveen Gaur, Paul Mineiro, Vivek Narasayya, Jake Radzikowski, Marco Rossi, Amin Saied, Neil Tenenholtz, Olga Vrousgou, Chi Wang, Yue Wang, Markus Weimer, Qingyun Wu, Qiufeng Yin, Haozhe Zhang, Minjia Zhang, XiaoYun Zhang, Eric Zhu, Rui Zhuang.