diff --git a/flaml/automl.py b/flaml/automl.py index 91eaf2215..d6f23ce1a 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -1546,11 +1546,12 @@ class AutoML(BaseEstimator): return points @property - def prune_attr(self) -> Optional[str]: - """Attribute for pruning + def resource_attr(self) -> Optional[str]: + """Attribute of the resource dimension. Returns: - A string for the sample size attribute or None + A string for the sample size attribute + (the resource attribute in AutoML) or None. """ return "FLAML_sample_size" if self._sample else None @@ -2178,7 +2179,7 @@ class AutoML(BaseEstimator): low_cost_partial_config=self.low_cost_partial_config, points_to_evaluate=self.points_to_evaluate, cat_hp_cost=self.cat_hp_cost, - prune_attr=self.prune_attr, + resource_attr=self.resource_attr, min_resource=self.min_resource, max_resource=self.max_resource, config_constraints=[ @@ -2326,11 +2327,11 @@ class AutoML(BaseEstimator): ) search_space = search_state.search_space if self._sample: - prune_attr = "FLAML_sample_size" + resource_attr = "FLAML_sample_size" min_resource = self._min_sample_size max_resource = self._state.data_size[0] else: - prune_attr = min_resource = max_resource = None + resource_attr = min_resource = max_resource = None learner_class = self._state.learner_classes.get(estimator) if "grid" == self._hpo_method: # for synthetic exp only points_to_evaluate = [] @@ -2362,7 +2363,7 @@ class AutoML(BaseEstimator): points_to_evaluate=points_to_evaluate, low_cost_partial_config=low_cost_partial_config, cat_hp_cost=search_state.cat_hp_cost, - prune_attr=prune_attr, + resource_attr=resource_attr, min_resource=min_resource, max_resource=max_resource, config_constraints=[ diff --git a/flaml/searcher/blendsearch.py b/flaml/searcher/blendsearch.py index 147d1c055..9de56c203 100644 --- a/flaml/searcher/blendsearch.py +++ b/flaml/searcher/blendsearch.py @@ -45,7 +45,7 @@ class BlendSearch(Searcher): evaluated_rewards: Optional[List] = None, time_budget_s: Union[int, float] = None, num_samples: Optional[int] = None, - prune_attr: Optional[str] = None, + resource_attr: Optional[str] = None, min_resource: Optional[float] = None, max_resource: Optional[float] = None, reduction_factor: Optional[float] = None, @@ -91,17 +91,10 @@ class BlendSearch(Searcher): points_to_evaluate. time_budget_s: int or float | Time budget in seconds. num_samples: int | The number of configs to try. - prune_attr: A string of the attribute used for pruning. - Not necessarily in space. - When prune_attr is in space, it is a hyperparameter, e.g., - 'n_iters', and the best value is unknown. - When prune_attr is not in space, it is a resource dimension, - e.g., 'sample_size', and the peak performance is assumed - to be at the max_resource. - min_resource: A float of the minimal resource to use for the - prune_attr; only valid if prune_attr is not in space. - max_resource: A float of the maximal resource to use for the - prune_attr; only valid if prune_attr is not in space. + resource_attr: A string to specify the resource dimension and the best + performance is assumed to be at the max_resource. + min_resource: A float of the minimal resource to use for the resource_attr. + max_resource: A float of the maximal resource to use for the resource_attr. reduction_factor: A float of the reduction factor used for incremental pruning. global_search_alg: A Searcher instance as the global search @@ -160,7 +153,7 @@ class BlendSearch(Searcher): metric, mode, space, - prune_attr, + resource_attr, min_resource, max_resource, reduction_factor, @@ -409,7 +402,7 @@ class BlendSearch(Searcher): if (objective - self._metric_target) * self._ls.metric_op < 0: self._metric_target = objective if self._ls.resource: - self._best_resource = config[self._ls.prune_attr] + self._best_resource = config[self._ls.resource_attr] if thread_id: if not self._metric_constraint_satisfied: # no point has been found to satisfy metric constraint @@ -637,7 +630,7 @@ class BlendSearch(Searcher): # return None config = self._search_thread_pool[choice].suggest(trial_id) if not choice and config is not None and self._ls.resource: - config[self._ls.prune_attr] = self.best_resource + config[self._ls.resource_attr] = self.best_resource elif choice and config is None: # local search thread finishes if self._search_thread_pool[choice].converged: @@ -975,7 +968,7 @@ class BlendSearchTuner(BlendSearch, NNITuner): self._ls.metric, self._mode, config, - self._ls.prune_attr, + self._ls.resource_attr, self._ls.min_resource, self._ls.max_resource, self._ls.resource_multiple_factor, diff --git a/flaml/searcher/flow2.py b/flaml/searcher/flow2.py index b881dadc4..b1e69c9b8 100644 --- a/flaml/searcher/flow2.py +++ b/flaml/searcher/flow2.py @@ -39,7 +39,7 @@ class FLOW2(Searcher): metric: Optional[str] = None, mode: Optional[str] = None, space: Optional[dict] = None, - prune_attr: Optional[str] = None, + resource_attr: Optional[str] = None, min_resource: Optional[float] = None, max_resource: Optional[float] = None, resource_multiple_factor: Optional[float] = 4, @@ -67,17 +67,10 @@ class FLOW2(Searcher): i.e., the relative cost of the three choices of 'tree_method' is 1, 1 and 2 respectively. space: A dictionary to specify the search space. - prune_attr: A string of the attribute used for pruning. - Not necessarily in space. - When prune_attr is in space, it is a hyperparameter, e.g., - 'n_iters', and the best value is unknown. - When prune_attr is not in space, it is a resource dimension, - e.g., 'sample_size', and the peak performance is assumed - to be at the max_resource. - min_resource: A float of the minimal resource to use for the - prune_attr; only valid if prune_attr is not in space. - max_resource: A float of the maximal resource to use for the - prune_attr; only valid if prune_attr is not in space. + resource_attr: A string to specify the resource dimension and the best + performance is assumed to be at the max_resource. + min_resource: A float of the minimal resource to use for the resource_attr. + max_resource: A float of the maximal resource to use for the resource_attr. resource_multiple_factor: A float of the multiplicative factor used for increasing resource. cost_attr: A string of the attribute used for cost. @@ -100,7 +93,7 @@ class FLOW2(Searcher): self.seed = seed self.init_config = init_config self.best_config = flatten_dict(init_config) - self.prune_attr = prune_attr + self.resource_attr = resource_attr self.min_resource = min_resource self.resource_multiple_factor = resource_multiple_factor or 4 self.cost_attr = cost_attr @@ -148,11 +141,15 @@ class FLOW2(Searcher): if not hier: self._space_keys = sorted(self._tunable_keys) self.hierarchical = hier - if self.prune_attr and self.prune_attr not in self._space and self.max_resource: + if ( + self.resource_attr + and self.resource_attr not in self._space + and self.max_resource + ): self.min_resource = self.min_resource or self._min_resource() self._resource = self._round(self.min_resource) if not hier: - self._space_keys.append(self.prune_attr) + self._space_keys.append(self.resource_attr) else: self._resource = None self.incumbent = {} @@ -252,7 +249,7 @@ class FLOW2(Searcher): if partial_config == self.init_config: self._reset_times += 1 if self._resource: - config[self.prune_attr] = self.min_resource + config[self.resource_attr] = self.min_resource return config, space def create( @@ -264,7 +261,7 @@ class FLOW2(Searcher): self.metric, self.mode, space, - self.prune_attr, + self.resource_attr, self.min_resource, self.max_resource, self.resource_multiple_factor, @@ -328,7 +325,7 @@ class FLOW2(Searcher): self.incumbent = self.normalize(self.best_config) self.cost_incumbent = result.get(self.cost_attr) if self._resource: - self._resource = self.best_config[self.prune_attr] + self._resource = self.best_config[self.resource_attr] self._num_complete4incumbent = 0 self._cost_complete4incumbent = 0 self._num_proposedby_incumbent = 0 @@ -377,7 +374,7 @@ class FLOW2(Searcher): if self.best_config != config: self.best_config = config if self._resource: - self._resource = config[self.prune_attr] + self._resource = config[self.resource_attr] self.incumbent = self.normalize(self.best_config) self.cost_incumbent = result.get(self.cost_attr) self._cost_complete4incumbent = 0 @@ -495,18 +492,18 @@ class FLOW2(Searcher): self._resource = self._round(self._resource * self.resource_multiple_factor) self.cost_incumbent *= self._resource / old_resource config = self.best_config.copy() - config[self.prune_attr] = self._resource + config[self.resource_attr] = self._resource self._direction_tried = None self._configs[trial_id] = (config, self.step) return unflatten_dict(config) def _project(self, config): - """project normalized config in the feasible region and set prune_attr""" + """project normalized config in the feasible region and set resource_attr""" for key in self._bounded_keys: value = config[key] config[key] = max(0, min(1, value)) if self._resource: - config[self.prune_attr] = self._resource + config[self.resource_attr] = self._resource @property def can_suggest(self) -> bool: @@ -525,7 +522,7 @@ class FLOW2(Searcher): keys = sorted(config.keys()) if self.hierarchical else self._space_keys for key in keys: value = config[key] - if key == self.prune_attr: + if key == self.resource_attr: value_list.append(value) else: # key must be in space @@ -556,7 +553,7 @@ class FLOW2(Searcher): """whether the incumbent can reach the incumbent of other.""" config1, config2 = self.best_config, other.best_config incumbent1, incumbent2 = self.incumbent, other.incumbent - if self._resource and config1[self.prune_attr] > config2[self.prune_attr]: + if self._resource and config1[self.resource_attr] > config2[self.resource_attr]: # resource will not decrease return False for key in self._unordered_cat_hp: diff --git a/flaml/tune/space.py b/flaml/tune/space.py index 91fe08868..d050eed1a 100644 --- a/flaml/tune/space.py +++ b/flaml/tune/space.py @@ -247,7 +247,7 @@ def normalize( config_norm = {} for key, value in config.items(): domain = space.get(key) - if domain is None: # e.g., prune_attr + if domain is None: # e.g., resource_attr config_norm[key] = value continue if not callable(getattr(domain, "get_sampler", None)): @@ -405,7 +405,7 @@ def denormalize( # Handle int (4.6 -> 5) if isinstance(domain, sample.Integer): config_denorm[key] = int(round(config_denorm[key])) - else: # prune_attr + else: # resource_attr config_denorm[key] = value return config_denorm diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py index c98c3bc6b..b7726453a 100644 --- a/flaml/tune/tune.py +++ b/flaml/tune/tune.py @@ -17,6 +17,7 @@ try: except (ImportError, AssertionError): ray_import = False from .analysis import ExperimentAnalysis as EA + from .result import DEFAULT_METRIC import logging @@ -117,11 +118,11 @@ def run( time_budget_s: Union[int, float] = None, points_to_evaluate: Optional[List[dict]] = None, evaluated_rewards: Optional[List] = None, - prune_attr: Optional[str] = None, + resource_attr: Optional[str] = None, min_resource: Optional[float] = None, max_resource: Optional[float] = None, reduction_factor: Optional[float] = None, - report_intermediate_result: Optional[bool] = False, + scheduler: Optional = None, search_alg=None, verbose: Optional[int] = 2, local_dir: Optional[str] = None, @@ -205,21 +206,29 @@ def run( points_to_evaluate are 3.0 and 1.0 respectively and want to inform run() - prune_attr: A string of the attribute used for pruning. - Not necessarily in space. - When prune_attr is in space, it is a hyperparameter, e.g., - 'n_iters', and the best value is unknown. - When prune_attr is not in space, it is a resource dimension, - e.g., 'sample_size', and the peak performance is assumed - to be at the max_resource. - min_resource: A float of the minimal resource to use for the - prune_attr; only valid if prune_attr is not in space. - max_resource: A float of the maximal resource to use for the - prune_attr; only valid if prune_attr is not in space. + resource_attr: A string to specify the resource dimension used by + the scheduler via "scheduler". + min_resource: A float of the minimal resource to use for the resource_attr. + max_resource: A float of the maximal resource to use for the resource_attr. reduction_factor: A float of the reduction factor used for incremental pruning. - report_intermediate_result: A boolean of whether intermediate results - are reported. If so, early stopping and pruning can be used. + scheduler: A scheduler for executing the experiment. Can be None, 'flaml', + 'asha' or a custom instance of the TrialScheduler class. Default is None: + in this case when resource_attr is provided, the 'flaml' scheduler will be + used, otherwise no scheduler will be used. When set 'flaml', an + authentic scheduler implemented in FLAML will be used. It does not + require users to report intermediate results in training_function. + Find more details abuot this scheduler in this paper + https://arxiv.org/pdf/1911.04706.pdf). + When set 'asha', the input for arguments "resource_attr", + "min_resource", "max_resource" and "reduction_factor" will be passed + to ASHA's "time_attr", "max_t", "grace_period" and "reduction_factor" + respectively. You can also provide a self-defined scheduler instance + of the TrialScheduler class. When 'asha' or self-defined scheduler is + used, you usually need to report intermediate results in the training + function. Please find examples using different types of schedulers + and how to set up the corresponding training functions in + test/tune/test_scheduler.py. TODO: point to notebook examples. search_alg: An instance of BlendSearch as the search algorithm to be used. The same instance can be used for iterative tuning. e.g., @@ -295,6 +304,20 @@ def run( from ..searcher.blendsearch import BlendSearch if search_alg is None: + flaml_scheduler_resource_attr = ( + flaml_scheduler_min_resource + ) = flaml_scheduler_max_resource = flaml_scheduler_reduction_factor = None + if scheduler in (None, "flaml"): + + # when scheduler is set 'flaml', we will use a scheduler that is + # authentic to the search algorithms in flaml. After setting up + # the search algorithm accordingly, we need to set scheduler to + # None in case it is later used in the trial runner. + flaml_scheduler_resource_attr = resource_attr + flaml_scheduler_min_resource = min_resource + flaml_scheduler_max_resource = max_resource + flaml_scheduler_reduction_factor = reduction_factor + scheduler = None search_alg = BlendSearch( metric=metric or DEFAULT_METRIC, mode=mode, @@ -305,10 +328,10 @@ def run( cat_hp_cost=cat_hp_cost, time_budget_s=time_budget_s, num_samples=num_samples, - prune_attr=prune_attr, - min_resource=min_resource, - max_resource=max_resource, - reduction_factor=reduction_factor, + resource_attr=flaml_scheduler_resource_attr, + min_resource=flaml_scheduler_min_resource, + max_resource=flaml_scheduler_max_resource, + reduction_factor=flaml_scheduler_reduction_factor, config_constraints=config_constraints, metric_constraints=metric_constraints, ) @@ -334,12 +357,11 @@ def run( searcher.set_search_properties(metric, mode, config, setting) else: searcher.set_search_properties(metric, mode, config) - scheduler = None - if report_intermediate_result: + if scheduler == "asha": params = {} - # scheduler resource_dimension=prune_attr - if prune_attr: - params["time_attr"] = prune_attr + # scheduler resource_dimension=resource_attr + if resource_attr: + params["time_attr"] = resource_attr if max_resource: params["max_t"] = max_resource if min_resource: diff --git a/flaml/version.py b/flaml/version.py index deded3247..3e2f46a3a 100644 --- a/flaml/version.py +++ b/flaml/version.py @@ -1 +1 @@ -__version__ = "0.8.2" +__version__ = "0.9.0" diff --git a/notebook/flaml_finetune_transformer.ipynb b/notebook/flaml_finetune_transformer.ipynb index 01718b9cc..beae8aed3 100644 --- a/notebook/flaml_finetune_transformer.ipynb +++ b/notebook/flaml_finetune_transformer.ipynb @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -56,18 +56,18 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "{'input_ids': [101, 2023, 2003, 1037, 3231, 102], 'attention_mask': [1, 1, 1, 1, 1, 1]}" ] }, + "execution_count": 4, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], "source": [ @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -101,14 +101,14 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ - "Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n" + "Reusing dataset glue (/home/ec2-user/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n" ] } ], @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -130,62 +130,68 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { - "output_type": "display_data", "data": { - "text/plain": "HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))", "application/vnd.jupyter.widget-view+json": { + "model_id": "0dcf9ca8ce024a2b832606a6a3219b17", "version_major": 2, - "version_minor": 0, - "model_id": "ecc66e6795f848e0a41e6cf1ce37bdf2" - } + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\n" ] }, { - "output_type": "display_data", "data": { - "text/plain": "HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))", "application/vnd.jupyter.widget-view+json": { + "model_id": "c58845729f0a4261830ad679891e7c77", "version_major": 2, - "version_minor": 0, - "model_id": "2d33fc70b80b403080ad8c0e77ed1891" - } + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\n" ] }, { - "output_type": "display_data", "data": { - "text/plain": "HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))", "application/vnd.jupyter.widget-view+json": { + "model_id": "9716d177a40748008cc6089e3d52a1d5", "version_major": 2, - "version_minor": 0, - "model_id": "d2ab3feb1a354187abb2dded0ead404f" - } + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\n" ] @@ -197,11 +203,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "{'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n", @@ -229,8 +234,9 @@ " 'sentence': \"Our friends won't buy this analysis, let alone the next one we propose.\"}" ] }, + "execution_count": 10, "metadata": {}, - "execution_count": 11 + "output_type": "execute_result" } ], "source": [ @@ -246,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -255,12 +261,12 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n", "- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", @@ -277,11 +283,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "DistilBertForSequenceClassification(\n", @@ -399,8 +404,9 @@ ")" ] }, + "execution_count": 13, "metadata": {}, - "execution_count": 14 + "output_type": "execute_result" } ], "source": [ @@ -416,7 +422,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -425,11 +431,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "Metric(name: \"glue\", features: {'predictions': Value(dtype='int64', id=None), 'references': Value(dtype='int64', id=None)}, usage: \"\"\"\n", @@ -477,8 +482,9 @@ "\"\"\", stored examples: 0)" ] }, + "execution_count": 15, "metadata": {}, - "execution_count": 16 + "output_type": "execute_result" } ], "source": [ @@ -487,7 +493,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -507,7 +513,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -517,7 +523,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -529,7 +535,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -545,40 +551,70 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": {}, "outputs": [ { + "name": "stdout", "output_type": "stream", - "name": "stderr", "text": [ - "/home/chiw/.local/lib/python3.8/site-packages/torch/nn/parallel/_functions.py:65: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n warnings.warn('Was asked to gather along dimension 0, but all '\n" + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/html": "\n
\n \n \n \n [ 2/804 : < :, Epoch 0.00/3]\n
\n \n \n \n \n \n \n \n \n \n
StepTraining Loss

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/home/chiw/.local/lib/python3.8/site-packages/torch/nn/parallel/_functions.py:65: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n warnings.warn('Was asked to gather along dimension 0, but all '\n" - ] - }, - { - "output_type": "execute_result", "data": { + "text/html": [ + "\n", + "

\n", + " \n", + " \n", + " \n", + " [1591/3207 1:03:06 < 1:04:11, 0.42 it/s, Epoch 1.49/3]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StepTraining Loss
5000.571000
10000.515400
15000.356100

" + ], "text/plain": [ - "TrainOutput(global_step=804, training_loss=0.3209413462017306, metrics={'train_runtime': 115.5328, 'train_samples_per_second': 6.959, 'total_flos': 238363718990580.0, 'epoch': 3.0, 'init_mem_cpu_alloc_delta': 2336600064, 'init_mem_gpu_alloc_delta': 268953088, 'init_mem_cpu_peaked_delta': 257929216, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': 2381066240, 'train_mem_gpu_alloc_delta': 806788096, 'train_mem_cpu_peaked_delta': 186974208, 'train_mem_gpu_peaked_delta': 550790144})" + "" ] }, "metadata": {}, - "execution_count": 21 + "output_type": "display_data" } ], "source": [ @@ -586,6 +622,8 @@ ] }, { + "cell_type": "markdown", + "metadata": {}, "source": [ "## Hyperparameter Optimization\n", "\n", @@ -595,13 +633,11 @@ "### Step 1. Define training method\n", "\n", "We define a function `train_distilbert(config: dict)` that accepts a hyperparameter configuration dict `config`. The specific configs will be generated by flaml's search algorithm in a given search space.\n" - ], - "cell_type": "markdown", - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -656,6 +692,8 @@ ] }, { + "cell_type": "markdown", + "metadata": {}, "source": [ "### Step 2. Define the search\n", "\n", @@ -664,13 +702,11 @@ "- The `search_space` for our hyperparameters\n", "- The metric and the mode ('max' or 'min') for optimization\n", "- The constraints (`n_cpus`, `n_gpus`, `num_samples`, and `time_budget_s`)" - ], - "cell_type": "markdown", - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -687,7 +723,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -704,809 +740,141 @@ ] }, { + "cell_type": "markdown", + "metadata": {}, "source": [ "### Step 3. Launch with `flaml.tune.run`\n", "\n", "We are now ready to launch the tuning using `flaml.tune.run`:" - ], - "cell_type": "markdown", - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, "outputs": [ { + "name": "stdout", "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, + { "name": "stderr", + "output_type": "stream", "text": [ - "2021-05-07 02:35:57,130\tINFO services.py:1172 -- View the Ray dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8265\u001b[39m\u001b[22m\n", - "2021-05-07 02:35:58,044\tWARNING function_runner.py:540 -- Function checkpointing is disabled. This may result in unexpected behavior when using checkpointing features or certain schedulers. To enable, set the train function arguments to be `func(config, checkpoint_dir=None)`.\n", - "Tuning started...\n" + "/home/ec2-user/miniconda3/envs/myflaml/lib/python3.8/site-packages/ray/_private/services.py:238: UserWarning: Not all Ray Dashboard dependencies were found. To use the dashboard please install Ray using `pip install ray[default]`. To disable this message, set RAY_DISABLE_IMPORT_WARNING env var to '1'.\n", + " warnings.warn(warning_message)\n", + "2021-12-01 23:35:54,348\tWARNING function_runner.py:558 -- Function checkpointing is disabled. This may result in unexpected behavior when using checkpointing features or certain schedulers. To enable, set the train function arguments to be `func(config, checkpoint_dir=None)`.\n" ] }, { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/html": "== Status ==
Memory usage on this node: 26.0/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 1/infinite (1 RUNNING)

" - }, - "metadata": {} + "name": "stdout", + "output_type": "stream", + "text": [ + "Tuning started...\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 4.3/7.7 GiB
Using FIFO scheduling algorithm.
Resources requested: 4.0/4 CPUs, 4.0/4 GPUs, 0.0/2.34 GiB heap, 0.0/1.17 GiB objects
Result logdir: /home/ec2-user/FLAML/notebook/logs/train_distilbert_2021-12-01_23-35-54
Number of trials: 1/infinite (1 RUNNING)

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 4.5/7.7 GiB
Using FIFO scheduling algorithm.
Resources requested: 4.0/4 CPUs, 4.0/4 GPUs, 0.0/2.34 GiB heap, 0.0/1.17 GiB objects
Result logdir: /home/ec2-user/FLAML/notebook/logs/train_distilbert_2021-12-01_23-35-54
Number of trials: 2/infinite (1 PENDING, 1 RUNNING)

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 4.6/7.7 GiB
Using FIFO scheduling algorithm.
Resources requested: 4.0/4 CPUs, 4.0/4 GPUs, 0.0/2.34 GiB heap, 0.0/1.17 GiB objects
Result logdir: /home/ec2-user/FLAML/notebook/logs/train_distilbert_2021-12-01_23-35-54
Number of trials: 2/infinite (1 PENDING, 1 RUNNING)

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ - "\u001b[2m\u001b[36m(pid=886303)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", + "\u001b[2m\u001b[36m(pid=11344)\u001b[0m Reusing dataset glue (/home/ec2-user/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n", " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 30.9/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 2/infinite (1 PENDING, 1 RUNNING)

" - }, - "metadata": {} - }, - { - "output_type": "stream", "name": "stdout", - "text": [ - "Trial train_distilbert_a0c303d0 completed. Last result: loss=0.5879864692687988,matthews_correlation=0.0\n", - "\u001b[2m\u001b[36m(pid=886302)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.2/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 3/infinite (1 PENDING, 1 RUNNING, 1 TERMINATED)

" - }, - "metadata": {} - }, - { "output_type": "stream", - "name": "stdout", "text": [ - "Trial train_distilbert_a0c303d1 completed. Last result: loss=0.6030182838439941,matthews_correlation=0.0\n", - "\u001b[2m\u001b[36m(pid=886305)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.4/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 4/infinite (1 PENDING, 1 RUNNING, 2 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_c39b2ef0 completed. Last result: loss=0.5865175724029541,matthews_correlation=0.0\n", - "\u001b[2m\u001b[36m(pid=886304)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.7/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 5/infinite (1 PENDING, 1 RUNNING, 3 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_f00776e2 completed. Last result: loss=0.5813134908676147,matthews_correlation=0.0\n", - "\u001b[2m\u001b[36m(pid=892770)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 32.0/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 6/infinite (1 PENDING, 1 RUNNING, 4 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_11ab3900 completed. Last result: loss=0.5855756998062134,matthews_correlation=0.0\n", - "\u001b[2m\u001b[36m(pid=897725)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 30.9/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 7/infinite (1 PENDING, 1 RUNNING, 5 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_353025b6 completed. Last result: loss=0.5316324830055237,matthews_correlation=0.38889272875750597\n", - "\u001b[2m\u001b[36m(pid=907288)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.3/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 8/infinite (1 PENDING, 1 RUNNING, 6 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_5728a1de completed. Last result: loss=0.5385054349899292,matthews_correlation=0.2805581766595423\n", - "\u001b[2m\u001b[36m(pid=908756)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.6/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 9/infinite (1 PENDING, 1 RUNNING, 7 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_9394c2e2 completed. Last result: loss=0.5391769409179688,matthews_correlation=0.3272948213494272\n", - "\u001b[2m\u001b[36m(pid=912284)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.9/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 10/infinite (1 PENDING, 1 RUNNING, 8 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_b6543fec completed. Last result: loss=0.5275164842605591,matthews_correlation=0.37917684067701946\n", - "\u001b[2m\u001b[36m(pid=914582)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.0/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 11/infinite (1 PENDING, 1 RUNNING, 9 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_0071f998 completed. Last result: loss=0.5162246823310852,matthews_correlation=0.417156672319181\n", - "\u001b[2m\u001b[36m(pid=918301)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.2/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 12/infinite (1 PENDING, 1 RUNNING, 10 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_2f830be6 completed. Last result: loss=0.5516289472579956,matthews_correlation=0.06558874629318973\n", - "\u001b[2m\u001b[36m(pid=920414)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.7/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 13/infinite (1 PENDING, 1 RUNNING, 11 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_7ce03f12 completed. Last result: loss=0.523731529712677,matthews_correlation=0.45354879777314566\n", - "\u001b[2m\u001b[36m(pid=925520)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 32.3/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 14/infinite (1 PENDING, 1 RUNNING, 12 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_aaab0508 completed. Last result: loss=0.5112878680229187,matthews_correlation=0.4508496945113286\n", - "\u001b[2m\u001b[36m(pid=929827)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.2/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 15/infinite (1 PENDING, 1 RUNNING, 13 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_14262454 completed. Last result: loss=0.5350601673126221,matthews_correlation=0.40085080763525827\n", - "\u001b[2m\u001b[36m(pid=934238)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.8/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 16/infinite (1 PENDING, 1 RUNNING, 14 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_6d211fe6 completed. Last result: loss=0.609851062297821,matthews_correlation=0.5268023551875569\n", - "\u001b[2m\u001b[36m(pid=942628)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.1/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 17/infinite (1 PENDING, 1 RUNNING, 15 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_c980bae4 completed. Last result: loss=0.5422758460044861,matthews_correlation=0.32496815807366203\n", - "\u001b[2m\u001b[36m(pid=945904)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 32.2/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 18/infinite (1 PENDING, 1 RUNNING, 16 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_6d0d29d6 completed. Last result: loss=0.9238015413284302,matthews_correlation=0.5494735380761103\n", - "\u001b[2m\u001b[36m(pid=973869)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.2/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 19/infinite (1 PENDING, 1 RUNNING, 17 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_b16ea82a completed. Last result: loss=0.5334658622741699,matthews_correlation=0.4513069078434825\n", - "\u001b[2m\u001b[36m(pid=978003)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.2/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 20/infinite (1 PENDING, 1 RUNNING, 18 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_eddf7cc0 completed. Last result: loss=0.9832845330238342,matthews_correlation=0.5699304939602442\n", - "\u001b[2m\u001b[36m(pid=1000417)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 31.4/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 21/infinite (1 PENDING, 1 RUNNING, 19 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial train_distilbert_43008974 completed. Last result: loss=0.8574612736701965,matthews_correlation=0.5200220944545176\n", - "\u001b[2m\u001b[36m(pid=1022436)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", - " 0%| | 0/9 [00:00", - "text/html": "== Status ==
Memory usage on this node: 32.0/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/4 CPUs, 0/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 22/infinite (22 TERMINATED)

" - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": "", - "text/html": "== Status ==
Memory usage on this node: 32.0/251.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/4 CPUs, 0/4 GPUs, 0.0/150.39 GiB heap, 0.0/47.22 GiB objects (0/1.0 accelerator_type:V100)
Result logdir: /home/chiw/FLAML/notebook/logs/train_distilbert_2021-05-07_02-35-58
Number of trials: 22/infinite (22 TERMINATED)
\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
Trial name status loc adam_beta1 adam_beta2 adam_epsilon learning_rate num_train_epochs iter total time (s) loss matthews_correlation
train_distilbert_a0c303d0TERMINATED 0.939079 0.991865 7.96945e-08 5.61152e-06 1 1 55.69090.587986 0
train_distilbert_a0c303d1TERMINATED 0.811036 0.997214 2.05111e-09 2.05134e-06 1.44427 1 71.76630.603018 0
train_distilbert_c39b2ef0TERMINATED 0.909395 0.993715 1e-07 5.26543e-06 1 1 53.76190.586518 0
train_distilbert_f00776e2TERMINATED 0.968763 0.990019 4.38943e-08 5.98035e-06 1.02723 1 56.83820.581313 0
train_distilbert_11ab3900TERMINATED 0.962198 0.991838 7.09296e-08 5.06608e-06 1 1 54.02310.585576 0
train_distilbert_353025b6TERMINATED 0.91596 0.991892 8.95426e-08 6.21568e-06 2.15443 1 98.32330.531632 0.388893
train_distilbert_5728a1deTERMINATED 0.926933 0.993146 1e-07 1.00902e-05 1 1 55.37260.538505 0.280558
train_distilbert_9394c2e2TERMINATED 0.928106 0.990614 4.49975e-08 3.45674e-06 2.72935 1 121.388 0.539177 0.327295
train_distilbert_b6543fecTERMINATED 0.876896 0.992098 1e-07 7.01176e-06 1.59538 1 76.02440.527516 0.379177
train_distilbert_0071f998TERMINATED 0.955024 0.991687 7.39776e-08 5.50998e-06 2.90939 1 126.871 0.516225 0.417157
train_distilbert_2f830be6TERMINATED 0.886931 0.989628 7.6127e-08 4.37646e-06 1.53338 1 73.89340.551629 0.0655887
train_distilbert_7ce03f12TERMINATED 0.984053 0.993956 8.70144e-08 7.82557e-06 4.08775 1 174.027 0.523732 0.453549
train_distilbert_aaab0508TERMINATED 0.940707 0.993946 1e-07 8.91979e-06 3.40243 1 146.249 0.511288 0.45085
train_distilbert_14262454TERMINATED 0.99 0.991696 4.60093e-08 4.83405e-06 3.4954 1 152.008 0.53506 0.400851
train_distilbert_6d211fe6TERMINATED 0.959277 0.994556 5.40791e-08 1.17333e-05 6.64995 1 271.444 0.609851 0.526802
train_distilbert_c980bae4TERMINATED 0.99 0.993355 1e-07 5.21929e-06 2.51275 1 111.799 0.542276 0.324968
train_distilbert_6d0d29d6TERMINATED 0.965773 0.995182 9.9752e-08 1.15549e-05 13.694 1 527.944 0.923802 0.549474
train_distilbert_b16ea82aTERMINATED 0.952781 0.993931 2.93182e-08 1.19145e-05 3.2293 1 139.844 0.533466 0.451307
train_distilbert_eddf7cc0TERMINATED 0.99 0.997109 8.13498e-08 1.28515e-05 15.5807 1 614.789 0.983285 0.56993
train_distilbert_43008974TERMINATED 0.929089 0.993258 1e-07 1.03892e-05 12.0357 1 474.387 0.857461 0.520022
train_distilbert_b3408a4eTERMINATED 0.99 0.993809 4.67441e-08 1.10418e-05 11.9165 1 474.126 0.828205 0.526164
train_distilbert_cfbfb220TERMINATED 0.979454 0.9999 1e-07 1.49578e-05 20.3715


" - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "2021-05-07 03:42:30,035\tINFO tune.py:450 -- Total run time: 3992.00 seconds (3991.90 seconds for the tuning loop).\n" + "\u001b[2m\u001b[36m(pid=11344)\u001b[0m huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "\u001b[2m\u001b[36m(pid=11344)\u001b[0m To disable this warning, you can either:\n", + "\u001b[2m\u001b[36m(pid=11344)\u001b[0m \t- Avoid using `tokenizers` before the fork if possible\n", + "\u001b[2m\u001b[36m(pid=11344)\u001b[0m \t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "\u001b[2m\u001b[36m(pid=11344)\u001b[0m huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "\u001b[2m\u001b[36m(pid=11344)\u001b[0m To disable this warning, you can either:\n", + "\u001b[2m\u001b[36m(pid=11344)\u001b[0m \t- Avoid using `tokenizers` before the fork if possible\n", + "\u001b[2m\u001b[36m(pid=11344)\u001b[0m \t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] } ], @@ -1525,8 +893,7 @@ " metric=HP_METRIC,\n", " mode=MODE,\n", " low_cost_partial_config={\"num_train_epochs\": 1}),\n", - " report_intermediate_result=False,\n", - " # uncomment the following if report_intermediate_result = True\n", + " # uncomment the following if scheduler = 'auto',\n", " # max_resource=max_num_epoch, min_resource=1,\n", " resources_per_trial={\"gpu\": num_gpus, \"cpu\": num_cpus},\n", " local_dir='logs/',\n", @@ -1540,14 +907,17 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "n_trials=22\ntime=3999.769361972809\nBest model eval matthews_correlation: 0.5699\nBest model parameters: {'num_train_epochs': 15.580684188655825, 'learning_rate': 1.2851507818900338e-05, 'adam_epsilon': 8.134982521948352e-08, 'adam_beta1': 0.99, 'adam_beta2': 0.9971094424784387}\n" + "n_trials=22\n", + "time=3999.769361972809\n", + "Best model eval matthews_correlation: 0.5699\n", + "Best model parameters: {'num_train_epochs': 15.580684188655825, 'learning_rate': 1.2851507818900338e-05, 'adam_epsilon': 8.134982521948352e-08, 'adam_beta1': 0.99, 'adam_beta2': 0.9971094424784387}\n" ] } ], @@ -1561,6 +931,8 @@ ] }, { + "cell_type": "markdown", + "metadata": {}, "source": [ "## Next Steps\n", "\n", @@ -1568,15 +940,17 @@ "\n", "- Huggingface provides _Callbacks_ which can be used to insert the `flaml.tune.report` call inside the training loop\n", "- Make sure to set `do_eval=True` in the `TrainingArguments` provided to `Trainer` and adjust the evaluation frequency accordingly" - ], - "cell_type": "markdown", - "metadata": {} + ] } ], "metadata": { + "interpreter": { + "hash": "1cfcceddaeccda27c3cce104660d474924e2ba82887c0e8e481b6ede3743c483" + }, "kernelspec": { - "name": "python385jvsc74a57bd031f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6", - "display_name": "Python 3.8.5 64-bit" + "display_name": "Python 3.8.5 64-bit", + "language": "python", + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1588,7 +962,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.12" }, "metadata": { "interpreter": { @@ -1598,4 +972,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebook/flaml_pytorch_cifar10.ipynb b/notebook/flaml_pytorch_cifar10.ipynb index ec89f929b..7e096607b 100644 --- a/notebook/flaml_pytorch_cifar10.ipynb +++ b/notebook/flaml_pytorch_cifar10.ipynb @@ -286,7 +286,7 @@ "metadata": {}, "outputs": [], "source": [ - "time_budget_s = 600 # time budget in seconds\n", + "time_budget_s = 3600 # time budget in seconds\n", "gpus_per_trial = 0.5 # number of gpus for each trial; 0.5 means two training jobs can share one gpu\n", "num_samples = 500 # maximal number of trials\n", "np.random.seed(7654321)" @@ -315,7 +315,7 @@ " low_cost_partial_config={\"num_epochs\": 1},\n", " max_resource=max_num_epoch,\n", " min_resource=1,\n", - " report_intermediate_result=True, # only set to True when intermediate results are reported by tune.report\n", + " scheduler=\"asha\", # need to use tune.report to report intermediate results in training_function \n", " resources_per_trial={\"cpu\": 1, \"gpu\": gpus_per_trial},\n", " local_dir='logs/',\n", " num_samples=num_samples,\n", @@ -325,24 +325,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "#trials=44\n", - "time=1193.913584947586\n", - "Best trial config: {'l1': 8, 'l2': 8, 'lr': 0.0008818671030627281, 'num_epochs': 55.9513429004283, 'batch_size': 3}\n", - "Best trial final validation loss: 1.0694482081472874\n", - "Best trial final validation accuracy: 0.6389\n", - "Files already downloaded and verified\n", - "Files already downloaded and verified\n", - "Best trial test set accuracy: 0.6294\n" - ] - } - ], + "outputs": [], "source": [ "print(f\"#trials={len(result.trials)}\")\n", "print(f\"time={time.time()-start_time}\")\n", @@ -390,7 +375,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.12" + "version": "3.8.12" }, "metadata": { "interpreter": { diff --git a/test/automl/test_forecast.py b/test/automl/test_forecast.py index f582d1119..5befb5307 100644 --- a/test/automl/test_forecast.py +++ b/test/automl/test_forecast.py @@ -71,7 +71,7 @@ def test_forecast_automl(budget=5): ) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget) for config in config_history: print(config) - print(automl.prune_attr) + print(automl.resource_attr) print(automl.max_resource) print(automl.min_resource) @@ -210,7 +210,7 @@ def test_multivariate_forecast_num(budget=5): ) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget) for config in config_history: print(config) - print(automl.prune_attr) + print(automl.resource_attr) print(automl.max_resource) print(automl.min_resource) @@ -341,7 +341,7 @@ def test_multivariate_forecast_cat(budget=5): ) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget) for config in config_history: print(config) - print(automl.prune_attr) + print(automl.resource_attr) print(automl.max_resource) print(automl.min_resource) diff --git a/test/automl/test_notebook_example.py b/test/automl/test_notebook_example.py index 515d569b1..33c9c15ce 100644 --- a/test/automl/test_notebook_example.py +++ b/test/automl/test_notebook_example.py @@ -64,7 +64,7 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None): ) = get_output_from_log(filename=settings["log_file_name"], time_budget=6) for config in config_history: print(config) - print(automl.prune_attr) + print(automl.resource_attr) print(automl.max_resource) print(automl.min_resource) diff --git a/test/automl/test_python_log.py b/test/automl/test_python_log.py index c367600c1..3681fcd45 100644 --- a/test/automl/test_python_log.py +++ b/test/automl/test_python_log.py @@ -80,7 +80,7 @@ class TestLogging(unittest.TestCase): low_cost_partial_config=low_cost_partial_config, points_to_evaluate=automl.points_to_evaluate, cat_hp_cost=automl.cat_hp_cost, - prune_attr=automl.prune_attr, + resource_attr=automl.resource_attr, min_resource=automl.min_resource, max_resource=automl.max_resource, config_constraints=[ diff --git a/test/automl/test_xgboost2d.py b/test/automl/test_xgboost2d.py index 2c17850a0..992420cd4 100644 --- a/test/automl/test_xgboost2d.py +++ b/test/automl/test_xgboost2d.py @@ -71,7 +71,7 @@ def test_simple(method=None): low_cost_partial_config=automl.low_cost_partial_config, points_to_evaluate=automl.points_to_evaluate, cat_hp_cost=automl.cat_hp_cost, - prune_attr=automl.prune_attr, + resource_attr=automl.resource_attr, min_resource=automl.min_resource, max_resource=automl.max_resource, time_budget_s=automl._state.time_budget, diff --git a/test/tune/test_pytorch_cifar10.py b/test/tune/test_pytorch_cifar10.py index 3087f4548..2151bf281 100644 --- a/test/tune/test_pytorch_cifar10.py +++ b/test/tune/test_pytorch_cifar10.py @@ -239,7 +239,7 @@ def cifar10_main( low_cost_partial_config={"num_epochs": 1}, max_resource=max_num_epochs, min_resource=1, - report_intermediate_result=True, + scheduler="asha", resources_per_trial={"cpu": 1, "gpu": gpus_per_trial}, local_dir="logs/", num_samples=num_samples, diff --git a/test/tune/test_scheduler.py b/test/tune/test_scheduler.py new file mode 100644 index 000000000..906d03619 --- /dev/null +++ b/test/tune/test_scheduler.py @@ -0,0 +1,157 @@ +"""Require: pip install flaml[test,ray] +""" +from logging import raiseExceptions +from flaml.scheduler.trial_scheduler import TrialScheduler +import numpy as np +from flaml import tune +import time + + +def rand_vector_unit_sphere(dim): + """this function allows you to generate + points that uniformly distribute on + the (dim-1)-sphere. + """ + vec = np.random.normal(0, 1, dim) + mag = np.linalg.norm(vec) + return vec / mag + + +def simple_obj(config, resource=10000): + config_value_vector = np.array([config["x"], config["y"], config["z"]]) + score_sequence = [] + for i in range(resource): + a = rand_vector_unit_sphere(3) + a[2] = abs(a[2]) + point_projection = np.dot(config_value_vector, a) + score_sequence.append(point_projection) + score_avg = np.mean(np.array(score_sequence)) + score_std = np.std(np.array(score_sequence)) + score_lb = score_avg - 1.96 * score_std / np.sqrt(resource) + tune.report(samplesize=resource, sphere_projection=score_lb) + + +def obj_w_intermediate_report(resource, config): + config_value_vector = np.array([config["x"], config["y"], config["z"]]) + score_sequence = [] + for i in range(resource): + a = rand_vector_unit_sphere(3) + a[2] = abs(a[2]) + point_projection = np.dot(config_value_vector, a) + score_sequence.append(point_projection) + if (i + 1) % 100 == 0: + score_avg = np.mean(np.array(score_sequence)) + score_std = np.std(np.array(score_sequence)) + score_lb = score_avg - 1.96 * score_std / np.sqrt(i + 1) + tune.report(samplesize=i + 1, sphere_projection=score_lb) + + +def obj_w_suggested_resource(resource_attr, config): + resource = config[resource_attr] + simple_obj(config, resource) + + +def test_scheduler(scheduler=None): + from functools import partial + + resource_attr = "samplesize" + max_resource = 10000 + + # specify the objective functions + if scheduler is None: + evaluation_obj = simple_obj + elif scheduler == "flaml": + evaluation_obj = partial(obj_w_suggested_resource, resource_attr) + elif scheduler == "asha" or isinstance(scheduler, TrialScheduler): + evaluation_obj = partial(obj_w_intermediate_report, max_resource) + else: + try: + from ray.tune.schedulers import TrialScheduler as RayTuneTrialScheduler + except ImportError: + print( + "skip this condition, which may require TrialScheduler from ray tune, \ + as ray tune cannot be imported." + ) + return + if isinstance(scheduler, RayTuneTrialScheduler): + evaluation_obj = partial(obj_w_intermediate_report, max_resource) + else: + raise ValueError + + analysis = tune.run( + evaluation_obj, + config={ + "x": tune.uniform(5, 20), + "y": tune.uniform(0, 10), + "z": tune.uniform(0, 10), + }, + metric="sphere_projection", + mode="max", + verbose=1, + resource_attr=resource_attr, + scheduler=scheduler, + max_resource=max_resource, + min_resource=100, + reduction_factor=2, + time_budget_s=1, + num_samples=500, + ) + + print("Best hyperparameters found were: ", analysis.best_config) + # print(analysis.get_best_trial) + return analysis.best_config + + +def test_no_scheduler(): + best_config = test_scheduler() + print("No scheduler, test error:", abs(10 / 2 - best_config["z"] / 2)) + + +def test_asha_scheduler(): + try: + from ray.tune.schedulers import ASHAScheduler + except ImportError: + print("skip the test as ray tune cannot be imported.") + return + best_config = test_scheduler(scheduler="asha") + print("Auto ASHA scheduler, test error:", abs(10 / 2 - best_config["z"] / 2)) + + +def test_custom_scheduler(): + try: + from ray.tune.schedulers import HyperBandScheduler + except ImportError: + print("skip the test as ray tune cannot be imported.") + return + my_scheduler = HyperBandScheduler( + time_attr="samplesize", max_t=1000, reduction_factor=2 + ) + best_config = test_scheduler(scheduler=my_scheduler) + print("Custom ASHA scheduler, test error:", abs(10 / 2 - best_config["z"] / 2)) + + +def test_custom_scheduler_default_time_attr(): + try: + from ray.tune.schedulers import ASHAScheduler + except ImportError: + print("skip the test as ray tune cannot be imported.") + return + my_scheduler = ASHAScheduler(max_t=10) + best_config = test_scheduler(scheduler=my_scheduler) + print( + "Custom ASHA scheduler (with ASHA default time attr), test error:", + abs(10 / 2 - best_config["z"] / 2), + ) + + +def test_flaml_scheduler(): + best_config = test_scheduler(scheduler="flaml") + print("FLAML scheduler, test error", abs(10 / 2 - best_config["z"] / 2)) + + +if __name__ == "__main__": + test_no_scheduler() + test_asha_scheduler() + test_custom_scheduler() + test_custom_scheduler_default_time_attr() + test_flaml_scheduler() diff --git a/test/tune/test_tune.py b/test/tune/test_tune.py index d4bfbd24b..ab7232a08 100644 --- a/test/tune/test_tune.py +++ b/test/tune/test_tune.py @@ -83,7 +83,7 @@ def _test_xgboost(method="BlendSearch"): mode="min", max_resource=max_iter, min_resource=1, - report_intermediate_result=True, + scheduler="asha", # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, local_dir="logs/",