diff --git a/.gitignore b/.gitignore index b5aae3601..83d5baf49 100644 --- a/.gitignore +++ b/.gitignore @@ -153,3 +153,5 @@ notebook/.azureml mlruns logs automl.pkl + +.idea/* diff --git a/docs/index.rst b/docs/index.rst index f56acbfb5..c4ae47a98 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -38,6 +38,13 @@ Tune :members: +NLP +------ + +.. autoclass:: flaml.nlp.AutoTransformers + :members: + + .. Indices and tables .. ================== diff --git a/flaml/nlp/README.md b/flaml/nlp/README.md new file mode 100644 index 000000000..217f91727 --- /dev/null +++ b/flaml/nlp/README.md @@ -0,0 +1,32 @@ +How to use AutoTransformers: + +```python +from flaml.nlp.autotransformers import AutoTransformers + +autohf = AutoTransformers() +preparedata_setting = { + "dataset_subdataset_name": "glue:rte", + "pretrained_model_size": "electra-base-discriminator:base", + "data_root_path": "data/", + "max_seq_length": 128, + } +autohf.prepare_data(**preparedata_setting) +autohf_settings = {"resources_per_trial": {"gpu": 1, "cpu": 1}, + "num_samples": -1, # unlimited sample size + "time_budget": 3600, + "ckpt_per_epoch": 1, + "fp16": False, + } +validation_metric, analysis = \ + autohf.fit(**autohf_settings,) + +``` + +The current use cases that are supported: +1. A simplified version of fine-tuning the GLUE dataset using HuggingFace; +2. For selecting better search space for fine-tuning the GLUE dataset; +3. Use the search algorithms in flaml for more efficient fine-tuning of HuggingFace; + +The use cases that can be supported in future: +1. HPO fine-tuning for text generation; +2. HPO fine-tuning for question answering; \ No newline at end of file diff --git a/flaml/nlp/__init__.py b/flaml/nlp/__init__.py new file mode 100644 index 000000000..34444752e --- /dev/null +++ b/flaml/nlp/__init__.py @@ -0,0 +1,2 @@ +from flaml.nlp.autotransformers import AutoTransformers +from flaml.nlp.result_analysis.azure_utils import AzureUtils, JobID diff --git a/flaml/nlp/autotransformers.py b/flaml/nlp/autotransformers.py new file mode 100644 index 000000000..cb76ebe32 --- /dev/null +++ b/flaml/nlp/autotransformers.py @@ -0,0 +1,852 @@ +import json +import os + +import torch +import transformers +import wandb + +from .dataset.dataprocess_auto import AutoEncodeText +import numpy as np + +from ray.tune import CLIReporter + +import time +import ray +import datasets +from datasets import load_dataset +from transformers.trainer_utils import IntervalStrategy, HPSearchBackend + +from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig, TrainingArguments + +from .dataset.metric_auto import get_default_and_alternative_metric +from .dataset.submission_auto import auto_output_prediction +from .dataset.task_auto import get_default_task +from .hpo.grid_searchspace_auto import AutoGridSearchSpace +from .hpo.hpo_searchspace import AutoHPOSearchSpace +from .huggingface.switch_head_auto import AutoSeqClassificationHead, MODEL_CLASSIFICATION_HEAD_MAPPING +from .utils import PathUtils, _variable_override_default_alternative +from .hpo.searchalgo_auto import AutoSearchAlgorithm +from .hpo.scheduler_auto import AutoScheduler +from .result_analysis.wandb_utils import WandbUtils +from .result_analysis.azure_utils import JobID +from .utils import load_console_args + +from .huggingface.trainer import TrainerForAutoTransformers + +import logging + +transformers.logging.set_verbosity_error() +logger = logging.getLogger(__name__) +logger_formatter = logging.Formatter( + '[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s', + '%m-%d %H:%M:%S') + +task_list = [ + "seq-classification", + "regression", + "question-answering" +] + + +class AutoTransformers: + '''The AutoTransformers class + + Example: + + .. code-block:: python + + autohf = AutoTransformers() + autohf_settings = {"resources_per_trial": {"cpu": 1}, + "num_samples": -1, + "time_budget": 100000, + "ckpt_per_epoch": 1, + "fp16": False, + } + + validation_metric, analysis = autohf.fit(**autohf_settings) + + ''' + + @staticmethod + def _convert_dict_to_ray_tune_space(config_json, mode="grid"): + search_space = {} + + if mode == "grid": + for each_hp in config_json.keys(): + this_config = config_json[each_hp] + assert isinstance(this_config, dict) or isinstance(this_config, list), \ + "config of " + each_hp + " must be dict or list" + search_space[each_hp] = ray.tune.grid_search(this_config) + else: + for each_hp in config_json.keys(): + this_config = config_json[each_hp] + assert isinstance(this_config, dict) or isinstance(this_config, list), \ + "config of " + each_hp + " must be dict or list" + if isinstance(this_config, dict): + lower = this_config["l"] + upper = this_config["u"] + space = this_config["space"] + if space == "log": + search_space[each_hp] = ray.tune.loguniform(lower, upper) + elif space == "linear": + search_space[each_hp] = ray.tune.uniform(lower, upper) + elif space == "quniform": + search_space[each_hp] = ray.tune.quniform(lower, upper, this_config["interval"]) + else: + search_space[each_hp] = ray.tune.choice(this_config) + + return search_space + + def _set_search_space(self, + **custom_hpo_args): + search_space_dict_hpo = search_space_dict_grid = None + if self.jobid_config.mod == "grid": + search_space_grid_json = AutoGridSearchSpace.from_model_and_dataset_name(self.jobid_config.pre, + self.jobid_config.presz, + self.get_full_data_name(), + self.jobid_config.subdat, "grid") + search_space_dict_grid \ + = AutoTransformers._convert_dict_to_ray_tune_space(search_space_grid_json, mode="grid") + search_space_dict_hpo = search_space_dict_grid + if self.jobid_config.mod != "grid" and self.jobid_config.mod != "gridbert": + search_space_hpo_json \ + = AutoHPOSearchSpace.from_model_and_dataset_name(logger, + self.jobid_config.spa, + self.jobid_config.pre, + self.jobid_config.presz, + self.get_full_data_name(), + self.jobid_config.subdat, + **custom_hpo_args) + search_space_dict_hpo = AutoTransformers._convert_dict_to_ray_tune_space(search_space_hpo_json, mode="hpo") + elif self.jobid_config.mod == "gridbert": + search_space_hpo_json = AutoGridSearchSpace.from_model_and_dataset_name( + "bert", + "base", + self.get_full_data_name(), + self.jobid_config.subdat, "grid") + search_space_dict_hpo = AutoTransformers._convert_dict_to_ray_tune_space(search_space_hpo_json, mode="grid") + + """ + resolve the conflict in search_space_dict_hpo: only one of "max_steps" and "num_train_epochs" can exist + in the search space. If both exists, num_train_epochs is removed. Similarly, if "warmup_steps" and + "warmup_ratio" both exist, warmup_ratio is removed + """ + search_space_dict_hpo = TrainerForAutoTransformers.resolve_hp_conflict(search_space_dict_hpo) + self._search_space_hpo = search_space_dict_hpo + if self.jobid_config.mod == "grid": + search_space_dict_grid = TrainerForAutoTransformers.resolve_hp_conflict(search_space_dict_grid) + self._search_space_grid = search_space_dict_grid + else: + self._search_space_grid = None + + try: + self.ds_config = custom_hpo_args["ds_config"] + except KeyError: + self.ds_config = None + + def _wrapper(self, func, *args): # with star + return func(*args) + + def _get_split_name(self, data_raw, fold_name=None): + if fold_name: + return fold_name + fold_keys = data_raw.keys() + if fold_keys == {"train", "validation", "test"}: + return "train", "validation", "test" + for each_key in fold_keys: + for each_split_name in {"train", "validation", "test"}: + assert not (each_key.startswith(each_split_name) and each_key != each_split_name), \ + "Dataset split must be within {}, must be explicitly specified in dataset_config, e.g.," \ + "'fold_name': ['train', 'validation_matched', 'test_matched']. Please refer to the example in the " \ + "documentation of AutoTransformers.prepare_data()".format(",".join(fold_keys)) + return "train", "validation", "test" + + def prepare_data(self, + data_root_path, + jobid_config=None, + is_wandb_on=False, + server_name=None, + max_seq_length=128, + fold_name=None, + resplit_portion=None, + **custom_data_args): + '''Prepare data + + An example: + + preparedata_setting = { + "server_name": "tmdev", + "data_root_path": "data/", + "max_seq_length": 128, + "jobid_config": jobid_config, + "wandb_utils": wandb_utils, + "resplit_portion": {"source": ["train", "validation"], + "train": [0, 0.8], "validation": [0.8, 0.9], "test": [0.9, 1.0]} + } + autohf.prepare_data(**preparedata_setting) + + Args: + server_name: + a string variable, which can be tmdev or azureml + data_root_path: + the root path for storing the checkpoints and output results, e.g., "data/" + jobid_config: + a JobID object describing the profile of job + wandb_utils: + a WandbUtils object for wandb operations + max_seq_length (optional): + max_seq_lckpt_per_epochength for the huggingface, this hyperparameter must be specified + at the data processing step + resplit_portion: + the proportion for resplitting the train and dev data when split_mode="resplit". + If args.resplit_mode = "rspt", resplit_portion is required + ''' + console_args = load_console_args(**custom_data_args) + self._max_seq_length = max_seq_length + self._server_name = server_name if server_name is not None else "tmdev" + self.jobid_config = jobid_config if jobid_config is not None else JobID(console_args) + self.wandb_utils = WandbUtils(is_wandb_on=is_wandb_on, + console_args=console_args, + jobid_config=self.jobid_config) + self.wandb_utils.set_wandb_per_run() + + self.path_utils = PathUtils(self.jobid_config, hpo_data_root_path=data_root_path) + + if self.jobid_config.spt == "rspt": + assert resplit_portion, "If split mode is 'rspt', the resplit_portion must be provided. Please " \ + "refer to the example in the documentation of AutoTransformers.prepare_data()" + if self.jobid_config.subdat: + data_raw = load_dataset(self.get_full_data_name(), self.jobid_config.subdat) + else: + data_raw = self._wrapper(load_dataset, *self.jobid_config.dat) + + self._train_name, self._dev_name, self._test_name = self._get_split_name(data_raw, fold_name=fold_name) + auto_tokentoids_config = {"max_seq_length": self._max_seq_length} + self._tokenizer = AutoTokenizer.from_pretrained(self.jobid_config.pre_full, use_fast=True) + + def autoencodetext_from_model_and_dataset_name(): + return AutoEncodeText.from_model_and_dataset_name( + data_raw, + self.jobid_config.pre_full, + self.get_full_data_name(), + self.jobid_config.subdat, + **auto_tokentoids_config) + + data_encoded = autoencodetext_from_model_and_dataset_name() + self._max_seq_length = 0 + """ + Update the max_seq_length to the minimum of the actual max seq length and the user defined max_seq_length + """ + for each_fold in data_encoded.keys(): + self._max_seq_length = max(self._max_seq_length, + max([sum(data_encoded[each_fold][x]['attention_mask']) for x in + range(len(data_encoded[each_fold]))])) + self._max_seq_length = int((self._max_seq_length + 15) / 16) * 16 + data_encoded = autoencodetext_from_model_and_dataset_name() + + if self.jobid_config.spt == "rspt": + all_folds_from_source = [] + assert "source" in resplit_portion.keys(), "Must specify the source for resplitting the dataset in" \ + "resplit_portion, which is a list of folder names, e.g., resplit_portion = {'source': ['train']}" + + source_fold_names = resplit_portion['source'] + for each_fold_name in source_fold_names: + this_fold_dataset = data_encoded[each_fold_name] + all_folds_from_source.append(this_fold_dataset) + + merged_folds_from_source = datasets.concatenate_datasets(all_folds_from_source) + merged_folds_from_source = merged_folds_from_source.shuffle(seed=self.jobid_config.sddt) + + assert "train" in resplit_portion.keys() and "validation" in resplit_portion.keys() \ + and "test" in resplit_portion.keys(), "train, validation, test must exist in resplit_portion" + + for key in ["train", "validation", "test"]: + target_fold_start, target_fold_end = \ + int(resplit_portion[key][0] * len(merged_folds_from_source)), \ + int(resplit_portion[key][1] * len(merged_folds_from_source)) + subfold_dataset = merged_folds_from_source.select( + [x for x in range(target_fold_start, target_fold_end)]).flatten_indices() + if key == "train": + self.train_dataset = subfold_dataset + elif key == "validation": + self.eval_dataset = subfold_dataset + else: + self.test_dataset = subfold_dataset + else: + self.train_dataset, self.eval_dataset, self.test_dataset \ + = data_encoded[self._train_name], data_encoded[self._dev_name], data_encoded[self._test_name] + + def _load_model(self, + checkpoint_path=None, + per_model_config=None): + + this_task = get_default_task(self.get_full_data_name(), self.jobid_config.subdat) + if this_task == "seq-classification": + self._num_labels = len(self.train_dataset.features["label"].names) + elif this_task == "regression": + self._num_labels = 1 + + if not checkpoint_path: + checkpoint_path = self.jobid_config.pre_full + + def get_this_model(): + return AutoModelForSequenceClassification.from_pretrained(checkpoint_path, config=model_config) + + def is_pretrained_model_in_classification_head_list(): + return self.jobid_config.pre in MODEL_CLASSIFICATION_HEAD_MAPPING.keys() + + def _set_model_config(): + if per_model_config and len(per_model_config) > 0: + model_config = AutoConfig.from_pretrained( + checkpoint_path, + num_labels=model_config_num_labels, + **per_model_config) + else: + model_config = AutoConfig.from_pretrained( + checkpoint_path, + num_labels=model_config_num_labels) + return model_config + + if this_task == "seq-classification": + num_labels_old = AutoConfig.from_pretrained(checkpoint_path).num_labels + if is_pretrained_model_in_classification_head_list(): + model_config_num_labels = num_labels_old + else: + model_config_num_labels = self._num_labels + model_config = _set_model_config() + + if is_pretrained_model_in_classification_head_list(): + if self._num_labels != num_labels_old: + this_model = get_this_model() + model_config.num_labels = self._num_labels + this_model.num_labels = self._num_labels + this_model.classifier = AutoSeqClassificationHead \ + .from_model_type_and_config(self.jobid_config.pre, + model_config) + else: + this_model = get_this_model() + else: + this_model = get_this_model() + + this_model.resize_token_embeddings(len(self._tokenizer)) + return this_model + elif this_task == "regression": + model_config = self._set_model_config(checkpoint_path, per_model_config, 1) + this_model = get_this_model() + return this_model + + def _get_metric_func(self): + if self.get_full_data_name() in ("glue", "super_glue"): + metric = datasets.load.load_metric(self.get_full_data_name(), self.jobid_config.subdat) + elif self.get_full_data_name() in ("squad", "squad_v2"): + metric = datasets.load.load_metric(self.get_full_data_name()) + else: + metric = datasets.load.load_metric(self.metric_name) + return metric + + def _compute_metrics_by_dataset_name(self, + eval_pred): + predictions, labels = eval_pred + predictions = np.squeeze(predictions) \ + if self.task_name == "regression" else np.argmax(predictions, axis=1) + metric_func = self._get_metric_func() + return metric_func.compute(predictions=predictions, references=labels) + + def _compute_checkpoint_freq(self, + num_train_epochs, + batch_size): + if "gpu" in self._resources_per_trial: + ckpt_step_freq = int(min(num_train_epochs, 1) * len(self.train_dataset) / batch_size + / self._resources_per_trial["gpu"] / self.ckpt_per_epoch) + 1 + else: + ckpt_step_freq = int(min(num_train_epochs, 1) * len(self.train_dataset) / batch_size + / self._resources_per_trial["cpu"] / self.ckpt_per_epoch) + 1 + + return ckpt_step_freq + + @staticmethod + def _separate_config(config): + training_args_config = {} + per_model_config = {} + + for key in config.keys(): + if key in TrainingArguments.__dict__.keys(): + training_args_config[key] = config[key] + else: + per_model_config[key] = config[key] + + return training_args_config, per_model_config + + def _objective(self, config, reporter, checkpoint_dir=None): + def model_init(): + return self._load_model() + + from transformers.trainer_utils import set_seed + set_seed(config["seed"]) + + training_args_config, per_model_config = AutoTransformers._separate_config(config) + this_model = self._load_model(per_model_config=per_model_config) + + trial_id = reporter.trial_id + self.path_utils.make_dir_per_trial(trial_id) + + ckpt_freq = self._compute_checkpoint_freq( + num_train_epochs=config["num_train_epochs"], + batch_size=config["per_device_train_batch_size"]) + + assert self.path_utils.ckpt_dir_per_trial + training_args = TrainingArguments( + output_dir=self.path_utils.ckpt_dir_per_trial, + do_eval=False, + per_device_eval_batch_size=32, + eval_steps=ckpt_freq, + evaluation_strategy=IntervalStrategy.STEPS, + save_steps=ckpt_freq, + save_total_limit=0, + fp16=self._fp16, + deepspeed=self.ds_config, + **training_args_config, + ) + + trainer = TrainerForAutoTransformers( + this_model, + training_args, + model_init=model_init, + train_dataset=self.train_dataset, + eval_dataset=self.eval_dataset, + tokenizer=self._tokenizer, + compute_metrics=self._compute_metrics_by_dataset_name, + ) + trainer.logger = logger + trainer.trial_id = reporter.trial_id + + """ + create a wandb run. If os.environ["WANDB_MODE"] == "offline", run = None + """ + run = self.wandb_utils.set_wandb_per_trial() + if os.environ["WANDB_MODE"] == "online": + for each_hp in config: + wandb.log({each_hp: config[each_hp]}) + trainer.train() + trainer.evaluate(self.eval_dataset) + """ + If a wandb run was created, close the run after train and evaluate finish + """ + if run: + run.finish() + + def _verify_init_config(self, + **custom_hpo_args): + for key in custom_hpo_args.keys(): + if key == "points_to_evaluate": + for each_init_config in custom_hpo_args[key]: + for each_hp in each_init_config.keys(): + assert each_hp in self._search_space_hpo.keys(), \ + "points_to_evaluate hp must be within the search space" + + assert isinstance(each_init_config[each_hp], int) or \ + isinstance(each_init_config[each_hp], float) or \ + isinstance(each_init_config[each_hp], str) or \ + isinstance(each_init_config[each_hp], bool), " points_to_evaluate must be a scalar" + + assert isinstance(self._search_space_hpo[each_hp], ray.tune.sample.Categorical) or \ + isinstance(self._search_space_hpo[each_hp], ray.tune.sample.Float) or \ + isinstance(self._search_space_hpo[each_hp], ray.tune.sample.Integer), \ + "Every hp space must either be categorical, integer or float" + + if isinstance(self._search_space_hpo[each_hp], ray.tune.sample.Categorical): + assert each_init_config[each_hp] in self._search_space_hpo[each_hp].categories, \ + "points_to_evaluate {each_hp} value must be within the search space" + else: + assert self._search_space_hpo[each_hp].lower <= each_init_config[each_hp] <= \ + self._search_space_hpo[each_hp].upper, \ + "points_to_evaluate {each_hp} value must be within the search space" + + def _get_search_algo(self, + search_algo_name, + search_algo_args_mode, + **custom_hpo_args): + if search_algo_name in ("bs", "cfo"): + self._verify_init_config(**custom_hpo_args) + search_algo = AutoSearchAlgorithm.from_method_name( + search_algo_name, + search_algo_args_mode, + self._search_space_hpo, + **custom_hpo_args) + return search_algo + + @staticmethod + def _recover_checkpoint(tune_checkpoint_dir): + assert tune_checkpoint_dir + # Get subdirectory used for Huggingface. + subdirs = [ + os.path.join(tune_checkpoint_dir, name) + for name in os.listdir(tune_checkpoint_dir) + if os.path.isdir(os.path.join(tune_checkpoint_dir, name)) + ] + # There should only be 1 subdir. + assert len(subdirs) == 1, subdirs + return subdirs[0] + + def get_full_data_name(self): + return JobID.dataset_list_to_str(self.jobid_config.dat, "dat") + + def _save_ckpt_json(self, + best_ckpt): + json.dump({"best_ckpt": best_ckpt}, + open(os.path.join(self.path_utils.result_dir_per_run, + "save_ckpt_" + self.jobid_config.to_jobid_string() + ".json"), "w")) + + def _save_output_metric(self, + output_metrics): + json.dump(output_metrics, open( + os.path.join(self.path_utils.result_dir_per_run, + "output_metric_" + self.jobid_config.to_jobid_string() + ".json"), "w")) + + def _load_ckpt_json(self, + ckpt_dir=None, + **kwargs): + if not ckpt_dir: + ckpt_dir = os.path.join(self.path_utils.result_dir_per_run, + "save_ckpt_" + self.jobid_config.to_jobid_string() + ".json") + try: + ckpt_json = json.load(open(ckpt_dir)) + return ckpt_json["best_ckpt"] + except FileNotFoundError as err: + logger.error("Saved checkpoint not found. Please make sure checkpoint is stored under {}".format(ckpt_dir)) + raise err + + def _set_metric(self, custom_metric_name=None, custom_metric_mode_name=None): + default_metric, default_mode, all_metrics, all_modes = get_default_and_alternative_metric( + self.get_full_data_name(), + subdataset_name=self.jobid_config.subdat, + custom_metric_name=custom_metric_name, + custom_metric_mode_name=custom_metric_mode_name) + _variable_override_default_alternative(logger, + self, + "metric_name", + default_metric, + all_metrics, + custom_metric_name) + _variable_override_default_alternative(logger, + self, + "metric_mode_name", + default_mode, + all_modes, + custom_metric_mode_name) + self._all_metrics = all_metrics + self._all_modes = all_modes + + def _set_task(self): + self.task_name = get_default_task(self.get_full_data_name(), self.jobid_config.subdat) + + def fit_hf(self, + resources_per_trial, + num_samples, + time_budget, + custom_metric_name=None, + custom_metric_mode_name=None, + _fp16=True, + **custom_hpo_args + ): + '''Fine tuning the huggingface using HF's API Transformers.hyperparameter_search (for comparitive purpose). + Transformers.hyperparameter_search has the following disadvantages: + (1) it does not return tune.analysis.Analysis result, what is analysis used for + (2) it is inconvenient to develop on top of Transformers.hyperparameter_search, whose trainable function, + search space, etc. are defined inside of Transformers.hyperparameter_search. + + An example: + autohf_settings = {"resources_per_trial": {"cpu": 1}, + "num_samples": 1, + "time_budget": 100000, + "ckpt_per_epoch": 1, + "fp16": False, + } + validation_metric, analysis = autohf.fit(**autohf_settings,) + + Args: + resources_per_trial: + A dict showing the resources used by each trial, + e.g., {"gpu": 4, "cpu": 4} + num_samples: + An int variable of the maximum number of trials + time_budget: + An int variable of the maximum time budget + custom_metric_name: + A string of the dataset name or a function, + e.g., 'accuracy', 'f1', 'loss', + custom_metric_mode_name: + A string of the mode name, + e.g., "max", "min", "last", "all" + fp16: + boolean, default = True | whether to use fp16 + custom_hpo_args: + The additional keyword arguments, e.g., + custom_hpo_args = {"points_to_evaluate": [{ + "num_train_epochs": 1, + "per_device_train_batch_size": 128, }]} + + Returns: + validation_metric: + a dict storing the validation score + ''' + + def model_init(): + return self._load_model() + + def ray_hp_space(trial): + return { + "learning_rate": ray.tune.loguniform(1e-6, 1e-4), + "num_train_epochs": ray.tune.choice(list(range(1, 6))), + "seed": ray.tune.quniform(1, 41, 1), + "per_device_train_batch_size": ray.tune.choice([4, 8, 16, 32, 64]), + } + + self._set_metric(custom_metric_name, custom_metric_mode_name) + self._set_task() + + training_args = TrainingArguments( + output_dir=self.path_utils.hpo_ckpt_path, + fp16=_fp16, + ) + this_model = self._load_model() + + trainer = TrainerForAutoTransformers( + this_model, + training_args, + model_init=model_init, + train_dataset=self.train_dataset, + eval_dataset=self.eval_dataset, + tokenizer=self._tokenizer, + compute_metrics=self._compute_metrics_by_dataset_name, + ) + self.path_utils.make_dir_per_run() + + start_time = time.time() + best_run = trainer.hyperparameter_search( + n_trials=num_samples, + time_budget_s=time_budget, + hp_space=ray_hp_space, + backend=HPSearchBackend.RAY, + resources_per_trial=resources_per_trial) + duration = time.time() - start_time + self.last_run_duration = duration + + hp_dict = best_run.hyperparameters + hp_dict["seed"] = int(hp_dict["seed"]) + + best_training_args = TrainingArguments( + output_dir=self.path_utils.hpo_ckpt_path, + fp16=_fp16, + **hp_dict, + ) + + best_trainer = TrainerForAutoTransformers( + this_model, + best_training_args, + model_init=model_init, + train_dataset=self.train_dataset, + eval_dataset=self.eval_dataset, + tokenizer=self._tokenizer, + compute_metrics=self._compute_metrics_by_dataset_name, + ) + + best_model_checkpoint_path = os.path.join(self.path_utils.hpo_ckpt_path, "hpo_hf") + if not os.path.exists(best_model_checkpoint_path): + os.mkdir(best_model_checkpoint_path) + best_trainer.train() + best_trainer.save_model(best_model_checkpoint_path) + self._save_ckpt_json(best_model_checkpoint_path) + validation_metric = best_trainer.evaluate() + + return validation_metric + + def fit(self, + num_samples, + time_budget, + custom_metric_name=None, + custom_metric_mode_name=None, + ckpt_per_epoch=1, + fp16=True, + verbose=1, + resources_per_trial={"gpu": 1, "cpu": 1}, + **custom_hpo_args): + '''Fine tuning the huggingface using the hpo setting + + An example: + autohf_settings = {"resources_per_trial": {"cpu": 1}, + "num_samples": 1, + "time_budget": 100000, + "ckpt_per_epoch": 1, + "fp16": False, + } + validation_metric, analysis = autohf.fit(**autohf_settings) + + Args: + resources_per_trial: + A dict showing the resources used by each trial, + e.g., {"gpu": 4, "cpu": 4} + num_samples: + An int variable of the maximum number of trials + time_budget: + An int variable of the maximum time budget + custom_metric_name: + A string of the dataset name or a function, + e.g., 'accuracy', 'f1', 'loss' + custom_metric_mode_name: + A string of the mode name, + e.g., "max", "min", "last", "all" + ckpt_per_epoch: + An integer value of number of checkpoints per epoch, default = 1 + verbose: + int, default=1 | Controls the verbosity, higher means more + messages + fp16: + boolean, default = True | whether to use fp16 + custom_hpo_args: + The additional keyword arguments, e.g., + custom_hpo_args = {"points_to_evaluate": [{ + "num_train_epochs": 1, + "per_device_train_batch_size": 128, }]} + + Returns: + validation_metric: + a dict storing the validation score + analysis: + a ray.tune.analysis.Analysis object storing the analysis results from tune.run + + ''' + self._resources_per_trial = resources_per_trial + self._set_metric(custom_metric_name, custom_metric_mode_name) + self._set_task() + self._fp16 = fp16 + ray.init(local_mode=True) + + self._set_search_space(**custom_hpo_args) + search_algo = self._get_search_algo(self.jobid_config.alg, self.jobid_config.arg, **custom_hpo_args) + scheduler = AutoScheduler.from_scheduler_name(self.jobid_config.pru) + self.ckpt_per_epoch = ckpt_per_epoch + self.path_utils.make_dir_per_run() + + logger.addHandler(logging.FileHandler(os.path.join(self.path_utils.log_dir_per_run, 'tune.log'))) + old_level = logger.getEffectiveLevel() + self._verbose = verbose + if verbose == 0: + logger.setLevel(logging.WARNING) + + assert self.path_utils.ckpt_dir_per_run + start_time = time.time() + + tune_config = self._search_space_hpo + tune_config["seed"] = self.jobid_config.sdhf + + analysis = ray.tune.run( + self._objective, + metric=self.metric_name, + mode=self.metric_mode_name, + name="ray_result", + resources_per_trial=resources_per_trial, + config=tune_config, + verbose=verbose, + local_dir=self.path_utils.ckpt_dir_per_run, + num_samples=num_samples, + time_budget_s=time_budget, + keep_checkpoints_num=1, + scheduler=scheduler, + search_alg=search_algo, + ) + duration = time.time() - start_time + self.last_run_duration = duration + logger.info("Total running time: {} seconds".format(duration)) + + ray.shutdown() + + best_trial = analysis.get_best_trial(scope="all", metric=self.metric_name, mode=self.metric_mode_name) + validation_metric = {"eval_" + self.metric_name + : best_trial.metric_analysis[self.metric_name][self.metric_mode_name]} + for x in range(len(self._all_metrics)): + validation_metric["eval_" + self._all_metrics[x]] \ + = best_trial.metric_analysis[self._all_metrics[x]][self._all_modes[x]] + + get_best_ckpt = analysis.get_best_checkpoint(best_trial, metric=self.metric_name, mode=self.metric_mode_name) + best_ckpt = AutoTransformers._recover_checkpoint(get_best_ckpt) + + self._save_ckpt_json(best_ckpt) + + if verbose == 0: + logger.setLevel(old_level) + + return validation_metric, analysis + + def predict(self, + ckpt_json_dir=None, + **kwargs): + '''Predict label for test data. + + An example: + predictions, test_metric = autohf.predict() + + Args: + ckpt_json_dir: + the checkpoint for the fine-tuned huggingface if you wish to override + the saved checkpoint in the training stage under self.path_utils._result_dir_per_run + + Returns: + A numpy array of shape n * 1 - - each element is a predicted class + label for an instance. + ''' + best_checkpoint = self._load_ckpt_json(ckpt_json_dir, **kwargs) + best_model = self._load_model(checkpoint_path=best_checkpoint) + training_args = TrainingArguments(per_device_eval_batch_size=1, + output_dir=self.path_utils.result_dir_per_run) + test_trainer = TrainerForAutoTransformers(best_model, training_args) + + if self.jobid_config.spt == "ori": + try: + self.test_dataset.remove_columns_("label") + except ValueError: + pass + + test_dataloader = test_trainer.get_test_dataloader(self.test_dataset) + predictions, labels, _ = test_trainer.prediction_loop(test_dataloader, description="Prediction") + predictions = np.squeeze(predictions) \ + if get_default_task(self.get_full_data_name(), self.jobid_config.subdat) == "regression" \ + else np.argmax(predictions, axis=1) + torch.cuda.empty_cache() + + if self.jobid_config.spt == "rspt": + assert labels is not None + metric = self._get_metric_func() + output_metric = metric.compute(predictions=predictions, references=labels) + self._save_output_metric(output_metric) + return predictions, output_metric + else: + return predictions, None + + def output_prediction(self, + predictions=None, + output_prediction_path=None, + output_zip_file_name=None): + """ + When using the original GLUE split, output the prediction on test data, + and prepare the .zip file for submission + + Example: + local_archive_path = self.autohf.output_prediction(predictions, + output_prediction_path= self.console_args.data_root_dir + "result/", + output_zip_file_name=azure_save_file_name) + + Args: + predictions: + a list of predictions, which is the output of AutoTransformers.predict() + output_prediction_path: + output path for the prediction + output_zip_file_name: + an string, which is the name of the output zip file + + Returns: + the path of the output .zip file + """ + return auto_output_prediction(self.get_full_data_name(), output_prediction_path, + output_zip_file_name, predictions, self.train_dataset, + self._dev_name, self.jobid_config.subdat) diff --git a/flaml/nlp/dataset/__init__.py b/flaml/nlp/dataset/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/flaml/nlp/dataset/dataprocess_auto.py b/flaml/nlp/dataset/dataprocess_auto.py new file mode 100644 index 000000000..ab3c49e3a --- /dev/null +++ b/flaml/nlp/dataset/dataprocess_auto.py @@ -0,0 +1,225 @@ +from collections import OrderedDict +from functools import partial + +from transformers import AutoTokenizer +from .sentence_keys_auto import get_sentence_keys + + +def inserting_sepp(sent, start, end, this_tokenizer): + return \ + sent[:start].rstrip() + " " + this_tokenizer.sep_token + " " + sent[start:end] \ + + " " + this_tokenizer.sep_token + " " + sent[end:].lstrip() + + +def tokenize_superglue_copa(this_example, + this_tokenizer, + dataset_name, + subdataset_name=None, + **kwargs): + return None + + +def tokenize_superglue_wic_gpt2(this_example, + this_tokenizer, + dataset_name, + subdataset_name=None, + **kwargs): + return None + + +def tokenize_superglue_wic(this_example, + this_tokenizer, + dataset_name, + subdataset_name=None, + **kwargs + ): + """ + tokenize the data from the wic task (word-in-context dataset), + e.g., sentence 1: "There's a lot of trash on the bed of the river" + sentence 2: "I keep a glass of water next to my bed when I sleep", + label = False (different word senses) + In the superglue data, the position of the word in sentence 1 and 2 are provided + What this function does is to update the span position after tokenization, based on each LM's own tokenizer, + The key is to insert an [SEP] before and after the original sentence, then feed it into the LM's tokenizer. + There are two challenges: + (1) Each LM's tokenizations are different, e.g., in XLNet's tokenizer, the paddings are on the left' + (2) Some LM's tokenization would add an underline symbol before the word, e.g., "There's a lot" + -> [_There, _', _s, _a, _lot] + When underline meets special char such as '"', "'", the tokenized sequence after adding [SEP] needs to be + aligned with the sequence tokenized without [SEP]. We use a two pointer algorithm for the alignment + """ + sent1, sent2 = this_example["sentence1"], this_example["sentence2"] + start1, end1 = this_example["start1"], this_example["end1"] + start2, end2 = this_example["start2"], this_example["end2"] + """ + Add [SEP] to the sentence + """ + altered_sent1 = inserting_sepp(sent1, start1, end1, this_tokenizer) + altered_sent2 = inserting_sepp(sent2, start2, end2, this_tokenizer) + input_ids_sepp = this_tokenizer(*(altered_sent1, altered_sent2), + padding="max_length", + max_length=1024, + truncation=True)["input_ids"] + data_pair = (sent1, sent2) + assert "max_seq_length" in kwargs, "max_seq_length must be provided for glue" + this_data = this_tokenizer(*data_pair, padding="max_length", max_length=kwargs["max_seq_length"], truncation=True) + input_ids = this_data["input_ids"] + which_sepp = 0 + + """ + span_start_end: a 2x2 array: + * (span_start_end[0][0], span_start_end[0][1]) are the spans of the position of the word in the first sentence + * (span_start_end[1][0], span_start_end[1][1]) are the spans of the position of the word in the second sentence + """ + span_start_end = [[-1, -1], [-1, -1]] + + ptr_sepp = ptr_nosepp = 0 + try: + padding_direction = this_tokenizer.padding_side + if padding_direction == "left": + padding_id = input_ids_sepp[0] + while input_ids_sepp[ptr_sepp] == padding_id: + ptr_sepp += 1 + while input_ids[ptr_nosepp] == padding_id: + ptr_nosepp += 1 + except KeyError: + pass + sep_id = this_tokenizer.convert_tokens_to_ids([this_tokenizer.sep_token])[0] + """ + use two pointers to align the tokenized sequence before and after adding [SEP]; + ptr_sepp: the pointer after adding; ptr_nosepp: the pointer without adding + """ + while ptr_sepp < len(input_ids_sepp) and ptr_nosepp < len(input_ids) and \ + input_ids_sepp[ptr_sepp] != 0 and input_ids[ptr_nosepp] != 0: + if input_ids_sepp[ptr_sepp] == input_ids[ptr_nosepp]: + ptr_sepp += 1 + ptr_nosepp += 1 + else: + if not (input_ids_sepp[ptr_sepp] == sep_id + or this_tokenizer.convert_ids_to_tokens([input_ids_sepp[ptr_sepp]])[0] in ('▁', '_')): + break + if input_ids_sepp[ptr_sepp] == sep_id: + span_start_end[int(which_sepp / 2)][which_sepp % 2] = ptr_nosepp + which_sepp += 1 + ptr_sepp += 1 + else: + ptr_sepp += 1 + """ + max_word_span is the maximum tokens of the word + It is set to 16 following deberta: + https://github.com/microsoft/DeBERTa/blob/master/DeBERTa/apps/tasks/superglue_tasks.py#L1054 + """ + max_word_span = 16 + word_indices = [] + for idx1 in range(2): + if span_start_end[idx1][1] < kwargs["max_seq_length"]: + first_span = [x for x in range(span_start_end[idx1][0], span_start_end[idx1][1]) + if x < kwargs["max_seq_length"]] + [0] * (max_word_span - span_start_end[idx1][1] + + span_start_end[idx1][0]) + word_indices.append(first_span) + this_data["word_spans"] = word_indices + return this_data + + +def tokenize_glue(this_example, + this_tokenizer, + dataset_name, + subdataset_name=None, + **kwargs): + sentence_keys = get_sentence_keys(dataset_name, subdataset_name) + + if len(sentence_keys) > 1: + sentence1_key, sentence2_key = sentence_keys[0], sentence_keys[1] + else: + sentence1_key = sentence_keys[0] + sentence2_key = None + + data_pair = ( + (this_example[sentence1_key],) if sentence2_key is None else ( + this_example[sentence1_key], this_example[sentence2_key]) + ) + assert "max_seq_length" in kwargs, "max_seq_length must be provided for glue" + return this_tokenizer(*data_pair, padding="max_length", max_length=kwargs["max_seq_length"], truncation=True) + + +TOKENIZER_MAPPING = OrderedDict( + [ + (("glue", "rte"), tokenize_glue), + (("glue", "mrpc"), tokenize_glue), + (("glue", "cola"), tokenize_glue), + (("glue", "wnli"), tokenize_glue), + (("glue", "stsb"), tokenize_glue), + (("glue", "sst2"), tokenize_glue), + (("glue", "mnli"), tokenize_glue), + (("glue", "qqp"), tokenize_glue), + (("glue", "qnli"), tokenize_glue), + (("super_glue", "wic"), tokenize_superglue_wic), + ] +) + + +class AutoEncodeText: + """ + This is a generic input text tokenization class that will be instantiated as one of the + tokenization classes of the library when created with the + `~flaml.nlp.dataset.AutoEncodeText.from_model_and_dataset_name` class method. + + This class cannot be instantiated directly using ``__init__()`` (throws an error). + """ + + def __init__(self): + raise EnvironmentError( + "AutoEncodeText is designed to be instantiated " + "using the `AutoEncodeText.from_model_and_dataset_name(cls," + "data_raw,model_checkpoint_path,dataset_name,subdataset_name = None,**kwargs)` methods." + ) + + @classmethod + def from_model_and_dataset_name(cls, + data_raw, + model_checkpoint_path, + dataset_name, + subdataset_name=None, + **kwargs): + """ + Instantiate one of the input text tokenization classes from the raw data, model checkpoint path, dataset name + and sub dataset name. The raw data is used for creating a mapping function from the raw tokens to the + tokenized token ids. + + Args: + data_raw: + The raw data (a datasets.Dataset object) + + model_checkpoint_path: + A string variable which specifies the model path, e.g., "google/electra-base-discriminator" + + dataset_name: + A string variable which is the dataset name, e.g., "glue" + + subdataset_name: + A string variable which is the sub dataset name,e.g., "rte" + + kwargs: + The values in kwargs of any keys will be used for the mapping function + + Examples: + >>> from datasets import load_dataset + >>> data_raw = load_dataset("glue", "rte") + >>> AutoEncodeText.from_model_and_dataset_name(data_raw, "google/electra-base-discriminator", ["glue"], "rte") + + """ + if (dataset_name, subdataset_name) in TOKENIZER_MAPPING.keys(): + this_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_path, use_fast=True) + token_func = TOKENIZER_MAPPING[(dataset_name, subdataset_name)] + return data_raw.map( + partial(token_func, + this_tokenizer=this_tokenizer, + dataset_name=dataset_name, + subdataset_name=subdataset_name, + **kwargs), batched=False) + raise ValueError( + "Unrecognized method {},{} for this kind of AutoGridSearchSpace: {}.\n" + "Method name should be one of {}.".format( + dataset_name, subdataset_name, cls.__name__, ", ".join(c.__name__ for c in TOKENIZER_MAPPING.keys()) + ) + ) diff --git a/flaml/nlp/dataset/metric_auto.py b/flaml/nlp/dataset/metric_auto.py new file mode 100644 index 000000000..6dbb35524 --- /dev/null +++ b/flaml/nlp/dataset/metric_auto.py @@ -0,0 +1,70 @@ +# https://github.com/huggingface/datasets/blob/master/metrics/glue/glue.py +from collections import OrderedDict + +metric_mode_mapping_glue = { + "cola": [("matthews_correlation", "max")], + "mnli": [("accuracy", "max")], + "mrpc": [("accuracy", "max"), ("f1", "max")], + "qnli": [("accuracy", "max")], + "qqp": [("accuracy", "max"), ("f1", "max")], + "rte": [("accuracy", "max")], + "sst2": [("accuracy", "max")], + "stsb": [("pearson", "max"), ("spearmanr", "max")], + "wnli": [("accuracy", "max")] +} + +metric_mode_mapping_squad = [("exact_match", "max"), ("f1", "max")] + +metric_mode_mapping_super_glue = { + "axb": [("matthews_correlation", "max")], + "cb": [("accuracy", "max"), ("f1", "max")], + "copa": [("accuracy", "max")], + "rte": [("accuracy", "max")], + "wic": [("accuracy", "max")], + "wsc": [("accuracy", "max")], + "wsc.fixed": [("accuracy", "max")], + "boolq": [("accuracy", "max")], + "axg": [("accuracy", "max")] +} + +metric_mode_mapping_imdb = [("accuracy", "max")] + +metric_mode_mapping_yelp = [("accuracy", "max")] + +METRIC_MAPPING = OrderedDict( + [ + ("squad", metric_mode_mapping_squad), + ("glue", metric_mode_mapping_glue), + ("super_glue", metric_mode_mapping_super_glue), + ("imdb", metric_mode_mapping_imdb), + ("yelp_review_full", metric_mode_mapping_yelp) + ] +) + + +def get_default_and_alternative_metric(dataset_name, + subdataset_name=None, + custom_metric_name=None, + custom_metric_mode_name=None): + if dataset_name not in METRIC_MAPPING.keys(): + assert custom_metric_name and custom_metric_mode_name, \ + "The dataset is not in {}, you must explicitly specify " \ + "the custom_metric_name and custom_metric_mode_name".format(",".join(METRIC_MAPPING.keys())) + eval_name_mapping = METRIC_MAPPING[dataset_name] + if isinstance(eval_name_mapping, dict): + assert subdataset_name and subdataset_name in eval_name_mapping, \ + "dataset_name and subdataset_name not correctly specified" + default_metric, default_mode = eval_name_mapping[subdataset_name][0] + all_metrics, all_mode \ + = [x[0] for x in eval_name_mapping[subdataset_name]] \ + + ["loss"], [x[1] for x in eval_name_mapping[subdataset_name]] + ["min"] + + return default_metric, default_mode, all_metrics, all_mode + else: + assert isinstance(eval_name_mapping, list), "dataset_name and subdataset_name not correctly specified" + + default_metric, default_mode = eval_name_mapping[0] + all_metrics, all_mode = [x[0] for x in eval_name_mapping] + ["loss"], \ + [x[1] for x in eval_name_mapping] + ["min"] + + return default_metric, default_mode, all_metrics, all_mode diff --git a/flaml/nlp/dataset/sentence_keys_auto.py b/flaml/nlp/dataset/sentence_keys_auto.py new file mode 100644 index 000000000..bcf098cf4 --- /dev/null +++ b/flaml/nlp/dataset/sentence_keys_auto.py @@ -0,0 +1,28 @@ +sentence_keys_glue = { + "cola": ["sentence"], + "mnli": ["premise", "hypothesis"], + "mrpc": ["sentence1", "sentence2"], + "qnli": ["sentence", "question"], + "qqp": ["question1", "question2"], + "rte": ["sentence1", "sentence2"], + "sst2": ["sentence"], + "stsb": ["sentence1", "sentence2"], + "wnli": ["sentence1", "sentence2"] +} + +sentence_keys_super_glue = { + "rte": ["hypothesis", "premise"], + "wic": ["sentence1", "sentence2"], + "wsc": ["text"] +} + + +def get_sentence_keys(dataset_name, subdataset_name=None): + eval_name_mapping = globals()["sentence_keys_" + dataset_name] + if isinstance(eval_name_mapping, dict): + assert subdataset_name and subdataset_name in eval_name_mapping, \ + "dataset_name and subdataset_name not correctly specified" + sentence_keys = eval_name_mapping[subdataset_name] + else: + sentence_keys = eval_name_mapping + return sentence_keys diff --git a/flaml/nlp/dataset/submission_auto.py b/flaml/nlp/dataset/submission_auto.py new file mode 100644 index 000000000..667419e35 --- /dev/null +++ b/flaml/nlp/dataset/submission_auto.py @@ -0,0 +1,126 @@ +import os +import shutil +from collections import OrderedDict + +file_name_mapping_glue = { + "ax": ["AX.tsv"], + "cola": ["CoLA.tsv"], + "mnli": ["MNLI-m.tsv", "MNLI-mm.tsv"], + "mrpc": ["MRPC.tsv"], + "qnli": ["QNLI.tsv"], + "qqp": ["QQP.tsv"], + "rte": ["RTE.tsv"], + "sst2": ["SST-2.tsv"], + "stsb": ["STS-B.tsv"], + "wnli": ["WNLI.tsv"] +} + +default_prediction_glue = { + "ax": ["entailment"], + "cola": ["0"], + "mnli": ["neutral", "neutral"], + "mrpc": ["0"], + "qnli": ["not_entailment"], + "qqp": ["0"], + "rte": ["not_entailment"], + "sst2": ["0"], + "stsb": ["0.0"], + "wnli": ["0"] +} + +test_size_glue = { + "ax": [1104], + "cola": [1064], + "mnli": [9796, 9847], + "mrpc": [1725], + "qnli": [5463], + "qqp": [390965], + "rte": [3000], + "sst2": [1821], + "stsb": [1379], + "wnli": [146] +} + + +def output_prediction_glue(output_path, output_dir_name, predictions, train_data, dev_name, subdataset_name): + output_dir = os.path.join(output_path, output_dir_name) + if os.path.exists(output_dir): + assert os.path.isdir(output_dir) + else: + os.mkdir(output_dir) + if subdataset_name != "stsb": + label_list = train_data.features["label"].names + + output_blank_tsv(output_dir) + for each_subdataset_name in file_name_mapping_glue.keys(): + for idx in range(len(file_name_mapping_glue[each_subdataset_name])): + each_file = file_name_mapping_glue[each_subdataset_name][idx] + if subdataset_name != "mnli": + is_match = subdataset_name == each_subdataset_name + else: + if dev_name == "validation_matched": + is_match = each_file == "MNLI-m.tsv" + else: + is_match = each_file == "MNLI-mm.tsv" + if is_match: + with open(os.path.join(output_dir, each_file), "w") as writer: + writer.write("index\tprediction\n") + for index, item in enumerate(predictions): + if subdataset_name == "stsb": + if item > 5.0: + item = 5.0 + writer.write(f"{index}\t{item:3.3f}\n") + else: + if subdataset_name in ("rte", "qnli", "mnli"): + item = label_list[item] + writer.write(f"{index}\t{item}\n") + else: + if int(item) == item: + item = int(item) + writer.write(f"{index}\t{item}\n") + else: + writer.write(f"{index}\t{item:3.3f}\n") + + shutil.make_archive(os.path.join(output_path, output_dir_name), 'zip', output_dir) + return os.path.join(output_path, output_dir_name + ".zip") + + +OUTPUT_PREDICTION_MAPPING = OrderedDict( + [ + ("glue", output_prediction_glue), + ] +) + + +def auto_output_prediction(dataset_name, + output_path, + output_dir_name, + predictions, + train_data, + dev_name, + subset_name): + if dataset_name in OUTPUT_PREDICTION_MAPPING.keys(): + return OUTPUT_PREDICTION_MAPPING[dataset_name](output_path, + output_dir_name, + predictions, + train_data, + dev_name, + subset_name) + else: + raise ValueError( + "Unrecognized dataset {}. \n" + "Should be one of {}.".format(dataset_name, ", ".join(c.__name__ for c in OUTPUT_PREDICTION_MAPPING.keys()) + ) + ) + + +def output_blank_tsv(output_dir): + for each_subdataset_name in file_name_mapping_glue.keys(): + for idx in range(len(file_name_mapping_glue[each_subdataset_name])): + each_file = file_name_mapping_glue[each_subdataset_name][idx] + default_prediction = default_prediction_glue[each_subdataset_name][idx] + test_size = test_size_glue[each_subdataset_name][idx] + with open(os.path.join(output_dir, each_file), "w") as writer: + writer.write("index\tprediction\n") + for index in range(test_size): + writer.write(f"{index}\t{default_prediction}\n") diff --git a/flaml/nlp/dataset/task_auto.py b/flaml/nlp/dataset/task_auto.py new file mode 100644 index 000000000..71419a463 --- /dev/null +++ b/flaml/nlp/dataset/task_auto.py @@ -0,0 +1,45 @@ +# https://github.com/huggingface/datasets/blob/master/metrics/glue/glue.py + +from collections import OrderedDict + +task_mapping_glue = { + "cola": "seq-classification", + "mnli": "seq-classification", + "mrpc": "seq-classification", + "qnli": "seq-classification", + "qqp": "seq-classification", + "rte": "seq-classification", + "sst2": "seq-classification", + "stsb": "regression", + "wnli": "seq-classification" +} + +task_mapping_squad = "question-answering" + +task_mapping_super_glue = { + "wic": "seq-classification", + "rte": "seq-classification" +} + +TASK_MAPPING = OrderedDict( + [ + ("squad", task_mapping_squad), + ("glue", task_mapping_glue), + ("super_glue", task_mapping_super_glue), + ] +) + + +def get_default_task(dataset_name, subdataset_name=None): + assert dataset_name in TASK_MAPPING.keys(), "The dataset is not in {}, you must explicitly specify " \ + "the custom_metric_name and custom_metric_mode_name".format( + ",".join(TASK_MAPPING.keys())) + eval_name_mapping = TASK_MAPPING[dataset_name] + if isinstance(eval_name_mapping, dict): + assert subdataset_name and subdataset_name in eval_name_mapping, \ + "dataset_name and subdataset_name not correctly specified" + default_task = eval_name_mapping[subdataset_name] + else: + assert isinstance(eval_name_mapping, list), "dataset_name and subdataset_name not correctly specified" + default_task = eval_name_mapping + return default_task diff --git a/flaml/nlp/hpo/__init__.py b/flaml/nlp/hpo/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/flaml/nlp/hpo/get_grid_search_space.py b/flaml/nlp/hpo/get_grid_search_space.py new file mode 100644 index 000000000..c6b1f0d1d --- /dev/null +++ b/flaml/nlp/hpo/get_grid_search_space.py @@ -0,0 +1,456 @@ +# lookup table for the grid configs in each pre-trained language huggingface for different tasks +import copy + + +def get_space_union_and_unique(search_space_common, search_space_unique, this_case_tags: list): + """ + get the recommended search configs for each pre-trained language models + + Args: + search_space_common: + the union of configs recommended by the LM for all cases; + search_space_unique: + the recommended config by the LM for a specific condition, e.g., small model + this_case_tags: + a list, which contains the tags describing the specific condition, e.g., ["small"] + """ + search_space_union = search_space_common.copy() + this_search_space = search_space_common.copy() + # enumerate over each case where the search space is different + # this difference can be the dataset or model size, etc. + is_included = False + from ..utils import merge_dicts + for each_case in search_space_unique.keys(): + from ..utils import _check_dict_keys_overlaps + if each_case in this_case_tags: + is_included = True + assert not _check_dict_keys_overlaps(this_search_space, search_space_unique[each_case]), \ + "the hyperparameters of common and unique search spaces should not have overlaps" + this_search_space.update(search_space_unique[each_case]) + search_space_union = merge_dicts(search_space_union, search_space_unique[each_case]) + if is_included: + return this_search_space + else: + if "other" in search_space_unique.keys(): + search_space_union = merge_dicts(search_space_union, search_space_unique["other"]) + return search_space_union + + +def get_deberta_space(model_size_type=None, + dataset_name=None, + subdataset_name=None, + algo_mode=None): + """ + DEBERTA: DECODING-ENHANCED BERT WITH DISENTANGLED ATTENTION: Table 9 + https://arxiv.org/abs/2006.03654 + """ + search_space_common = { + "cls_dropout": [0, 0.1, 0.15], + "warmup_steps": [50, 100, 500, 1000], + "per_device_train_batch_size": [16, 32, 48, 64], + "num_train_epochs": [10], + "adam_epsilon": [1e-6], + } + search_space_unique = { + "large": { + "learning_rate": [5e-6, 8e-6, 9e-6, 1e-5], + "weight_decay": [0.01], + }, + "base": { + "learning_rate": [1.5e-5, 2e-5, 3e-5, 4e-5], + } + } + return get_space_union_and_unique(search_space_common, search_space_unique, [model_size_type]) + + +def get_longformer_space(model_size_type=None, + dataset_name=None, + subdataset_name=None, + algo_mode=None): + """ + TODO: Longformer: The Long-Document Transformer + """ + if dataset_name == "glue": + return + + +def get_funnel_space(model_size_type=None, + dataset_name=None, + subdataset_name=None, + algo_mode=None): + """ + Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing + https://arxiv.org/abs/2006.03236 + """ + search_space_common = {"learning_rate": [1e-5, 2e-5, 3e-5], + "hidden_dropout": [0.1], + "activation_dropout": [0.0], + "attention_dropout": [0.1], + "weight_decay": [0.01], + "warmup_ratio": [0.1], + "adam_epsilon": [1e-6], + } + search_space_unique = { + "imdb": { + "per_device_train_batch_size": [32], + "num_train_epochs": [5] + }, + "ag_news": { + "per_device_train_batch_size": [32], + "num_train_epochs": [3] + }, + "dbpedia_14": { + "per_device_train_batch_size": [64], + "num_train_epochs": [3] + }, + "yelp_polarity": { + "per_device_train_batch_size": [128], + "num_train_epochs": [3] + }, + "yelp_review_full": { + "per_device_train_batch_size": [128], + "num_train_epochs": [3] + }, + "amazon_polarity": { + "per_device_train_batch_size": [128], + "num_train_epochs": [3] + }, + "amazon_review_multi": { + "per_device_train_batch_size": [128], + "num_train_epochs": [3] + }, + "glue_rte": { + "per_device_train_batch_size": [16], + "num_train_epochs": [10] + }, + "glue_mrpc": { + "per_device_train_batch_size": [16], + "num_train_epochs": [10] + }, + "glue_stsb": { + "per_device_train_batch_size": [16], + "num_train_epochs": [10] + }, + "glue_cola": { + "per_device_train_batch_size": [16], + "num_train_epochs": [10] + }, + "glue_sst2": { + "per_device_train_batch_size": [32], + "num_train_epochs": [5] + }, + "glue_qnli": { + "per_device_train_batch_size": [32], + "num_train_epochs": [3] + }, + "glue_mnli": { + "per_device_train_batch_size": [64], + "num_train_epochs": [3] + }, + "glue_qqp": { + "per_device_train_batch_size": [64], + "num_train_epochs": [5] + } + } + from ..result_analysis.azure_utils import JobID + return get_space_union_and_unique(search_space_common, search_space_unique, + [JobID.get_full_data_name(dataset_name, subdataset_name)]) + + +def get_bert_space(model_size_type=None, + dataset_name=None, + subdataset_name=None, + algo_mode=None): + """ + BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding + https://arxiv.org/pdf/1810.04805.pdf + """ + search_space_common = {} + search_space_unique = { + # Section 4.1: We use a batch size of 32 and fine-tune for 3 epochs over the data for all GLUE tasks. For each + # task, we selected the best fine-tuning learning rate (among 5e-5, 4e-5, 3e-5, and 2e-5) on the Dev set + "glue": { + "learning_rate": [5e-5, 4e-5, 3e-5, 2e-5], + "per_device_train_batch_size": [32], + "num_train_epochs": [3], + }, + # Section 4.2: We fine-tune for 3 epochs with a learning rate of 5e-5 and a batch size of 32 + "squad": { + "learning_rate": [5e-5], + "per_device_train_batch_size": [32], + "num_train_epochs": [2], + }, + # Section 4.3: We fine-tuned for 2 epochs with a learning rate of 5e-5 and a batch size of 48. + "squad_v2": { + "learning_rate": [5e-5], + "per_device_train_batch_size": [48], + "num_train_epochs": [2], + }, + # Section 4.4: We fine-tune the huggingface for 3 epochs with a learning rate of 2e-5 and a batch size of 16. + "swag": { + "learning_rate": [2e-5], + "per_device_train_batch_size": [16], + "num_train_epochs": [3], + }, + # Appedix A. The optimal hyperparameter values are task-specific, but we found the following + # range of possible values to work well across all tasks: + # - Batch size: 16, 32 + # - Learning rate (Adam): 5e-5, 3e-5, 2e-5 + # - Number of epochs: 2, 3, 4 + "other": { + "learning_rate": [5e-5, 3e-5, 2e-5], + "per_device_train_batch_size": [16, 32], + "num_train_epochs": [2, 3, 4], + } + } + return get_space_union_and_unique(search_space_common, search_space_unique, [dataset_name]) + + +def get_roberta_space(model_size_type=None, + dataset_name=None, + subdataset_name=None, + algo_mode=None): + # RoBERTa: A Robustly Optimized BERT Pretraining Approach + # https://arxiv.org/pdf/1907.11692.pdf + search_space_common = { + "warmup_ratio": [0.06], + } + search_space_unique = { + # Table 10: Hyperparameters for finetuning RoBERTa-LARGE on RACE, SQuAD and GLUE. + # We consider a limited hyperparameter + # sweep for each task, with batch sizes ∈ {16, 32} + # and learning rates ∈ {1e−5, 2e−5, 3e−5}, with a + # linear warmup for the first 6% of steps followed by + # a linear decay to 0. + "glue": { + "learning_rate": [1e-5, 2e-5, 3e-5], + "per_device_train_batch_size": [16, 32], + "weight_decay": [0.1], + "num_train_epochs": [10], + }, + "race": { + "learning_rate": [1e-5], + "per_device_train_batch_size": [16], + "weight_decay": [0.1], + "num_train_epochs": [4], + }, + "squad": { + "learning_rate": [1.5e-5], + "per_device_train_batch_size": [48], + "weight_decay": [0.01], + "num_train_epochs": [2], + } + } + return get_space_union_and_unique(search_space_common, search_space_unique, [dataset_name]) + + +def get_electra_space(model_size_type=None, + dataset_name=None, + subdataset_name=None, + algo_mode=None): + """ + ELECTRA: PRE-TRAINING TEXT ENCODERS AS DISCRIMINATORS RATHER THAN GENERATORS + https://arxiv.org/pdf/2003.10555.pdf + """ + assert model_size_type in ("small", "base", "large", "intermediate", "xlarge"), \ + "Electra paper has only provided hyperparameter for the small and base huggingface" + search_space_common = { + "learning_rate": [3e-5, 5e-5, 1e-4, 1.5e-4] if algo_mode == "grid" + else [3e-5, 5e-5, 1e-4, 1.5e-4, 2e-4, 3e-4, 5e-3], + "weight_decay": [0.0], + "adam_epsilon": [1e-6], + "warmup_ratio": [0.1], + "per_device_train_batch_size": [32], + "hidden_dropout_prob": [0.1], + "attention_probs_dropout_prob": [0.1], + } + search_space_unique = { + # Appendix B: For Basesized models we searched for a learning + "squad": { + "num_train_epochs": [2] + }, + "squad_v2": { + "num_train_epochs": [2] + }, + "glue_stsb": { + "num_train_epochs": [10], + }, + "glue_rte": { + "num_train_epochs": [10], + }, + "glue_wnli": { + "num_train_epochs": [3], + }, + "glue_mrpc": { + "num_train_epochs": [3], + }, + "glue_cola": { + "num_train_epochs": [3], + }, + "glue_sst2": { + "num_train_epochs": [3], + }, + "glue_qnli": { + "num_train_epochs": [3], + }, + "glue_mnli": { + "num_train_epochs": [3], + }, + "glue_qqp": { + "num_train_epochs": [3], + } + } + from ..result_analysis.azure_utils import JobID + return get_space_union_and_unique(search_space_common, search_space_unique, + [JobID.get_full_data_name(dataset_name, subdataset_name), model_size_type]) + + +def get_mobilebert_space(model_size_type=None, + dataset_name=None, + subdataset_name=None, + algo_mode=None): + """ + MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices + https://arxiv.org/pdf/2004.02984.pdf + """ + # To finetune the pre-trained models, we search the optimization hyperparameters + # in a search space including different batch sizes (16/32/48), learning + # rates ((1-10) * e-5), and the number of epochs (2-10) + search_space_common = { + "learning_rate": [x * 1e-5 for x in range(1, 11)], + "per_device_train_batch_size": [4, 8, 16, 32, 48], + "num_train_epochs": [x for x in range(2, 11)], + } + search_space_unique = {} + return get_space_union_and_unique(search_space_common, search_space_unique, []) + + +def get_albert_space(model_size_type=None, + dataset_name=None, + subdataset_name=None, + algo_mode=None): + """ + Hyperparameters for downstream tasks are shown in Table 14. We adapt these hyperparameters + from Liu et al. (2019), Devlin et al. (2019), and Yang et al. (2019). + + LR BSZ ALBERT DR Classifier DR TS WS MSL + CoLA 1.00E-05 16 0 0.1 5336 320 512 + STS 2.00E-05 16 0 0.1 3598 214 512 + SST-2 1.00E-05 32 0 0.1 20935 1256 512 + MNLI 3.00E-05 128 0 0.1 10000 1000 512 + QNLI 1.00E-05 32 0 0.1 33112 1986 512 + QQP 5.00E-05 128 0.1 0.1 14000 1000 512 + RTE 3.00E-05 32 0.1 0.1 800 200 512 + MRPC 2.00E-05 32 0 0.1 800 200 512 + WNLI 2.00E-05 16 0.1 0.1 2000 250 512 + SQuAD v1.1 5.00E-05 48 0 0.1 3649 365 384 + SQuAD v2.0 3.00E-05 48 0 0.1 8144 814 512 + RACE 2.00E-05 32 0.1 0.1 12000 1000 512 + """ + search_space_common = { + } + search_space_unique = { + "glue_cola": { + "learning_rate": [1e-5], + "per_device_train_batch_size": [16], + "attention_probs_dropout_prob": [0], + "classifier_dropout_prob": [0.1], + "max_steps": [5336], + "warmup_steps": [320], + }, + "glue_stsb": { + "learning_rate": [2e-5], + "per_device_train_batch_size": [16], + "attention_probs_dropout_prob": [0], + "classifier_dropout_prob": [0.1], + "max_steps": [3598], + "warmup_steps": [214], + }, + "glue_sst2": { + "learning_rate": [1e-5], + "per_device_train_batch_size": [32], + "attention_probs_dropout_prob": [0], + "classifier_dropout_prob": [0.1], + "max_steps": [20935], + "warmup_steps": [1256], + }, + "glue_mnli": { + "learning_rate": [3e-5], + "per_device_train_batch_size": [128], + "attention_probs_dropout_prob": [0], + "classifier_dropout_prob": [0.1], + "max_steps": [10000], + "warmup_steps": [1000], + }, + "glue_qnli": { + "learning_rate": [1e-5], + "per_device_train_batch_size": [32], + "attention_probs_dropout_prob": [0], + "classifier_dropout_prob": [0.1], + "max_steps": [33112], + "warmup_steps": [1986], + }, + "glue_qqp": { + "learning_rate": [5e-5], + "per_device_train_batch_size": [128], + "attention_probs_dropout_prob": [0.1], + "classifier_dropout_prob": [0.1], + "max_steps": [14000], + "warmup_steps": [1000], + }, + "glue_rte": { + "learning_rate": [3e-5], + "per_device_train_batch_size": [32], + "attention_probs_dropout_prob": [0.1], + "classifier_dropout_prob": [0.1], + "max_steps": [800], + "warmup_steps": [200], + }, + "glue_mrpc": { + "learning_rate": [2e-5], + "per_device_train_batch_size": [32], + "attention_probs_dropout_prob": [0], + "classifier_dropout_prob": [0.1], + "max_steps": [800], + "warmup_steps": [200], + }, + "glue_wnli": { + "learning_rate": [2e-5], + "per_device_train_batch_size": [16], + "attention_probs_dropout_prob": [0.1], + "classifier_dropout_prob": [0.1], + "max_steps": [2000], + "warmup_steps": [250], + }, + "squad": { + "learning_rate": [5e-5], + "per_device_train_batch_size": [48], + "attention_probs_dropout_prob": [0], + "classifier_dropout_prob": [0.1], + "max_steps": [3649], + "warmup_steps": [365], + }, + "squad_v2": { + "learning_rate": [3e-5], + "per_device_train_batch_size": [48], + "attention_probs_dropout_prob": [0], + "classifier_dropout_prob": [0.1], + "max_steps": [8144], + "warmup_steps": [814], + }, + "race": { + "learning_rate": [2e-5], + "per_device_train_batch_size": [32], + "attention_probs_dropout_prob": [0.1], + "classifier_dropout_prob": [0.1], + "max_steps": [12000], + "warmup_steps": [1000], + }, + } + + # To finetune the pre-trained models, we search the optimization hyperparameters + # in a search space including different batch sizes (16/32/48), learning + # rates ((1-10) * e-5), and the number of epochs (2-10) + from ..result_analysis.azure_utils import JobID + return get_space_union_and_unique(search_space_common, search_space_unique, + [JobID.get_full_data_name(dataset_name, subdataset_name)]) diff --git a/flaml/nlp/hpo/grid_searchspace_auto.py b/flaml/nlp/hpo/grid_searchspace_auto.py new file mode 100644 index 000000000..654f569f2 --- /dev/null +++ b/flaml/nlp/hpo/grid_searchspace_auto.py @@ -0,0 +1,93 @@ +from collections import OrderedDict + +from .get_grid_search_space import \ + (get_electra_space, + get_bert_space, + get_roberta_space, + get_funnel_space, + get_deberta_space, + get_albert_space + ) + +GRID_SEARCH_SPACE_MAPPING = OrderedDict( + [ + ("electra", get_electra_space), + ("bert", get_bert_space), + ("roberta", get_roberta_space), + ("funnel", get_funnel_space), + ("deberta", get_deberta_space), + ("albert", get_albert_space), + ] +) + +HF_MODEL_LIST = [ + "bert", + "roberta", + "electra", + "xlnet", + "albert", + "distilbert", + "deberta", + "mobilebert", + "funnel" +] + + +class AutoGridSearchSpace: + """ + This is a class for getting the recommended grid search space of a pre-trained LM that will be + instantiated as one of the search spaces of the library when created with the + `~flaml.nlp.hpo.AutoGridSearchSpace.from_model_and_dataset_name` method. + + This class cannot be instantiated directly using ``__init__()`` (throws an error). + """ + + def __init__(self): + raise EnvironmentError( + "AutoGridSearchSpace is designed to be instantiated " + "using the `AutoGridSearchSpace.from_config_and_method_name(cls, model_type, model_size_type," + "dataset_name,subdataset_name = None,algo_mode = None)` methods." + ) + + @classmethod + def from_model_and_dataset_name(cls, + model_type, + model_size_type, + dataset_name, + subdataset_name=None, + algo_mode=None): + """ + Instantiate one of the classes for getting the recommended grid search space of a pre-trained LM from + the model type, model size type, dataset name, sub dataset name and algorithm mode + + Args: + model_type: + A string variable which is the model type, e.g. "electra" + + model_size_type: + A string variable which is the size of the model, e.g., "small" + + dataset_name: + A string variable which is the dataset name, e.g., "glue" + + subdataset_name: + A string variable which is the sub dataset name,e.g., "rte" + + algo_mode: + A string variable which is the algorithm mode for grid search, e.g., "gridbert" + + Example: + >>> AutoGridSearchSpace.from_model_and_dataset_name("electra", "small", "glue", "rte", "grid") + + """ + + if model_type in GRID_SEARCH_SPACE_MAPPING.keys(): + this_model_recommended_space = GRID_SEARCH_SPACE_MAPPING[model_type]( + model_size_type, dataset_name, subdataset_name, algo_mode) + return this_model_recommended_space + raise ValueError( + "Unrecognized method {},{} for this kind of AutoGridSearchSpace: {}.\n" + "Method name should be one of {}.".format( + model_type, dataset_name, cls.__name__, ", ".join(c.__name__ for c in GRID_SEARCH_SPACE_MAPPING.keys()) + ) + ) diff --git a/flaml/nlp/hpo/hpo_searchspace.py b/flaml/nlp/hpo/hpo_searchspace.py new file mode 100644 index 000000000..b19dac369 --- /dev/null +++ b/flaml/nlp/hpo/hpo_searchspace.py @@ -0,0 +1,242 @@ +from collections import OrderedDict + +from ..huggingface.trainer import TrainerForAutoTransformers +from ray import tune +from transformers import TrainingArguments + +from .grid_searchspace_auto import AutoGridSearchSpace + + +def hpo_space_custom(**custom_hpo_args): + assert "hpo_space" in custom_hpo_args + custom_search_space = custom_hpo_args["hpo_space"] + return custom_search_space + + +def bounded_gridunion(logger=None, + model_type=None, + model_size_type=None, + dataset_name=None, + subdataset_name=None, + **custom_hpo_args): + assert "bound" in custom_hpo_args + gridunion_space = HPO_SEARCH_SPACE_MAPPING["uni"](logger, + model_type, + model_size_type, + dataset_name, + subdataset_name, + **custom_hpo_args) + for each_key in custom_hpo_args["bound"].keys(): + if "u" in custom_hpo_args["bound"][each_key]: + upper = custom_hpo_args["bound"][each_key]["u"] + else: + upper = 100000 + if "l" in custom_hpo_args["bound"][each_key]: + lower = custom_hpo_args["bound"][each_key]["l"] + else: + lower = -100000 + original_space = sorted(gridunion_space[each_key]) + upper_id = len(original_space) + for x in range(len(original_space)): + if original_space[x] > upper: + upper_id = x + break + lower_id = 0 + for x in range(len(original_space) - 1, -1, -1): + if original_space[x] < lower: + lower_id = x + break + gridunion_space[each_key] = original_space[lower_id:upper_id] + return gridunion_space + + +def hpo_space_gridunion(logger=None, + model_type=None, + model_size_type=None, + dataset_name=None, + subdataset_name=None, + **custom_hpo_args): + output_config = {} + for each_model_type in {"electra", "roberta", "bert"}: + # if each_model_type == model_type: continue + this_config = AutoGridSearchSpace.from_model_and_dataset_name( + each_model_type, model_size_type, dataset_name, subdataset_name, "hpo") + from ..utils import merge_dicts + output_config = merge_dicts(output_config, this_config) + default_values = {} + """ + adding the default configuration from transformers/training_args.py into hpo space + """ + training_args = TrainingArguments(output_dir=".") + for each_hp in output_config.keys(): + try: + default_values[each_hp] = [getattr(training_args, each_hp)] + except AttributeError: + pass + + output_config = merge_dicts(output_config, default_values) + + return output_config + + +def hpo_space_gridunion_smoke_test( + logger=None, + model_type=None, + model_size_type=None, + dataset_name=None, + subdataset_name=None, + **custom_hpo_args): + return {'learning_rate': [1e-5], + 'weight_decay': [0.0], + 'adam_epsilon': [1e-08], + 'warmup_ratio': [0.1], + 'per_device_train_batch_size': [2], + 'hidden_dropout_prob': [0.1], + 'attention_probs_dropout_prob': [0.1], + 'num_train_epochs': [0.1]} + + +def hpo_space_generic(logger=None, + model_type=None, + model_size_type=None, + dataset_name=None, + subdataset_name=None, + **custom_hpo_args): + output_config = { + "learning_rate": {"l": 1e-6, "u": 1e-3, "space": "log"}, + "num_train_epochs": {"l": 1.0, "u": 10.0, "space": "log"}, + "per_device_train_batch_size": [4, 8, 16, 32, 48], + "warmup_ratio": {"l": 0.0, "u": 0.3, "space": "linear"}, + "weight_decay": {"l": 0.0, "u": 0.3, "space": "linear"} + } + return output_config + + +def hpo_space_generic_grid(logger=None, + model_type=None, + model_size_type=None, + dataset_name=None, + subdataset_name=None, + **custom_hpo_args): + output_config = { + "learning_rate": [1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 1e-4, 1.5e-4], + "num_train_epochs": [3, 10], + "per_device_train_batch_size": [16, 32], + "warmup_ratio": [0, 0.06, 0.1], + "weight_decay": [0, 0.1] + } + return output_config + + +def hpo_space_small(logger=None, + model_type=None, + model_size_type=None, + dataset_name=None, + subdataset_name=None, + **custom_hpo_args): + config_json = AutoGridSearchSpace.from_model_and_dataset_name( + model_type, model_size_type, dataset_name, subdataset_name, "hpo") + output_config = {} + + for each_hp in config_json.keys(): + if each_hp == "learning_rate": + if len(config_json[each_hp]) > 1: + output_config[each_hp] = {"l": 3e-5, "u": 1.5e-4, "space": "log"} + else: + output_config[each_hp] = config_json[each_hp] + elif each_hp == "num_train_epochs": + output_config[each_hp] = {"l": 2.0, "u": 4.0, "space": "linear"} + elif each_hp == "per_device_train_batch_size": + output_config[each_hp] = [16, 32, 64] + elif each_hp == "warmup_ratio": + output_config[each_hp] = {"l": 0.0, "u": 0.2, "space": "linear"} + elif each_hp == "weight_decay": + output_config[each_hp] = {"l": 0.0, "u": 0.3, "space": "linear"} + else: + output_config[each_hp] = config_json[each_hp] + + return output_config + + +HPO_SEARCH_SPACE_MAPPING = OrderedDict( + [ + ("uni", hpo_space_gridunion), + ("gnr", hpo_space_generic), + ("uni_test", hpo_space_gridunion_smoke_test), + ("cus", hpo_space_custom), + ("buni", bounded_gridunion) + ] +) + + +class AutoHPOSearchSpace: + """ + This is a class for getting the hpo search space based on the search space mode + (a string variable) instantiated as one of the HPO search spaces of the library when + created with the `~flaml.nlp.hpo.AutoHPOSearchSpace.from_model_and_dataset_name` method. + + This class cannot be instantiated directly using ``__init__()`` (throws an error). + """ + + def __init__(self): + raise EnvironmentError( + "AutoHPOSearchSpace is designed to be instantiated " + "using the `AutoHPOSearchSpace.from_config_and_method_name(cls, logger,hpo_searchspace_name," + "model_type,model_size_type,dataset_name,subdataset_name = None,**custom_hpo_args)` methods." + ) + + @classmethod + def from_model_and_dataset_name(cls, + logger, + hpo_searchspace_mode, + model_type, + model_size_type, + dataset_name, + subdataset_name=None, + **custom_hpo_args): + """ + Instantiate one of the classes for getting the hpo search space from the search space name, model type, + model size type, dataset name and sub dataset name + + Args: + logger: + Reference to the logger + + hpo_searchspace_mode: + A string variable which is name of the hpo search space, e.g., "uni" + + model_type: + A string variable which is the type of the model, e.g., "electra" + + model_size_type: + A string variable which is the type of the model size, e.g., "small" + + dataset_name: + A string variable which is the dataset name, e.g., "glue" + + subdataset_name: + A string variable which is the sub dataset name,e.g., "rte" + + custom_hpo_args: + Any additional keyword argument to be used for the function for the HPO search space + + Example: + >>> AutoHPOSearchSpace.from_model_and_dataset_name(logger, "uni", "electra", "small", "glue", "rte") + """ + + if hpo_searchspace_mode in HPO_SEARCH_SPACE_MAPPING.keys(): + hpo_space = HPO_SEARCH_SPACE_MAPPING[hpo_searchspace_mode]( + logger, + model_type, + model_size_type, + dataset_name, + subdataset_name, + **custom_hpo_args) + return hpo_space + raise ValueError( + "Unrecognized method {},{} for this kind of AutoHPOSearchSpace: {}.\n" + "Method name should be one of {}.".format( + hpo_searchspace_mode, dataset_name, cls.__name__, + ", ".join(c.__name__ for c in HPO_SEARCH_SPACE_MAPPING.keys()) + ) + ) diff --git a/flaml/nlp/hpo/scheduler_auto.py b/flaml/nlp/hpo/scheduler_auto.py new file mode 100644 index 000000000..7e54afb65 --- /dev/null +++ b/flaml/nlp/hpo/scheduler_auto.py @@ -0,0 +1,51 @@ +from collections import OrderedDict +from ray.tune.schedulers import ASHAScheduler, HyperBandScheduler, MedianStoppingRule + +SCHEDULER_MAPPING = OrderedDict( + [ + ("None", None), + ("asha", ASHAScheduler), + ("hb", HyperBandScheduler), + ] +) + + +class AutoScheduler: + """ + This is a class for getting the scheduler based on the scheduler name + (a string variable) instantiated as one of the schedulers of the library when + created with the `~flaml.nlp.hpo.AutoScheduler.from_scheduler_name` method. + + This class cannot be instantiated directly using ``__init__()`` (throws an error). + """ + + def __init__(self): + raise EnvironmentError( + "AutoScheduler is designed to be instantiated " + "using the `AutoScheduler.from_scheduler_name(cls, scheduler_name, **kwargs)` methods." + ) + + @classmethod + def from_scheduler_name(cls, scheduler_name, **kwargs): + """ + Instantiate one of the schedulers using the scheduler names + + Args: + scheduler_name: + A string variable for the scheduler name + + Example: + >>> AutoScheduler.from_scheduler_name("asha") + """ + + if scheduler_name in SCHEDULER_MAPPING.keys(): + try: + return SCHEDULER_MAPPING[scheduler_name](**kwargs) + except TypeError: + return None + raise ValueError( + "Unrecognized scheduler {} for this kind of AutoScheduler: {}.\n" + "Scheduler name should be one of {}.".format( + scheduler_name, cls.__name__, ", ".join(c.__name__ for c in SCHEDULER_MAPPING.keys()) + ) + ) diff --git a/flaml/nlp/hpo/searchalgo_auto.py b/flaml/nlp/hpo/searchalgo_auto.py new file mode 100644 index 000000000..eec7d8e3a --- /dev/null +++ b/flaml/nlp/hpo/searchalgo_auto.py @@ -0,0 +1,182 @@ +import itertools +from collections import OrderedDict + +import ray +from ray.tune.suggest.optuna import OptunaSearch +from flaml import CFO, BlendSearch + +SEARCH_ALGO_MAPPING = OrderedDict( + [ + ("optuna", OptunaSearch), + ("cfo", CFO), + ("bs", BlendSearch), + ("grid", None), + ("gridbert", None), + ("rs", None) + ] +) + + +class AutoSearchAlgorithm: + """ + This is a class for getting the search algorithm based on the search algorithm name + (a string variable) instantiated as one of the algorithms of the library when + created with the `~flaml.nlp.hpo.AutoSearchAlgorithm.from_method_name` method. + + This class cannot be instantiated directly using ``__init__()`` (throws an error). + """ + + def __init__(self): + raise EnvironmentError( + "AutoSearchAlgorithm is designed to be instantiated " + "using the `AutoSearchAlgorithm.from_method_name(cls, search_algo_name, search_algo_args_mode," + " hpo_search_space, **custom_hpo_args)` methods." + ) + + @classmethod + def from_method_name(cls, search_algo_name, search_algo_args_mode, hpo_search_space, **custom_hpo_args): + """ + Instantiating one of the search algorithm classes based on the search algorithm name, search algorithm + argument mode, hpo search space and other keyword args + + Args: + search_algo_name: + A string variable that specifies the search algorithm name, e.g., "bs" + + search_algo_args_mode: + A string variable that specifies the mode for the search algorithm args, e.g., "dft" means + initializing using the default mode + + hpo_search_space: + The hpo search space + + custom_hpo_args: + The customized arguments for the search algorithm (specified by user) + + Example: + >>> from flaml.nlp.hpo.hpo_searchspace import AutoHPOSearchSpace + >>> search_space_hpo=AutoHPOSearchSpace.from_model_and_dataset_name(logger, "uni", "electra", "small", "glue", "rte") + >>> search_algo = AutoSearchAlgorithm.from_method_name("bs", "cus", search_space_hpo, + {"points_to_evaluate": [{"learning_rate": 1e-5, "num_train_epochs": 10}]) + """ + + assert hpo_search_space, "hpo_search_space needs to be specified for calling AutoSearchAlgorithm.from_method_name" + if not search_algo_name: + search_algo_name = "grid" + if search_algo_name in SEARCH_ALGO_MAPPING.keys(): + try: + """ + filtering the customized args for hpo from custom_hpo_args, keep those + which are in the input variable name list of the constructor of + the algorithm, remove those which does not appear in the input variables + of the constructor function + """ + this_search_algo_kwargs = None + allowed_arguments = SEARCH_ALGO_MAPPING[search_algo_name].__init__.__code__.co_varnames + allowed_custom_args = {key: custom_hpo_args[key] for key in custom_hpo_args.keys() if + key in allowed_arguments} + + """ + If the search_algo_args_mode is "dft", set the args to the default args, e.g.,the default args for + BlendSearch is "low_cost_partial_config": {"num_train_epochs": min_epoch,"per_device_train_batch_size" + : max(hpo_search_space["per_device_train_batch_size"].categories)}, + """ + if search_algo_args_mode == "dft": + this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name]( + "dft", hpo_search_space=hpo_search_space, **allowed_custom_args) + elif search_algo_args_mode == "cus": + this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name]( + "cus", hpo_search_space=hpo_search_space, **allowed_custom_args) + + """ + returning the hpo algorithm with the arguments + """ + return SEARCH_ALGO_MAPPING[search_algo_name](**this_search_algo_kwargs) + except KeyError: + return None + raise ValueError( + "Unrecognized method {} for this kind of AutoSearchAlgorithm: {}.\n" + "Method name should be one of {}.".format( + search_algo_name, cls.__name__, ", ".join(c.__name__ for c in SEARCH_ALGO_MAPPING.keys()) + ) + ) + + @staticmethod + def grid2list(grid_config): + key_val_list = [[(key, each_val) for each_val in val_list['grid_search']] + for (key, val_list) in grid_config.items()] + config_list = [dict(x) for x in itertools.product(*key_val_list)] + return config_list + + +def get_search_algo_args_optuna(search_args_mode, hpo_search_space=None, **custom_hpo_args): + return {} + + +def default_search_algo_args_bs(search_args_mode, hpo_search_space=None, **custom_hpo_args): + assert hpo_search_space, "hpo_search_space needs to be specified for calling AutoSearchAlgorithm.from_method_name" + if "num_train_epochs" in hpo_search_space and \ + isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Categorical): + min_epoch = min(hpo_search_space["num_train_epochs"].categories) + else: + assert isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Float) + min_epoch = hpo_search_space["num_train_epochs"].lower + default_search_algo_args = { + "low_cost_partial_config": { + "num_train_epochs": min_epoch, + "per_device_train_batch_size": max(hpo_search_space["per_device_train_batch_size"].categories), + }, + } + if search_args_mode == "cus": + default_search_algo_args.update(custom_hpo_args) + return default_search_algo_args + + +def experiment_search_algo_args_bs(hpo_search_space=None): + if "num_train_epochs" in hpo_search_space and \ + isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Categorical): + min_epoch = min(hpo_search_space["num_train_epochs"].categories) + else: + assert isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Float) + min_epoch = hpo_search_space["num_train_epochs"].lower + default_search_algo_args = { + "low_cost_partial_config": { + "num_train_epochs": min_epoch, + }, + } + return default_search_algo_args + + +def default_search_algo_args_skopt(hpo_search_space=None): + return {} + + +def default_search_algo_args_dragonfly(hpo_search_space=None): + return {} + + +def default_search_algo_args_nevergrad(hpo_search_space=None): + return {} + + +def default_search_algo_args_hyperopt(hpo_search_space=None): + return {} + + +def default_search_algo_args_grid_search(search_args_mode, hpo_search_space=None, **custom_hpo_args): + return {} + + +def default_search_algo_args_random_search(search_args_mode, hpo_search_space=None, **custom_hpo_args): + return {} + + +DEFAULT_SEARCH_ALGO_ARGS_MAPPING = OrderedDict( + [ + ("optuna", get_search_algo_args_optuna), + ("cfo", default_search_algo_args_bs), + ("bs", default_search_algo_args_bs), + ("grid", default_search_algo_args_grid_search), + ("gridbert", default_search_algo_args_random_search) + ] +) diff --git a/flaml/nlp/huggingface/switch_head_auto.py b/flaml/nlp/huggingface/switch_head_auto.py new file mode 100644 index 000000000..d92f12fc3 --- /dev/null +++ b/flaml/nlp/huggingface/switch_head_auto.py @@ -0,0 +1,52 @@ +from collections import OrderedDict + +from transformers.models.electra.modeling_electra import ElectraClassificationHead +from transformers.models.roberta.modeling_roberta import RobertaClassificationHead + +MODEL_CLASSIFICATION_HEAD_MAPPING = OrderedDict( + [ + ("electra", ElectraClassificationHead), + ("roberta", RobertaClassificationHead), + ] +) + + +class AutoSeqClassificationHead: + """ + This is a class for getting classification head class based on the name of the LM + instantiated as one of the ClassificationHead classes of the library when + created with the `~flaml.nlp.huggingface.AutoSeqClassificationHead.from_model_type_and_config` method. + + This class cannot be instantiated directly using ``__init__()`` (throws an error). + """ + + def __init__(self): + raise EnvironmentError( + "AutoSeqClassificationHead is designed to be instantiated " + "using the `AutoSeqClassificationHead.from_model_type_and_config(cls, model_type, config)` methods." + ) + + @classmethod + def from_model_type_and_config(cls, model_type, config): + """ + Instantiate one of the classification head classes from the mode_type and model configuration. + + Args: + model_type: + A string, which desribes the model type, e.g., "electra" + config (:class:`~transformers.PretrainedConfig`): + The huggingface class of the model's configuration: + + Examples:: + >>> from transformers import AutoConfig + >>> model_config = AutoConfig.from_pretrained("google/electra-base-discriminator") + >>> AutoSeqClassificationHead.from_model_type_and_config("electra", model_config) + """ + if model_type in MODEL_CLASSIFICATION_HEAD_MAPPING.keys(): + return MODEL_CLASSIFICATION_HEAD_MAPPING[model_type](config) + raise ValueError( + "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" + "Model type should be one of {}.".format( + config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_CLASSIFICATION_HEAD_MAPPING.keys()) + ) + ) diff --git a/flaml/nlp/huggingface/trainer.py b/flaml/nlp/huggingface/trainer.py new file mode 100644 index 000000000..80a25d2ad --- /dev/null +++ b/flaml/nlp/huggingface/trainer.py @@ -0,0 +1,121 @@ +import copy +import os + +import transformers + +from ray import tune +import torch +from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR + +transformers.logging.set_verbosity_error() + + +class TrainerForAutoTransformers(transformers.Trainer): + """ + Overriding transformers.Trainer. + + Args: + huggingface (:class:`~transformers.PreTrainedModel` or :obj:`torch.nn.Module`, `optional`): + """ + + def get_optimizers( + self, num_training_steps + ): + self.current_optimizer, self.current_scheduler = super().get_optimizers(num_training_steps) + return (self.current_optimizer, self.current_scheduler) + + def evaluate(self, + eval_dataset=None): + """ + Overriding transformers.Trainer.evaluate by saving state with save_state + + Args: + eval_dataset: + the dataset to be evaluated + """ + import wandb + eval_dataloader = self.get_eval_dataloader(eval_dataset) + output = self.prediction_loop( + eval_dataloader, description="Evaluation") + self.log(output.metrics) + + self.save_state() + + for key in list(output.metrics.keys()): + if key.startswith("eval_"): + output.metrics[key[5:]] = output.metrics[key] + tune.report(**output.metrics) + + return output.metrics + + def save_state(self): + """ + Overriding transformers.Trainer.save_state. It is only through saving + the states can best_trial.get_best_checkpoint return a non-empty value. + """ + with tune.checkpoint_dir(step=self.state.global_step) as checkpoint_dir: + self.args.output_dir = checkpoint_dir + # This is the directory name that Huggingface requires. + output_dir = os.path.join( + self.args.output_dir, + f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}") + self.save_model(output_dir) + torch.save(self.optimizer.state_dict(), + os.path.join(output_dir, "optimizer.pt")) + torch.save(self.lr_scheduler.state_dict(), + os.path.join(output_dir, "scheduler.pt")) + + @staticmethod + def convert_num_train_epochs_to_max_steps( + num_train_epochs: int, + num_train_examples: int, + per_device_train_batch_size: int, + device_count: int): + return int(num_train_epochs * num_train_examples / per_device_train_batch_size / device_count) + + @staticmethod + def convert_max_steps_to_num_train_epochs( + max_steps: int, + num_train_examples: int, + per_device_train_batch_size: int, + device_count: int): + return float(max_steps * per_device_train_batch_size * device_count) / num_train_examples + + @staticmethod + def convert_warmup_ratio_to_warmup_steps( + warmup_ratio, + max_steps=None, + num_train_epochs=None, + num_train_examples=None, + per_device_train_batch_size=None, + device_count=None): + if max_steps: + return int(warmup_ratio * max_steps) + max_steps = TrainerForAutoTransformers.convert_num_train_epochs_to_max_steps( + num_train_epochs, + num_train_examples, + per_device_train_batch_size, + device_count) + return int(warmup_ratio * max_steps) + + @staticmethod + def convert_warmup_steps_to_warmup_ratio( + warmup_steps: int, + num_train_epochs: int, + num_train_examples: int, + per_device_train_batch_size: int, + device_count: int): + max_steps = TrainerForAutoTransformers.convert_num_train_epochs_to_max_steps( + num_train_epochs, + num_train_examples, + per_device_train_batch_size, + device_count) + return float(warmup_steps / max_steps) + + @staticmethod + def resolve_hp_conflict(search_space_dict): + if "max_steps" in search_space_dict and "num_train_epochs" in search_space_dict: + del search_space_dict["num_train_epochs"] + if "warmup_ratio" in search_space_dict and "warmup_steps" in search_space_dict: + del search_space_dict["warmup_ratio"] + return search_space_dict diff --git a/flaml/nlp/result_analysis/__init__.py b/flaml/nlp/result_analysis/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/flaml/nlp/result_analysis/azure_utils.py b/flaml/nlp/result_analysis/azure_utils.py new file mode 100644 index 000000000..7be3e5fd4 --- /dev/null +++ b/flaml/nlp/result_analysis/azure_utils.py @@ -0,0 +1,677 @@ +import re +import pathlib +import os +from azure.storage.blob import BlobServiceClient, ContainerClient +from transformers import AutoConfig + +from ..utils import get_wandb_azure_key +from datetime import datetime +from dataclasses import dataclass, field +from ..hpo.grid_searchspace_auto import HF_MODEL_LIST +import json + + +@dataclass +class JobID: + dat: list = field(default=None) + subdat: str = field(default=None) + mod: str = field(default=None) + spa: str = field(default=None) + arg: str = field(default=None) + alg: str = field(default=None) + pru: str = field(default=None) + pre_full: str = field(default=None) + pre: str = field(default=None) + presz: str = field(default=None) + spt: str = field(default=None) + rep: int = field(default=0) + sddt: int = field(default=None) + sdhf: int = field(default=None) + + def __init__(self, + console_args=None): + if console_args: + self.set_jobid_from_console_args(console_args) + + def set_unittest_config(self): + """ + set the JobID config for unit test + """ + self.dat = ["glue"] + self.subdat = "mrpc" + self.mod = "hpo" + self.spa = "uni_test" + self.arg = "dft" + self.alg = "bs" + self.pru = "None" + self.pre_full = "google/mobilebert-uncased" + self.pre = "mobilebert" + self.presz = "small" + self.spt = "rspt" + self.rep = 0 + self.sddt = 43 + self.sdhf = 42 + + def is_match(self, partial_jobid): + """ + return a boolean variable whether the current object matches the partial jobid defined + in partial_jobid. For example, + self = JobID(dat = ['glue'], + subdat = 'cola', + mod = 'bestnn', + spa = 'buni', + arg = 'cus', + alg = 'bs', + pru = 'None', + pre = 'funnel', + presz = 'xlarge', + spt = 'rspt', + rep = 0, + sddt = 43, + sdhf = 42) + partial_jobid1 = JobID(dat = ['glue'], + subdat = 'cola', + mod = 'hpo') + partial_jobid2 = JobID(dat = ['glue'], + subdat = 'cola', + mod = 'bestnn') + return False for partial_jobid1 and True for partial_jobid2 + """ + is_not_match = False + for key, val in partial_jobid.__dict__.items(): + if val is None: + continue + if getattr(self, key) != val: + is_not_match = True + return not is_not_match + + def to_wandb_string(self): + """ + preparing for the job ID for wandb + """ + field_dict = self.__dict__ + keytoval_str = "_".join([JobID.dataset_list_to_str(field_dict[key], key) + if type(field_dict[key]) == list + else str(field_dict[key]) + for key in field_dict.keys() if not key.endswith("_full")]) + return keytoval_str + + def to_jobid_string(self): + """ + convert the current JobID into a blob name string which contains all the fields + """ + list_keys = list(JobID.__dataclass_fields__.keys()) + field_dict = self.__dict__ + keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key], key) + if type(field_dict[key]) == list + else key + "=" + str(field_dict[key]) + for key in list_keys if not key.endswith("_full")]) + return keytoval_str + + def to_partial_jobid_string(self): + """ + convert the current JobID into a blob name string which only contains the fields whose values are not "None" + """ + list_keys = list(JobID.__dataclass_fields__.keys()) + field_dict = self.__dict__ # field_dict contains fields whose values are not None + keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key], key) + if type(field_dict[key]) == list + else key + "=" + str(field_dict[key]) + for key in list_keys if key in field_dict.keys()]) + return keytoval_str + + @staticmethod + def blobname_to_jobid_dict(keytoval_str): + """ + converting an azure blobname to a JobID config, + e.g., blobname = "dat=glue_subdat=cola_mod=bestnn_spa=buni_arg=cus_ + alg=bs_pru=None_pre=funnel_presz=xlarge_spt=rspt_rep=0.json" + the converted jobid dict = {dat = ['glue'], subdat = 'cola', mod = 'bestnn', + spa = 'buni', arg = 'cus', alg = 'bs', pru = 'None', + pre = 'funnel', presz = 'xlarge', spt = 'rspt', + rep = 0, sddt = 43, sdhf = 42) + """ + field_keys = [key for key in + list(JobID.__dataclass_fields__.keys()) if not key.endswith("_full")] + regex_expression = ".*" + "_".join([key + "=(?P<" + key + ">.*)" for key in field_keys]) + ".(json|zip)" + result = re.search(regex_expression, keytoval_str) + if result: + result_dict = {} + for key in field_keys: + if key == "dat": + result_dict[key] = [result.group(key)] + elif key == "rep": + try: + result_dict[key] = int(result.group(key)) + except IndexError: + result_dict[key] = -1 + else: + result_dict[key] = result.group(key) + return result_dict + else: + return None + + @staticmethod + def dataset_list_to_str(dataset_name, key): + if key == "dat": + assert isinstance(dataset_name, list) + return "-".join(dataset_name) + else: + return dataset_name + + @staticmethod + def set_jobid_from_arg_list(self, + **jobid_list + ): + """ + set the jobid from a dict object + """ + + for key in jobid_list.keys(): + assert key in JobID.__dataclass_fields__.keys() + setattr(self, key, jobid_list[key]) + + @staticmethod + def convert_blobname_to_jobid(blobname): + """ + converting a blobname string to a JobID object + """ + jobconfig_dict = JobID.blobname_to_jobid_dict(blobname) + if jobconfig_dict: + jobconfig = JobID() + jobconfig.set_jobid_from_arg_list(**jobconfig_dict) + return jobconfig + else: + return None + + @staticmethod + def get_full_data_name(dataset_name, subdataset_name=None): + """ + convert a dataset name and sub dataset name to a full dataset name + """ + full_dataset_name = dataset_name + if subdataset_name: + full_dataset_name = full_dataset_name + "_" + subdataset_name + return full_dataset_name + + def get_jobid_full_data_name(self): + """ + get the full dataset name of the current JobID object + """ + return JobID.get_full_data_name(JobID.dataset_list_to_str(self.dat, "dat"), self.subdat) + + @staticmethod + def _extract_model_type_with_keywords_match(pre_full): + matched_model_type = [] + for each_model_type in HF_MODEL_LIST: + if each_model_type in pre_full: + matched_model_type.append(each_model_type) + assert len(matched_model_type) > 0 + return max(enumerate(matched_model_type), key=lambda x: len(x[1]))[1] + + @staticmethod + def extract_model_type(full_model_name): + model_config = AutoConfig.from_pretrained(full_model_name) + config_json_file = model_config.get_config_dict(full_model_name)[0] + try: + model_type = config_json_file["model_type"] + except KeyError: + model_type = JobID._extract_model_type_with_keywords_match() + return model_type + + def set_jobid_from_console_args(self, console_args): + self.dat = console_args.dataset_subdataset_name.split(":")[0].split(",") + self.subdat = console_args.dataset_subdataset_name.split(":")[1] + self.mod = console_args.algo_mode + self.spa = console_args.space_mode + self.arg = console_args.search_alg_args_mode + self.alg = console_args.algo_name + self.pru = console_args.pruner + self.pre_full = console_args.pretrained_model_size.split(":")[0] + self.pre = JobID.extract_model_type(self.pre_full) + self.presz = console_args.pretrained_model_size.split(":")[1] + self.spt = console_args.resplit_mode + self.rep = console_args.rep_id + self.sddt = console_args.seed_data + self.sdhf = console_args.seed_transformers + + @staticmethod + def legacy_old_blobname_to_new_blobname(self, + old_blobname): + spa_id2val = { + 0: "gnr", + 1: "uni" + } + alg_id2val = { + 0: "bs", + 1: "optuna", + 2: "cfo" + } + pre_id2val = { + 0: "xlnet-base-cased", + 1: "albert-large-v1", + 2: "distilbert-base-uncased", + 3: "microsoft/deberta-base", + 4: "funnel-transformer/small-base", + 5: "microsoft/deberta-large", + 6: "funnel-transformer/large-base", + 7: "funnel-transformer/intermediate-base", + 8: "funnel-transformer/xlarge-base" + } + presz_id2val = { + 0: "base", + 1: "small", + 2: "base", + 3: "base", + 4: "base", + 5: "large", + 6: "large", + 7: "intermediate", + 8: "xlarge" + } + spt_id2val = { + 0: "rspt", + 1: "ori" + } + result_grid = re.search(r".*_mod(el)?(?P\d+)_None_None(_spt(?P\d+))?_rep(?P\d+).log", + old_blobname) + result = re.search( + r".*_mod(el)?(?P\d+)_(alg)?(?P\d+)_(spa)?" + r"(?P\d+)(_spt(?P\d+))?_rep(?P\d+).log", + old_blobname) + if result_grid: + dat = [old_blobname.split("/")[1].split("_")[0]] + subdat = old_blobname.split("/")[1].split("_")[1] + mod = "hpo" + spa = None + arg = None + alg = None + pru = None + pre = pre_id2val[int(result_grid.group("model_id"))] + presz = presz_id2val[int(result_grid.group("model_id"))] + try: + spt = spt_id2val[int(result_grid.group("split_id"))] + except KeyError: + spt = spt_id2val[0] + rep = None + self.set_jobid_from_arg_list(dat, subdat, mod, spa, arg, alg, pru, pre, presz, spt, rep) + return self.to_jobid_string() + if result: + dat = [old_blobname.split("/")[1].split("_")[0]] + subdat = old_blobname.split("/")[1].split("_")[1] + mod = "hpo" + spa = spa_id2val[int(result.group("space_id"))] + arg = "dft" + alg = alg_id2val[int(result.group("algo_id"))] + pru = "None" + pre = pre_id2val[int(result_grid.group("model_id"))] + presz = presz_id2val[int(result_grid.group("model_id"))] + try: + spt = spt_id2val[int(result_grid.group("split_id"))] + except KeyError: + spt = spt_id2val[0] + rep = int(result.group("rep_id")) + self.set_jobid_from_arg_list(dat, subdat, mod, spa, arg, alg, pru, pre, presz, spt, rep) + return self.to_jobid_string() + return None + + +class AzureUtils: + + def __init__(self, + root_log_path=None, + console_args=None, + jobid=None, + autohf=None): + if root_log_path: + self.root_log_path = root_log_path + else: + self.root_log_path = "logs_azure" + self.jobid = jobid + self.console_args = console_args + self.autohf = autohf + if console_args: + wandb_key, azure_key, container_name = get_wandb_azure_key(console_args.key_path) + self._container_name = container_name + self._azure_key = azure_key + + def _get_complete_connection_string(self): + return "DefaultEndpointsProtocol=https;AccountName=docws5141197765;AccountKey=" \ + + self._azure_key + ";EndpointSuffix=core.windows.net" + + def _init_azure_clients(self): + connection_string = self._get_complete_connection_string() + container_client = ContainerClient.from_connection_string(conn_str=connection_string, + container_name=self._container_name) + return container_client + + def _init_blob_client(self, + local_file_path): + connection_string = self._get_complete_connection_string() + blob_service_client = BlobServiceClient.from_connection_string(connection_string) + blob_client = blob_service_client.get_blob_client(container=self._container_name, blob=local_file_path) + return blob_client + + def upload_local_file_to_azure(self, local_file_path): + blob_client = self._init_blob_client(local_file_path) + with open(local_file_path, "rb") as fin: + blob_client.upload_blob(fin, overwrite=True) + + def download_azure_blob(self, blobname): + blob_client = self._init_blob_client(blobname) + pathlib.Path(re.search("(?P^.*)/[^/]+$", blobname).group("parent_path")).mkdir( + parents=True, exist_ok=True) + with open(blobname, "wb") as fout: + fout.write(blob_client.download_blob().readall()) + + def write_exception(self): + result_json = { + "timestamp": datetime.now(), + } + local_file_path = self.generate_local_json_path() + self.create_local_json_and_upload(result_json, local_file_path) + + def extract_log_from_analysis(self, + analysis): + """ + Extracting a json object for storing the key information returned from tune.run + """ + json_log = [] + for each_trial in analysis.trials: + trial_id = each_trial.trial_id + start_time = each_trial.start_time + last_update_time = each_trial.last_update_time + config = each_trial.config + try: + metric_score = each_trial.metric_analysis["eval_" + analysis.default_metric] + time_stamp = each_trial.metric_analysis['timestamp'] + json_log.append({"trial_id": trial_id, + "start_time": start_time, + "last_update_time": last_update_time, + "config": config, + "metric_score": metric_score, + "time_stamp": time_stamp}) + except KeyError: + pass + return json_log + + def write_autohf_output(self, + json_log=None, + valid_metric=None, + predictions=None, + duration=None): + """ + write the key info from a job and upload to azure blob storage + """ + local_file_path = self.generate_local_json_path() + output_json = {} + if json_log: + output_json["val_log"] = json_log + if valid_metric: + output_json["valid_metric"] = valid_metric + if duration: + output_json["duration"] = duration + if len(output_json) > 0: + self.create_local_json_and_upload(output_json, local_file_path) + if predictions is not None: + self.create_local_prediction_and_upload(local_file_path, predictions) + + def generate_local_json_path(self): + """ + return a path string for storing the json file locally + """ + full_dataset_name = self.jobid.get_jobid_full_data_name() + jobid_str = self.jobid.to_jobid_string() + local_file_path = os.path.join(self.root_log_path, full_dataset_name, jobid_str + ".json") + pathlib.Path(os.path.join(self.root_log_path, full_dataset_name)).mkdir(parents=True, exist_ok=True) + return local_file_path + + def create_local_json_and_upload(self, result_json, local_file_path): + with open(local_file_path, "w") as fout: + fout.write(json.dumps(result_json)) + fout.flush() + self.upload_local_file_to_azure(local_file_path) + + def legacy_to_json(self): + container_client = self._init_azure_clients() + for old_blob in container_client.list_blobs(): + new_jobid_str = self.jobid.legacy_old_blobname_to_new_blobname(old_blob.name) + if new_jobid_str: + self.download_azure_blob(old_blob.name) + with open(old_blob.name, "r") as fin: + alllines = fin.readlines() + wandb_group_name = alllines[0].rstrip("\n:") + timestamp = re.search( + r"timestamp:(?P.*):", + alllines[1].strip("\n")).group("timestamp") + duration = re.search( + r"duration:(?P.*)$", + alllines[3].strip("\n")).group("duration") + sample_num = int(re.search( + r"sample_num: (?P\d+)$", + alllines[4].strip("\n")).group("sample_num")) + validation = {"accuracy": float(re.search( + "validation accuracy: (?P.*)$", + alllines[2].strip("\n")).group("validation"))} + test = None + if len(alllines) > 6: + result_test = re.search("test accuracy:(?P.*)$", alllines[6].strip("\n")) + if result_test: + test = json.loads(result_test.group("test")) + yml_file = None + if len(alllines) > 8: + if alllines[8].startswith("aml"): + yml_file = alllines[8].strip("\n") + new_json = {"wandb_group_name": wandb_group_name, + "validation": validation, + "test": test, + "timestamp": timestamp, + "duration": duration, + "sample_num": sample_num, + "yml_file": yml_file} + full_dataset_name = self.jobid.get_jobid_full_data_name() + new_blobname = os.path.join("logs_azure/", full_dataset_name, new_jobid_str + ".json") + self.create_local_json_and_upload(new_json, new_blobname) + + def create_local_prediction_and_upload(self, + local_json_file, + predictions): + """ + store predictions (a .zip file) locally and upload + """ + azure_save_file_name = local_json_file.split("/")[-1][:-5] + local_archive_path = self.autohf.output_prediction(predictions, + output_prediction_path=self.console_args.data_root_dir + "result/", + output_zip_file_name=azure_save_file_name) + self.upload_local_file_to_azure(local_archive_path) + + def get_ranked_configs(self, metric_mode): + """ + extract the configs (ranked in descebding order by the score) for the azure file of the current object + (defined by self.jobid_config) + """ + azure_file_path = self.generate_local_json_path() + self.download_azure_blob(azure_file_path) + + json_log = json.load(open(azure_file_path, "r")) + assert "val_log" in json_log + + trialid_to_score = {} + trialid_to_config = {} + + for each_entry in json_log["val_log"]: + trial_id = each_entry["trial_id"] + config = each_entry["config"] + this_score = each_entry["metric_score"][metric_mode] + trialid_to_config[trial_id] = config + trialid_to_score[trial_id] = this_score + + sorted_trialid_to_score = sorted(trialid_to_score.items(), key=lambda x: x[1], reverse=True) + return [trialid_to_config[entry[0]] for entry in sorted_trialid_to_score] + + @staticmethod + def is_after_earliest_time(this_blob, earliest_time): + import pytz + utc = pytz.UTC + if this_blob.last_modified >= utc.localize(datetime(earliest_time[0], earliest_time[1], earliest_time[2])): + return True + return False + + def get_blob_list_matching_partial_jobid(self, root_log_path, partial_jobid, earliest_time=None): + """ + get all blobs whose jobid configs match the partial_jobid + """ + blob_list = [] + container_client = self._init_azure_clients() + jobid_config = JobID() + for each_blob in container_client.list_blobs(): + if each_blob.name.startswith(root_log_path): + each_jobconfig = jobid_config.convert_blobname_to_jobid(each_blob.name) + is_append = False + if each_jobconfig: + if each_jobconfig.is_match(partial_jobid): + is_append = True + if earliest_time and not AzureUtils.is_after_earliest_time(each_blob, earliest_time): + is_append = False + if is_append: + blob_list.append((each_jobconfig, each_blob)) + return blob_list + + @staticmethod + def extract_config_and_score(blobname): + data_json = json.load(open(blobname, "r")) + return [(x['config'], x['metric_score']["max"], x['start_time']) for x in data_json['val_log']] + + def get_config_and_score_from_partial_jobid(self, + root_log_path, + partial_jobid, + group_attrs, + method, + earliest_time=None): + """ + get the best config and best score for each job matching the partial_jobid + """ + matched_blob_list = self.get_blob_list_matching_partial_jobid( + root_log_path, + partial_jobid, + earliest_time=earliest_time) + group_dict = {} + for (each_jobconfig, each_blob) in matched_blob_list: + self.download_azure_blob(each_blob.name) + config_and_score = AzureUtils.extract_config_and_score(each_blob.name) + if method == "unsorted": + sorted_config_and_score = config_and_score + elif method == "sort_time": + sorted_config_and_score = sorted(config_and_score, key=lambda x: x[2], reverse=False) + else: + sorted_config_and_score = sorted(config_and_score, key=lambda x: x[1], reverse=True) + group_attr_list = [] + for each_attr in group_attrs: + group_val = getattr(each_jobconfig, each_attr) + if isinstance(group_val, list): + group_attr_list.append(JobID.dataset_list_to_str(group_val, each_attr)) + else: + group_attr_list.append(group_val) + group_attr_tuple = tuple(group_attr_list) + group_dict.setdefault(group_attr_tuple, []) + group_dict[group_attr_tuple].append([(config, score, each_blob.name) + for (config, score, ts) in sorted_config_and_score]) + return group_dict + + def get_validation_perf(self, console_args=None, partial_jobid_config=None): + """ + get the validation score for all blobs matching the partial_jobid_config + """ + if partial_jobid_config.pre == "electra": + dataset_namelist = ["wnli", "rte", "mrpc", "cola", "stsb", "sst2", "qnli", "mnli"] + else: + dataset_namelist = ["wnli", "rte", "mrpc", "cola", "stsb", "sst2"] + dataset_vallist1 = [0] * len(dataset_namelist) + dataset_vallist2 = [0] * len(dataset_namelist) + + matched_blob_list = self.get_blob_list_matching_partial_jobid(console_args.azure_root_log_path, + partial_jobid_config) + for (each_jobconfig, each_blob) in matched_blob_list: + subdat_name = each_jobconfig.subdat + self.download_azure_blob(each_blob.name) + data_json = json.load(open(each_blob.name, "r")) + print(len(data_json["val_log"])) + validation_metric = data_json['valid_metric'] + try: + dataset_idx = dataset_namelist.index(subdat_name) + dataset_vallist1[dataset_idx], dataset_vallist2[dataset_idx] \ + = self.get_validation_metricstr(validation_metric) + except ValueError: + pass + # print(" & ".join(dataset_vallist1)) + # print(", ,".join(dataset_vallist2)) + + def get_validation_metricstr(self, validation_metric): + """ + get a string representing validations for pasting to Google spreadsheet + """ + validation_str1 = validation_str2 = "" + is_first = True + for key in ["f1", "accuracy", "pearson", "spearmanr", "matthews_correlation"]: + if "eval_" + key in validation_metric.keys(): + if is_first: + validation_str1 += str("%.1f" % (validation_metric["eval_" + key] * 100)) + validation_str2 += str(validation_metric["eval_" + key] * 100) + is_first = False + else: + validation_str1 += "/" + str("%.1f" % (validation_metric["eval_" + key] * 100)) + validation_str2 += "," + str(validation_metric["eval_" + key] * 100) + return validation_str1, validation_str2 + + def get_test_perf(self, partial_jobid_config=None, result_root_dir=None): + """ + get the test scores for all blobs matching the partial_jobid_config + """ + import shutil + from flaml.nlp.dataset.submission_auto import file_name_mapping_glue, output_blank_tsv + matched_blob_list = self.get_blob_list_matching_partial_jobid("data/", partial_jobid_config) + partial_jobid_str = partial_jobid_config.to_partial_jobid_string() + output_dir = os.path.join(result_root_dir, partial_jobid_str) + if os.path.exists(output_dir): + assert os.path.isdir(output_dir) + else: + os.mkdir(output_dir) + output_blank_tsv(output_dir) + + for (each_jobconfig, each_blob) in matched_blob_list: + subdat_name = each_jobconfig.subdat + self.download_azure_blob(each_blob.name) + import zipfile + if os.path.exists(each_blob.name[:-4]): + assert os.path.isdir(each_blob.name[:-4]) + else: + os.mkdir(each_blob.name[:-4]) + with zipfile.ZipFile(each_blob.name, 'r') as zip_ref: + zip_ref.extractall(each_blob.name[:-4]) + src = os.path.join(each_blob.name[:-4], file_name_mapping_glue[subdat_name][0]) + dst = os.path.join(output_dir, file_name_mapping_glue[subdat_name][0]) + shutil.copy(src, dst) + shutil.make_archive(os.path.join(output_dir), 'zip', output_dir) + + def get_best_perf_config(self, console_args, jobid_config): + """ + get the config of the best performed trial + """ + matched_blob_list = self.get_blob_list_matching_partial_jobid(console_args.azure_root_log_path, jobid_config) + try: + assert len(matched_blob_list) == 1 + except AssertionError: + import pdb + pdb.set_trace() + + each_jobconfig, each_blob = matched_blob_list[0] + self.download_azure_blob(each_blob.name) + data_json = json.load(open(each_blob.name, "r")) + + sorted_entries = sorted(data_json['val_log'], key=lambda x: x['metric_score']['max'], reverse=True) + best_config = sorted_entries[0]['config'] + if jobid_config.subdat != "mrpc": + best_score = sorted_entries[0]['metric_score']['max'] + else: + best_score = (data_json["valid_metric"]["eval_f1"], data_json["valid_metric"]["eval_accuracy"]) + return best_config, best_score diff --git a/flaml/nlp/result_analysis/generate_result_summary.py b/flaml/nlp/result_analysis/generate_result_summary.py new file mode 100644 index 000000000..efea1641a --- /dev/null +++ b/flaml/nlp/result_analysis/generate_result_summary.py @@ -0,0 +1,357 @@ +def extract_ranked_config_score(console_args, partial_config_dict): + from .azure_utils import AzureUtils + azure_utils = AzureUtils(console_args=console_args) + + for method, each_partial_config in partial_config_dict.items(): + dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(each_partial_config, + ["dat", "subdat"], method) + for each_dataset, configscorelist in dataset2configscorelist.items(): + for config_idx in range(len(configscorelist)): + avg_scores = configscorelist[config_idx][0][1] + top_config = configscorelist[config_idx][0][0] + print(avg_scores) + print(top_config) + # print(method + "," + str(each_dataset) + ",rep=" + str(config_idx)) + # print("avg score :" + str(avg_scores)) + # print(''.join(['{0}={1}\n'.format(key, top_config[key]) for key in sorted(top_config.keys())])) + + +def extract_sorted_config_list(dataset2configscorelist, topk): + dataset2topkconfigs = {} + for dataset, configscorelist in dataset2configscorelist.items(): + all_configscorelist = [] + for scorelist in configscorelist: + for item in scorelist: + if item[0] not in [x[0] for x in all_configscorelist]: + all_configscorelist.append(item) + sorted_all_configscorelist = sorted(all_configscorelist, key=lambda x: x[1], reverse=True) + topk_configs = [] + + for each_hp in ("learning_rate", "num_train_epochs", "per_device_train_batch_size", "warmup_ratio", + "weight_decay", "adam_epsilon"): + topk_configs.append((each_hp, [sorted_all_configscorelist[x][0][each_hp] for x in range(topk)])) + topk_configs.append(("perf", [sorted_all_configscorelist[x][1] for x in range(topk)])) + + dataset2topkconfigs[dataset] = topk_configs + return dataset2topkconfigs + + +def dict2tuple(this_dict): + tuple_list = [] + for key in sorted(this_dict.keys()): + tuple_list.append(this_dict[key]) + return tuple(tuple_list) + + +def merge_configscore_list(small_dataset2configscorelist): + dataset2merged_configscorelist = {} + for (dataset, each_configscore_list) in small_dataset2configscorelist.items(): + merged_configscore_list = {} + for rep_id in range(len(each_configscore_list)): + for each_configscore_entry in each_configscore_list[rep_id]: + is_exist = False + for configscore in merged_configscore_list.keys(): + if configscore[0] == each_configscore_entry[0]: + is_exist = True + break + if is_exist is False: + merged_configscore_list[dict2tuple(each_configscore_entry[0])] = each_configscore_entry[1] + dataset2merged_configscorelist[dataset] = merged_configscore_list + return dataset2merged_configscorelist + + +def get_result(console_args, partial_jobid_config): + from .azure_utils import AzureUtils, JobID + azure_utils = AzureUtils(console_args=console_args) + dataset2configscorelist = \ + azure_utils.get_config_and_score_from_partial_config( + console_args.azure_root_log_path, + partial_jobid_config, + ["dat", "subdat"], + "hpo") + for dataset, configscore_list in dataset2configscorelist.items(): + for rep_id in range(len(configscore_list)): + config_dict = configscore_list[rep_id][0][0] + score = configscore_list[rep_id][0][1] + print(dataset, rep_id) + print_config(config_dict) + print(score) + print() + + +def print_config(config_dict): + for key in sorted(config_dict.keys()): + if key in ("attention_probs_dropout_prob", "hidden_dropout_prob", "seed"): + continue + if key == "per_device_train_batch_size": + short_key = "batch_size" + elif key == "num_train_epochs": + short_key = "epochs" + else: + short_key = key + print(short_key, config_dict[key]) + + +def compare_small_vs_large(console_args): + from .azure_utils import AzureUtils, JobID + azure_utils = AzureUtils(console_args=console_args) + + partial_jobid_config = JobID() + partial_jobid_config.pre = "deberta" + partial_jobid_config.mod = "hpo" + partial_jobid_config.spa = "uni" + partial_jobid_config.presz = "base" + + small_dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(partial_jobid_config, + ["dat", "subdat"], "list") + + small_mergedconfiglist = merge_configscore_list(small_dataset2configscorelist) + + partial_jobid_config = JobID() + partial_jobid_config.pre = "deberta" + partial_jobid_config.mod = "hpo" + partial_jobid_config.spa = "uni" + partial_jobid_config.presz = "large" + + large_dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(partial_jobid_config, + ["dat", "subdat"], "hpo") + + large_mergedconfiglist = merge_configscore_list(large_dataset2configscorelist) + + for (each_dataset, merged_small_configlist) in small_mergedconfiglist.items(): + merged_large_configlist = large_mergedconfiglist[each_dataset] + print(each_dataset) + print() + for (each_tuple, large_score) in sorted(merged_large_configlist.items(), key=lambda x: x[1], reverse=True): + # small_score = merged_small_configlist[each_tuple] + is_in_onlysmall = each_tuple in small_mergedconfiglist[each_dataset] + for each_val in each_tuple: + print(each_val, end=", ") + print(large_score, is_in_onlysmall, sep=",") + print() + for (each_tuple, small_score) in \ + sorted(small_mergedconfiglist[each_dataset].items(), key=lambda x: x[1], reverse=True): + is_in_large = each_tuple in large_mergedconfiglist[each_dataset] + for each_val in each_tuple: + print(each_val, end=", ") + print(small_score, is_in_large, sep=",") + + +def check_conflict(console_args, partial_jobid_config_list): + from .azure_utils import AzureUtils, JobID + azure_utils = AzureUtils(console_args=console_args) + for each_partial_config in partial_jobid_config_list: + dataset2configscorelist = \ + azure_utils.get_config_and_score_from_partial_config( + console_args.azure_root_log_path, + each_partial_config, + ["dat", "subdat"], + "unsorted") + for (dataset, configscorelists) in dataset2configscorelist.items(): + config2score = {} + for each_configscorelist in configscorelists: + for (config, score, blobname) in each_configscorelist: + config_dict = dict2tuple(config) + try: + config2score[config_dict].append((score, blobname)) + except KeyError: + config2score.setdefault(config_dict, []) + config2score[config_dict].append((score, blobname)) + dup_keys = [config for config in config2score.keys() if len(config2score[config]) > 1] + dupkey_count = [len(set([y[0] for y in config2score[x]])) for x in dup_keys] + print(dataset) + print(len(config2score)) + print(len(dupkey_count)) + print(dupkey_count) + + +def print_cfo(console_args): + from .azure_utils import JobID, AzureUtils + jobid_config = JobID() + jobid_config.mod = "bestnn" + jobid_config.spa = "buni" + jobid_config.alg = "bs" + jobid_config.pre = "funnel" + jobid_config.presz = "xlarge" + + for each_rep in range(3): + jobid_config.rep = each_rep + azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config) + + dataset2configscorelist = \ + azure_utils.get_config_and_score_from_partial_config( + console_args.azure_root_log_path, + jobid_config, + ["dat", "subdat"], + "sort_time") + dataset = ('glue', 'mrpc') + configscorelist = dataset2configscorelist[dataset] + count = 0 + print(dataset) + for (config, score, blobname) in sorted(configscorelist[0], key=lambda x: x[1], reverse=True)[0:1]: + print(count) + print(score) + print_config(config) + print() + count += 1 + + +def download_validation(console_args, result_root_dir): + from .azure_utils import JobID, AzureUtils + partial_jobid_config = JobID() + partial_jobid_config.mod = "grid" + partial_jobid_config.pre = "roberta" + partial_jobid_config.presz = "base" + # partial_jobid_config.alg = "optuna" + # partial_jobid_config.pru = "asha" + partial_jobid_config.rep = 0 + + azure_utils = AzureUtils(console_args=console_args, jobid=partial_jobid_config) + azure_utils.get_validation_perf(console_args=console_args, partial_jobid_config=partial_jobid_config) + azure_utils.get_test_perf(partial_jobid_config, result_root_dir) + + +def get_result_str(jobid_config, val_score, test_score, best_config, subdat2config=None, mode="grid"): + result_str = jobid_config.subdat.upper() + "," + if jobid_config.alg: + result_str += jobid_config.alg.upper().replace("OPTUNA", "Optuna") + if jobid_config.pru is not None and jobid_config.pru != "None": + result_str += "+" + jobid_config.pru.upper() + if jobid_config.subdat != "mrpc": + result_str += ",rep " + str(jobid_config.rep) + " & " + str( + "%.1f" % (val_score * 100)) + " & " + str(test_score) + else: + result_str += ",rep " + str(jobid_config.rep) + " & " + str( + "%.1f" % (val_score[0] * 100)) + "/" + str( + "%.1f" % (val_score[1] * 100)) + " & " + str(test_score) + for hp in ["learning_rate", "warmup_ratio", "per_device_train_batch_size", "hidden_dropout", "attention_dropout", + "weight_decay"]: + if hp not in best_config: + result_str += " & " + else: + if mode == "hpo": + if best_config[hp] > 1.2 * subdat2config[jobid_config.subdat][hp]: + wrap_left = "\\cellcolor{green!85}{" + elif best_config[hp] > subdat2config[jobid_config.subdat][hp]: + wrap_left = "\\cellcolor{green!15}{" + elif best_config[hp] < subdat2config[jobid_config.subdat][hp] / 1.2: + wrap_left = "\\cellcolor{red!85}{" + else: + wrap_left = "\\cellcolor{red!15}{" + wrap_right = "}" + else: + wrap_left = wrap_right = "" + if hp == "per_device_train_batch_size" or hp == "learning_rate": + wrap_left = wrap_right = "" + if hp == "learning_rate": + result_str += " & " + wrap_left + "{:.1e}".format(best_config[hp]) + wrap_right + elif hp == "per_device_train_batch_size": + result_str += " & " + wrap_left + str(best_config[hp]) + wrap_right + else: + result_str += " & " + wrap_left + str("%.3f" % best_config[hp]) + wrap_right + return result_str + "\\\\" + + +def extract_grid(console_args, jobid_config, overfitting_subdat, test_scores): + from .azure_utils import JobID, AzureUtils + key2printstr = {} + subdat2config = {} + for idx in range(len(overfitting_subdat)): + jobid_config.subdat = overfitting_subdat[idx] + jobid_config.mod = "grid" + jobid_config.rep = 0 + azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config) + best_config, val_score = azure_utils.get_best_perf_config(console_args, jobid_config) + best_config["hidden_dropout"] = 0.1 + best_config["attention_dropout"] = 0.1 + test_score = test_scores[idx] + key2printstr[jobid_config.subdat.upper() + ", grid"] = get_result_str(jobid_config, val_score, + test_score, best_config) + subdat2config[jobid_config.subdat] = best_config + print() + for key, printstr in sorted(key2printstr.items(), key=lambda x: x[0]): + print(printstr) + return subdat2config + + +def extract_hpo( + console_args, + jobid_config, + overfitting_subdat, + overfitting_alg, + overfitting_pru, + overfitting_rep, + subdat2config, + test_scores): + from .azure_utils import AzureUtils + key2printstr = {} + for idx in range(len(overfitting_subdat)): + jobid_config.subdat = overfitting_subdat[idx] + jobid_config.alg = overfitting_alg[idx] + jobid_config.pru = overfitting_pru[idx] + jobid_config.rep = overfitting_rep[idx] + azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config) + best_config, val_score = azure_utils.get_best_perf_config(console_args, jobid_config) + test_score = test_scores[idx] + key2printstr[jobid_config.subdat.upper() + "," + jobid_config.alg.upper() + "," + + jobid_config.pru + ",rep " + str(jobid_config.rep)] \ + = get_result_str(jobid_config, val_score, test_score, best_config, subdat2config, mode="hpo") + + for key, printstr in sorted(key2printstr.items(), key=lambda x: x[0]): + print(printstr) + + +def extract_roberta_overfitting_configs(console_args): + from .azure_utils import JobID, AzureUtils + jobid_config = JobID() + jobid_config.pre = "roberta" + jobid_config.presz = "base" + + overfitting_subdat = ["rte", "mrpc", "cola", "sst2", "stsb"] + test_scores = ["73.1", "91.4/88.5", "61.4", "96", "89.5/88.7"] + subdat2config = extract_grid(console_args, jobid_config, overfitting_subdat, test_scores) + + jobid_config = JobID() + jobid_config.pre = "roberta" + jobid_config.presz = "base" + + overfitting_subdat = ["rte", "rte", "rte", "mrpc", "mrpc", "mrpc", "sst2", + "rte", "mrpc", "mrpc", "stsb", "sst2", "sst2", + "rte", "rte", "mrpc", "mrpc", "sst2", "sst2"] + overfitting_alg = ["rs", "rs", "rs", "rs", "rs", "rs", "rs", + "rs", "rs", "rs", "rs", "rs", "rs", + "optuna", "optuna", "optuna", "optuna", "optuna", "optuna"] + overfitting_pru = ["None", "None", "None", "None", "None", "None", "None", + "asha", "asha", "asha", "asha", "asha", "asha", + "asha", "asha", "asha", "asha", "asha", "asha"] + overfitting_rep = [0, 1, 2, 0, 1, 2, 0, + 1, 0, 2, 2, 1, 2, + 1, 2, 0, 1, 1, 2] + test_scores = ["71.5", "72.3", "72.2", "90.5/87.1", "90.5/87.4", "90.5/87.2", "95.6", + "72.4", "90.7/87.4", "91.0/87.9", "89.4/88.8", "95.2", "95.7", + "72.4", "72.4", "90.8/87.4", "90.3/86.5", "95.1", "95.8"] + extract_hpo(console_args, jobid_config, overfitting_subdat, overfitting_alg, overfitting_pru, overfitting_rep, + subdat2config, test_scores) + + +def extract_electra_overfitting_configs(console_args): + from .azure_utils import JobID, AzureUtils + jobid_config = JobID() + jobid_config.pre = "electra" + jobid_config.presz = "base" + + overfitting_subdat = ["rte", "qnli", "cola"] + test_scores = ["74.4", "93.2", "64.8"] + subdat2config = extract_grid(console_args, jobid_config, overfitting_subdat, test_scores) + + jobid_config = JobID() + jobid_config.pre = "electra" + jobid_config.presz = "base" + + overfitting_subdat = ["rte", "rte", "qnli", "cola", "qnli", "cola"] + overfitting_alg = ["rs", "rs", "rs", "rs", "rs", "optuna"] + overfitting_pru = ["None", "None", "None", "asha", "asha", "asha"] + overfitting_rep = [0, 1, 0, 2, 0, 0] + test_scores = ["73.8", "74.3", "92.8", "64.7", "92.9", "63.6"] + extract_hpo(console_args, jobid_config, overfitting_subdat, overfitting_alg, overfitting_pru, overfitting_rep, + subdat2config, test_scores) diff --git a/flaml/nlp/result_analysis/wandb_utils.py b/flaml/nlp/result_analysis/wandb_utils.py new file mode 100644 index 000000000..84535dc3d --- /dev/null +++ b/flaml/nlp/result_analysis/wandb_utils.py @@ -0,0 +1,71 @@ +import os +from ..utils import get_wandb_azure_key +import subprocess +import wandb +import hashlib +from time import time + + +class WandbUtils: + + # Documentation on the wandb setting: + # There are two ways to initialize wandb in tune.run: + # (1) using WandbLoggerCallback, by adding the following argument to tune.run: + # callbacks=[WandbLoggerCallback( + # project="hpo", + # api_key = os.environ["WANDB_API_KEY"], + # group = os.environ["WANDB_RUN_GROUP"], + # log_config=True)] + # (2) using wandb_mixin decorator (the current implementation) + # The current implementation uses (2) because (1) has the following bug. + # In Ray 1.2, when using WandbLoggerCallback + setting time limit using the time_budget_s argument, + # A bug exists which is the previous run will not clear the cache after tune.run returns. After the + # later run has already starts, some zombie trials in the previous run remain in the memory and never stop. + # This bug can be reproduced by switching to (1) by adding the above callbacks argument + # and removing the wandb_mixin decorator + # https://docs.ray.io/en/master/tune/tutorials/tune-wandb.html + + def __init__(self, + is_wandb_on=None, + console_args=None, + jobid_config=None): + if is_wandb_on: + wandb_key, azure_key, container_name = get_wandb_azure_key(console_args.key_path) + subprocess.run(["wandb", "login", "--relogin", wandb_key]) + os.environ["WANDB_API_KEY"] = wandb_key + os.environ["WANDB_MODE"] = "online" + else: + os.environ["WANDB_MODE"] = "disabled" + self.jobid_config = jobid_config + + def set_wandb_per_trial(self): + print("before wandb.init\n\n\n") + if os.environ["WANDB_MODE"] == "online": + os.environ["WANDB_SILENT"] = "false" + return wandb.init(project=self.jobid_config.get_jobid_full_data_name(), + group=self.wandb_group_name, + name=str(WandbUtils._get_next_trial_ids()), + settings=wandb.Settings( + _disable_stats=True), + reinit=False) + else: + return None + + @staticmethod + def _get_next_trial_ids(): + hash = hashlib.sha1() + hash.update(str(time()).encode('utf-8')) + return "trial_" + hash.hexdigest()[:3] + + def set_wandb_per_run(self): + os.environ["WANDB_RUN_GROUP"] = self.jobid_config.to_wandb_string() + wandb.util.generate_id() + self.wandb_group_name = os.environ["WANDB_RUN_GROUP"] + if os.environ["WANDB_MODE"] == "online": + os.environ["WANDB_SILENT"] = "false" + return wandb.init(project=self.jobid_config.get_jobid_full_data_name(), + group=os.environ["WANDB_RUN_GROUP"], + settings=wandb.Settings( + _disable_stats=True), + reinit=False) + else: + return None diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py new file mode 100644 index 000000000..c6f11f787 --- /dev/null +++ b/flaml/nlp/utils.py @@ -0,0 +1,155 @@ +import argparse +import json +import os +import pathlib +import re +from dataclasses import dataclass, field + + +def dataset_subdataset_name_format_check(val_str): + regex = re.compile(r"^[^:]*:[^:]*$") + if not regex.match(val_str): + raise argparse.ArgumentTypeError("dataset_subdataset_name must be in the format {data_name}:{subdata_name}") + return val_str + + +def pretrained_model_size_format_check(val_str): + regex = re.compile(r"^[^:]*:(small|base|large|xlarge)") + if not regex.match(val_str): + raise argparse.ArgumentTypeError("pretrained_model_size must be in the format {model_name}:{model_size}," + "where {model_name} is the name from huggingface.co/models, {model_size}" + "is chosen from small, base, large, xlarge") + return val_str + + +def load_console_args(**custom_data_args): + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument('--server_name', type=str, help='server name', required=False, + choices=["tmdev", "dgx", "azureml"], default="tmdev") + arg_parser.add_argument('--algo_mode', type=str, help='hpo or grid search', required=False, + choices=["grid", "gridbert", "hpo", "hfhpo", "list_s", "list", "bestnn"], default="hpo") + arg_parser.add_argument('--data_root_dir', type=str, help='data dir', required=False, default="data/") + arg_parser.add_argument('--dataset_subdataset_name', type=dataset_subdataset_name_format_check, + help='dataset and subdataset name', required=False, default=None) + arg_parser.add_argument('--space_mode', type=str, help='space mode', required=False, + choices=["gnr", "uni", "uni_test", "cus", "buni"], default="uni") + arg_parser.add_argument('--search_alg_args_mode', type=str, help='search algorithm args mode', required=False, + choices=["dft", "exp", "cus"], default="dft") + arg_parser.add_argument('--algo_name', type=str, help='algorithm', required=False, + choices=["bs", "optuna", "cfo", "rs"], default="bs") + arg_parser.add_argument('--pruner', type=str, help='pruner', required=False, + choices=["asha", "None"], default="None") + arg_parser.add_argument('--pretrained_model_size', type=pretrained_model_size_format_check, + help='pretrained model', required=False, default=None) + arg_parser.add_argument('--sample_num', type=int, help='sample num', required=False, default=None) + arg_parser.add_argument('--time_budget', type=int, help='time budget', required=False, default=None) + arg_parser.add_argument('--time_as_grid', type=int, help='time as grid search', required=False, default=None) + arg_parser.add_argument('--rep_id', type=int, help='rep id', required=False, default=0) + arg_parser.add_argument('--azure_key', type=str, help='azure key', required=False, default=None) + arg_parser.add_argument('--resplit_mode', type=str, help='resplit mode', required=False, + choices=["rspt", "ori"], default="ori") + arg_parser.add_argument('--ds_config', type=str, help='deep speed config file path', + required=False, default=None) + arg_parser.add_argument('--yml_file', type=str, help='yml file path', required=False, default="test.yml") + arg_parser.add_argument('--key_path', type=str, help='path for key.json', required=False, default=None) + arg_parser.add_argument('--root_log_path', type=str, help='root path for log', required=False, default="logs_azure") + arg_parser.add_argument('--round_idx', type=int, help='round idx for acl experiments', required=False, default=0) + arg_parser.add_argument('--seed_data', type=int, help='seed of data shuffling', required=False, default=43) + arg_parser.add_argument('--seed_transformers', type=int, help='seed of transformers', required=False, default=42) + args, unknown = arg_parser.parse_known_args() + + for each_key in custom_data_args.keys(): + if args.__contains__(each_key): + try: + check_key_format_func = globals()[each_key + "_format_check"] + check_key_format_func(custom_data_args[each_key]) + except KeyError: + pass + setattr(args, each_key, custom_data_args[each_key]) + return args + + +def get_wandb_azure_key(key_path): + key_json = json.load(open(os.path.join(key_path, "key.json"), "r")) + wandb_key = key_json["wandb_key"] + azure_key = key_json["azure_key"] + azure_container_name = key_json["container_name"] + return wandb_key, azure_key, azure_container_name + + +def merge_dicts(dict1, dict2): + for key2 in dict2.keys(): + if key2 in dict1: + dict1_vals = set(dict1[key2]) + dict2_vals = set(dict2[key2]) + dict1[key2] = list(dict1_vals.union(dict2_vals)) + else: + dict1[key2] = dict2[key2] + return dict1 + + +def _check_dict_keys_overlaps(dict1: dict, dict2: dict): + dict1_keys = set(dict1.keys()) + dict2_keys = set(dict2.keys()) + return len(dict1_keys.intersection(dict2_keys)) > 0 + + +def _variable_override_default_alternative(logger, obj_ref, var_name, default_value, all_values, overriding_value=None): + """ + Setting the value of var. If overriding_value is specified, var is set to overriding_value; + If overriding_value is not specified, var is set to default_value meanwhile showing all_values + """ + assert isinstance(all_values, list) + if overriding_value: + setattr(obj_ref, var_name, overriding_value) + logger.warning("The value for {} is specified as {}".format(var_name, overriding_value)) + else: + setattr(obj_ref, var_name, default_value) + logger.warning("The value for {} is not specified, setting it to the default value {}. " + "Alternatively, you can set it to {}".format(var_name, default_value, ",".join(all_values))) + + +@dataclass +class PathUtils: + hpo_ckpt_path: str = field(metadata={"help": "the directory for hpo output"}) + hpo_result_path: str = field(metadata={"help": "the directory for hpo result"}) + hpo_log_path: str = field(metadata={"help": "the directory for log"}) + hpo_config_path: str = field(metadata={"help": "the directory for log"}) + + log_dir_per_run: str = field(metadata={"help": "log directory for each run."}) + result_dir_per_run: str = field(metadata={"help": "result directory for each run."}) + ckpt_dir_per_run: str = field(metadata={"help": "checkpoint directory for each run."}) + ckpt_dir_per_trial: str = field(metadata={"help": "checkpoint directory for each trial."}) + + def __init__(self, + jobid_config, + hpo_data_root_path, + ): + self.jobid_config = jobid_config + self.hpo_data_root_path = hpo_data_root_path + self.hpo_ckpt_path = os.path.join(hpo_data_root_path, "checkpoint") + self.hpo_result_path = os.path.join(hpo_data_root_path, "result") + self.hpo_log_path = self.hpo_result_path + + @staticmethod + def init_and_make_one_dir(dir_path): + assert dir_path + if not os.path.exists(dir_path): + pathlib.Path(dir_path).mkdir(parents=True, exist_ok=True) + + def make_dir_per_run(self): + jobid_str = self.jobid_config.to_jobid_string() + self.ckpt_dir_per_run = os.path.join(self.hpo_ckpt_path, jobid_str) + PathUtils.init_and_make_one_dir(self.ckpt_dir_per_run) + + self.result_dir_per_run = os.path.join(self.hpo_result_path, jobid_str) + PathUtils.init_and_make_one_dir(self.result_dir_per_run) + + self.log_dir_per_run = os.path.join(self.hpo_log_path, jobid_str) + PathUtils.init_and_make_one_dir(self.log_dir_per_run) + + def make_dir_per_trial(self, trial_id): + jobid_str = self.jobid_config.to_jobid_string() + ckpt_dir_per_run = os.path.join(self.hpo_ckpt_path, jobid_str) + self.ckpt_dir_per_trial = os.path.join(ckpt_dir_per_run, jobid_str, trial_id) + PathUtils.init_and_make_one_dir(self.ckpt_dir_per_trial) diff --git a/notebook/flaml_autohf.ipynb b/notebook/flaml_autohf.ipynb new file mode 100644 index 000000000..7985500aa --- /dev/null +++ b/notebook/flaml_autohf.ipynb @@ -0,0 +1,43 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "1. Electra Example" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/setup.py b/setup.py index 5567f162c..167839653 100644 --- a/setup.py +++ b/setup.py @@ -51,6 +51,11 @@ setuptools.setup( "optuna==2.3.0", "vowpalwabbit", "openml", + "transformers==4.4.1", + "wandb==0.10.26", + "torch==1.8.1", + "datasets==1.4.1", + "azure-storage-blob" ], "blendsearch": [ "optuna==2.3.0" diff --git a/test/hf/run_analysis.py b/test/hf/run_analysis.py new file mode 100644 index 000000000..655780a5a --- /dev/null +++ b/test/hf/run_analysis.py @@ -0,0 +1,75 @@ +'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray] +''' +# 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 +import argparse +from flaml.nlp.result_analysis.azure_utils import JobID + + +def create_partial_config_bestnn(): + jobid_config = JobID() + # funnel xlarge + # jobid_config.mod = "bestnn" + jobid_config.spa = "uni" + # jobid_config.arg = "cus" + # jobid_config.alg = "cfo" + jobid_config.pre = "funnel" + jobid_config.presz = "xlarge" + # funnel small + # jobid_config.mod = "list" + # jobid_config.pre = "funnel" + # jobid_config.presz = "small" + # jobid_config.rep = 0 + + # # deberta large + # jobid_config.mod = "bestnn" + # jobid_config.spa = "uni" + # jobid_config.arg = "cus" + # jobid_config.alg = "cfo" + # jobid_config.pre = "deberta" + # jobid_config.presz = "large" + + # # deberta base + # jobid_config.mod = "hpo" + # jobid_config.pre = "deberta" + # jobid_config.presz = "base" + # jobid_config.rep = 0 + + # # deberta large + # jobid_config.mod = "hpo" + # jobid_config.pre = "deberta" + # jobid_config.presz = "large" + + return jobid_config + + +def create_partial_config_list(): + jobid_config = JobID() + jobid_config.mod = "list" + jobid_config.spa = "uni" + jobid_config.presz = "xlarge" + return jobid_config + + +def create_partial_config_hpo(): + jobid_config = JobID() + jobid_config.mod = "hpo" + jobid_config.spa = "uni" + return jobid_config + + +if __name__ == "__main__": + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument('--key_path', type=str, help='key path', required=False, default="../../") + arg_parser.add_argument('--azure_root_log_path', type=str, + help='root log path of blob storage', required=True, default="logs_azure/") + args = arg_parser.parse_args() + + partial_config_large = create_partial_config_bestnn() + from flaml.nlp.result_analysis.generate_result_summary import compare_small_vs_large, get_result, check_conflict, \ + print_cfo, download_validation, extract_roberta_overfitting_configs, extract_electra_overfitting_configs + + # get_result(args, partial_config_large) + # check_conflict(args, [partial_config_large]) + download_validation(args, "/data/xliu127/projects/hyperopt/data/result/") + + # extract_roberta_overfitting_configs(args) diff --git a/test/hf/run_autohf.py b/test/hf/run_autohf.py new file mode 100644 index 000000000..7d0256949 --- /dev/null +++ b/test/hf/run_autohf.py @@ -0,0 +1,285 @@ +'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray] +''' +# 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 +import os +import shutil + +from flaml.nlp import AutoTransformers +from flaml.nlp import AzureUtils, JobID +from flaml.nlp.utils import load_console_args + +global azure_log_path +global azure_key + + +def get_resplit_portion(jobid_config): + if jobid_config.dat == ["glue"] and jobid_config.subdat in {"mnli"}: + return {"source": ["train", "validation_matched"], "train": [0, 0.8], "validation": [0.8, 0.9], + "test": [0.9, 1.0]} + else: + return {"source": ["train", "validation"], "train": [0, 0.8], "validation": [0.8, 0.9], "test": [0.9, 1.0]} + + +def get_preparedata_setting(args, jobid_config): + preparedata_setting = { + "server_name": args.server_name, + "data_root_path": args.data_root_dir, + "max_seq_length": 128, + "jobid_config": jobid_config, + "is_wandb_on": True + } + if jobid_config.spt == 'rspt': + preparedata_setting["resplit_portion"] = get_resplit_portion(jobid_config) + if ("albert" == jobid_config.pre and jobid_config.dat == ["squad"]) or \ + ("funnel" in jobid_config.pre and jobid_config.dat[0] in {"imdb", "yelp_review_full", "yelp_polarity", + "amazon_polarity", "amazon_review_multi"}): + preparedata_setting["max_seq_length"] = 512 + if jobid_config.dat[0] == "glue" and jobid_config.subdat == "mnli": + preparedata_setting["fold_name"] = ['train', 'validation_matched', 'test_matched'] + return preparedata_setting + + +def get_autohf_settings(args, **custom_args): + autohf_settings = {"resources_per_trial": {"gpu": 1, "cpu": 1}, + "num_samples": args.sample_num, + "time_budget": args.time_budget, + "ckpt_per_epoch": 1, + } + for other_attr in ["ds_config", "rep_id"]: + if hasattr(args, other_attr): + autohf_settings[other_attr] = getattr(args, other_attr) + else: + autohf_settings[other_attr] = None + if len(custom_args) > 0: + autohf_settings.update(custom_args) + return autohf_settings + + +def rm_home_result(): + from os.path import expanduser + home = expanduser("~") + if os.path.exists(home + "/ray_results/"): + shutil.rmtree(home + "/ray_results/") + + +def get_best_base_config(args, jobid_config, autohf): + import copy + import re + args_small = copy.deepcopy(args) + args_small.algo_name = "optuna" + args_small.search_alg_args_mode = "dft" + args_small.algo_mode = "hpo" + args_small.space_mode = "uni" + args_small.pruner = "None" + + if "funnel" not in args_small.pretrained_model_size: + args_small.algo_mode = "hpo" + else: + args_small.algo_mode = "list" + args_small.sample_num = 10000 + args_small.time_budget = 3600 + args_small.rep_id = 0 + jobid_config_small = JobID(args_small) + if jobid_config_small.pre == "deberta": + jobid_config_small.presz = "base" + else: + jobid_config_small.presz = "small" + jobid_config_small.pre_full = re.sub("(xlarge|large|intermediate)", jobid_config_small.presz, + jobid_config_small.pre_full) + azure_utils_small = AzureUtils( + console_args=args_small, + jobid=jobid_config_small, + autohf=autohf) + preparedata_setting = get_preparedata_setting(args, jobid_config) + autohf.prepare_data(**preparedata_setting) + autohf.set_metric() + + best_config = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)[0] + return best_config + + +def search_base_and_search_lower_lr(args, jobid_config, autohf): + best_config = get_best_base_config(args, jobid_config, autohf) + + import copy + args_large = copy.deepcopy(args) + args_large.time_budget = args.time_budget - 3600 + args_large.sample_num = 100000 + args_large.algo_name = args.algo_name + args_large.search_alg_args_mode = "cus" + args_large.space_mode = "buni" + args_large.pruner = "None" + jobid_config_large = JobID(args_large) + jobid_config_large.presz = jobid_config.presz + jobid_config_large.pre_full = jobid_config.pre_full + azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf) + + _test_hpo(args_large, + jobid_config_large, + autohf, + azure_utils_large, + autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config], + "bound": {"learning_rate": { + "u": best_config["learning_rate"]}}})) + + +def search_base_and_search_around_best(args, jobid_config, autohf): + args.algo_name = "bs" + args.search_alg_args_mode = "dft" + args.spa = "uni" + args.pru = "None" + best_config = get_best_base_config(args, jobid_config, autohf) + + import copy + args_large = copy.deepcopy(args) + args_large.time_budget = args.time_budget - 3600 + args_large.sample_num = 100000 + args_large.algo_name = "cfo" + args_large.search_alg_args_mode = "cus" + args_large.space_mode = "uni" + jobid_config_large = JobID(args_large) + jobid_config_large.presz = jobid_config.presz + jobid_config_large.pre_full = jobid_config.pre_full + azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf) + + _test_hpo(args_large, + jobid_config_large, + autohf, + azure_utils_large, + autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config]})) + + +def evaluate_configs(autohf, args, ranked_all_configs): + import copy + this_args = copy.deepcopy(args) + this_args.time_budget = 100000 + this_args.sample_num = int(len(ranked_all_configs)) + this_args.search_alg_args_mode = "cus" + jobid_config = JobID(this_args) + azure_utils_large = AzureUtils(console_args=this_args, jobid=jobid_config, autohf=autohf) + _test_hpo(this_args, + jobid_config, + autohf, + azure_utils_large, + autohf_settings=get_autohf_settings(this_args, **{"points_to_evaluate": ranked_all_configs})) + + +def convert_config_to_different_size(origin_config, mode): + import re + import copy + if mode == "small": + new_config = copy.deepcopy(origin_config) + if new_config.pre == "funnel": + new_config.mod = "list" + else: + new_config.mod = "hpo" + if new_config.pre == "funnel": + new_config.presz = "small" + else: + new_config.presz = "base" + new_config.pre_full = re.sub("(xlarge|large|intermediate)", new_config.presz, origin_config.pre_full) + elif mode == "large": + new_config = copy.deepcopy(origin_config) + new_config.mod = "hpo" + if new_config.pre == "funnel": + new_config.presz = "xlarge" + new_config.pre_full = re.sub("(small)", "xlarge", origin_config.pre_full) + else: + new_config.presz = "large" + new_config.pre_full = re.sub("(small)", "large", origin_config.pre_full) + + return new_config + + +def evaluate_small_best_configs_on_large(large_args, autohf): + jobid_config_small = convert_config_to_different_size(JobID(large_args), mode="small") + jobid_config_small.rep = 0 + azure_utils_small = AzureUtils(console_args=None, jobid=jobid_config_small, autohf=autohf) + ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name) + evaluate_configs(large_args, ranked_all_small_configs[:int(len(ranked_all_small_configs) / 2)]) + + +def add_dict_item_to_list(this_list, this_dict): + is_exist = len([x for x in this_list if x == this_dict]) > 0 + if not is_exist: + this_list.append(this_dict) + return this_list + + +def evaluate_large_best_configs_on_small(small_args, autohf): + jobid_config_large = convert_config_to_different_size(JobID(small_args), mode="large") + autohf.jobid_config = jobid_config_large + autohf.set_metric() + all_configs_from_large = [] + for rep_id in range(3): + jobid_config_large.rep = rep_id + azure_utils_large = AzureUtils(console_args=small_args, jobid=jobid_config_large, autohf=autohf) + ranked_all_large_configs = azure_utils_large.get_ranked_configs(autohf.metric_mode_name) + for each_config in ranked_all_large_configs: + all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config) + jobid_config_small = convert_config_to_different_size(JobID(small_args), mode="small") + jobid_config_small.rep = 0 + azure_utils_small = AzureUtils(console_args=small_args, jobid=jobid_config_small, autohf=autohf) + ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name) + for each_config in ranked_all_small_configs: + all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config) + + evaluate_configs(autohf, small_args, list(all_configs_from_large)) + + +def _test_hpo(args, + jobid_config, + autohf, + azure_utils=None, + autohf_settings=None, + ): + try: + if not azure_utils: + azure_utils = AzureUtils(console_args=args, jobid=jobid_config, autohf=autohf) + preparedata_setting = get_preparedata_setting(args, jobid_config) + autohf.prepare_data(**preparedata_setting) + + analysis = validation_metric = test_metric = None + if not autohf_settings: + autohf_settings = get_autohf_settings(args) + if args.algo_mode != "hfhpo": + validation_metric, analysis = autohf.fit(**autohf_settings, ) + else: + autohf.fit_hf(**autohf_settings) + + if jobid_config.spt == "ori": + predictions, test_metric = autohf.predict() + if validation_metric: + test_metric.update({"validation": validation_metric}) + else: + predictions = None + if test_metric: + validation_metric.update({"test": test_metric}) + + if analysis is not None: + json_log = azure_utils.extract_log_from_analysis(analysis) + else: + json_log = None + azure_utils.write_autohf_output(json_log=json_log, + valid_metric=validation_metric, + predictions=predictions, + duration=autohf.last_run_duration) + + except AssertionError: + azure_utils.write_exception() + rm_home_result() + + +if __name__ == "__main__": + autohf = AutoTransformers() + args = load_console_args() + jobid_config = JobID(args) + + if args.algo_mode in ("hpo", "hfhpo", "grid", "gridbert"): + _test_hpo(args, jobid_config, autohf) + elif args.algo_mode == "bestnn": + search_base_and_search_lower_lr(args, jobid_config, autohf) + elif args.algo_mode == "list": + evaluate_small_best_configs_on_large(args, autohf) + elif args.algo_mode == "list_s": + evaluate_large_best_configs_on_small(args, autohf) diff --git a/test/hf/test_mobilebert.py b/test/hf/test_mobilebert.py new file mode 100644 index 000000000..dba768a76 --- /dev/null +++ b/test/hf/test_mobilebert.py @@ -0,0 +1,62 @@ +'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray] +''' +# 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 + +global azure_log_path +global azure_key + + +def get_preparedata_setting(jobid_config): + preparedata_setting = { + "server_name": "tmdev", + "data_root_path": "data/", + "max_seq_length": 128, + "jobid_config": jobid_config, + "resplit_portion": {"source": ["train", "validation"], + "train": [0, 0.8], + "validation": [0.8, 0.9], + "test": [0.9, 1.0]} + } + return preparedata_setting + + +def get_autohf_settings(): + autohf_settings = {"resources_per_trial": {"cpu": 1}, + "num_samples": 1, + "time_budget": 100000, + "ckpt_per_epoch": 1, + "fp16": False, + } + return autohf_settings + + +def test_hpo(): + try: + import ray + except ImportError: + return + + from flaml.nlp import AutoTransformers + from flaml.nlp import JobID + + jobid_config = JobID() + jobid_config.set_unittest_config() + autohf = AutoTransformers() + + try: + preparedata_setting = get_preparedata_setting(jobid_config) + autohf.prepare_data(**preparedata_setting) + + autohf_settings = get_autohf_settings() + validation_metric, analysis = autohf.fit(**autohf_settings, ) + + predictions, test_metric = autohf.predict() + if test_metric: + validation_metric.update({"test": test_metric}) + + except AssertionError: + pass + + +if __name__ == "__main__": + test_hpo()