automate huggingface transformer
This commit is contained in:
Xueqing Liu
2021-06-09 11:37:03 -04:00
committed by GitHub
parent e031c2eb7d
commit a4049ad9b6
29 changed files with 4316 additions and 0 deletions

2
.gitignore vendored
View File

@@ -153,3 +153,5 @@ notebook/.azureml
mlruns
logs
automl.pkl
.idea/*

View File

@@ -38,6 +38,13 @@ Tune
:members:
NLP
------
.. autoclass:: flaml.nlp.AutoTransformers
:members:
.. Indices and tables
.. ==================

32
flaml/nlp/README.md Normal file
View File

@@ -0,0 +1,32 @@
How to use AutoTransformers:
```python
from flaml.nlp.autotransformers import AutoTransformers
autohf = AutoTransformers()
preparedata_setting = {
"dataset_subdataset_name": "glue:rte",
"pretrained_model_size": "electra-base-discriminator:base",
"data_root_path": "data/",
"max_seq_length": 128,
}
autohf.prepare_data(**preparedata_setting)
autohf_settings = {"resources_per_trial": {"gpu": 1, "cpu": 1},
"num_samples": -1, # unlimited sample size
"time_budget": 3600,
"ckpt_per_epoch": 1,
"fp16": False,
}
validation_metric, analysis = \
autohf.fit(**autohf_settings,)
```
The current use cases that are supported:
1. A simplified version of fine-tuning the GLUE dataset using HuggingFace;
2. For selecting better search space for fine-tuning the GLUE dataset;
3. Use the search algorithms in flaml for more efficient fine-tuning of HuggingFace;
The use cases that can be supported in future:
1. HPO fine-tuning for text generation;
2. HPO fine-tuning for question answering;

2
flaml/nlp/__init__.py Normal file
View File

@@ -0,0 +1,2 @@
from flaml.nlp.autotransformers import AutoTransformers
from flaml.nlp.result_analysis.azure_utils import AzureUtils, JobID

View File

@@ -0,0 +1,852 @@
import json
import os
import torch
import transformers
import wandb
from .dataset.dataprocess_auto import AutoEncodeText
import numpy as np
from ray.tune import CLIReporter
import time
import ray
import datasets
from datasets import load_dataset
from transformers.trainer_utils import IntervalStrategy, HPSearchBackend
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig, TrainingArguments
from .dataset.metric_auto import get_default_and_alternative_metric
from .dataset.submission_auto import auto_output_prediction
from .dataset.task_auto import get_default_task
from .hpo.grid_searchspace_auto import AutoGridSearchSpace
from .hpo.hpo_searchspace import AutoHPOSearchSpace
from .huggingface.switch_head_auto import AutoSeqClassificationHead, MODEL_CLASSIFICATION_HEAD_MAPPING
from .utils import PathUtils, _variable_override_default_alternative
from .hpo.searchalgo_auto import AutoSearchAlgorithm
from .hpo.scheduler_auto import AutoScheduler
from .result_analysis.wandb_utils import WandbUtils
from .result_analysis.azure_utils import JobID
from .utils import load_console_args
from .huggingface.trainer import TrainerForAutoTransformers
import logging
transformers.logging.set_verbosity_error()
logger = logging.getLogger(__name__)
logger_formatter = logging.Formatter(
'[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
'%m-%d %H:%M:%S')
task_list = [
"seq-classification",
"regression",
"question-answering"
]
class AutoTransformers:
'''The AutoTransformers class
Example:
.. code-block:: python
autohf = AutoTransformers()
autohf_settings = {"resources_per_trial": {"cpu": 1},
"num_samples": -1,
"time_budget": 100000,
"ckpt_per_epoch": 1,
"fp16": False,
}
validation_metric, analysis = autohf.fit(**autohf_settings)
'''
@staticmethod
def _convert_dict_to_ray_tune_space(config_json, mode="grid"):
search_space = {}
if mode == "grid":
for each_hp in config_json.keys():
this_config = config_json[each_hp]
assert isinstance(this_config, dict) or isinstance(this_config, list), \
"config of " + each_hp + " must be dict or list"
search_space[each_hp] = ray.tune.grid_search(this_config)
else:
for each_hp in config_json.keys():
this_config = config_json[each_hp]
assert isinstance(this_config, dict) or isinstance(this_config, list), \
"config of " + each_hp + " must be dict or list"
if isinstance(this_config, dict):
lower = this_config["l"]
upper = this_config["u"]
space = this_config["space"]
if space == "log":
search_space[each_hp] = ray.tune.loguniform(lower, upper)
elif space == "linear":
search_space[each_hp] = ray.tune.uniform(lower, upper)
elif space == "quniform":
search_space[each_hp] = ray.tune.quniform(lower, upper, this_config["interval"])
else:
search_space[each_hp] = ray.tune.choice(this_config)
return search_space
def _set_search_space(self,
**custom_hpo_args):
search_space_dict_hpo = search_space_dict_grid = None
if self.jobid_config.mod == "grid":
search_space_grid_json = AutoGridSearchSpace.from_model_and_dataset_name(self.jobid_config.pre,
self.jobid_config.presz,
self.get_full_data_name(),
self.jobid_config.subdat, "grid")
search_space_dict_grid \
= AutoTransformers._convert_dict_to_ray_tune_space(search_space_grid_json, mode="grid")
search_space_dict_hpo = search_space_dict_grid
if self.jobid_config.mod != "grid" and self.jobid_config.mod != "gridbert":
search_space_hpo_json \
= AutoHPOSearchSpace.from_model_and_dataset_name(logger,
self.jobid_config.spa,
self.jobid_config.pre,
self.jobid_config.presz,
self.get_full_data_name(),
self.jobid_config.subdat,
**custom_hpo_args)
search_space_dict_hpo = AutoTransformers._convert_dict_to_ray_tune_space(search_space_hpo_json, mode="hpo")
elif self.jobid_config.mod == "gridbert":
search_space_hpo_json = AutoGridSearchSpace.from_model_and_dataset_name(
"bert",
"base",
self.get_full_data_name(),
self.jobid_config.subdat, "grid")
search_space_dict_hpo = AutoTransformers._convert_dict_to_ray_tune_space(search_space_hpo_json, mode="grid")
"""
resolve the conflict in search_space_dict_hpo: only one of "max_steps" and "num_train_epochs" can exist
in the search space. If both exists, num_train_epochs is removed. Similarly, if "warmup_steps" and
"warmup_ratio" both exist, warmup_ratio is removed
"""
search_space_dict_hpo = TrainerForAutoTransformers.resolve_hp_conflict(search_space_dict_hpo)
self._search_space_hpo = search_space_dict_hpo
if self.jobid_config.mod == "grid":
search_space_dict_grid = TrainerForAutoTransformers.resolve_hp_conflict(search_space_dict_grid)
self._search_space_grid = search_space_dict_grid
else:
self._search_space_grid = None
try:
self.ds_config = custom_hpo_args["ds_config"]
except KeyError:
self.ds_config = None
def _wrapper(self, func, *args): # with star
return func(*args)
def _get_split_name(self, data_raw, fold_name=None):
if fold_name:
return fold_name
fold_keys = data_raw.keys()
if fold_keys == {"train", "validation", "test"}:
return "train", "validation", "test"
for each_key in fold_keys:
for each_split_name in {"train", "validation", "test"}:
assert not (each_key.startswith(each_split_name) and each_key != each_split_name), \
"Dataset split must be within {}, must be explicitly specified in dataset_config, e.g.," \
"'fold_name': ['train', 'validation_matched', 'test_matched']. Please refer to the example in the " \
"documentation of AutoTransformers.prepare_data()".format(",".join(fold_keys))
return "train", "validation", "test"
def prepare_data(self,
data_root_path,
jobid_config=None,
is_wandb_on=False,
server_name=None,
max_seq_length=128,
fold_name=None,
resplit_portion=None,
**custom_data_args):
'''Prepare data
An example:
preparedata_setting = {
"server_name": "tmdev",
"data_root_path": "data/",
"max_seq_length": 128,
"jobid_config": jobid_config,
"wandb_utils": wandb_utils,
"resplit_portion": {"source": ["train", "validation"],
"train": [0, 0.8], "validation": [0.8, 0.9], "test": [0.9, 1.0]}
}
autohf.prepare_data(**preparedata_setting)
Args:
server_name:
a string variable, which can be tmdev or azureml
data_root_path:
the root path for storing the checkpoints and output results, e.g., "data/"
jobid_config:
a JobID object describing the profile of job
wandb_utils:
a WandbUtils object for wandb operations
max_seq_length (optional):
max_seq_lckpt_per_epochength for the huggingface, this hyperparameter must be specified
at the data processing step
resplit_portion:
the proportion for resplitting the train and dev data when split_mode="resplit".
If args.resplit_mode = "rspt", resplit_portion is required
'''
console_args = load_console_args(**custom_data_args)
self._max_seq_length = max_seq_length
self._server_name = server_name if server_name is not None else "tmdev"
self.jobid_config = jobid_config if jobid_config is not None else JobID(console_args)
self.wandb_utils = WandbUtils(is_wandb_on=is_wandb_on,
console_args=console_args,
jobid_config=self.jobid_config)
self.wandb_utils.set_wandb_per_run()
self.path_utils = PathUtils(self.jobid_config, hpo_data_root_path=data_root_path)
if self.jobid_config.spt == "rspt":
assert resplit_portion, "If split mode is 'rspt', the resplit_portion must be provided. Please " \
"refer to the example in the documentation of AutoTransformers.prepare_data()"
if self.jobid_config.subdat:
data_raw = load_dataset(self.get_full_data_name(), self.jobid_config.subdat)
else:
data_raw = self._wrapper(load_dataset, *self.jobid_config.dat)
self._train_name, self._dev_name, self._test_name = self._get_split_name(data_raw, fold_name=fold_name)
auto_tokentoids_config = {"max_seq_length": self._max_seq_length}
self._tokenizer = AutoTokenizer.from_pretrained(self.jobid_config.pre_full, use_fast=True)
def autoencodetext_from_model_and_dataset_name():
return AutoEncodeText.from_model_and_dataset_name(
data_raw,
self.jobid_config.pre_full,
self.get_full_data_name(),
self.jobid_config.subdat,
**auto_tokentoids_config)
data_encoded = autoencodetext_from_model_and_dataset_name()
self._max_seq_length = 0
"""
Update the max_seq_length to the minimum of the actual max seq length and the user defined max_seq_length
"""
for each_fold in data_encoded.keys():
self._max_seq_length = max(self._max_seq_length,
max([sum(data_encoded[each_fold][x]['attention_mask']) for x in
range(len(data_encoded[each_fold]))]))
self._max_seq_length = int((self._max_seq_length + 15) / 16) * 16
data_encoded = autoencodetext_from_model_and_dataset_name()
if self.jobid_config.spt == "rspt":
all_folds_from_source = []
assert "source" in resplit_portion.keys(), "Must specify the source for resplitting the dataset in" \
"resplit_portion, which is a list of folder names, e.g., resplit_portion = {'source': ['train']}"
source_fold_names = resplit_portion['source']
for each_fold_name in source_fold_names:
this_fold_dataset = data_encoded[each_fold_name]
all_folds_from_source.append(this_fold_dataset)
merged_folds_from_source = datasets.concatenate_datasets(all_folds_from_source)
merged_folds_from_source = merged_folds_from_source.shuffle(seed=self.jobid_config.sddt)
assert "train" in resplit_portion.keys() and "validation" in resplit_portion.keys() \
and "test" in resplit_portion.keys(), "train, validation, test must exist in resplit_portion"
for key in ["train", "validation", "test"]:
target_fold_start, target_fold_end = \
int(resplit_portion[key][0] * len(merged_folds_from_source)), \
int(resplit_portion[key][1] * len(merged_folds_from_source))
subfold_dataset = merged_folds_from_source.select(
[x for x in range(target_fold_start, target_fold_end)]).flatten_indices()
if key == "train":
self.train_dataset = subfold_dataset
elif key == "validation":
self.eval_dataset = subfold_dataset
else:
self.test_dataset = subfold_dataset
else:
self.train_dataset, self.eval_dataset, self.test_dataset \
= data_encoded[self._train_name], data_encoded[self._dev_name], data_encoded[self._test_name]
def _load_model(self,
checkpoint_path=None,
per_model_config=None):
this_task = get_default_task(self.get_full_data_name(), self.jobid_config.subdat)
if this_task == "seq-classification":
self._num_labels = len(self.train_dataset.features["label"].names)
elif this_task == "regression":
self._num_labels = 1
if not checkpoint_path:
checkpoint_path = self.jobid_config.pre_full
def get_this_model():
return AutoModelForSequenceClassification.from_pretrained(checkpoint_path, config=model_config)
def is_pretrained_model_in_classification_head_list():
return self.jobid_config.pre in MODEL_CLASSIFICATION_HEAD_MAPPING.keys()
def _set_model_config():
if per_model_config and len(per_model_config) > 0:
model_config = AutoConfig.from_pretrained(
checkpoint_path,
num_labels=model_config_num_labels,
**per_model_config)
else:
model_config = AutoConfig.from_pretrained(
checkpoint_path,
num_labels=model_config_num_labels)
return model_config
if this_task == "seq-classification":
num_labels_old = AutoConfig.from_pretrained(checkpoint_path).num_labels
if is_pretrained_model_in_classification_head_list():
model_config_num_labels = num_labels_old
else:
model_config_num_labels = self._num_labels
model_config = _set_model_config()
if is_pretrained_model_in_classification_head_list():
if self._num_labels != num_labels_old:
this_model = get_this_model()
model_config.num_labels = self._num_labels
this_model.num_labels = self._num_labels
this_model.classifier = AutoSeqClassificationHead \
.from_model_type_and_config(self.jobid_config.pre,
model_config)
else:
this_model = get_this_model()
else:
this_model = get_this_model()
this_model.resize_token_embeddings(len(self._tokenizer))
return this_model
elif this_task == "regression":
model_config = self._set_model_config(checkpoint_path, per_model_config, 1)
this_model = get_this_model()
return this_model
def _get_metric_func(self):
if self.get_full_data_name() in ("glue", "super_glue"):
metric = datasets.load.load_metric(self.get_full_data_name(), self.jobid_config.subdat)
elif self.get_full_data_name() in ("squad", "squad_v2"):
metric = datasets.load.load_metric(self.get_full_data_name())
else:
metric = datasets.load.load_metric(self.metric_name)
return metric
def _compute_metrics_by_dataset_name(self,
eval_pred):
predictions, labels = eval_pred
predictions = np.squeeze(predictions) \
if self.task_name == "regression" else np.argmax(predictions, axis=1)
metric_func = self._get_metric_func()
return metric_func.compute(predictions=predictions, references=labels)
def _compute_checkpoint_freq(self,
num_train_epochs,
batch_size):
if "gpu" in self._resources_per_trial:
ckpt_step_freq = int(min(num_train_epochs, 1) * len(self.train_dataset) / batch_size
/ self._resources_per_trial["gpu"] / self.ckpt_per_epoch) + 1
else:
ckpt_step_freq = int(min(num_train_epochs, 1) * len(self.train_dataset) / batch_size
/ self._resources_per_trial["cpu"] / self.ckpt_per_epoch) + 1
return ckpt_step_freq
@staticmethod
def _separate_config(config):
training_args_config = {}
per_model_config = {}
for key in config.keys():
if key in TrainingArguments.__dict__.keys():
training_args_config[key] = config[key]
else:
per_model_config[key] = config[key]
return training_args_config, per_model_config
def _objective(self, config, reporter, checkpoint_dir=None):
def model_init():
return self._load_model()
from transformers.trainer_utils import set_seed
set_seed(config["seed"])
training_args_config, per_model_config = AutoTransformers._separate_config(config)
this_model = self._load_model(per_model_config=per_model_config)
trial_id = reporter.trial_id
self.path_utils.make_dir_per_trial(trial_id)
ckpt_freq = self._compute_checkpoint_freq(
num_train_epochs=config["num_train_epochs"],
batch_size=config["per_device_train_batch_size"])
assert self.path_utils.ckpt_dir_per_trial
training_args = TrainingArguments(
output_dir=self.path_utils.ckpt_dir_per_trial,
do_eval=False,
per_device_eval_batch_size=32,
eval_steps=ckpt_freq,
evaluation_strategy=IntervalStrategy.STEPS,
save_steps=ckpt_freq,
save_total_limit=0,
fp16=self._fp16,
deepspeed=self.ds_config,
**training_args_config,
)
trainer = TrainerForAutoTransformers(
this_model,
training_args,
model_init=model_init,
train_dataset=self.train_dataset,
eval_dataset=self.eval_dataset,
tokenizer=self._tokenizer,
compute_metrics=self._compute_metrics_by_dataset_name,
)
trainer.logger = logger
trainer.trial_id = reporter.trial_id
"""
create a wandb run. If os.environ["WANDB_MODE"] == "offline", run = None
"""
run = self.wandb_utils.set_wandb_per_trial()
if os.environ["WANDB_MODE"] == "online":
for each_hp in config:
wandb.log({each_hp: config[each_hp]})
trainer.train()
trainer.evaluate(self.eval_dataset)
"""
If a wandb run was created, close the run after train and evaluate finish
"""
if run:
run.finish()
def _verify_init_config(self,
**custom_hpo_args):
for key in custom_hpo_args.keys():
if key == "points_to_evaluate":
for each_init_config in custom_hpo_args[key]:
for each_hp in each_init_config.keys():
assert each_hp in self._search_space_hpo.keys(), \
"points_to_evaluate hp must be within the search space"
assert isinstance(each_init_config[each_hp], int) or \
isinstance(each_init_config[each_hp], float) or \
isinstance(each_init_config[each_hp], str) or \
isinstance(each_init_config[each_hp], bool), " points_to_evaluate must be a scalar"
assert isinstance(self._search_space_hpo[each_hp], ray.tune.sample.Categorical) or \
isinstance(self._search_space_hpo[each_hp], ray.tune.sample.Float) or \
isinstance(self._search_space_hpo[each_hp], ray.tune.sample.Integer), \
"Every hp space must either be categorical, integer or float"
if isinstance(self._search_space_hpo[each_hp], ray.tune.sample.Categorical):
assert each_init_config[each_hp] in self._search_space_hpo[each_hp].categories, \
"points_to_evaluate {each_hp} value must be within the search space"
else:
assert self._search_space_hpo[each_hp].lower <= each_init_config[each_hp] <= \
self._search_space_hpo[each_hp].upper, \
"points_to_evaluate {each_hp} value must be within the search space"
def _get_search_algo(self,
search_algo_name,
search_algo_args_mode,
**custom_hpo_args):
if search_algo_name in ("bs", "cfo"):
self._verify_init_config(**custom_hpo_args)
search_algo = AutoSearchAlgorithm.from_method_name(
search_algo_name,
search_algo_args_mode,
self._search_space_hpo,
**custom_hpo_args)
return search_algo
@staticmethod
def _recover_checkpoint(tune_checkpoint_dir):
assert tune_checkpoint_dir
# Get subdirectory used for Huggingface.
subdirs = [
os.path.join(tune_checkpoint_dir, name)
for name in os.listdir(tune_checkpoint_dir)
if os.path.isdir(os.path.join(tune_checkpoint_dir, name))
]
# There should only be 1 subdir.
assert len(subdirs) == 1, subdirs
return subdirs[0]
def get_full_data_name(self):
return JobID.dataset_list_to_str(self.jobid_config.dat, "dat")
def _save_ckpt_json(self,
best_ckpt):
json.dump({"best_ckpt": best_ckpt},
open(os.path.join(self.path_utils.result_dir_per_run,
"save_ckpt_" + self.jobid_config.to_jobid_string() + ".json"), "w"))
def _save_output_metric(self,
output_metrics):
json.dump(output_metrics, open(
os.path.join(self.path_utils.result_dir_per_run,
"output_metric_" + self.jobid_config.to_jobid_string() + ".json"), "w"))
def _load_ckpt_json(self,
ckpt_dir=None,
**kwargs):
if not ckpt_dir:
ckpt_dir = os.path.join(self.path_utils.result_dir_per_run,
"save_ckpt_" + self.jobid_config.to_jobid_string() + ".json")
try:
ckpt_json = json.load(open(ckpt_dir))
return ckpt_json["best_ckpt"]
except FileNotFoundError as err:
logger.error("Saved checkpoint not found. Please make sure checkpoint is stored under {}".format(ckpt_dir))
raise err
def _set_metric(self, custom_metric_name=None, custom_metric_mode_name=None):
default_metric, default_mode, all_metrics, all_modes = get_default_and_alternative_metric(
self.get_full_data_name(),
subdataset_name=self.jobid_config.subdat,
custom_metric_name=custom_metric_name,
custom_metric_mode_name=custom_metric_mode_name)
_variable_override_default_alternative(logger,
self,
"metric_name",
default_metric,
all_metrics,
custom_metric_name)
_variable_override_default_alternative(logger,
self,
"metric_mode_name",
default_mode,
all_modes,
custom_metric_mode_name)
self._all_metrics = all_metrics
self._all_modes = all_modes
def _set_task(self):
self.task_name = get_default_task(self.get_full_data_name(), self.jobid_config.subdat)
def fit_hf(self,
resources_per_trial,
num_samples,
time_budget,
custom_metric_name=None,
custom_metric_mode_name=None,
_fp16=True,
**custom_hpo_args
):
'''Fine tuning the huggingface using HF's API Transformers.hyperparameter_search (for comparitive purpose).
Transformers.hyperparameter_search has the following disadvantages:
(1) it does not return tune.analysis.Analysis result, what is analysis used for
(2) it is inconvenient to develop on top of Transformers.hyperparameter_search, whose trainable function,
search space, etc. are defined inside of Transformers.hyperparameter_search.
An example:
autohf_settings = {"resources_per_trial": {"cpu": 1},
"num_samples": 1,
"time_budget": 100000,
"ckpt_per_epoch": 1,
"fp16": False,
}
validation_metric, analysis = autohf.fit(**autohf_settings,)
Args:
resources_per_trial:
A dict showing the resources used by each trial,
e.g., {"gpu": 4, "cpu": 4}
num_samples:
An int variable of the maximum number of trials
time_budget:
An int variable of the maximum time budget
custom_metric_name:
A string of the dataset name or a function,
e.g., 'accuracy', 'f1', 'loss',
custom_metric_mode_name:
A string of the mode name,
e.g., "max", "min", "last", "all"
fp16:
boolean, default = True | whether to use fp16
custom_hpo_args:
The additional keyword arguments, e.g.,
custom_hpo_args = {"points_to_evaluate": [{
"num_train_epochs": 1,
"per_device_train_batch_size": 128, }]}
Returns:
validation_metric:
a dict storing the validation score
'''
def model_init():
return self._load_model()
def ray_hp_space(trial):
return {
"learning_rate": ray.tune.loguniform(1e-6, 1e-4),
"num_train_epochs": ray.tune.choice(list(range(1, 6))),
"seed": ray.tune.quniform(1, 41, 1),
"per_device_train_batch_size": ray.tune.choice([4, 8, 16, 32, 64]),
}
self._set_metric(custom_metric_name, custom_metric_mode_name)
self._set_task()
training_args = TrainingArguments(
output_dir=self.path_utils.hpo_ckpt_path,
fp16=_fp16,
)
this_model = self._load_model()
trainer = TrainerForAutoTransformers(
this_model,
training_args,
model_init=model_init,
train_dataset=self.train_dataset,
eval_dataset=self.eval_dataset,
tokenizer=self._tokenizer,
compute_metrics=self._compute_metrics_by_dataset_name,
)
self.path_utils.make_dir_per_run()
start_time = time.time()
best_run = trainer.hyperparameter_search(
n_trials=num_samples,
time_budget_s=time_budget,
hp_space=ray_hp_space,
backend=HPSearchBackend.RAY,
resources_per_trial=resources_per_trial)
duration = time.time() - start_time
self.last_run_duration = duration
hp_dict = best_run.hyperparameters
hp_dict["seed"] = int(hp_dict["seed"])
best_training_args = TrainingArguments(
output_dir=self.path_utils.hpo_ckpt_path,
fp16=_fp16,
**hp_dict,
)
best_trainer = TrainerForAutoTransformers(
this_model,
best_training_args,
model_init=model_init,
train_dataset=self.train_dataset,
eval_dataset=self.eval_dataset,
tokenizer=self._tokenizer,
compute_metrics=self._compute_metrics_by_dataset_name,
)
best_model_checkpoint_path = os.path.join(self.path_utils.hpo_ckpt_path, "hpo_hf")
if not os.path.exists(best_model_checkpoint_path):
os.mkdir(best_model_checkpoint_path)
best_trainer.train()
best_trainer.save_model(best_model_checkpoint_path)
self._save_ckpt_json(best_model_checkpoint_path)
validation_metric = best_trainer.evaluate()
return validation_metric
def fit(self,
num_samples,
time_budget,
custom_metric_name=None,
custom_metric_mode_name=None,
ckpt_per_epoch=1,
fp16=True,
verbose=1,
resources_per_trial={"gpu": 1, "cpu": 1},
**custom_hpo_args):
'''Fine tuning the huggingface using the hpo setting
An example:
autohf_settings = {"resources_per_trial": {"cpu": 1},
"num_samples": 1,
"time_budget": 100000,
"ckpt_per_epoch": 1,
"fp16": False,
}
validation_metric, analysis = autohf.fit(**autohf_settings)
Args:
resources_per_trial:
A dict showing the resources used by each trial,
e.g., {"gpu": 4, "cpu": 4}
num_samples:
An int variable of the maximum number of trials
time_budget:
An int variable of the maximum time budget
custom_metric_name:
A string of the dataset name or a function,
e.g., 'accuracy', 'f1', 'loss'
custom_metric_mode_name:
A string of the mode name,
e.g., "max", "min", "last", "all"
ckpt_per_epoch:
An integer value of number of checkpoints per epoch, default = 1
verbose:
int, default=1 | Controls the verbosity, higher means more
messages
fp16:
boolean, default = True | whether to use fp16
custom_hpo_args:
The additional keyword arguments, e.g.,
custom_hpo_args = {"points_to_evaluate": [{
"num_train_epochs": 1,
"per_device_train_batch_size": 128, }]}
Returns:
validation_metric:
a dict storing the validation score
analysis:
a ray.tune.analysis.Analysis object storing the analysis results from tune.run
'''
self._resources_per_trial = resources_per_trial
self._set_metric(custom_metric_name, custom_metric_mode_name)
self._set_task()
self._fp16 = fp16
ray.init(local_mode=True)
self._set_search_space(**custom_hpo_args)
search_algo = self._get_search_algo(self.jobid_config.alg, self.jobid_config.arg, **custom_hpo_args)
scheduler = AutoScheduler.from_scheduler_name(self.jobid_config.pru)
self.ckpt_per_epoch = ckpt_per_epoch
self.path_utils.make_dir_per_run()
logger.addHandler(logging.FileHandler(os.path.join(self.path_utils.log_dir_per_run, 'tune.log')))
old_level = logger.getEffectiveLevel()
self._verbose = verbose
if verbose == 0:
logger.setLevel(logging.WARNING)
assert self.path_utils.ckpt_dir_per_run
start_time = time.time()
tune_config = self._search_space_hpo
tune_config["seed"] = self.jobid_config.sdhf
analysis = ray.tune.run(
self._objective,
metric=self.metric_name,
mode=self.metric_mode_name,
name="ray_result",
resources_per_trial=resources_per_trial,
config=tune_config,
verbose=verbose,
local_dir=self.path_utils.ckpt_dir_per_run,
num_samples=num_samples,
time_budget_s=time_budget,
keep_checkpoints_num=1,
scheduler=scheduler,
search_alg=search_algo,
)
duration = time.time() - start_time
self.last_run_duration = duration
logger.info("Total running time: {} seconds".format(duration))
ray.shutdown()
best_trial = analysis.get_best_trial(scope="all", metric=self.metric_name, mode=self.metric_mode_name)
validation_metric = {"eval_" + self.metric_name
: best_trial.metric_analysis[self.metric_name][self.metric_mode_name]}
for x in range(len(self._all_metrics)):
validation_metric["eval_" + self._all_metrics[x]] \
= best_trial.metric_analysis[self._all_metrics[x]][self._all_modes[x]]
get_best_ckpt = analysis.get_best_checkpoint(best_trial, metric=self.metric_name, mode=self.metric_mode_name)
best_ckpt = AutoTransformers._recover_checkpoint(get_best_ckpt)
self._save_ckpt_json(best_ckpt)
if verbose == 0:
logger.setLevel(old_level)
return validation_metric, analysis
def predict(self,
ckpt_json_dir=None,
**kwargs):
'''Predict label for test data.
An example:
predictions, test_metric = autohf.predict()
Args:
ckpt_json_dir:
the checkpoint for the fine-tuned huggingface if you wish to override
the saved checkpoint in the training stage under self.path_utils._result_dir_per_run
Returns:
A numpy array of shape n * 1 - - each element is a predicted class
label for an instance.
'''
best_checkpoint = self._load_ckpt_json(ckpt_json_dir, **kwargs)
best_model = self._load_model(checkpoint_path=best_checkpoint)
training_args = TrainingArguments(per_device_eval_batch_size=1,
output_dir=self.path_utils.result_dir_per_run)
test_trainer = TrainerForAutoTransformers(best_model, training_args)
if self.jobid_config.spt == "ori":
try:
self.test_dataset.remove_columns_("label")
except ValueError:
pass
test_dataloader = test_trainer.get_test_dataloader(self.test_dataset)
predictions, labels, _ = test_trainer.prediction_loop(test_dataloader, description="Prediction")
predictions = np.squeeze(predictions) \
if get_default_task(self.get_full_data_name(), self.jobid_config.subdat) == "regression" \
else np.argmax(predictions, axis=1)
torch.cuda.empty_cache()
if self.jobid_config.spt == "rspt":
assert labels is not None
metric = self._get_metric_func()
output_metric = metric.compute(predictions=predictions, references=labels)
self._save_output_metric(output_metric)
return predictions, output_metric
else:
return predictions, None
def output_prediction(self,
predictions=None,
output_prediction_path=None,
output_zip_file_name=None):
"""
When using the original GLUE split, output the prediction on test data,
and prepare the .zip file for submission
Example:
local_archive_path = self.autohf.output_prediction(predictions,
output_prediction_path= self.console_args.data_root_dir + "result/",
output_zip_file_name=azure_save_file_name)
Args:
predictions:
a list of predictions, which is the output of AutoTransformers.predict()
output_prediction_path:
output path for the prediction
output_zip_file_name:
an string, which is the name of the output zip file
Returns:
the path of the output .zip file
"""
return auto_output_prediction(self.get_full_data_name(), output_prediction_path,
output_zip_file_name, predictions, self.train_dataset,
self._dev_name, self.jobid_config.subdat)

View File

View File

@@ -0,0 +1,225 @@
from collections import OrderedDict
from functools import partial
from transformers import AutoTokenizer
from .sentence_keys_auto import get_sentence_keys
def inserting_sepp(sent, start, end, this_tokenizer):
return \
sent[:start].rstrip() + " " + this_tokenizer.sep_token + " " + sent[start:end] \
+ " " + this_tokenizer.sep_token + " " + sent[end:].lstrip()
def tokenize_superglue_copa(this_example,
this_tokenizer,
dataset_name,
subdataset_name=None,
**kwargs):
return None
def tokenize_superglue_wic_gpt2(this_example,
this_tokenizer,
dataset_name,
subdataset_name=None,
**kwargs):
return None
def tokenize_superglue_wic(this_example,
this_tokenizer,
dataset_name,
subdataset_name=None,
**kwargs
):
"""
tokenize the data from the wic task (word-in-context dataset),
e.g., sentence 1: "There's a lot of trash on the bed of the river"
sentence 2: "I keep a glass of water next to my bed when I sleep",
label = False (different word senses)
In the superglue data, the position of the word in sentence 1 and 2 are provided
What this function does is to update the span position after tokenization, based on each LM's own tokenizer,
The key is to insert an [SEP] before and after the original sentence, then feed it into the LM's tokenizer.
There are two challenges:
(1) Each LM's tokenizations are different, e.g., in XLNet's tokenizer, the paddings are on the left'
(2) Some LM's tokenization would add an underline symbol before the word, e.g., "There's a lot"
-> [_There, _', _s, _a, _lot]
When underline meets special char such as '"', "'", the tokenized sequence after adding [SEP] needs to be
aligned with the sequence tokenized without [SEP]. We use a two pointer algorithm for the alignment
"""
sent1, sent2 = this_example["sentence1"], this_example["sentence2"]
start1, end1 = this_example["start1"], this_example["end1"]
start2, end2 = this_example["start2"], this_example["end2"]
"""
Add [SEP] to the sentence
"""
altered_sent1 = inserting_sepp(sent1, start1, end1, this_tokenizer)
altered_sent2 = inserting_sepp(sent2, start2, end2, this_tokenizer)
input_ids_sepp = this_tokenizer(*(altered_sent1, altered_sent2),
padding="max_length",
max_length=1024,
truncation=True)["input_ids"]
data_pair = (sent1, sent2)
assert "max_seq_length" in kwargs, "max_seq_length must be provided for glue"
this_data = this_tokenizer(*data_pair, padding="max_length", max_length=kwargs["max_seq_length"], truncation=True)
input_ids = this_data["input_ids"]
which_sepp = 0
"""
span_start_end: a 2x2 array:
* (span_start_end[0][0], span_start_end[0][1]) are the spans of the position of the word in the first sentence
* (span_start_end[1][0], span_start_end[1][1]) are the spans of the position of the word in the second sentence
"""
span_start_end = [[-1, -1], [-1, -1]]
ptr_sepp = ptr_nosepp = 0
try:
padding_direction = this_tokenizer.padding_side
if padding_direction == "left":
padding_id = input_ids_sepp[0]
while input_ids_sepp[ptr_sepp] == padding_id:
ptr_sepp += 1
while input_ids[ptr_nosepp] == padding_id:
ptr_nosepp += 1
except KeyError:
pass
sep_id = this_tokenizer.convert_tokens_to_ids([this_tokenizer.sep_token])[0]
"""
use two pointers to align the tokenized sequence before and after adding [SEP];
ptr_sepp: the pointer after adding; ptr_nosepp: the pointer without adding
"""
while ptr_sepp < len(input_ids_sepp) and ptr_nosepp < len(input_ids) and \
input_ids_sepp[ptr_sepp] != 0 and input_ids[ptr_nosepp] != 0:
if input_ids_sepp[ptr_sepp] == input_ids[ptr_nosepp]:
ptr_sepp += 1
ptr_nosepp += 1
else:
if not (input_ids_sepp[ptr_sepp] == sep_id
or this_tokenizer.convert_ids_to_tokens([input_ids_sepp[ptr_sepp]])[0] in ('', '_')):
break
if input_ids_sepp[ptr_sepp] == sep_id:
span_start_end[int(which_sepp / 2)][which_sepp % 2] = ptr_nosepp
which_sepp += 1
ptr_sepp += 1
else:
ptr_sepp += 1
"""
max_word_span is the maximum tokens of the word
It is set to 16 following deberta:
https://github.com/microsoft/DeBERTa/blob/master/DeBERTa/apps/tasks/superglue_tasks.py#L1054
"""
max_word_span = 16
word_indices = []
for idx1 in range(2):
if span_start_end[idx1][1] < kwargs["max_seq_length"]:
first_span = [x for x in range(span_start_end[idx1][0], span_start_end[idx1][1])
if x < kwargs["max_seq_length"]] + [0] * (max_word_span - span_start_end[idx1][1]
+ span_start_end[idx1][0])
word_indices.append(first_span)
this_data["word_spans"] = word_indices
return this_data
def tokenize_glue(this_example,
this_tokenizer,
dataset_name,
subdataset_name=None,
**kwargs):
sentence_keys = get_sentence_keys(dataset_name, subdataset_name)
if len(sentence_keys) > 1:
sentence1_key, sentence2_key = sentence_keys[0], sentence_keys[1]
else:
sentence1_key = sentence_keys[0]
sentence2_key = None
data_pair = (
(this_example[sentence1_key],) if sentence2_key is None else (
this_example[sentence1_key], this_example[sentence2_key])
)
assert "max_seq_length" in kwargs, "max_seq_length must be provided for glue"
return this_tokenizer(*data_pair, padding="max_length", max_length=kwargs["max_seq_length"], truncation=True)
TOKENIZER_MAPPING = OrderedDict(
[
(("glue", "rte"), tokenize_glue),
(("glue", "mrpc"), tokenize_glue),
(("glue", "cola"), tokenize_glue),
(("glue", "wnli"), tokenize_glue),
(("glue", "stsb"), tokenize_glue),
(("glue", "sst2"), tokenize_glue),
(("glue", "mnli"), tokenize_glue),
(("glue", "qqp"), tokenize_glue),
(("glue", "qnli"), tokenize_glue),
(("super_glue", "wic"), tokenize_superglue_wic),
]
)
class AutoEncodeText:
"""
This is a generic input text tokenization class that will be instantiated as one of the
tokenization classes of the library when created with the
`~flaml.nlp.dataset.AutoEncodeText.from_model_and_dataset_name` class method.
This class cannot be instantiated directly using ``__init__()`` (throws an error).
"""
def __init__(self):
raise EnvironmentError(
"AutoEncodeText is designed to be instantiated "
"using the `AutoEncodeText.from_model_and_dataset_name(cls,"
"data_raw,model_checkpoint_path,dataset_name,subdataset_name = None,**kwargs)` methods."
)
@classmethod
def from_model_and_dataset_name(cls,
data_raw,
model_checkpoint_path,
dataset_name,
subdataset_name=None,
**kwargs):
"""
Instantiate one of the input text tokenization classes from the raw data, model checkpoint path, dataset name
and sub dataset name. The raw data is used for creating a mapping function from the raw tokens to the
tokenized token ids.
Args:
data_raw:
The raw data (a datasets.Dataset object)
model_checkpoint_path:
A string variable which specifies the model path, e.g., "google/electra-base-discriminator"
dataset_name:
A string variable which is the dataset name, e.g., "glue"
subdataset_name:
A string variable which is the sub dataset name,e.g., "rte"
kwargs:
The values in kwargs of any keys will be used for the mapping function
Examples:
>>> from datasets import load_dataset
>>> data_raw = load_dataset("glue", "rte")
>>> AutoEncodeText.from_model_and_dataset_name(data_raw, "google/electra-base-discriminator", ["glue"], "rte")
"""
if (dataset_name, subdataset_name) in TOKENIZER_MAPPING.keys():
this_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_path, use_fast=True)
token_func = TOKENIZER_MAPPING[(dataset_name, subdataset_name)]
return data_raw.map(
partial(token_func,
this_tokenizer=this_tokenizer,
dataset_name=dataset_name,
subdataset_name=subdataset_name,
**kwargs), batched=False)
raise ValueError(
"Unrecognized method {},{} for this kind of AutoGridSearchSpace: {}.\n"
"Method name should be one of {}.".format(
dataset_name, subdataset_name, cls.__name__, ", ".join(c.__name__ for c in TOKENIZER_MAPPING.keys())
)
)

View File

@@ -0,0 +1,70 @@
# https://github.com/huggingface/datasets/blob/master/metrics/glue/glue.py
from collections import OrderedDict
metric_mode_mapping_glue = {
"cola": [("matthews_correlation", "max")],
"mnli": [("accuracy", "max")],
"mrpc": [("accuracy", "max"), ("f1", "max")],
"qnli": [("accuracy", "max")],
"qqp": [("accuracy", "max"), ("f1", "max")],
"rte": [("accuracy", "max")],
"sst2": [("accuracy", "max")],
"stsb": [("pearson", "max"), ("spearmanr", "max")],
"wnli": [("accuracy", "max")]
}
metric_mode_mapping_squad = [("exact_match", "max"), ("f1", "max")]
metric_mode_mapping_super_glue = {
"axb": [("matthews_correlation", "max")],
"cb": [("accuracy", "max"), ("f1", "max")],
"copa": [("accuracy", "max")],
"rte": [("accuracy", "max")],
"wic": [("accuracy", "max")],
"wsc": [("accuracy", "max")],
"wsc.fixed": [("accuracy", "max")],
"boolq": [("accuracy", "max")],
"axg": [("accuracy", "max")]
}
metric_mode_mapping_imdb = [("accuracy", "max")]
metric_mode_mapping_yelp = [("accuracy", "max")]
METRIC_MAPPING = OrderedDict(
[
("squad", metric_mode_mapping_squad),
("glue", metric_mode_mapping_glue),
("super_glue", metric_mode_mapping_super_glue),
("imdb", metric_mode_mapping_imdb),
("yelp_review_full", metric_mode_mapping_yelp)
]
)
def get_default_and_alternative_metric(dataset_name,
subdataset_name=None,
custom_metric_name=None,
custom_metric_mode_name=None):
if dataset_name not in METRIC_MAPPING.keys():
assert custom_metric_name and custom_metric_mode_name, \
"The dataset is not in {}, you must explicitly specify " \
"the custom_metric_name and custom_metric_mode_name".format(",".join(METRIC_MAPPING.keys()))
eval_name_mapping = METRIC_MAPPING[dataset_name]
if isinstance(eval_name_mapping, dict):
assert subdataset_name and subdataset_name in eval_name_mapping, \
"dataset_name and subdataset_name not correctly specified"
default_metric, default_mode = eval_name_mapping[subdataset_name][0]
all_metrics, all_mode \
= [x[0] for x in eval_name_mapping[subdataset_name]] \
+ ["loss"], [x[1] for x in eval_name_mapping[subdataset_name]] + ["min"]
return default_metric, default_mode, all_metrics, all_mode
else:
assert isinstance(eval_name_mapping, list), "dataset_name and subdataset_name not correctly specified"
default_metric, default_mode = eval_name_mapping[0]
all_metrics, all_mode = [x[0] for x in eval_name_mapping] + ["loss"], \
[x[1] for x in eval_name_mapping] + ["min"]
return default_metric, default_mode, all_metrics, all_mode

View File

@@ -0,0 +1,28 @@
sentence_keys_glue = {
"cola": ["sentence"],
"mnli": ["premise", "hypothesis"],
"mrpc": ["sentence1", "sentence2"],
"qnli": ["sentence", "question"],
"qqp": ["question1", "question2"],
"rte": ["sentence1", "sentence2"],
"sst2": ["sentence"],
"stsb": ["sentence1", "sentence2"],
"wnli": ["sentence1", "sentence2"]
}
sentence_keys_super_glue = {
"rte": ["hypothesis", "premise"],
"wic": ["sentence1", "sentence2"],
"wsc": ["text"]
}
def get_sentence_keys(dataset_name, subdataset_name=None):
eval_name_mapping = globals()["sentence_keys_" + dataset_name]
if isinstance(eval_name_mapping, dict):
assert subdataset_name and subdataset_name in eval_name_mapping, \
"dataset_name and subdataset_name not correctly specified"
sentence_keys = eval_name_mapping[subdataset_name]
else:
sentence_keys = eval_name_mapping
return sentence_keys

View File

@@ -0,0 +1,126 @@
import os
import shutil
from collections import OrderedDict
file_name_mapping_glue = {
"ax": ["AX.tsv"],
"cola": ["CoLA.tsv"],
"mnli": ["MNLI-m.tsv", "MNLI-mm.tsv"],
"mrpc": ["MRPC.tsv"],
"qnli": ["QNLI.tsv"],
"qqp": ["QQP.tsv"],
"rte": ["RTE.tsv"],
"sst2": ["SST-2.tsv"],
"stsb": ["STS-B.tsv"],
"wnli": ["WNLI.tsv"]
}
default_prediction_glue = {
"ax": ["entailment"],
"cola": ["0"],
"mnli": ["neutral", "neutral"],
"mrpc": ["0"],
"qnli": ["not_entailment"],
"qqp": ["0"],
"rte": ["not_entailment"],
"sst2": ["0"],
"stsb": ["0.0"],
"wnli": ["0"]
}
test_size_glue = {
"ax": [1104],
"cola": [1064],
"mnli": [9796, 9847],
"mrpc": [1725],
"qnli": [5463],
"qqp": [390965],
"rte": [3000],
"sst2": [1821],
"stsb": [1379],
"wnli": [146]
}
def output_prediction_glue(output_path, output_dir_name, predictions, train_data, dev_name, subdataset_name):
output_dir = os.path.join(output_path, output_dir_name)
if os.path.exists(output_dir):
assert os.path.isdir(output_dir)
else:
os.mkdir(output_dir)
if subdataset_name != "stsb":
label_list = train_data.features["label"].names
output_blank_tsv(output_dir)
for each_subdataset_name in file_name_mapping_glue.keys():
for idx in range(len(file_name_mapping_glue[each_subdataset_name])):
each_file = file_name_mapping_glue[each_subdataset_name][idx]
if subdataset_name != "mnli":
is_match = subdataset_name == each_subdataset_name
else:
if dev_name == "validation_matched":
is_match = each_file == "MNLI-m.tsv"
else:
is_match = each_file == "MNLI-mm.tsv"
if is_match:
with open(os.path.join(output_dir, each_file), "w") as writer:
writer.write("index\tprediction\n")
for index, item in enumerate(predictions):
if subdataset_name == "stsb":
if item > 5.0:
item = 5.0
writer.write(f"{index}\t{item:3.3f}\n")
else:
if subdataset_name in ("rte", "qnli", "mnli"):
item = label_list[item]
writer.write(f"{index}\t{item}\n")
else:
if int(item) == item:
item = int(item)
writer.write(f"{index}\t{item}\n")
else:
writer.write(f"{index}\t{item:3.3f}\n")
shutil.make_archive(os.path.join(output_path, output_dir_name), 'zip', output_dir)
return os.path.join(output_path, output_dir_name + ".zip")
OUTPUT_PREDICTION_MAPPING = OrderedDict(
[
("glue", output_prediction_glue),
]
)
def auto_output_prediction(dataset_name,
output_path,
output_dir_name,
predictions,
train_data,
dev_name,
subset_name):
if dataset_name in OUTPUT_PREDICTION_MAPPING.keys():
return OUTPUT_PREDICTION_MAPPING[dataset_name](output_path,
output_dir_name,
predictions,
train_data,
dev_name,
subset_name)
else:
raise ValueError(
"Unrecognized dataset {}. \n"
"Should be one of {}.".format(dataset_name, ", ".join(c.__name__ for c in OUTPUT_PREDICTION_MAPPING.keys())
)
)
def output_blank_tsv(output_dir):
for each_subdataset_name in file_name_mapping_glue.keys():
for idx in range(len(file_name_mapping_glue[each_subdataset_name])):
each_file = file_name_mapping_glue[each_subdataset_name][idx]
default_prediction = default_prediction_glue[each_subdataset_name][idx]
test_size = test_size_glue[each_subdataset_name][idx]
with open(os.path.join(output_dir, each_file), "w") as writer:
writer.write("index\tprediction\n")
for index in range(test_size):
writer.write(f"{index}\t{default_prediction}\n")

View File

@@ -0,0 +1,45 @@
# https://github.com/huggingface/datasets/blob/master/metrics/glue/glue.py
from collections import OrderedDict
task_mapping_glue = {
"cola": "seq-classification",
"mnli": "seq-classification",
"mrpc": "seq-classification",
"qnli": "seq-classification",
"qqp": "seq-classification",
"rte": "seq-classification",
"sst2": "seq-classification",
"stsb": "regression",
"wnli": "seq-classification"
}
task_mapping_squad = "question-answering"
task_mapping_super_glue = {
"wic": "seq-classification",
"rte": "seq-classification"
}
TASK_MAPPING = OrderedDict(
[
("squad", task_mapping_squad),
("glue", task_mapping_glue),
("super_glue", task_mapping_super_glue),
]
)
def get_default_task(dataset_name, subdataset_name=None):
assert dataset_name in TASK_MAPPING.keys(), "The dataset is not in {}, you must explicitly specify " \
"the custom_metric_name and custom_metric_mode_name".format(
",".join(TASK_MAPPING.keys()))
eval_name_mapping = TASK_MAPPING[dataset_name]
if isinstance(eval_name_mapping, dict):
assert subdataset_name and subdataset_name in eval_name_mapping, \
"dataset_name and subdataset_name not correctly specified"
default_task = eval_name_mapping[subdataset_name]
else:
assert isinstance(eval_name_mapping, list), "dataset_name and subdataset_name not correctly specified"
default_task = eval_name_mapping
return default_task

View File

View File

@@ -0,0 +1,456 @@
# lookup table for the grid configs in each pre-trained language huggingface for different tasks
import copy
def get_space_union_and_unique(search_space_common, search_space_unique, this_case_tags: list):
"""
get the recommended search configs for each pre-trained language models
Args:
search_space_common:
the union of configs recommended by the LM for all cases;
search_space_unique:
the recommended config by the LM for a specific condition, e.g., small model
this_case_tags:
a list, which contains the tags describing the specific condition, e.g., ["small"]
"""
search_space_union = search_space_common.copy()
this_search_space = search_space_common.copy()
# enumerate over each case where the search space is different
# this difference can be the dataset or model size, etc.
is_included = False
from ..utils import merge_dicts
for each_case in search_space_unique.keys():
from ..utils import _check_dict_keys_overlaps
if each_case in this_case_tags:
is_included = True
assert not _check_dict_keys_overlaps(this_search_space, search_space_unique[each_case]), \
"the hyperparameters of common and unique search spaces should not have overlaps"
this_search_space.update(search_space_unique[each_case])
search_space_union = merge_dicts(search_space_union, search_space_unique[each_case])
if is_included:
return this_search_space
else:
if "other" in search_space_unique.keys():
search_space_union = merge_dicts(search_space_union, search_space_unique["other"])
return search_space_union
def get_deberta_space(model_size_type=None,
dataset_name=None,
subdataset_name=None,
algo_mode=None):
"""
DEBERTA: DECODING-ENHANCED BERT WITH DISENTANGLED ATTENTION: Table 9
https://arxiv.org/abs/2006.03654
"""
search_space_common = {
"cls_dropout": [0, 0.1, 0.15],
"warmup_steps": [50, 100, 500, 1000],
"per_device_train_batch_size": [16, 32, 48, 64],
"num_train_epochs": [10],
"adam_epsilon": [1e-6],
}
search_space_unique = {
"large": {
"learning_rate": [5e-6, 8e-6, 9e-6, 1e-5],
"weight_decay": [0.01],
},
"base": {
"learning_rate": [1.5e-5, 2e-5, 3e-5, 4e-5],
}
}
return get_space_union_and_unique(search_space_common, search_space_unique, [model_size_type])
def get_longformer_space(model_size_type=None,
dataset_name=None,
subdataset_name=None,
algo_mode=None):
"""
TODO: Longformer: The Long-Document Transformer
"""
if dataset_name == "glue":
return
def get_funnel_space(model_size_type=None,
dataset_name=None,
subdataset_name=None,
algo_mode=None):
"""
Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing
https://arxiv.org/abs/2006.03236
"""
search_space_common = {"learning_rate": [1e-5, 2e-5, 3e-5],
"hidden_dropout": [0.1],
"activation_dropout": [0.0],
"attention_dropout": [0.1],
"weight_decay": [0.01],
"warmup_ratio": [0.1],
"adam_epsilon": [1e-6],
}
search_space_unique = {
"imdb": {
"per_device_train_batch_size": [32],
"num_train_epochs": [5]
},
"ag_news": {
"per_device_train_batch_size": [32],
"num_train_epochs": [3]
},
"dbpedia_14": {
"per_device_train_batch_size": [64],
"num_train_epochs": [3]
},
"yelp_polarity": {
"per_device_train_batch_size": [128],
"num_train_epochs": [3]
},
"yelp_review_full": {
"per_device_train_batch_size": [128],
"num_train_epochs": [3]
},
"amazon_polarity": {
"per_device_train_batch_size": [128],
"num_train_epochs": [3]
},
"amazon_review_multi": {
"per_device_train_batch_size": [128],
"num_train_epochs": [3]
},
"glue_rte": {
"per_device_train_batch_size": [16],
"num_train_epochs": [10]
},
"glue_mrpc": {
"per_device_train_batch_size": [16],
"num_train_epochs": [10]
},
"glue_stsb": {
"per_device_train_batch_size": [16],
"num_train_epochs": [10]
},
"glue_cola": {
"per_device_train_batch_size": [16],
"num_train_epochs": [10]
},
"glue_sst2": {
"per_device_train_batch_size": [32],
"num_train_epochs": [5]
},
"glue_qnli": {
"per_device_train_batch_size": [32],
"num_train_epochs": [3]
},
"glue_mnli": {
"per_device_train_batch_size": [64],
"num_train_epochs": [3]
},
"glue_qqp": {
"per_device_train_batch_size": [64],
"num_train_epochs": [5]
}
}
from ..result_analysis.azure_utils import JobID
return get_space_union_and_unique(search_space_common, search_space_unique,
[JobID.get_full_data_name(dataset_name, subdataset_name)])
def get_bert_space(model_size_type=None,
dataset_name=None,
subdataset_name=None,
algo_mode=None):
"""
BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
https://arxiv.org/pdf/1810.04805.pdf
"""
search_space_common = {}
search_space_unique = {
# Section 4.1: We use a batch size of 32 and fine-tune for 3 epochs over the data for all GLUE tasks. For each
# task, we selected the best fine-tuning learning rate (among 5e-5, 4e-5, 3e-5, and 2e-5) on the Dev set
"glue": {
"learning_rate": [5e-5, 4e-5, 3e-5, 2e-5],
"per_device_train_batch_size": [32],
"num_train_epochs": [3],
},
# Section 4.2: We fine-tune for 3 epochs with a learning rate of 5e-5 and a batch size of 32
"squad": {
"learning_rate": [5e-5],
"per_device_train_batch_size": [32],
"num_train_epochs": [2],
},
# Section 4.3: We fine-tuned for 2 epochs with a learning rate of 5e-5 and a batch size of 48.
"squad_v2": {
"learning_rate": [5e-5],
"per_device_train_batch_size": [48],
"num_train_epochs": [2],
},
# Section 4.4: We fine-tune the huggingface for 3 epochs with a learning rate of 2e-5 and a batch size of 16.
"swag": {
"learning_rate": [2e-5],
"per_device_train_batch_size": [16],
"num_train_epochs": [3],
},
# Appedix A. The optimal hyperparameter values are task-specific, but we found the following
# range of possible values to work well across all tasks:
# - Batch size: 16, 32
# - Learning rate (Adam): 5e-5, 3e-5, 2e-5
# - Number of epochs: 2, 3, 4
"other": {
"learning_rate": [5e-5, 3e-5, 2e-5],
"per_device_train_batch_size": [16, 32],
"num_train_epochs": [2, 3, 4],
}
}
return get_space_union_and_unique(search_space_common, search_space_unique, [dataset_name])
def get_roberta_space(model_size_type=None,
dataset_name=None,
subdataset_name=None,
algo_mode=None):
# RoBERTa: A Robustly Optimized BERT Pretraining Approach
# https://arxiv.org/pdf/1907.11692.pdf
search_space_common = {
"warmup_ratio": [0.06],
}
search_space_unique = {
# Table 10: Hyperparameters for finetuning RoBERTa-LARGE on RACE, SQuAD and GLUE.
# We consider a limited hyperparameter
# sweep for each task, with batch sizes ∈ {16, 32}
# and learning rates ∈ {1e5, 2e5, 3e5}, with a
# linear warmup for the first 6% of steps followed by
# a linear decay to 0.
"glue": {
"learning_rate": [1e-5, 2e-5, 3e-5],
"per_device_train_batch_size": [16, 32],
"weight_decay": [0.1],
"num_train_epochs": [10],
},
"race": {
"learning_rate": [1e-5],
"per_device_train_batch_size": [16],
"weight_decay": [0.1],
"num_train_epochs": [4],
},
"squad": {
"learning_rate": [1.5e-5],
"per_device_train_batch_size": [48],
"weight_decay": [0.01],
"num_train_epochs": [2],
}
}
return get_space_union_and_unique(search_space_common, search_space_unique, [dataset_name])
def get_electra_space(model_size_type=None,
dataset_name=None,
subdataset_name=None,
algo_mode=None):
"""
ELECTRA: PRE-TRAINING TEXT ENCODERS AS DISCRIMINATORS RATHER THAN GENERATORS
https://arxiv.org/pdf/2003.10555.pdf
"""
assert model_size_type in ("small", "base", "large", "intermediate", "xlarge"), \
"Electra paper has only provided hyperparameter for the small and base huggingface"
search_space_common = {
"learning_rate": [3e-5, 5e-5, 1e-4, 1.5e-4] if algo_mode == "grid"
else [3e-5, 5e-5, 1e-4, 1.5e-4, 2e-4, 3e-4, 5e-3],
"weight_decay": [0.0],
"adam_epsilon": [1e-6],
"warmup_ratio": [0.1],
"per_device_train_batch_size": [32],
"hidden_dropout_prob": [0.1],
"attention_probs_dropout_prob": [0.1],
}
search_space_unique = {
# Appendix B: For Basesized models we searched for a learning
"squad": {
"num_train_epochs": [2]
},
"squad_v2": {
"num_train_epochs": [2]
},
"glue_stsb": {
"num_train_epochs": [10],
},
"glue_rte": {
"num_train_epochs": [10],
},
"glue_wnli": {
"num_train_epochs": [3],
},
"glue_mrpc": {
"num_train_epochs": [3],
},
"glue_cola": {
"num_train_epochs": [3],
},
"glue_sst2": {
"num_train_epochs": [3],
},
"glue_qnli": {
"num_train_epochs": [3],
},
"glue_mnli": {
"num_train_epochs": [3],
},
"glue_qqp": {
"num_train_epochs": [3],
}
}
from ..result_analysis.azure_utils import JobID
return get_space_union_and_unique(search_space_common, search_space_unique,
[JobID.get_full_data_name(dataset_name, subdataset_name), model_size_type])
def get_mobilebert_space(model_size_type=None,
dataset_name=None,
subdataset_name=None,
algo_mode=None):
"""
MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices
https://arxiv.org/pdf/2004.02984.pdf
"""
# To finetune the pre-trained models, we search the optimization hyperparameters
# in a search space including different batch sizes (16/32/48), learning
# rates ((1-10) * e-5), and the number of epochs (2-10)
search_space_common = {
"learning_rate": [x * 1e-5 for x in range(1, 11)],
"per_device_train_batch_size": [4, 8, 16, 32, 48],
"num_train_epochs": [x for x in range(2, 11)],
}
search_space_unique = {}
return get_space_union_and_unique(search_space_common, search_space_unique, [])
def get_albert_space(model_size_type=None,
dataset_name=None,
subdataset_name=None,
algo_mode=None):
"""
Hyperparameters for downstream tasks are shown in Table 14. We adapt these hyperparameters
from Liu et al. (2019), Devlin et al. (2019), and Yang et al. (2019).
LR BSZ ALBERT DR Classifier DR TS WS MSL
CoLA 1.00E-05 16 0 0.1 5336 320 512
STS 2.00E-05 16 0 0.1 3598 214 512
SST-2 1.00E-05 32 0 0.1 20935 1256 512
MNLI 3.00E-05 128 0 0.1 10000 1000 512
QNLI 1.00E-05 32 0 0.1 33112 1986 512
QQP 5.00E-05 128 0.1 0.1 14000 1000 512
RTE 3.00E-05 32 0.1 0.1 800 200 512
MRPC 2.00E-05 32 0 0.1 800 200 512
WNLI 2.00E-05 16 0.1 0.1 2000 250 512
SQuAD v1.1 5.00E-05 48 0 0.1 3649 365 384
SQuAD v2.0 3.00E-05 48 0 0.1 8144 814 512
RACE 2.00E-05 32 0.1 0.1 12000 1000 512
"""
search_space_common = {
}
search_space_unique = {
"glue_cola": {
"learning_rate": [1e-5],
"per_device_train_batch_size": [16],
"attention_probs_dropout_prob": [0],
"classifier_dropout_prob": [0.1],
"max_steps": [5336],
"warmup_steps": [320],
},
"glue_stsb": {
"learning_rate": [2e-5],
"per_device_train_batch_size": [16],
"attention_probs_dropout_prob": [0],
"classifier_dropout_prob": [0.1],
"max_steps": [3598],
"warmup_steps": [214],
},
"glue_sst2": {
"learning_rate": [1e-5],
"per_device_train_batch_size": [32],
"attention_probs_dropout_prob": [0],
"classifier_dropout_prob": [0.1],
"max_steps": [20935],
"warmup_steps": [1256],
},
"glue_mnli": {
"learning_rate": [3e-5],
"per_device_train_batch_size": [128],
"attention_probs_dropout_prob": [0],
"classifier_dropout_prob": [0.1],
"max_steps": [10000],
"warmup_steps": [1000],
},
"glue_qnli": {
"learning_rate": [1e-5],
"per_device_train_batch_size": [32],
"attention_probs_dropout_prob": [0],
"classifier_dropout_prob": [0.1],
"max_steps": [33112],
"warmup_steps": [1986],
},
"glue_qqp": {
"learning_rate": [5e-5],
"per_device_train_batch_size": [128],
"attention_probs_dropout_prob": [0.1],
"classifier_dropout_prob": [0.1],
"max_steps": [14000],
"warmup_steps": [1000],
},
"glue_rte": {
"learning_rate": [3e-5],
"per_device_train_batch_size": [32],
"attention_probs_dropout_prob": [0.1],
"classifier_dropout_prob": [0.1],
"max_steps": [800],
"warmup_steps": [200],
},
"glue_mrpc": {
"learning_rate": [2e-5],
"per_device_train_batch_size": [32],
"attention_probs_dropout_prob": [0],
"classifier_dropout_prob": [0.1],
"max_steps": [800],
"warmup_steps": [200],
},
"glue_wnli": {
"learning_rate": [2e-5],
"per_device_train_batch_size": [16],
"attention_probs_dropout_prob": [0.1],
"classifier_dropout_prob": [0.1],
"max_steps": [2000],
"warmup_steps": [250],
},
"squad": {
"learning_rate": [5e-5],
"per_device_train_batch_size": [48],
"attention_probs_dropout_prob": [0],
"classifier_dropout_prob": [0.1],
"max_steps": [3649],
"warmup_steps": [365],
},
"squad_v2": {
"learning_rate": [3e-5],
"per_device_train_batch_size": [48],
"attention_probs_dropout_prob": [0],
"classifier_dropout_prob": [0.1],
"max_steps": [8144],
"warmup_steps": [814],
},
"race": {
"learning_rate": [2e-5],
"per_device_train_batch_size": [32],
"attention_probs_dropout_prob": [0.1],
"classifier_dropout_prob": [0.1],
"max_steps": [12000],
"warmup_steps": [1000],
},
}
# To finetune the pre-trained models, we search the optimization hyperparameters
# in a search space including different batch sizes (16/32/48), learning
# rates ((1-10) * e-5), and the number of epochs (2-10)
from ..result_analysis.azure_utils import JobID
return get_space_union_and_unique(search_space_common, search_space_unique,
[JobID.get_full_data_name(dataset_name, subdataset_name)])

View File

@@ -0,0 +1,93 @@
from collections import OrderedDict
from .get_grid_search_space import \
(get_electra_space,
get_bert_space,
get_roberta_space,
get_funnel_space,
get_deberta_space,
get_albert_space
)
GRID_SEARCH_SPACE_MAPPING = OrderedDict(
[
("electra", get_electra_space),
("bert", get_bert_space),
("roberta", get_roberta_space),
("funnel", get_funnel_space),
("deberta", get_deberta_space),
("albert", get_albert_space),
]
)
HF_MODEL_LIST = [
"bert",
"roberta",
"electra",
"xlnet",
"albert",
"distilbert",
"deberta",
"mobilebert",
"funnel"
]
class AutoGridSearchSpace:
"""
This is a class for getting the recommended grid search space of a pre-trained LM that will be
instantiated as one of the search spaces of the library when created with the
`~flaml.nlp.hpo.AutoGridSearchSpace.from_model_and_dataset_name` method.
This class cannot be instantiated directly using ``__init__()`` (throws an error).
"""
def __init__(self):
raise EnvironmentError(
"AutoGridSearchSpace is designed to be instantiated "
"using the `AutoGridSearchSpace.from_config_and_method_name(cls, model_type, model_size_type,"
"dataset_name,subdataset_name = None,algo_mode = None)` methods."
)
@classmethod
def from_model_and_dataset_name(cls,
model_type,
model_size_type,
dataset_name,
subdataset_name=None,
algo_mode=None):
"""
Instantiate one of the classes for getting the recommended grid search space of a pre-trained LM from
the model type, model size type, dataset name, sub dataset name and algorithm mode
Args:
model_type:
A string variable which is the model type, e.g. "electra"
model_size_type:
A string variable which is the size of the model, e.g., "small"
dataset_name:
A string variable which is the dataset name, e.g., "glue"
subdataset_name:
A string variable which is the sub dataset name,e.g., "rte"
algo_mode:
A string variable which is the algorithm mode for grid search, e.g., "gridbert"
Example:
>>> AutoGridSearchSpace.from_model_and_dataset_name("electra", "small", "glue", "rte", "grid")
"""
if model_type in GRID_SEARCH_SPACE_MAPPING.keys():
this_model_recommended_space = GRID_SEARCH_SPACE_MAPPING[model_type](
model_size_type, dataset_name, subdataset_name, algo_mode)
return this_model_recommended_space
raise ValueError(
"Unrecognized method {},{} for this kind of AutoGridSearchSpace: {}.\n"
"Method name should be one of {}.".format(
model_type, dataset_name, cls.__name__, ", ".join(c.__name__ for c in GRID_SEARCH_SPACE_MAPPING.keys())
)
)

View File

@@ -0,0 +1,242 @@
from collections import OrderedDict
from ..huggingface.trainer import TrainerForAutoTransformers
from ray import tune
from transformers import TrainingArguments
from .grid_searchspace_auto import AutoGridSearchSpace
def hpo_space_custom(**custom_hpo_args):
assert "hpo_space" in custom_hpo_args
custom_search_space = custom_hpo_args["hpo_space"]
return custom_search_space
def bounded_gridunion(logger=None,
model_type=None,
model_size_type=None,
dataset_name=None,
subdataset_name=None,
**custom_hpo_args):
assert "bound" in custom_hpo_args
gridunion_space = HPO_SEARCH_SPACE_MAPPING["uni"](logger,
model_type,
model_size_type,
dataset_name,
subdataset_name,
**custom_hpo_args)
for each_key in custom_hpo_args["bound"].keys():
if "u" in custom_hpo_args["bound"][each_key]:
upper = custom_hpo_args["bound"][each_key]["u"]
else:
upper = 100000
if "l" in custom_hpo_args["bound"][each_key]:
lower = custom_hpo_args["bound"][each_key]["l"]
else:
lower = -100000
original_space = sorted(gridunion_space[each_key])
upper_id = len(original_space)
for x in range(len(original_space)):
if original_space[x] > upper:
upper_id = x
break
lower_id = 0
for x in range(len(original_space) - 1, -1, -1):
if original_space[x] < lower:
lower_id = x
break
gridunion_space[each_key] = original_space[lower_id:upper_id]
return gridunion_space
def hpo_space_gridunion(logger=None,
model_type=None,
model_size_type=None,
dataset_name=None,
subdataset_name=None,
**custom_hpo_args):
output_config = {}
for each_model_type in {"electra", "roberta", "bert"}:
# if each_model_type == model_type: continue
this_config = AutoGridSearchSpace.from_model_and_dataset_name(
each_model_type, model_size_type, dataset_name, subdataset_name, "hpo")
from ..utils import merge_dicts
output_config = merge_dicts(output_config, this_config)
default_values = {}
"""
adding the default configuration from transformers/training_args.py into hpo space
"""
training_args = TrainingArguments(output_dir=".")
for each_hp in output_config.keys():
try:
default_values[each_hp] = [getattr(training_args, each_hp)]
except AttributeError:
pass
output_config = merge_dicts(output_config, default_values)
return output_config
def hpo_space_gridunion_smoke_test(
logger=None,
model_type=None,
model_size_type=None,
dataset_name=None,
subdataset_name=None,
**custom_hpo_args):
return {'learning_rate': [1e-5],
'weight_decay': [0.0],
'adam_epsilon': [1e-08],
'warmup_ratio': [0.1],
'per_device_train_batch_size': [2],
'hidden_dropout_prob': [0.1],
'attention_probs_dropout_prob': [0.1],
'num_train_epochs': [0.1]}
def hpo_space_generic(logger=None,
model_type=None,
model_size_type=None,
dataset_name=None,
subdataset_name=None,
**custom_hpo_args):
output_config = {
"learning_rate": {"l": 1e-6, "u": 1e-3, "space": "log"},
"num_train_epochs": {"l": 1.0, "u": 10.0, "space": "log"},
"per_device_train_batch_size": [4, 8, 16, 32, 48],
"warmup_ratio": {"l": 0.0, "u": 0.3, "space": "linear"},
"weight_decay": {"l": 0.0, "u": 0.3, "space": "linear"}
}
return output_config
def hpo_space_generic_grid(logger=None,
model_type=None,
model_size_type=None,
dataset_name=None,
subdataset_name=None,
**custom_hpo_args):
output_config = {
"learning_rate": [1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 1e-4, 1.5e-4],
"num_train_epochs": [3, 10],
"per_device_train_batch_size": [16, 32],
"warmup_ratio": [0, 0.06, 0.1],
"weight_decay": [0, 0.1]
}
return output_config
def hpo_space_small(logger=None,
model_type=None,
model_size_type=None,
dataset_name=None,
subdataset_name=None,
**custom_hpo_args):
config_json = AutoGridSearchSpace.from_model_and_dataset_name(
model_type, model_size_type, dataset_name, subdataset_name, "hpo")
output_config = {}
for each_hp in config_json.keys():
if each_hp == "learning_rate":
if len(config_json[each_hp]) > 1:
output_config[each_hp] = {"l": 3e-5, "u": 1.5e-4, "space": "log"}
else:
output_config[each_hp] = config_json[each_hp]
elif each_hp == "num_train_epochs":
output_config[each_hp] = {"l": 2.0, "u": 4.0, "space": "linear"}
elif each_hp == "per_device_train_batch_size":
output_config[each_hp] = [16, 32, 64]
elif each_hp == "warmup_ratio":
output_config[each_hp] = {"l": 0.0, "u": 0.2, "space": "linear"}
elif each_hp == "weight_decay":
output_config[each_hp] = {"l": 0.0, "u": 0.3, "space": "linear"}
else:
output_config[each_hp] = config_json[each_hp]
return output_config
HPO_SEARCH_SPACE_MAPPING = OrderedDict(
[
("uni", hpo_space_gridunion),
("gnr", hpo_space_generic),
("uni_test", hpo_space_gridunion_smoke_test),
("cus", hpo_space_custom),
("buni", bounded_gridunion)
]
)
class AutoHPOSearchSpace:
"""
This is a class for getting the hpo search space based on the search space mode
(a string variable) instantiated as one of the HPO search spaces of the library when
created with the `~flaml.nlp.hpo.AutoHPOSearchSpace.from_model_and_dataset_name` method.
This class cannot be instantiated directly using ``__init__()`` (throws an error).
"""
def __init__(self):
raise EnvironmentError(
"AutoHPOSearchSpace is designed to be instantiated "
"using the `AutoHPOSearchSpace.from_config_and_method_name(cls, logger,hpo_searchspace_name,"
"model_type,model_size_type,dataset_name,subdataset_name = None,**custom_hpo_args)` methods."
)
@classmethod
def from_model_and_dataset_name(cls,
logger,
hpo_searchspace_mode,
model_type,
model_size_type,
dataset_name,
subdataset_name=None,
**custom_hpo_args):
"""
Instantiate one of the classes for getting the hpo search space from the search space name, model type,
model size type, dataset name and sub dataset name
Args:
logger:
Reference to the logger
hpo_searchspace_mode:
A string variable which is name of the hpo search space, e.g., "uni"
model_type:
A string variable which is the type of the model, e.g., "electra"
model_size_type:
A string variable which is the type of the model size, e.g., "small"
dataset_name:
A string variable which is the dataset name, e.g., "glue"
subdataset_name:
A string variable which is the sub dataset name,e.g., "rte"
custom_hpo_args:
Any additional keyword argument to be used for the function for the HPO search space
Example:
>>> AutoHPOSearchSpace.from_model_and_dataset_name(logger, "uni", "electra", "small", "glue", "rte")
"""
if hpo_searchspace_mode in HPO_SEARCH_SPACE_MAPPING.keys():
hpo_space = HPO_SEARCH_SPACE_MAPPING[hpo_searchspace_mode](
logger,
model_type,
model_size_type,
dataset_name,
subdataset_name,
**custom_hpo_args)
return hpo_space
raise ValueError(
"Unrecognized method {},{} for this kind of AutoHPOSearchSpace: {}.\n"
"Method name should be one of {}.".format(
hpo_searchspace_mode, dataset_name, cls.__name__,
", ".join(c.__name__ for c in HPO_SEARCH_SPACE_MAPPING.keys())
)
)

View File

@@ -0,0 +1,51 @@
from collections import OrderedDict
from ray.tune.schedulers import ASHAScheduler, HyperBandScheduler, MedianStoppingRule
SCHEDULER_MAPPING = OrderedDict(
[
("None", None),
("asha", ASHAScheduler),
("hb", HyperBandScheduler),
]
)
class AutoScheduler:
"""
This is a class for getting the scheduler based on the scheduler name
(a string variable) instantiated as one of the schedulers of the library when
created with the `~flaml.nlp.hpo.AutoScheduler.from_scheduler_name` method.
This class cannot be instantiated directly using ``__init__()`` (throws an error).
"""
def __init__(self):
raise EnvironmentError(
"AutoScheduler is designed to be instantiated "
"using the `AutoScheduler.from_scheduler_name(cls, scheduler_name, **kwargs)` methods."
)
@classmethod
def from_scheduler_name(cls, scheduler_name, **kwargs):
"""
Instantiate one of the schedulers using the scheduler names
Args:
scheduler_name:
A string variable for the scheduler name
Example:
>>> AutoScheduler.from_scheduler_name("asha")
"""
if scheduler_name in SCHEDULER_MAPPING.keys():
try:
return SCHEDULER_MAPPING[scheduler_name](**kwargs)
except TypeError:
return None
raise ValueError(
"Unrecognized scheduler {} for this kind of AutoScheduler: {}.\n"
"Scheduler name should be one of {}.".format(
scheduler_name, cls.__name__, ", ".join(c.__name__ for c in SCHEDULER_MAPPING.keys())
)
)

View File

@@ -0,0 +1,182 @@
import itertools
from collections import OrderedDict
import ray
from ray.tune.suggest.optuna import OptunaSearch
from flaml import CFO, BlendSearch
SEARCH_ALGO_MAPPING = OrderedDict(
[
("optuna", OptunaSearch),
("cfo", CFO),
("bs", BlendSearch),
("grid", None),
("gridbert", None),
("rs", None)
]
)
class AutoSearchAlgorithm:
"""
This is a class for getting the search algorithm based on the search algorithm name
(a string variable) instantiated as one of the algorithms of the library when
created with the `~flaml.nlp.hpo.AutoSearchAlgorithm.from_method_name` method.
This class cannot be instantiated directly using ``__init__()`` (throws an error).
"""
def __init__(self):
raise EnvironmentError(
"AutoSearchAlgorithm is designed to be instantiated "
"using the `AutoSearchAlgorithm.from_method_name(cls, search_algo_name, search_algo_args_mode,"
" hpo_search_space, **custom_hpo_args)` methods."
)
@classmethod
def from_method_name(cls, search_algo_name, search_algo_args_mode, hpo_search_space, **custom_hpo_args):
"""
Instantiating one of the search algorithm classes based on the search algorithm name, search algorithm
argument mode, hpo search space and other keyword args
Args:
search_algo_name:
A string variable that specifies the search algorithm name, e.g., "bs"
search_algo_args_mode:
A string variable that specifies the mode for the search algorithm args, e.g., "dft" means
initializing using the default mode
hpo_search_space:
The hpo search space
custom_hpo_args:
The customized arguments for the search algorithm (specified by user)
Example:
>>> from flaml.nlp.hpo.hpo_searchspace import AutoHPOSearchSpace
>>> search_space_hpo=AutoHPOSearchSpace.from_model_and_dataset_name(logger, "uni", "electra", "small", "glue", "rte")
>>> search_algo = AutoSearchAlgorithm.from_method_name("bs", "cus", search_space_hpo,
{"points_to_evaluate": [{"learning_rate": 1e-5, "num_train_epochs": 10}])
"""
assert hpo_search_space, "hpo_search_space needs to be specified for calling AutoSearchAlgorithm.from_method_name"
if not search_algo_name:
search_algo_name = "grid"
if search_algo_name in SEARCH_ALGO_MAPPING.keys():
try:
"""
filtering the customized args for hpo from custom_hpo_args, keep those
which are in the input variable name list of the constructor of
the algorithm, remove those which does not appear in the input variables
of the constructor function
"""
this_search_algo_kwargs = None
allowed_arguments = SEARCH_ALGO_MAPPING[search_algo_name].__init__.__code__.co_varnames
allowed_custom_args = {key: custom_hpo_args[key] for key in custom_hpo_args.keys() if
key in allowed_arguments}
"""
If the search_algo_args_mode is "dft", set the args to the default args, e.g.,the default args for
BlendSearch is "low_cost_partial_config": {"num_train_epochs": min_epoch,"per_device_train_batch_size"
: max(hpo_search_space["per_device_train_batch_size"].categories)},
"""
if search_algo_args_mode == "dft":
this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name](
"dft", hpo_search_space=hpo_search_space, **allowed_custom_args)
elif search_algo_args_mode == "cus":
this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name](
"cus", hpo_search_space=hpo_search_space, **allowed_custom_args)
"""
returning the hpo algorithm with the arguments
"""
return SEARCH_ALGO_MAPPING[search_algo_name](**this_search_algo_kwargs)
except KeyError:
return None
raise ValueError(
"Unrecognized method {} for this kind of AutoSearchAlgorithm: {}.\n"
"Method name should be one of {}.".format(
search_algo_name, cls.__name__, ", ".join(c.__name__ for c in SEARCH_ALGO_MAPPING.keys())
)
)
@staticmethod
def grid2list(grid_config):
key_val_list = [[(key, each_val) for each_val in val_list['grid_search']]
for (key, val_list) in grid_config.items()]
config_list = [dict(x) for x in itertools.product(*key_val_list)]
return config_list
def get_search_algo_args_optuna(search_args_mode, hpo_search_space=None, **custom_hpo_args):
return {}
def default_search_algo_args_bs(search_args_mode, hpo_search_space=None, **custom_hpo_args):
assert hpo_search_space, "hpo_search_space needs to be specified for calling AutoSearchAlgorithm.from_method_name"
if "num_train_epochs" in hpo_search_space and \
isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Categorical):
min_epoch = min(hpo_search_space["num_train_epochs"].categories)
else:
assert isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Float)
min_epoch = hpo_search_space["num_train_epochs"].lower
default_search_algo_args = {
"low_cost_partial_config": {
"num_train_epochs": min_epoch,
"per_device_train_batch_size": max(hpo_search_space["per_device_train_batch_size"].categories),
},
}
if search_args_mode == "cus":
default_search_algo_args.update(custom_hpo_args)
return default_search_algo_args
def experiment_search_algo_args_bs(hpo_search_space=None):
if "num_train_epochs" in hpo_search_space and \
isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Categorical):
min_epoch = min(hpo_search_space["num_train_epochs"].categories)
else:
assert isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Float)
min_epoch = hpo_search_space["num_train_epochs"].lower
default_search_algo_args = {
"low_cost_partial_config": {
"num_train_epochs": min_epoch,
},
}
return default_search_algo_args
def default_search_algo_args_skopt(hpo_search_space=None):
return {}
def default_search_algo_args_dragonfly(hpo_search_space=None):
return {}
def default_search_algo_args_nevergrad(hpo_search_space=None):
return {}
def default_search_algo_args_hyperopt(hpo_search_space=None):
return {}
def default_search_algo_args_grid_search(search_args_mode, hpo_search_space=None, **custom_hpo_args):
return {}
def default_search_algo_args_random_search(search_args_mode, hpo_search_space=None, **custom_hpo_args):
return {}
DEFAULT_SEARCH_ALGO_ARGS_MAPPING = OrderedDict(
[
("optuna", get_search_algo_args_optuna),
("cfo", default_search_algo_args_bs),
("bs", default_search_algo_args_bs),
("grid", default_search_algo_args_grid_search),
("gridbert", default_search_algo_args_random_search)
]
)

View File

@@ -0,0 +1,52 @@
from collections import OrderedDict
from transformers.models.electra.modeling_electra import ElectraClassificationHead
from transformers.models.roberta.modeling_roberta import RobertaClassificationHead
MODEL_CLASSIFICATION_HEAD_MAPPING = OrderedDict(
[
("electra", ElectraClassificationHead),
("roberta", RobertaClassificationHead),
]
)
class AutoSeqClassificationHead:
"""
This is a class for getting classification head class based on the name of the LM
instantiated as one of the ClassificationHead classes of the library when
created with the `~flaml.nlp.huggingface.AutoSeqClassificationHead.from_model_type_and_config` method.
This class cannot be instantiated directly using ``__init__()`` (throws an error).
"""
def __init__(self):
raise EnvironmentError(
"AutoSeqClassificationHead is designed to be instantiated "
"using the `AutoSeqClassificationHead.from_model_type_and_config(cls, model_type, config)` methods."
)
@classmethod
def from_model_type_and_config(cls, model_type, config):
"""
Instantiate one of the classification head classes from the mode_type and model configuration.
Args:
model_type:
A string, which desribes the model type, e.g., "electra"
config (:class:`~transformers.PretrainedConfig`):
The huggingface class of the model's configuration:
Examples::
>>> from transformers import AutoConfig
>>> model_config = AutoConfig.from_pretrained("google/electra-base-discriminator")
>>> AutoSeqClassificationHead.from_model_type_and_config("electra", model_config)
"""
if model_type in MODEL_CLASSIFICATION_HEAD_MAPPING.keys():
return MODEL_CLASSIFICATION_HEAD_MAPPING[model_type](config)
raise ValueError(
"Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
"Model type should be one of {}.".format(
config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_CLASSIFICATION_HEAD_MAPPING.keys())
)
)

View File

@@ -0,0 +1,121 @@
import copy
import os
import transformers
from ray import tune
import torch
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
transformers.logging.set_verbosity_error()
class TrainerForAutoTransformers(transformers.Trainer):
"""
Overriding transformers.Trainer.
Args:
huggingface (:class:`~transformers.PreTrainedModel` or :obj:`torch.nn.Module`, `optional`):
"""
def get_optimizers(
self, num_training_steps
):
self.current_optimizer, self.current_scheduler = super().get_optimizers(num_training_steps)
return (self.current_optimizer, self.current_scheduler)
def evaluate(self,
eval_dataset=None):
"""
Overriding transformers.Trainer.evaluate by saving state with save_state
Args:
eval_dataset:
the dataset to be evaluated
"""
import wandb
eval_dataloader = self.get_eval_dataloader(eval_dataset)
output = self.prediction_loop(
eval_dataloader, description="Evaluation")
self.log(output.metrics)
self.save_state()
for key in list(output.metrics.keys()):
if key.startswith("eval_"):
output.metrics[key[5:]] = output.metrics[key]
tune.report(**output.metrics)
return output.metrics
def save_state(self):
"""
Overriding transformers.Trainer.save_state. It is only through saving
the states can best_trial.get_best_checkpoint return a non-empty value.
"""
with tune.checkpoint_dir(step=self.state.global_step) as checkpoint_dir:
self.args.output_dir = checkpoint_dir
# This is the directory name that Huggingface requires.
output_dir = os.path.join(
self.args.output_dir,
f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}")
self.save_model(output_dir)
torch.save(self.optimizer.state_dict(),
os.path.join(output_dir, "optimizer.pt"))
torch.save(self.lr_scheduler.state_dict(),
os.path.join(output_dir, "scheduler.pt"))
@staticmethod
def convert_num_train_epochs_to_max_steps(
num_train_epochs: int,
num_train_examples: int,
per_device_train_batch_size: int,
device_count: int):
return int(num_train_epochs * num_train_examples / per_device_train_batch_size / device_count)
@staticmethod
def convert_max_steps_to_num_train_epochs(
max_steps: int,
num_train_examples: int,
per_device_train_batch_size: int,
device_count: int):
return float(max_steps * per_device_train_batch_size * device_count) / num_train_examples
@staticmethod
def convert_warmup_ratio_to_warmup_steps(
warmup_ratio,
max_steps=None,
num_train_epochs=None,
num_train_examples=None,
per_device_train_batch_size=None,
device_count=None):
if max_steps:
return int(warmup_ratio * max_steps)
max_steps = TrainerForAutoTransformers.convert_num_train_epochs_to_max_steps(
num_train_epochs,
num_train_examples,
per_device_train_batch_size,
device_count)
return int(warmup_ratio * max_steps)
@staticmethod
def convert_warmup_steps_to_warmup_ratio(
warmup_steps: int,
num_train_epochs: int,
num_train_examples: int,
per_device_train_batch_size: int,
device_count: int):
max_steps = TrainerForAutoTransformers.convert_num_train_epochs_to_max_steps(
num_train_epochs,
num_train_examples,
per_device_train_batch_size,
device_count)
return float(warmup_steps / max_steps)
@staticmethod
def resolve_hp_conflict(search_space_dict):
if "max_steps" in search_space_dict and "num_train_epochs" in search_space_dict:
del search_space_dict["num_train_epochs"]
if "warmup_ratio" in search_space_dict and "warmup_steps" in search_space_dict:
del search_space_dict["warmup_ratio"]
return search_space_dict

View File

View File

@@ -0,0 +1,677 @@
import re
import pathlib
import os
from azure.storage.blob import BlobServiceClient, ContainerClient
from transformers import AutoConfig
from ..utils import get_wandb_azure_key
from datetime import datetime
from dataclasses import dataclass, field
from ..hpo.grid_searchspace_auto import HF_MODEL_LIST
import json
@dataclass
class JobID:
dat: list = field(default=None)
subdat: str = field(default=None)
mod: str = field(default=None)
spa: str = field(default=None)
arg: str = field(default=None)
alg: str = field(default=None)
pru: str = field(default=None)
pre_full: str = field(default=None)
pre: str = field(default=None)
presz: str = field(default=None)
spt: str = field(default=None)
rep: int = field(default=0)
sddt: int = field(default=None)
sdhf: int = field(default=None)
def __init__(self,
console_args=None):
if console_args:
self.set_jobid_from_console_args(console_args)
def set_unittest_config(self):
"""
set the JobID config for unit test
"""
self.dat = ["glue"]
self.subdat = "mrpc"
self.mod = "hpo"
self.spa = "uni_test"
self.arg = "dft"
self.alg = "bs"
self.pru = "None"
self.pre_full = "google/mobilebert-uncased"
self.pre = "mobilebert"
self.presz = "small"
self.spt = "rspt"
self.rep = 0
self.sddt = 43
self.sdhf = 42
def is_match(self, partial_jobid):
"""
return a boolean variable whether the current object matches the partial jobid defined
in partial_jobid. For example,
self = JobID(dat = ['glue'],
subdat = 'cola',
mod = 'bestnn',
spa = 'buni',
arg = 'cus',
alg = 'bs',
pru = 'None',
pre = 'funnel',
presz = 'xlarge',
spt = 'rspt',
rep = 0,
sddt = 43,
sdhf = 42)
partial_jobid1 = JobID(dat = ['glue'],
subdat = 'cola',
mod = 'hpo')
partial_jobid2 = JobID(dat = ['glue'],
subdat = 'cola',
mod = 'bestnn')
return False for partial_jobid1 and True for partial_jobid2
"""
is_not_match = False
for key, val in partial_jobid.__dict__.items():
if val is None:
continue
if getattr(self, key) != val:
is_not_match = True
return not is_not_match
def to_wandb_string(self):
"""
preparing for the job ID for wandb
"""
field_dict = self.__dict__
keytoval_str = "_".join([JobID.dataset_list_to_str(field_dict[key], key)
if type(field_dict[key]) == list
else str(field_dict[key])
for key in field_dict.keys() if not key.endswith("_full")])
return keytoval_str
def to_jobid_string(self):
"""
convert the current JobID into a blob name string which contains all the fields
"""
list_keys = list(JobID.__dataclass_fields__.keys())
field_dict = self.__dict__
keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key], key)
if type(field_dict[key]) == list
else key + "=" + str(field_dict[key])
for key in list_keys if not key.endswith("_full")])
return keytoval_str
def to_partial_jobid_string(self):
"""
convert the current JobID into a blob name string which only contains the fields whose values are not "None"
"""
list_keys = list(JobID.__dataclass_fields__.keys())
field_dict = self.__dict__ # field_dict contains fields whose values are not None
keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key], key)
if type(field_dict[key]) == list
else key + "=" + str(field_dict[key])
for key in list_keys if key in field_dict.keys()])
return keytoval_str
@staticmethod
def blobname_to_jobid_dict(keytoval_str):
"""
converting an azure blobname to a JobID config,
e.g., blobname = "dat=glue_subdat=cola_mod=bestnn_spa=buni_arg=cus_
alg=bs_pru=None_pre=funnel_presz=xlarge_spt=rspt_rep=0.json"
the converted jobid dict = {dat = ['glue'], subdat = 'cola', mod = 'bestnn',
spa = 'buni', arg = 'cus', alg = 'bs', pru = 'None',
pre = 'funnel', presz = 'xlarge', spt = 'rspt',
rep = 0, sddt = 43, sdhf = 42)
"""
field_keys = [key for key in
list(JobID.__dataclass_fields__.keys()) if not key.endswith("_full")]
regex_expression = ".*" + "_".join([key + "=(?P<" + key + ">.*)" for key in field_keys]) + ".(json|zip)"
result = re.search(regex_expression, keytoval_str)
if result:
result_dict = {}
for key in field_keys:
if key == "dat":
result_dict[key] = [result.group(key)]
elif key == "rep":
try:
result_dict[key] = int(result.group(key))
except IndexError:
result_dict[key] = -1
else:
result_dict[key] = result.group(key)
return result_dict
else:
return None
@staticmethod
def dataset_list_to_str(dataset_name, key):
if key == "dat":
assert isinstance(dataset_name, list)
return "-".join(dataset_name)
else:
return dataset_name
@staticmethod
def set_jobid_from_arg_list(self,
**jobid_list
):
"""
set the jobid from a dict object
"""
for key in jobid_list.keys():
assert key in JobID.__dataclass_fields__.keys()
setattr(self, key, jobid_list[key])
@staticmethod
def convert_blobname_to_jobid(blobname):
"""
converting a blobname string to a JobID object
"""
jobconfig_dict = JobID.blobname_to_jobid_dict(blobname)
if jobconfig_dict:
jobconfig = JobID()
jobconfig.set_jobid_from_arg_list(**jobconfig_dict)
return jobconfig
else:
return None
@staticmethod
def get_full_data_name(dataset_name, subdataset_name=None):
"""
convert a dataset name and sub dataset name to a full dataset name
"""
full_dataset_name = dataset_name
if subdataset_name:
full_dataset_name = full_dataset_name + "_" + subdataset_name
return full_dataset_name
def get_jobid_full_data_name(self):
"""
get the full dataset name of the current JobID object
"""
return JobID.get_full_data_name(JobID.dataset_list_to_str(self.dat, "dat"), self.subdat)
@staticmethod
def _extract_model_type_with_keywords_match(pre_full):
matched_model_type = []
for each_model_type in HF_MODEL_LIST:
if each_model_type in pre_full:
matched_model_type.append(each_model_type)
assert len(matched_model_type) > 0
return max(enumerate(matched_model_type), key=lambda x: len(x[1]))[1]
@staticmethod
def extract_model_type(full_model_name):
model_config = AutoConfig.from_pretrained(full_model_name)
config_json_file = model_config.get_config_dict(full_model_name)[0]
try:
model_type = config_json_file["model_type"]
except KeyError:
model_type = JobID._extract_model_type_with_keywords_match()
return model_type
def set_jobid_from_console_args(self, console_args):
self.dat = console_args.dataset_subdataset_name.split(":")[0].split(",")
self.subdat = console_args.dataset_subdataset_name.split(":")[1]
self.mod = console_args.algo_mode
self.spa = console_args.space_mode
self.arg = console_args.search_alg_args_mode
self.alg = console_args.algo_name
self.pru = console_args.pruner
self.pre_full = console_args.pretrained_model_size.split(":")[0]
self.pre = JobID.extract_model_type(self.pre_full)
self.presz = console_args.pretrained_model_size.split(":")[1]
self.spt = console_args.resplit_mode
self.rep = console_args.rep_id
self.sddt = console_args.seed_data
self.sdhf = console_args.seed_transformers
@staticmethod
def legacy_old_blobname_to_new_blobname(self,
old_blobname):
spa_id2val = {
0: "gnr",
1: "uni"
}
alg_id2val = {
0: "bs",
1: "optuna",
2: "cfo"
}
pre_id2val = {
0: "xlnet-base-cased",
1: "albert-large-v1",
2: "distilbert-base-uncased",
3: "microsoft/deberta-base",
4: "funnel-transformer/small-base",
5: "microsoft/deberta-large",
6: "funnel-transformer/large-base",
7: "funnel-transformer/intermediate-base",
8: "funnel-transformer/xlarge-base"
}
presz_id2val = {
0: "base",
1: "small",
2: "base",
3: "base",
4: "base",
5: "large",
6: "large",
7: "intermediate",
8: "xlarge"
}
spt_id2val = {
0: "rspt",
1: "ori"
}
result_grid = re.search(r".*_mod(el)?(?P<model_id>\d+)_None_None(_spt(?P<split_id>\d+))?_rep(?P<rep_id>\d+).log",
old_blobname)
result = re.search(
r".*_mod(el)?(?P<model_id>\d+)_(alg)?(?P<algo_id>\d+)_(spa)?"
r"(?P<space_id>\d+)(_spt(?P<split_id>\d+))?_rep(?P<rep_id>\d+).log",
old_blobname)
if result_grid:
dat = [old_blobname.split("/")[1].split("_")[0]]
subdat = old_blobname.split("/")[1].split("_")[1]
mod = "hpo"
spa = None
arg = None
alg = None
pru = None
pre = pre_id2val[int(result_grid.group("model_id"))]
presz = presz_id2val[int(result_grid.group("model_id"))]
try:
spt = spt_id2val[int(result_grid.group("split_id"))]
except KeyError:
spt = spt_id2val[0]
rep = None
self.set_jobid_from_arg_list(dat, subdat, mod, spa, arg, alg, pru, pre, presz, spt, rep)
return self.to_jobid_string()
if result:
dat = [old_blobname.split("/")[1].split("_")[0]]
subdat = old_blobname.split("/")[1].split("_")[1]
mod = "hpo"
spa = spa_id2val[int(result.group("space_id"))]
arg = "dft"
alg = alg_id2val[int(result.group("algo_id"))]
pru = "None"
pre = pre_id2val[int(result_grid.group("model_id"))]
presz = presz_id2val[int(result_grid.group("model_id"))]
try:
spt = spt_id2val[int(result_grid.group("split_id"))]
except KeyError:
spt = spt_id2val[0]
rep = int(result.group("rep_id"))
self.set_jobid_from_arg_list(dat, subdat, mod, spa, arg, alg, pru, pre, presz, spt, rep)
return self.to_jobid_string()
return None
class AzureUtils:
def __init__(self,
root_log_path=None,
console_args=None,
jobid=None,
autohf=None):
if root_log_path:
self.root_log_path = root_log_path
else:
self.root_log_path = "logs_azure"
self.jobid = jobid
self.console_args = console_args
self.autohf = autohf
if console_args:
wandb_key, azure_key, container_name = get_wandb_azure_key(console_args.key_path)
self._container_name = container_name
self._azure_key = azure_key
def _get_complete_connection_string(self):
return "DefaultEndpointsProtocol=https;AccountName=docws5141197765;AccountKey=" \
+ self._azure_key + ";EndpointSuffix=core.windows.net"
def _init_azure_clients(self):
connection_string = self._get_complete_connection_string()
container_client = ContainerClient.from_connection_string(conn_str=connection_string,
container_name=self._container_name)
return container_client
def _init_blob_client(self,
local_file_path):
connection_string = self._get_complete_connection_string()
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
blob_client = blob_service_client.get_blob_client(container=self._container_name, blob=local_file_path)
return blob_client
def upload_local_file_to_azure(self, local_file_path):
blob_client = self._init_blob_client(local_file_path)
with open(local_file_path, "rb") as fin:
blob_client.upload_blob(fin, overwrite=True)
def download_azure_blob(self, blobname):
blob_client = self._init_blob_client(blobname)
pathlib.Path(re.search("(?P<parent_path>^.*)/[^/]+$", blobname).group("parent_path")).mkdir(
parents=True, exist_ok=True)
with open(blobname, "wb") as fout:
fout.write(blob_client.download_blob().readall())
def write_exception(self):
result_json = {
"timestamp": datetime.now(),
}
local_file_path = self.generate_local_json_path()
self.create_local_json_and_upload(result_json, local_file_path)
def extract_log_from_analysis(self,
analysis):
"""
Extracting a json object for storing the key information returned from tune.run
"""
json_log = []
for each_trial in analysis.trials:
trial_id = each_trial.trial_id
start_time = each_trial.start_time
last_update_time = each_trial.last_update_time
config = each_trial.config
try:
metric_score = each_trial.metric_analysis["eval_" + analysis.default_metric]
time_stamp = each_trial.metric_analysis['timestamp']
json_log.append({"trial_id": trial_id,
"start_time": start_time,
"last_update_time": last_update_time,
"config": config,
"metric_score": metric_score,
"time_stamp": time_stamp})
except KeyError:
pass
return json_log
def write_autohf_output(self,
json_log=None,
valid_metric=None,
predictions=None,
duration=None):
"""
write the key info from a job and upload to azure blob storage
"""
local_file_path = self.generate_local_json_path()
output_json = {}
if json_log:
output_json["val_log"] = json_log
if valid_metric:
output_json["valid_metric"] = valid_metric
if duration:
output_json["duration"] = duration
if len(output_json) > 0:
self.create_local_json_and_upload(output_json, local_file_path)
if predictions is not None:
self.create_local_prediction_and_upload(local_file_path, predictions)
def generate_local_json_path(self):
"""
return a path string for storing the json file locally
"""
full_dataset_name = self.jobid.get_jobid_full_data_name()
jobid_str = self.jobid.to_jobid_string()
local_file_path = os.path.join(self.root_log_path, full_dataset_name, jobid_str + ".json")
pathlib.Path(os.path.join(self.root_log_path, full_dataset_name)).mkdir(parents=True, exist_ok=True)
return local_file_path
def create_local_json_and_upload(self, result_json, local_file_path):
with open(local_file_path, "w") as fout:
fout.write(json.dumps(result_json))
fout.flush()
self.upload_local_file_to_azure(local_file_path)
def legacy_to_json(self):
container_client = self._init_azure_clients()
for old_blob in container_client.list_blobs():
new_jobid_str = self.jobid.legacy_old_blobname_to_new_blobname(old_blob.name)
if new_jobid_str:
self.download_azure_blob(old_blob.name)
with open(old_blob.name, "r") as fin:
alllines = fin.readlines()
wandb_group_name = alllines[0].rstrip("\n:")
timestamp = re.search(
r"timestamp:(?P<timestamp>.*):",
alllines[1].strip("\n")).group("timestamp")
duration = re.search(
r"duration:(?P<duration>.*)$",
alllines[3].strip("\n")).group("duration")
sample_num = int(re.search(
r"sample_num: (?P<sample_num>\d+)$",
alllines[4].strip("\n")).group("sample_num"))
validation = {"accuracy": float(re.search(
"validation accuracy: (?P<validation>.*)$",
alllines[2].strip("\n")).group("validation"))}
test = None
if len(alllines) > 6:
result_test = re.search("test accuracy:(?P<test>.*)$", alllines[6].strip("\n"))
if result_test:
test = json.loads(result_test.group("test"))
yml_file = None
if len(alllines) > 8:
if alllines[8].startswith("aml"):
yml_file = alllines[8].strip("\n")
new_json = {"wandb_group_name": wandb_group_name,
"validation": validation,
"test": test,
"timestamp": timestamp,
"duration": duration,
"sample_num": sample_num,
"yml_file": yml_file}
full_dataset_name = self.jobid.get_jobid_full_data_name()
new_blobname = os.path.join("logs_azure/", full_dataset_name, new_jobid_str + ".json")
self.create_local_json_and_upload(new_json, new_blobname)
def create_local_prediction_and_upload(self,
local_json_file,
predictions):
"""
store predictions (a .zip file) locally and upload
"""
azure_save_file_name = local_json_file.split("/")[-1][:-5]
local_archive_path = self.autohf.output_prediction(predictions,
output_prediction_path=self.console_args.data_root_dir + "result/",
output_zip_file_name=azure_save_file_name)
self.upload_local_file_to_azure(local_archive_path)
def get_ranked_configs(self, metric_mode):
"""
extract the configs (ranked in descebding order by the score) for the azure file of the current object
(defined by self.jobid_config)
"""
azure_file_path = self.generate_local_json_path()
self.download_azure_blob(azure_file_path)
json_log = json.load(open(azure_file_path, "r"))
assert "val_log" in json_log
trialid_to_score = {}
trialid_to_config = {}
for each_entry in json_log["val_log"]:
trial_id = each_entry["trial_id"]
config = each_entry["config"]
this_score = each_entry["metric_score"][metric_mode]
trialid_to_config[trial_id] = config
trialid_to_score[trial_id] = this_score
sorted_trialid_to_score = sorted(trialid_to_score.items(), key=lambda x: x[1], reverse=True)
return [trialid_to_config[entry[0]] for entry in sorted_trialid_to_score]
@staticmethod
def is_after_earliest_time(this_blob, earliest_time):
import pytz
utc = pytz.UTC
if this_blob.last_modified >= utc.localize(datetime(earliest_time[0], earliest_time[1], earliest_time[2])):
return True
return False
def get_blob_list_matching_partial_jobid(self, root_log_path, partial_jobid, earliest_time=None):
"""
get all blobs whose jobid configs match the partial_jobid
"""
blob_list = []
container_client = self._init_azure_clients()
jobid_config = JobID()
for each_blob in container_client.list_blobs():
if each_blob.name.startswith(root_log_path):
each_jobconfig = jobid_config.convert_blobname_to_jobid(each_blob.name)
is_append = False
if each_jobconfig:
if each_jobconfig.is_match(partial_jobid):
is_append = True
if earliest_time and not AzureUtils.is_after_earliest_time(each_blob, earliest_time):
is_append = False
if is_append:
blob_list.append((each_jobconfig, each_blob))
return blob_list
@staticmethod
def extract_config_and_score(blobname):
data_json = json.load(open(blobname, "r"))
return [(x['config'], x['metric_score']["max"], x['start_time']) for x in data_json['val_log']]
def get_config_and_score_from_partial_jobid(self,
root_log_path,
partial_jobid,
group_attrs,
method,
earliest_time=None):
"""
get the best config and best score for each job matching the partial_jobid
"""
matched_blob_list = self.get_blob_list_matching_partial_jobid(
root_log_path,
partial_jobid,
earliest_time=earliest_time)
group_dict = {}
for (each_jobconfig, each_blob) in matched_blob_list:
self.download_azure_blob(each_blob.name)
config_and_score = AzureUtils.extract_config_and_score(each_blob.name)
if method == "unsorted":
sorted_config_and_score = config_and_score
elif method == "sort_time":
sorted_config_and_score = sorted(config_and_score, key=lambda x: x[2], reverse=False)
else:
sorted_config_and_score = sorted(config_and_score, key=lambda x: x[1], reverse=True)
group_attr_list = []
for each_attr in group_attrs:
group_val = getattr(each_jobconfig, each_attr)
if isinstance(group_val, list):
group_attr_list.append(JobID.dataset_list_to_str(group_val, each_attr))
else:
group_attr_list.append(group_val)
group_attr_tuple = tuple(group_attr_list)
group_dict.setdefault(group_attr_tuple, [])
group_dict[group_attr_tuple].append([(config, score, each_blob.name)
for (config, score, ts) in sorted_config_and_score])
return group_dict
def get_validation_perf(self, console_args=None, partial_jobid_config=None):
"""
get the validation score for all blobs matching the partial_jobid_config
"""
if partial_jobid_config.pre == "electra":
dataset_namelist = ["wnli", "rte", "mrpc", "cola", "stsb", "sst2", "qnli", "mnli"]
else:
dataset_namelist = ["wnli", "rte", "mrpc", "cola", "stsb", "sst2"]
dataset_vallist1 = [0] * len(dataset_namelist)
dataset_vallist2 = [0] * len(dataset_namelist)
matched_blob_list = self.get_blob_list_matching_partial_jobid(console_args.azure_root_log_path,
partial_jobid_config)
for (each_jobconfig, each_blob) in matched_blob_list:
subdat_name = each_jobconfig.subdat
self.download_azure_blob(each_blob.name)
data_json = json.load(open(each_blob.name, "r"))
print(len(data_json["val_log"]))
validation_metric = data_json['valid_metric']
try:
dataset_idx = dataset_namelist.index(subdat_name)
dataset_vallist1[dataset_idx], dataset_vallist2[dataset_idx] \
= self.get_validation_metricstr(validation_metric)
except ValueError:
pass
# print(" & ".join(dataset_vallist1))
# print(", ,".join(dataset_vallist2))
def get_validation_metricstr(self, validation_metric):
"""
get a string representing validations for pasting to Google spreadsheet
"""
validation_str1 = validation_str2 = ""
is_first = True
for key in ["f1", "accuracy", "pearson", "spearmanr", "matthews_correlation"]:
if "eval_" + key in validation_metric.keys():
if is_first:
validation_str1 += str("%.1f" % (validation_metric["eval_" + key] * 100))
validation_str2 += str(validation_metric["eval_" + key] * 100)
is_first = False
else:
validation_str1 += "/" + str("%.1f" % (validation_metric["eval_" + key] * 100))
validation_str2 += "," + str(validation_metric["eval_" + key] * 100)
return validation_str1, validation_str2
def get_test_perf(self, partial_jobid_config=None, result_root_dir=None):
"""
get the test scores for all blobs matching the partial_jobid_config
"""
import shutil
from flaml.nlp.dataset.submission_auto import file_name_mapping_glue, output_blank_tsv
matched_blob_list = self.get_blob_list_matching_partial_jobid("data/", partial_jobid_config)
partial_jobid_str = partial_jobid_config.to_partial_jobid_string()
output_dir = os.path.join(result_root_dir, partial_jobid_str)
if os.path.exists(output_dir):
assert os.path.isdir(output_dir)
else:
os.mkdir(output_dir)
output_blank_tsv(output_dir)
for (each_jobconfig, each_blob) in matched_blob_list:
subdat_name = each_jobconfig.subdat
self.download_azure_blob(each_blob.name)
import zipfile
if os.path.exists(each_blob.name[:-4]):
assert os.path.isdir(each_blob.name[:-4])
else:
os.mkdir(each_blob.name[:-4])
with zipfile.ZipFile(each_blob.name, 'r') as zip_ref:
zip_ref.extractall(each_blob.name[:-4])
src = os.path.join(each_blob.name[:-4], file_name_mapping_glue[subdat_name][0])
dst = os.path.join(output_dir, file_name_mapping_glue[subdat_name][0])
shutil.copy(src, dst)
shutil.make_archive(os.path.join(output_dir), 'zip', output_dir)
def get_best_perf_config(self, console_args, jobid_config):
"""
get the config of the best performed trial
"""
matched_blob_list = self.get_blob_list_matching_partial_jobid(console_args.azure_root_log_path, jobid_config)
try:
assert len(matched_blob_list) == 1
except AssertionError:
import pdb
pdb.set_trace()
each_jobconfig, each_blob = matched_blob_list[0]
self.download_azure_blob(each_blob.name)
data_json = json.load(open(each_blob.name, "r"))
sorted_entries = sorted(data_json['val_log'], key=lambda x: x['metric_score']['max'], reverse=True)
best_config = sorted_entries[0]['config']
if jobid_config.subdat != "mrpc":
best_score = sorted_entries[0]['metric_score']['max']
else:
best_score = (data_json["valid_metric"]["eval_f1"], data_json["valid_metric"]["eval_accuracy"])
return best_config, best_score

View File

@@ -0,0 +1,357 @@
def extract_ranked_config_score(console_args, partial_config_dict):
from .azure_utils import AzureUtils
azure_utils = AzureUtils(console_args=console_args)
for method, each_partial_config in partial_config_dict.items():
dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(each_partial_config,
["dat", "subdat"], method)
for each_dataset, configscorelist in dataset2configscorelist.items():
for config_idx in range(len(configscorelist)):
avg_scores = configscorelist[config_idx][0][1]
top_config = configscorelist[config_idx][0][0]
print(avg_scores)
print(top_config)
# print(method + "," + str(each_dataset) + ",rep=" + str(config_idx))
# print("avg score :" + str(avg_scores))
# print(''.join(['{0}={1}\n'.format(key, top_config[key]) for key in sorted(top_config.keys())]))
def extract_sorted_config_list(dataset2configscorelist, topk):
dataset2topkconfigs = {}
for dataset, configscorelist in dataset2configscorelist.items():
all_configscorelist = []
for scorelist in configscorelist:
for item in scorelist:
if item[0] not in [x[0] for x in all_configscorelist]:
all_configscorelist.append(item)
sorted_all_configscorelist = sorted(all_configscorelist, key=lambda x: x[1], reverse=True)
topk_configs = []
for each_hp in ("learning_rate", "num_train_epochs", "per_device_train_batch_size", "warmup_ratio",
"weight_decay", "adam_epsilon"):
topk_configs.append((each_hp, [sorted_all_configscorelist[x][0][each_hp] for x in range(topk)]))
topk_configs.append(("perf", [sorted_all_configscorelist[x][1] for x in range(topk)]))
dataset2topkconfigs[dataset] = topk_configs
return dataset2topkconfigs
def dict2tuple(this_dict):
tuple_list = []
for key in sorted(this_dict.keys()):
tuple_list.append(this_dict[key])
return tuple(tuple_list)
def merge_configscore_list(small_dataset2configscorelist):
dataset2merged_configscorelist = {}
for (dataset, each_configscore_list) in small_dataset2configscorelist.items():
merged_configscore_list = {}
for rep_id in range(len(each_configscore_list)):
for each_configscore_entry in each_configscore_list[rep_id]:
is_exist = False
for configscore in merged_configscore_list.keys():
if configscore[0] == each_configscore_entry[0]:
is_exist = True
break
if is_exist is False:
merged_configscore_list[dict2tuple(each_configscore_entry[0])] = each_configscore_entry[1]
dataset2merged_configscorelist[dataset] = merged_configscore_list
return dataset2merged_configscorelist
def get_result(console_args, partial_jobid_config):
from .azure_utils import AzureUtils, JobID
azure_utils = AzureUtils(console_args=console_args)
dataset2configscorelist = \
azure_utils.get_config_and_score_from_partial_config(
console_args.azure_root_log_path,
partial_jobid_config,
["dat", "subdat"],
"hpo")
for dataset, configscore_list in dataset2configscorelist.items():
for rep_id in range(len(configscore_list)):
config_dict = configscore_list[rep_id][0][0]
score = configscore_list[rep_id][0][1]
print(dataset, rep_id)
print_config(config_dict)
print(score)
print()
def print_config(config_dict):
for key in sorted(config_dict.keys()):
if key in ("attention_probs_dropout_prob", "hidden_dropout_prob", "seed"):
continue
if key == "per_device_train_batch_size":
short_key = "batch_size"
elif key == "num_train_epochs":
short_key = "epochs"
else:
short_key = key
print(short_key, config_dict[key])
def compare_small_vs_large(console_args):
from .azure_utils import AzureUtils, JobID
azure_utils = AzureUtils(console_args=console_args)
partial_jobid_config = JobID()
partial_jobid_config.pre = "deberta"
partial_jobid_config.mod = "hpo"
partial_jobid_config.spa = "uni"
partial_jobid_config.presz = "base"
small_dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(partial_jobid_config,
["dat", "subdat"], "list")
small_mergedconfiglist = merge_configscore_list(small_dataset2configscorelist)
partial_jobid_config = JobID()
partial_jobid_config.pre = "deberta"
partial_jobid_config.mod = "hpo"
partial_jobid_config.spa = "uni"
partial_jobid_config.presz = "large"
large_dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(partial_jobid_config,
["dat", "subdat"], "hpo")
large_mergedconfiglist = merge_configscore_list(large_dataset2configscorelist)
for (each_dataset, merged_small_configlist) in small_mergedconfiglist.items():
merged_large_configlist = large_mergedconfiglist[each_dataset]
print(each_dataset)
print()
for (each_tuple, large_score) in sorted(merged_large_configlist.items(), key=lambda x: x[1], reverse=True):
# small_score = merged_small_configlist[each_tuple]
is_in_onlysmall = each_tuple in small_mergedconfiglist[each_dataset]
for each_val in each_tuple:
print(each_val, end=", ")
print(large_score, is_in_onlysmall, sep=",")
print()
for (each_tuple, small_score) in \
sorted(small_mergedconfiglist[each_dataset].items(), key=lambda x: x[1], reverse=True):
is_in_large = each_tuple in large_mergedconfiglist[each_dataset]
for each_val in each_tuple:
print(each_val, end=", ")
print(small_score, is_in_large, sep=",")
def check_conflict(console_args, partial_jobid_config_list):
from .azure_utils import AzureUtils, JobID
azure_utils = AzureUtils(console_args=console_args)
for each_partial_config in partial_jobid_config_list:
dataset2configscorelist = \
azure_utils.get_config_and_score_from_partial_config(
console_args.azure_root_log_path,
each_partial_config,
["dat", "subdat"],
"unsorted")
for (dataset, configscorelists) in dataset2configscorelist.items():
config2score = {}
for each_configscorelist in configscorelists:
for (config, score, blobname) in each_configscorelist:
config_dict = dict2tuple(config)
try:
config2score[config_dict].append((score, blobname))
except KeyError:
config2score.setdefault(config_dict, [])
config2score[config_dict].append((score, blobname))
dup_keys = [config for config in config2score.keys() if len(config2score[config]) > 1]
dupkey_count = [len(set([y[0] for y in config2score[x]])) for x in dup_keys]
print(dataset)
print(len(config2score))
print(len(dupkey_count))
print(dupkey_count)
def print_cfo(console_args):
from .azure_utils import JobID, AzureUtils
jobid_config = JobID()
jobid_config.mod = "bestnn"
jobid_config.spa = "buni"
jobid_config.alg = "bs"
jobid_config.pre = "funnel"
jobid_config.presz = "xlarge"
for each_rep in range(3):
jobid_config.rep = each_rep
azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config)
dataset2configscorelist = \
azure_utils.get_config_and_score_from_partial_config(
console_args.azure_root_log_path,
jobid_config,
["dat", "subdat"],
"sort_time")
dataset = ('glue', 'mrpc')
configscorelist = dataset2configscorelist[dataset]
count = 0
print(dataset)
for (config, score, blobname) in sorted(configscorelist[0], key=lambda x: x[1], reverse=True)[0:1]:
print(count)
print(score)
print_config(config)
print()
count += 1
def download_validation(console_args, result_root_dir):
from .azure_utils import JobID, AzureUtils
partial_jobid_config = JobID()
partial_jobid_config.mod = "grid"
partial_jobid_config.pre = "roberta"
partial_jobid_config.presz = "base"
# partial_jobid_config.alg = "optuna"
# partial_jobid_config.pru = "asha"
partial_jobid_config.rep = 0
azure_utils = AzureUtils(console_args=console_args, jobid=partial_jobid_config)
azure_utils.get_validation_perf(console_args=console_args, partial_jobid_config=partial_jobid_config)
azure_utils.get_test_perf(partial_jobid_config, result_root_dir)
def get_result_str(jobid_config, val_score, test_score, best_config, subdat2config=None, mode="grid"):
result_str = jobid_config.subdat.upper() + ","
if jobid_config.alg:
result_str += jobid_config.alg.upper().replace("OPTUNA", "Optuna")
if jobid_config.pru is not None and jobid_config.pru != "None":
result_str += "+" + jobid_config.pru.upper()
if jobid_config.subdat != "mrpc":
result_str += ",rep " + str(jobid_config.rep) + " & " + str(
"%.1f" % (val_score * 100)) + " & " + str(test_score)
else:
result_str += ",rep " + str(jobid_config.rep) + " & " + str(
"%.1f" % (val_score[0] * 100)) + "/" + str(
"%.1f" % (val_score[1] * 100)) + " & " + str(test_score)
for hp in ["learning_rate", "warmup_ratio", "per_device_train_batch_size", "hidden_dropout", "attention_dropout",
"weight_decay"]:
if hp not in best_config:
result_str += " & "
else:
if mode == "hpo":
if best_config[hp] > 1.2 * subdat2config[jobid_config.subdat][hp]:
wrap_left = "\\cellcolor{green!85}{"
elif best_config[hp] > subdat2config[jobid_config.subdat][hp]:
wrap_left = "\\cellcolor{green!15}{"
elif best_config[hp] < subdat2config[jobid_config.subdat][hp] / 1.2:
wrap_left = "\\cellcolor{red!85}{"
else:
wrap_left = "\\cellcolor{red!15}{"
wrap_right = "}"
else:
wrap_left = wrap_right = ""
if hp == "per_device_train_batch_size" or hp == "learning_rate":
wrap_left = wrap_right = ""
if hp == "learning_rate":
result_str += " & " + wrap_left + "{:.1e}".format(best_config[hp]) + wrap_right
elif hp == "per_device_train_batch_size":
result_str += " & " + wrap_left + str(best_config[hp]) + wrap_right
else:
result_str += " & " + wrap_left + str("%.3f" % best_config[hp]) + wrap_right
return result_str + "\\\\"
def extract_grid(console_args, jobid_config, overfitting_subdat, test_scores):
from .azure_utils import JobID, AzureUtils
key2printstr = {}
subdat2config = {}
for idx in range(len(overfitting_subdat)):
jobid_config.subdat = overfitting_subdat[idx]
jobid_config.mod = "grid"
jobid_config.rep = 0
azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config)
best_config, val_score = azure_utils.get_best_perf_config(console_args, jobid_config)
best_config["hidden_dropout"] = 0.1
best_config["attention_dropout"] = 0.1
test_score = test_scores[idx]
key2printstr[jobid_config.subdat.upper() + ", grid"] = get_result_str(jobid_config, val_score,
test_score, best_config)
subdat2config[jobid_config.subdat] = best_config
print()
for key, printstr in sorted(key2printstr.items(), key=lambda x: x[0]):
print(printstr)
return subdat2config
def extract_hpo(
console_args,
jobid_config,
overfitting_subdat,
overfitting_alg,
overfitting_pru,
overfitting_rep,
subdat2config,
test_scores):
from .azure_utils import AzureUtils
key2printstr = {}
for idx in range(len(overfitting_subdat)):
jobid_config.subdat = overfitting_subdat[idx]
jobid_config.alg = overfitting_alg[idx]
jobid_config.pru = overfitting_pru[idx]
jobid_config.rep = overfitting_rep[idx]
azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config)
best_config, val_score = azure_utils.get_best_perf_config(console_args, jobid_config)
test_score = test_scores[idx]
key2printstr[jobid_config.subdat.upper() + "," + jobid_config.alg.upper() + ","
+ jobid_config.pru + ",rep " + str(jobid_config.rep)] \
= get_result_str(jobid_config, val_score, test_score, best_config, subdat2config, mode="hpo")
for key, printstr in sorted(key2printstr.items(), key=lambda x: x[0]):
print(printstr)
def extract_roberta_overfitting_configs(console_args):
from .azure_utils import JobID, AzureUtils
jobid_config = JobID()
jobid_config.pre = "roberta"
jobid_config.presz = "base"
overfitting_subdat = ["rte", "mrpc", "cola", "sst2", "stsb"]
test_scores = ["73.1", "91.4/88.5", "61.4", "96", "89.5/88.7"]
subdat2config = extract_grid(console_args, jobid_config, overfitting_subdat, test_scores)
jobid_config = JobID()
jobid_config.pre = "roberta"
jobid_config.presz = "base"
overfitting_subdat = ["rte", "rte", "rte", "mrpc", "mrpc", "mrpc", "sst2",
"rte", "mrpc", "mrpc", "stsb", "sst2", "sst2",
"rte", "rte", "mrpc", "mrpc", "sst2", "sst2"]
overfitting_alg = ["rs", "rs", "rs", "rs", "rs", "rs", "rs",
"rs", "rs", "rs", "rs", "rs", "rs",
"optuna", "optuna", "optuna", "optuna", "optuna", "optuna"]
overfitting_pru = ["None", "None", "None", "None", "None", "None", "None",
"asha", "asha", "asha", "asha", "asha", "asha",
"asha", "asha", "asha", "asha", "asha", "asha"]
overfitting_rep = [0, 1, 2, 0, 1, 2, 0,
1, 0, 2, 2, 1, 2,
1, 2, 0, 1, 1, 2]
test_scores = ["71.5", "72.3", "72.2", "90.5/87.1", "90.5/87.4", "90.5/87.2", "95.6",
"72.4", "90.7/87.4", "91.0/87.9", "89.4/88.8", "95.2", "95.7",
"72.4", "72.4", "90.8/87.4", "90.3/86.5", "95.1", "95.8"]
extract_hpo(console_args, jobid_config, overfitting_subdat, overfitting_alg, overfitting_pru, overfitting_rep,
subdat2config, test_scores)
def extract_electra_overfitting_configs(console_args):
from .azure_utils import JobID, AzureUtils
jobid_config = JobID()
jobid_config.pre = "electra"
jobid_config.presz = "base"
overfitting_subdat = ["rte", "qnli", "cola"]
test_scores = ["74.4", "93.2", "64.8"]
subdat2config = extract_grid(console_args, jobid_config, overfitting_subdat, test_scores)
jobid_config = JobID()
jobid_config.pre = "electra"
jobid_config.presz = "base"
overfitting_subdat = ["rte", "rte", "qnli", "cola", "qnli", "cola"]
overfitting_alg = ["rs", "rs", "rs", "rs", "rs", "optuna"]
overfitting_pru = ["None", "None", "None", "asha", "asha", "asha"]
overfitting_rep = [0, 1, 0, 2, 0, 0]
test_scores = ["73.8", "74.3", "92.8", "64.7", "92.9", "63.6"]
extract_hpo(console_args, jobid_config, overfitting_subdat, overfitting_alg, overfitting_pru, overfitting_rep,
subdat2config, test_scores)

View File

@@ -0,0 +1,71 @@
import os
from ..utils import get_wandb_azure_key
import subprocess
import wandb
import hashlib
from time import time
class WandbUtils:
# Documentation on the wandb setting:
# There are two ways to initialize wandb in tune.run:
# (1) using WandbLoggerCallback, by adding the following argument to tune.run:
# callbacks=[WandbLoggerCallback(
# project="hpo",
# api_key = os.environ["WANDB_API_KEY"],
# group = os.environ["WANDB_RUN_GROUP"],
# log_config=True)]
# (2) using wandb_mixin decorator (the current implementation)
# The current implementation uses (2) because (1) has the following bug.
# In Ray 1.2, when using WandbLoggerCallback + setting time limit using the time_budget_s argument,
# A bug exists which is the previous run will not clear the cache after tune.run returns. After the
# later run has already starts, some zombie trials in the previous run remain in the memory and never stop.
# This bug can be reproduced by switching to (1) by adding the above callbacks argument
# and removing the wandb_mixin decorator
# https://docs.ray.io/en/master/tune/tutorials/tune-wandb.html
def __init__(self,
is_wandb_on=None,
console_args=None,
jobid_config=None):
if is_wandb_on:
wandb_key, azure_key, container_name = get_wandb_azure_key(console_args.key_path)
subprocess.run(["wandb", "login", "--relogin", wandb_key])
os.environ["WANDB_API_KEY"] = wandb_key
os.environ["WANDB_MODE"] = "online"
else:
os.environ["WANDB_MODE"] = "disabled"
self.jobid_config = jobid_config
def set_wandb_per_trial(self):
print("before wandb.init\n\n\n")
if os.environ["WANDB_MODE"] == "online":
os.environ["WANDB_SILENT"] = "false"
return wandb.init(project=self.jobid_config.get_jobid_full_data_name(),
group=self.wandb_group_name,
name=str(WandbUtils._get_next_trial_ids()),
settings=wandb.Settings(
_disable_stats=True),
reinit=False)
else:
return None
@staticmethod
def _get_next_trial_ids():
hash = hashlib.sha1()
hash.update(str(time()).encode('utf-8'))
return "trial_" + hash.hexdigest()[:3]
def set_wandb_per_run(self):
os.environ["WANDB_RUN_GROUP"] = self.jobid_config.to_wandb_string() + wandb.util.generate_id()
self.wandb_group_name = os.environ["WANDB_RUN_GROUP"]
if os.environ["WANDB_MODE"] == "online":
os.environ["WANDB_SILENT"] = "false"
return wandb.init(project=self.jobid_config.get_jobid_full_data_name(),
group=os.environ["WANDB_RUN_GROUP"],
settings=wandb.Settings(
_disable_stats=True),
reinit=False)
else:
return None

155
flaml/nlp/utils.py Normal file
View File

@@ -0,0 +1,155 @@
import argparse
import json
import os
import pathlib
import re
from dataclasses import dataclass, field
def dataset_subdataset_name_format_check(val_str):
regex = re.compile(r"^[^:]*:[^:]*$")
if not regex.match(val_str):
raise argparse.ArgumentTypeError("dataset_subdataset_name must be in the format {data_name}:{subdata_name}")
return val_str
def pretrained_model_size_format_check(val_str):
regex = re.compile(r"^[^:]*:(small|base|large|xlarge)")
if not regex.match(val_str):
raise argparse.ArgumentTypeError("pretrained_model_size must be in the format {model_name}:{model_size},"
"where {model_name} is the name from huggingface.co/models, {model_size}"
"is chosen from small, base, large, xlarge")
return val_str
def load_console_args(**custom_data_args):
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--server_name', type=str, help='server name', required=False,
choices=["tmdev", "dgx", "azureml"], default="tmdev")
arg_parser.add_argument('--algo_mode', type=str, help='hpo or grid search', required=False,
choices=["grid", "gridbert", "hpo", "hfhpo", "list_s", "list", "bestnn"], default="hpo")
arg_parser.add_argument('--data_root_dir', type=str, help='data dir', required=False, default="data/")
arg_parser.add_argument('--dataset_subdataset_name', type=dataset_subdataset_name_format_check,
help='dataset and subdataset name', required=False, default=None)
arg_parser.add_argument('--space_mode', type=str, help='space mode', required=False,
choices=["gnr", "uni", "uni_test", "cus", "buni"], default="uni")
arg_parser.add_argument('--search_alg_args_mode', type=str, help='search algorithm args mode', required=False,
choices=["dft", "exp", "cus"], default="dft")
arg_parser.add_argument('--algo_name', type=str, help='algorithm', required=False,
choices=["bs", "optuna", "cfo", "rs"], default="bs")
arg_parser.add_argument('--pruner', type=str, help='pruner', required=False,
choices=["asha", "None"], default="None")
arg_parser.add_argument('--pretrained_model_size', type=pretrained_model_size_format_check,
help='pretrained model', required=False, default=None)
arg_parser.add_argument('--sample_num', type=int, help='sample num', required=False, default=None)
arg_parser.add_argument('--time_budget', type=int, help='time budget', required=False, default=None)
arg_parser.add_argument('--time_as_grid', type=int, help='time as grid search', required=False, default=None)
arg_parser.add_argument('--rep_id', type=int, help='rep id', required=False, default=0)
arg_parser.add_argument('--azure_key', type=str, help='azure key', required=False, default=None)
arg_parser.add_argument('--resplit_mode', type=str, help='resplit mode', required=False,
choices=["rspt", "ori"], default="ori")
arg_parser.add_argument('--ds_config', type=str, help='deep speed config file path',
required=False, default=None)
arg_parser.add_argument('--yml_file', type=str, help='yml file path', required=False, default="test.yml")
arg_parser.add_argument('--key_path', type=str, help='path for key.json', required=False, default=None)
arg_parser.add_argument('--root_log_path', type=str, help='root path for log', required=False, default="logs_azure")
arg_parser.add_argument('--round_idx', type=int, help='round idx for acl experiments', required=False, default=0)
arg_parser.add_argument('--seed_data', type=int, help='seed of data shuffling', required=False, default=43)
arg_parser.add_argument('--seed_transformers', type=int, help='seed of transformers', required=False, default=42)
args, unknown = arg_parser.parse_known_args()
for each_key in custom_data_args.keys():
if args.__contains__(each_key):
try:
check_key_format_func = globals()[each_key + "_format_check"]
check_key_format_func(custom_data_args[each_key])
except KeyError:
pass
setattr(args, each_key, custom_data_args[each_key])
return args
def get_wandb_azure_key(key_path):
key_json = json.load(open(os.path.join(key_path, "key.json"), "r"))
wandb_key = key_json["wandb_key"]
azure_key = key_json["azure_key"]
azure_container_name = key_json["container_name"]
return wandb_key, azure_key, azure_container_name
def merge_dicts(dict1, dict2):
for key2 in dict2.keys():
if key2 in dict1:
dict1_vals = set(dict1[key2])
dict2_vals = set(dict2[key2])
dict1[key2] = list(dict1_vals.union(dict2_vals))
else:
dict1[key2] = dict2[key2]
return dict1
def _check_dict_keys_overlaps(dict1: dict, dict2: dict):
dict1_keys = set(dict1.keys())
dict2_keys = set(dict2.keys())
return len(dict1_keys.intersection(dict2_keys)) > 0
def _variable_override_default_alternative(logger, obj_ref, var_name, default_value, all_values, overriding_value=None):
"""
Setting the value of var. If overriding_value is specified, var is set to overriding_value;
If overriding_value is not specified, var is set to default_value meanwhile showing all_values
"""
assert isinstance(all_values, list)
if overriding_value:
setattr(obj_ref, var_name, overriding_value)
logger.warning("The value for {} is specified as {}".format(var_name, overriding_value))
else:
setattr(obj_ref, var_name, default_value)
logger.warning("The value for {} is not specified, setting it to the default value {}. "
"Alternatively, you can set it to {}".format(var_name, default_value, ",".join(all_values)))
@dataclass
class PathUtils:
hpo_ckpt_path: str = field(metadata={"help": "the directory for hpo output"})
hpo_result_path: str = field(metadata={"help": "the directory for hpo result"})
hpo_log_path: str = field(metadata={"help": "the directory for log"})
hpo_config_path: str = field(metadata={"help": "the directory for log"})
log_dir_per_run: str = field(metadata={"help": "log directory for each run."})
result_dir_per_run: str = field(metadata={"help": "result directory for each run."})
ckpt_dir_per_run: str = field(metadata={"help": "checkpoint directory for each run."})
ckpt_dir_per_trial: str = field(metadata={"help": "checkpoint directory for each trial."})
def __init__(self,
jobid_config,
hpo_data_root_path,
):
self.jobid_config = jobid_config
self.hpo_data_root_path = hpo_data_root_path
self.hpo_ckpt_path = os.path.join(hpo_data_root_path, "checkpoint")
self.hpo_result_path = os.path.join(hpo_data_root_path, "result")
self.hpo_log_path = self.hpo_result_path
@staticmethod
def init_and_make_one_dir(dir_path):
assert dir_path
if not os.path.exists(dir_path):
pathlib.Path(dir_path).mkdir(parents=True, exist_ok=True)
def make_dir_per_run(self):
jobid_str = self.jobid_config.to_jobid_string()
self.ckpt_dir_per_run = os.path.join(self.hpo_ckpt_path, jobid_str)
PathUtils.init_and_make_one_dir(self.ckpt_dir_per_run)
self.result_dir_per_run = os.path.join(self.hpo_result_path, jobid_str)
PathUtils.init_and_make_one_dir(self.result_dir_per_run)
self.log_dir_per_run = os.path.join(self.hpo_log_path, jobid_str)
PathUtils.init_and_make_one_dir(self.log_dir_per_run)
def make_dir_per_trial(self, trial_id):
jobid_str = self.jobid_config.to_jobid_string()
ckpt_dir_per_run = os.path.join(self.hpo_ckpt_path, jobid_str)
self.ckpt_dir_per_trial = os.path.join(ckpt_dir_per_run, jobid_str, trial_id)
PathUtils.init_and_make_one_dir(self.ckpt_dir_per_trial)

View File

@@ -0,0 +1,43 @@
{
"cells": [
{
"cell_type": "markdown",
"source": [
"1. Electra Example"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -51,6 +51,11 @@ setuptools.setup(
"optuna==2.3.0",
"vowpalwabbit",
"openml",
"transformers==4.4.1",
"wandb==0.10.26",
"torch==1.8.1",
"datasets==1.4.1",
"azure-storage-blob"
],
"blendsearch": [
"optuna==2.3.0"

75
test/hf/run_analysis.py Normal file
View File

@@ -0,0 +1,75 @@
'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
'''
# 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
import argparse
from flaml.nlp.result_analysis.azure_utils import JobID
def create_partial_config_bestnn():
jobid_config = JobID()
# funnel xlarge
# jobid_config.mod = "bestnn"
jobid_config.spa = "uni"
# jobid_config.arg = "cus"
# jobid_config.alg = "cfo"
jobid_config.pre = "funnel"
jobid_config.presz = "xlarge"
# funnel small
# jobid_config.mod = "list"
# jobid_config.pre = "funnel"
# jobid_config.presz = "small"
# jobid_config.rep = 0
# # deberta large
# jobid_config.mod = "bestnn"
# jobid_config.spa = "uni"
# jobid_config.arg = "cus"
# jobid_config.alg = "cfo"
# jobid_config.pre = "deberta"
# jobid_config.presz = "large"
# # deberta base
# jobid_config.mod = "hpo"
# jobid_config.pre = "deberta"
# jobid_config.presz = "base"
# jobid_config.rep = 0
# # deberta large
# jobid_config.mod = "hpo"
# jobid_config.pre = "deberta"
# jobid_config.presz = "large"
return jobid_config
def create_partial_config_list():
jobid_config = JobID()
jobid_config.mod = "list"
jobid_config.spa = "uni"
jobid_config.presz = "xlarge"
return jobid_config
def create_partial_config_hpo():
jobid_config = JobID()
jobid_config.mod = "hpo"
jobid_config.spa = "uni"
return jobid_config
if __name__ == "__main__":
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--key_path', type=str, help='key path', required=False, default="../../")
arg_parser.add_argument('--azure_root_log_path', type=str,
help='root log path of blob storage', required=True, default="logs_azure/")
args = arg_parser.parse_args()
partial_config_large = create_partial_config_bestnn()
from flaml.nlp.result_analysis.generate_result_summary import compare_small_vs_large, get_result, check_conflict, \
print_cfo, download_validation, extract_roberta_overfitting_configs, extract_electra_overfitting_configs
# get_result(args, partial_config_large)
# check_conflict(args, [partial_config_large])
download_validation(args, "/data/xliu127/projects/hyperopt/data/result/")
# extract_roberta_overfitting_configs(args)

285
test/hf/run_autohf.py Normal file
View File

@@ -0,0 +1,285 @@
'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
'''
# 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
import os
import shutil
from flaml.nlp import AutoTransformers
from flaml.nlp import AzureUtils, JobID
from flaml.nlp.utils import load_console_args
global azure_log_path
global azure_key
def get_resplit_portion(jobid_config):
if jobid_config.dat == ["glue"] and jobid_config.subdat in {"mnli"}:
return {"source": ["train", "validation_matched"], "train": [0, 0.8], "validation": [0.8, 0.9],
"test": [0.9, 1.0]}
else:
return {"source": ["train", "validation"], "train": [0, 0.8], "validation": [0.8, 0.9], "test": [0.9, 1.0]}
def get_preparedata_setting(args, jobid_config):
preparedata_setting = {
"server_name": args.server_name,
"data_root_path": args.data_root_dir,
"max_seq_length": 128,
"jobid_config": jobid_config,
"is_wandb_on": True
}
if jobid_config.spt == 'rspt':
preparedata_setting["resplit_portion"] = get_resplit_portion(jobid_config)
if ("albert" == jobid_config.pre and jobid_config.dat == ["squad"]) or \
("funnel" in jobid_config.pre and jobid_config.dat[0] in {"imdb", "yelp_review_full", "yelp_polarity",
"amazon_polarity", "amazon_review_multi"}):
preparedata_setting["max_seq_length"] = 512
if jobid_config.dat[0] == "glue" and jobid_config.subdat == "mnli":
preparedata_setting["fold_name"] = ['train', 'validation_matched', 'test_matched']
return preparedata_setting
def get_autohf_settings(args, **custom_args):
autohf_settings = {"resources_per_trial": {"gpu": 1, "cpu": 1},
"num_samples": args.sample_num,
"time_budget": args.time_budget,
"ckpt_per_epoch": 1,
}
for other_attr in ["ds_config", "rep_id"]:
if hasattr(args, other_attr):
autohf_settings[other_attr] = getattr(args, other_attr)
else:
autohf_settings[other_attr] = None
if len(custom_args) > 0:
autohf_settings.update(custom_args)
return autohf_settings
def rm_home_result():
from os.path import expanduser
home = expanduser("~")
if os.path.exists(home + "/ray_results/"):
shutil.rmtree(home + "/ray_results/")
def get_best_base_config(args, jobid_config, autohf):
import copy
import re
args_small = copy.deepcopy(args)
args_small.algo_name = "optuna"
args_small.search_alg_args_mode = "dft"
args_small.algo_mode = "hpo"
args_small.space_mode = "uni"
args_small.pruner = "None"
if "funnel" not in args_small.pretrained_model_size:
args_small.algo_mode = "hpo"
else:
args_small.algo_mode = "list"
args_small.sample_num = 10000
args_small.time_budget = 3600
args_small.rep_id = 0
jobid_config_small = JobID(args_small)
if jobid_config_small.pre == "deberta":
jobid_config_small.presz = "base"
else:
jobid_config_small.presz = "small"
jobid_config_small.pre_full = re.sub("(xlarge|large|intermediate)", jobid_config_small.presz,
jobid_config_small.pre_full)
azure_utils_small = AzureUtils(
console_args=args_small,
jobid=jobid_config_small,
autohf=autohf)
preparedata_setting = get_preparedata_setting(args, jobid_config)
autohf.prepare_data(**preparedata_setting)
autohf.set_metric()
best_config = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)[0]
return best_config
def search_base_and_search_lower_lr(args, jobid_config, autohf):
best_config = get_best_base_config(args, jobid_config, autohf)
import copy
args_large = copy.deepcopy(args)
args_large.time_budget = args.time_budget - 3600
args_large.sample_num = 100000
args_large.algo_name = args.algo_name
args_large.search_alg_args_mode = "cus"
args_large.space_mode = "buni"
args_large.pruner = "None"
jobid_config_large = JobID(args_large)
jobid_config_large.presz = jobid_config.presz
jobid_config_large.pre_full = jobid_config.pre_full
azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf)
_test_hpo(args_large,
jobid_config_large,
autohf,
azure_utils_large,
autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config],
"bound": {"learning_rate": {
"u": best_config["learning_rate"]}}}))
def search_base_and_search_around_best(args, jobid_config, autohf):
args.algo_name = "bs"
args.search_alg_args_mode = "dft"
args.spa = "uni"
args.pru = "None"
best_config = get_best_base_config(args, jobid_config, autohf)
import copy
args_large = copy.deepcopy(args)
args_large.time_budget = args.time_budget - 3600
args_large.sample_num = 100000
args_large.algo_name = "cfo"
args_large.search_alg_args_mode = "cus"
args_large.space_mode = "uni"
jobid_config_large = JobID(args_large)
jobid_config_large.presz = jobid_config.presz
jobid_config_large.pre_full = jobid_config.pre_full
azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf)
_test_hpo(args_large,
jobid_config_large,
autohf,
azure_utils_large,
autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config]}))
def evaluate_configs(autohf, args, ranked_all_configs):
import copy
this_args = copy.deepcopy(args)
this_args.time_budget = 100000
this_args.sample_num = int(len(ranked_all_configs))
this_args.search_alg_args_mode = "cus"
jobid_config = JobID(this_args)
azure_utils_large = AzureUtils(console_args=this_args, jobid=jobid_config, autohf=autohf)
_test_hpo(this_args,
jobid_config,
autohf,
azure_utils_large,
autohf_settings=get_autohf_settings(this_args, **{"points_to_evaluate": ranked_all_configs}))
def convert_config_to_different_size(origin_config, mode):
import re
import copy
if mode == "small":
new_config = copy.deepcopy(origin_config)
if new_config.pre == "funnel":
new_config.mod = "list"
else:
new_config.mod = "hpo"
if new_config.pre == "funnel":
new_config.presz = "small"
else:
new_config.presz = "base"
new_config.pre_full = re.sub("(xlarge|large|intermediate)", new_config.presz, origin_config.pre_full)
elif mode == "large":
new_config = copy.deepcopy(origin_config)
new_config.mod = "hpo"
if new_config.pre == "funnel":
new_config.presz = "xlarge"
new_config.pre_full = re.sub("(small)", "xlarge", origin_config.pre_full)
else:
new_config.presz = "large"
new_config.pre_full = re.sub("(small)", "large", origin_config.pre_full)
return new_config
def evaluate_small_best_configs_on_large(large_args, autohf):
jobid_config_small = convert_config_to_different_size(JobID(large_args), mode="small")
jobid_config_small.rep = 0
azure_utils_small = AzureUtils(console_args=None, jobid=jobid_config_small, autohf=autohf)
ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)
evaluate_configs(large_args, ranked_all_small_configs[:int(len(ranked_all_small_configs) / 2)])
def add_dict_item_to_list(this_list, this_dict):
is_exist = len([x for x in this_list if x == this_dict]) > 0
if not is_exist:
this_list.append(this_dict)
return this_list
def evaluate_large_best_configs_on_small(small_args, autohf):
jobid_config_large = convert_config_to_different_size(JobID(small_args), mode="large")
autohf.jobid_config = jobid_config_large
autohf.set_metric()
all_configs_from_large = []
for rep_id in range(3):
jobid_config_large.rep = rep_id
azure_utils_large = AzureUtils(console_args=small_args, jobid=jobid_config_large, autohf=autohf)
ranked_all_large_configs = azure_utils_large.get_ranked_configs(autohf.metric_mode_name)
for each_config in ranked_all_large_configs:
all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config)
jobid_config_small = convert_config_to_different_size(JobID(small_args), mode="small")
jobid_config_small.rep = 0
azure_utils_small = AzureUtils(console_args=small_args, jobid=jobid_config_small, autohf=autohf)
ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)
for each_config in ranked_all_small_configs:
all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config)
evaluate_configs(autohf, small_args, list(all_configs_from_large))
def _test_hpo(args,
jobid_config,
autohf,
azure_utils=None,
autohf_settings=None,
):
try:
if not azure_utils:
azure_utils = AzureUtils(console_args=args, jobid=jobid_config, autohf=autohf)
preparedata_setting = get_preparedata_setting(args, jobid_config)
autohf.prepare_data(**preparedata_setting)
analysis = validation_metric = test_metric = None
if not autohf_settings:
autohf_settings = get_autohf_settings(args)
if args.algo_mode != "hfhpo":
validation_metric, analysis = autohf.fit(**autohf_settings, )
else:
autohf.fit_hf(**autohf_settings)
if jobid_config.spt == "ori":
predictions, test_metric = autohf.predict()
if validation_metric:
test_metric.update({"validation": validation_metric})
else:
predictions = None
if test_metric:
validation_metric.update({"test": test_metric})
if analysis is not None:
json_log = azure_utils.extract_log_from_analysis(analysis)
else:
json_log = None
azure_utils.write_autohf_output(json_log=json_log,
valid_metric=validation_metric,
predictions=predictions,
duration=autohf.last_run_duration)
except AssertionError:
azure_utils.write_exception()
rm_home_result()
if __name__ == "__main__":
autohf = AutoTransformers()
args = load_console_args()
jobid_config = JobID(args)
if args.algo_mode in ("hpo", "hfhpo", "grid", "gridbert"):
_test_hpo(args, jobid_config, autohf)
elif args.algo_mode == "bestnn":
search_base_and_search_lower_lr(args, jobid_config, autohf)
elif args.algo_mode == "list":
evaluate_small_best_configs_on_large(args, autohf)
elif args.algo_mode == "list_s":
evaluate_large_best_configs_on_small(args, autohf)

View File

@@ -0,0 +1,62 @@
'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
'''
# 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
global azure_log_path
global azure_key
def get_preparedata_setting(jobid_config):
preparedata_setting = {
"server_name": "tmdev",
"data_root_path": "data/",
"max_seq_length": 128,
"jobid_config": jobid_config,
"resplit_portion": {"source": ["train", "validation"],
"train": [0, 0.8],
"validation": [0.8, 0.9],
"test": [0.9, 1.0]}
}
return preparedata_setting
def get_autohf_settings():
autohf_settings = {"resources_per_trial": {"cpu": 1},
"num_samples": 1,
"time_budget": 100000,
"ckpt_per_epoch": 1,
"fp16": False,
}
return autohf_settings
def test_hpo():
try:
import ray
except ImportError:
return
from flaml.nlp import AutoTransformers
from flaml.nlp import JobID
jobid_config = JobID()
jobid_config.set_unittest_config()
autohf = AutoTransformers()
try:
preparedata_setting = get_preparedata_setting(jobid_config)
autohf.prepare_data(**preparedata_setting)
autohf_settings = get_autohf_settings()
validation_metric, analysis = autohf.fit(**autohf_settings, )
predictions, test_metric = autohf.predict()
if test_metric:
validation_metric.update({"test": test_metric})
except AssertionError:
pass
if __name__ == "__main__":
test_hpo()