mirror of
https://github.com/microsoft/autogen.git
synced 2026-04-20 03:02:16 -04:00
2
.gitignore
vendored
2
.gitignore
vendored
@@ -153,3 +153,5 @@ notebook/.azureml
|
||||
mlruns
|
||||
logs
|
||||
automl.pkl
|
||||
|
||||
.idea/*
|
||||
|
||||
@@ -38,6 +38,13 @@ Tune
|
||||
:members:
|
||||
|
||||
|
||||
NLP
|
||||
------
|
||||
|
||||
.. autoclass:: flaml.nlp.AutoTransformers
|
||||
:members:
|
||||
|
||||
|
||||
.. Indices and tables
|
||||
.. ==================
|
||||
|
||||
|
||||
32
flaml/nlp/README.md
Normal file
32
flaml/nlp/README.md
Normal file
@@ -0,0 +1,32 @@
|
||||
How to use AutoTransformers:
|
||||
|
||||
```python
|
||||
from flaml.nlp.autotransformers import AutoTransformers
|
||||
|
||||
autohf = AutoTransformers()
|
||||
preparedata_setting = {
|
||||
"dataset_subdataset_name": "glue:rte",
|
||||
"pretrained_model_size": "electra-base-discriminator:base",
|
||||
"data_root_path": "data/",
|
||||
"max_seq_length": 128,
|
||||
}
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
autohf_settings = {"resources_per_trial": {"gpu": 1, "cpu": 1},
|
||||
"num_samples": -1, # unlimited sample size
|
||||
"time_budget": 3600,
|
||||
"ckpt_per_epoch": 1,
|
||||
"fp16": False,
|
||||
}
|
||||
validation_metric, analysis = \
|
||||
autohf.fit(**autohf_settings,)
|
||||
|
||||
```
|
||||
|
||||
The current use cases that are supported:
|
||||
1. A simplified version of fine-tuning the GLUE dataset using HuggingFace;
|
||||
2. For selecting better search space for fine-tuning the GLUE dataset;
|
||||
3. Use the search algorithms in flaml for more efficient fine-tuning of HuggingFace;
|
||||
|
||||
The use cases that can be supported in future:
|
||||
1. HPO fine-tuning for text generation;
|
||||
2. HPO fine-tuning for question answering;
|
||||
2
flaml/nlp/__init__.py
Normal file
2
flaml/nlp/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
from flaml.nlp.autotransformers import AutoTransformers
|
||||
from flaml.nlp.result_analysis.azure_utils import AzureUtils, JobID
|
||||
852
flaml/nlp/autotransformers.py
Normal file
852
flaml/nlp/autotransformers.py
Normal file
@@ -0,0 +1,852 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
import torch
|
||||
import transformers
|
||||
import wandb
|
||||
|
||||
from .dataset.dataprocess_auto import AutoEncodeText
|
||||
import numpy as np
|
||||
|
||||
from ray.tune import CLIReporter
|
||||
|
||||
import time
|
||||
import ray
|
||||
import datasets
|
||||
from datasets import load_dataset
|
||||
from transformers.trainer_utils import IntervalStrategy, HPSearchBackend
|
||||
|
||||
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig, TrainingArguments
|
||||
|
||||
from .dataset.metric_auto import get_default_and_alternative_metric
|
||||
from .dataset.submission_auto import auto_output_prediction
|
||||
from .dataset.task_auto import get_default_task
|
||||
from .hpo.grid_searchspace_auto import AutoGridSearchSpace
|
||||
from .hpo.hpo_searchspace import AutoHPOSearchSpace
|
||||
from .huggingface.switch_head_auto import AutoSeqClassificationHead, MODEL_CLASSIFICATION_HEAD_MAPPING
|
||||
from .utils import PathUtils, _variable_override_default_alternative
|
||||
from .hpo.searchalgo_auto import AutoSearchAlgorithm
|
||||
from .hpo.scheduler_auto import AutoScheduler
|
||||
from .result_analysis.wandb_utils import WandbUtils
|
||||
from .result_analysis.azure_utils import JobID
|
||||
from .utils import load_console_args
|
||||
|
||||
from .huggingface.trainer import TrainerForAutoTransformers
|
||||
|
||||
import logging
|
||||
|
||||
transformers.logging.set_verbosity_error()
|
||||
logger = logging.getLogger(__name__)
|
||||
logger_formatter = logging.Formatter(
|
||||
'[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
|
||||
'%m-%d %H:%M:%S')
|
||||
|
||||
task_list = [
|
||||
"seq-classification",
|
||||
"regression",
|
||||
"question-answering"
|
||||
]
|
||||
|
||||
|
||||
class AutoTransformers:
|
||||
'''The AutoTransformers class
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
autohf = AutoTransformers()
|
||||
autohf_settings = {"resources_per_trial": {"cpu": 1},
|
||||
"num_samples": -1,
|
||||
"time_budget": 100000,
|
||||
"ckpt_per_epoch": 1,
|
||||
"fp16": False,
|
||||
}
|
||||
|
||||
validation_metric, analysis = autohf.fit(**autohf_settings)
|
||||
|
||||
'''
|
||||
|
||||
@staticmethod
|
||||
def _convert_dict_to_ray_tune_space(config_json, mode="grid"):
|
||||
search_space = {}
|
||||
|
||||
if mode == "grid":
|
||||
for each_hp in config_json.keys():
|
||||
this_config = config_json[each_hp]
|
||||
assert isinstance(this_config, dict) or isinstance(this_config, list), \
|
||||
"config of " + each_hp + " must be dict or list"
|
||||
search_space[each_hp] = ray.tune.grid_search(this_config)
|
||||
else:
|
||||
for each_hp in config_json.keys():
|
||||
this_config = config_json[each_hp]
|
||||
assert isinstance(this_config, dict) or isinstance(this_config, list), \
|
||||
"config of " + each_hp + " must be dict or list"
|
||||
if isinstance(this_config, dict):
|
||||
lower = this_config["l"]
|
||||
upper = this_config["u"]
|
||||
space = this_config["space"]
|
||||
if space == "log":
|
||||
search_space[each_hp] = ray.tune.loguniform(lower, upper)
|
||||
elif space == "linear":
|
||||
search_space[each_hp] = ray.tune.uniform(lower, upper)
|
||||
elif space == "quniform":
|
||||
search_space[each_hp] = ray.tune.quniform(lower, upper, this_config["interval"])
|
||||
else:
|
||||
search_space[each_hp] = ray.tune.choice(this_config)
|
||||
|
||||
return search_space
|
||||
|
||||
def _set_search_space(self,
|
||||
**custom_hpo_args):
|
||||
search_space_dict_hpo = search_space_dict_grid = None
|
||||
if self.jobid_config.mod == "grid":
|
||||
search_space_grid_json = AutoGridSearchSpace.from_model_and_dataset_name(self.jobid_config.pre,
|
||||
self.jobid_config.presz,
|
||||
self.get_full_data_name(),
|
||||
self.jobid_config.subdat, "grid")
|
||||
search_space_dict_grid \
|
||||
= AutoTransformers._convert_dict_to_ray_tune_space(search_space_grid_json, mode="grid")
|
||||
search_space_dict_hpo = search_space_dict_grid
|
||||
if self.jobid_config.mod != "grid" and self.jobid_config.mod != "gridbert":
|
||||
search_space_hpo_json \
|
||||
= AutoHPOSearchSpace.from_model_and_dataset_name(logger,
|
||||
self.jobid_config.spa,
|
||||
self.jobid_config.pre,
|
||||
self.jobid_config.presz,
|
||||
self.get_full_data_name(),
|
||||
self.jobid_config.subdat,
|
||||
**custom_hpo_args)
|
||||
search_space_dict_hpo = AutoTransformers._convert_dict_to_ray_tune_space(search_space_hpo_json, mode="hpo")
|
||||
elif self.jobid_config.mod == "gridbert":
|
||||
search_space_hpo_json = AutoGridSearchSpace.from_model_and_dataset_name(
|
||||
"bert",
|
||||
"base",
|
||||
self.get_full_data_name(),
|
||||
self.jobid_config.subdat, "grid")
|
||||
search_space_dict_hpo = AutoTransformers._convert_dict_to_ray_tune_space(search_space_hpo_json, mode="grid")
|
||||
|
||||
"""
|
||||
resolve the conflict in search_space_dict_hpo: only one of "max_steps" and "num_train_epochs" can exist
|
||||
in the search space. If both exists, num_train_epochs is removed. Similarly, if "warmup_steps" and
|
||||
"warmup_ratio" both exist, warmup_ratio is removed
|
||||
"""
|
||||
search_space_dict_hpo = TrainerForAutoTransformers.resolve_hp_conflict(search_space_dict_hpo)
|
||||
self._search_space_hpo = search_space_dict_hpo
|
||||
if self.jobid_config.mod == "grid":
|
||||
search_space_dict_grid = TrainerForAutoTransformers.resolve_hp_conflict(search_space_dict_grid)
|
||||
self._search_space_grid = search_space_dict_grid
|
||||
else:
|
||||
self._search_space_grid = None
|
||||
|
||||
try:
|
||||
self.ds_config = custom_hpo_args["ds_config"]
|
||||
except KeyError:
|
||||
self.ds_config = None
|
||||
|
||||
def _wrapper(self, func, *args): # with star
|
||||
return func(*args)
|
||||
|
||||
def _get_split_name(self, data_raw, fold_name=None):
|
||||
if fold_name:
|
||||
return fold_name
|
||||
fold_keys = data_raw.keys()
|
||||
if fold_keys == {"train", "validation", "test"}:
|
||||
return "train", "validation", "test"
|
||||
for each_key in fold_keys:
|
||||
for each_split_name in {"train", "validation", "test"}:
|
||||
assert not (each_key.startswith(each_split_name) and each_key != each_split_name), \
|
||||
"Dataset split must be within {}, must be explicitly specified in dataset_config, e.g.," \
|
||||
"'fold_name': ['train', 'validation_matched', 'test_matched']. Please refer to the example in the " \
|
||||
"documentation of AutoTransformers.prepare_data()".format(",".join(fold_keys))
|
||||
return "train", "validation", "test"
|
||||
|
||||
def prepare_data(self,
|
||||
data_root_path,
|
||||
jobid_config=None,
|
||||
is_wandb_on=False,
|
||||
server_name=None,
|
||||
max_seq_length=128,
|
||||
fold_name=None,
|
||||
resplit_portion=None,
|
||||
**custom_data_args):
|
||||
'''Prepare data
|
||||
|
||||
An example:
|
||||
|
||||
preparedata_setting = {
|
||||
"server_name": "tmdev",
|
||||
"data_root_path": "data/",
|
||||
"max_seq_length": 128,
|
||||
"jobid_config": jobid_config,
|
||||
"wandb_utils": wandb_utils,
|
||||
"resplit_portion": {"source": ["train", "validation"],
|
||||
"train": [0, 0.8], "validation": [0.8, 0.9], "test": [0.9, 1.0]}
|
||||
}
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
|
||||
Args:
|
||||
server_name:
|
||||
a string variable, which can be tmdev or azureml
|
||||
data_root_path:
|
||||
the root path for storing the checkpoints and output results, e.g., "data/"
|
||||
jobid_config:
|
||||
a JobID object describing the profile of job
|
||||
wandb_utils:
|
||||
a WandbUtils object for wandb operations
|
||||
max_seq_length (optional):
|
||||
max_seq_lckpt_per_epochength for the huggingface, this hyperparameter must be specified
|
||||
at the data processing step
|
||||
resplit_portion:
|
||||
the proportion for resplitting the train and dev data when split_mode="resplit".
|
||||
If args.resplit_mode = "rspt", resplit_portion is required
|
||||
'''
|
||||
console_args = load_console_args(**custom_data_args)
|
||||
self._max_seq_length = max_seq_length
|
||||
self._server_name = server_name if server_name is not None else "tmdev"
|
||||
self.jobid_config = jobid_config if jobid_config is not None else JobID(console_args)
|
||||
self.wandb_utils = WandbUtils(is_wandb_on=is_wandb_on,
|
||||
console_args=console_args,
|
||||
jobid_config=self.jobid_config)
|
||||
self.wandb_utils.set_wandb_per_run()
|
||||
|
||||
self.path_utils = PathUtils(self.jobid_config, hpo_data_root_path=data_root_path)
|
||||
|
||||
if self.jobid_config.spt == "rspt":
|
||||
assert resplit_portion, "If split mode is 'rspt', the resplit_portion must be provided. Please " \
|
||||
"refer to the example in the documentation of AutoTransformers.prepare_data()"
|
||||
if self.jobid_config.subdat:
|
||||
data_raw = load_dataset(self.get_full_data_name(), self.jobid_config.subdat)
|
||||
else:
|
||||
data_raw = self._wrapper(load_dataset, *self.jobid_config.dat)
|
||||
|
||||
self._train_name, self._dev_name, self._test_name = self._get_split_name(data_raw, fold_name=fold_name)
|
||||
auto_tokentoids_config = {"max_seq_length": self._max_seq_length}
|
||||
self._tokenizer = AutoTokenizer.from_pretrained(self.jobid_config.pre_full, use_fast=True)
|
||||
|
||||
def autoencodetext_from_model_and_dataset_name():
|
||||
return AutoEncodeText.from_model_and_dataset_name(
|
||||
data_raw,
|
||||
self.jobid_config.pre_full,
|
||||
self.get_full_data_name(),
|
||||
self.jobid_config.subdat,
|
||||
**auto_tokentoids_config)
|
||||
|
||||
data_encoded = autoencodetext_from_model_and_dataset_name()
|
||||
self._max_seq_length = 0
|
||||
"""
|
||||
Update the max_seq_length to the minimum of the actual max seq length and the user defined max_seq_length
|
||||
"""
|
||||
for each_fold in data_encoded.keys():
|
||||
self._max_seq_length = max(self._max_seq_length,
|
||||
max([sum(data_encoded[each_fold][x]['attention_mask']) for x in
|
||||
range(len(data_encoded[each_fold]))]))
|
||||
self._max_seq_length = int((self._max_seq_length + 15) / 16) * 16
|
||||
data_encoded = autoencodetext_from_model_and_dataset_name()
|
||||
|
||||
if self.jobid_config.spt == "rspt":
|
||||
all_folds_from_source = []
|
||||
assert "source" in resplit_portion.keys(), "Must specify the source for resplitting the dataset in" \
|
||||
"resplit_portion, which is a list of folder names, e.g., resplit_portion = {'source': ['train']}"
|
||||
|
||||
source_fold_names = resplit_portion['source']
|
||||
for each_fold_name in source_fold_names:
|
||||
this_fold_dataset = data_encoded[each_fold_name]
|
||||
all_folds_from_source.append(this_fold_dataset)
|
||||
|
||||
merged_folds_from_source = datasets.concatenate_datasets(all_folds_from_source)
|
||||
merged_folds_from_source = merged_folds_from_source.shuffle(seed=self.jobid_config.sddt)
|
||||
|
||||
assert "train" in resplit_portion.keys() and "validation" in resplit_portion.keys() \
|
||||
and "test" in resplit_portion.keys(), "train, validation, test must exist in resplit_portion"
|
||||
|
||||
for key in ["train", "validation", "test"]:
|
||||
target_fold_start, target_fold_end = \
|
||||
int(resplit_portion[key][0] * len(merged_folds_from_source)), \
|
||||
int(resplit_portion[key][1] * len(merged_folds_from_source))
|
||||
subfold_dataset = merged_folds_from_source.select(
|
||||
[x for x in range(target_fold_start, target_fold_end)]).flatten_indices()
|
||||
if key == "train":
|
||||
self.train_dataset = subfold_dataset
|
||||
elif key == "validation":
|
||||
self.eval_dataset = subfold_dataset
|
||||
else:
|
||||
self.test_dataset = subfold_dataset
|
||||
else:
|
||||
self.train_dataset, self.eval_dataset, self.test_dataset \
|
||||
= data_encoded[self._train_name], data_encoded[self._dev_name], data_encoded[self._test_name]
|
||||
|
||||
def _load_model(self,
|
||||
checkpoint_path=None,
|
||||
per_model_config=None):
|
||||
|
||||
this_task = get_default_task(self.get_full_data_name(), self.jobid_config.subdat)
|
||||
if this_task == "seq-classification":
|
||||
self._num_labels = len(self.train_dataset.features["label"].names)
|
||||
elif this_task == "regression":
|
||||
self._num_labels = 1
|
||||
|
||||
if not checkpoint_path:
|
||||
checkpoint_path = self.jobid_config.pre_full
|
||||
|
||||
def get_this_model():
|
||||
return AutoModelForSequenceClassification.from_pretrained(checkpoint_path, config=model_config)
|
||||
|
||||
def is_pretrained_model_in_classification_head_list():
|
||||
return self.jobid_config.pre in MODEL_CLASSIFICATION_HEAD_MAPPING.keys()
|
||||
|
||||
def _set_model_config():
|
||||
if per_model_config and len(per_model_config) > 0:
|
||||
model_config = AutoConfig.from_pretrained(
|
||||
checkpoint_path,
|
||||
num_labels=model_config_num_labels,
|
||||
**per_model_config)
|
||||
else:
|
||||
model_config = AutoConfig.from_pretrained(
|
||||
checkpoint_path,
|
||||
num_labels=model_config_num_labels)
|
||||
return model_config
|
||||
|
||||
if this_task == "seq-classification":
|
||||
num_labels_old = AutoConfig.from_pretrained(checkpoint_path).num_labels
|
||||
if is_pretrained_model_in_classification_head_list():
|
||||
model_config_num_labels = num_labels_old
|
||||
else:
|
||||
model_config_num_labels = self._num_labels
|
||||
model_config = _set_model_config()
|
||||
|
||||
if is_pretrained_model_in_classification_head_list():
|
||||
if self._num_labels != num_labels_old:
|
||||
this_model = get_this_model()
|
||||
model_config.num_labels = self._num_labels
|
||||
this_model.num_labels = self._num_labels
|
||||
this_model.classifier = AutoSeqClassificationHead \
|
||||
.from_model_type_and_config(self.jobid_config.pre,
|
||||
model_config)
|
||||
else:
|
||||
this_model = get_this_model()
|
||||
else:
|
||||
this_model = get_this_model()
|
||||
|
||||
this_model.resize_token_embeddings(len(self._tokenizer))
|
||||
return this_model
|
||||
elif this_task == "regression":
|
||||
model_config = self._set_model_config(checkpoint_path, per_model_config, 1)
|
||||
this_model = get_this_model()
|
||||
return this_model
|
||||
|
||||
def _get_metric_func(self):
|
||||
if self.get_full_data_name() in ("glue", "super_glue"):
|
||||
metric = datasets.load.load_metric(self.get_full_data_name(), self.jobid_config.subdat)
|
||||
elif self.get_full_data_name() in ("squad", "squad_v2"):
|
||||
metric = datasets.load.load_metric(self.get_full_data_name())
|
||||
else:
|
||||
metric = datasets.load.load_metric(self.metric_name)
|
||||
return metric
|
||||
|
||||
def _compute_metrics_by_dataset_name(self,
|
||||
eval_pred):
|
||||
predictions, labels = eval_pred
|
||||
predictions = np.squeeze(predictions) \
|
||||
if self.task_name == "regression" else np.argmax(predictions, axis=1)
|
||||
metric_func = self._get_metric_func()
|
||||
return metric_func.compute(predictions=predictions, references=labels)
|
||||
|
||||
def _compute_checkpoint_freq(self,
|
||||
num_train_epochs,
|
||||
batch_size):
|
||||
if "gpu" in self._resources_per_trial:
|
||||
ckpt_step_freq = int(min(num_train_epochs, 1) * len(self.train_dataset) / batch_size
|
||||
/ self._resources_per_trial["gpu"] / self.ckpt_per_epoch) + 1
|
||||
else:
|
||||
ckpt_step_freq = int(min(num_train_epochs, 1) * len(self.train_dataset) / batch_size
|
||||
/ self._resources_per_trial["cpu"] / self.ckpt_per_epoch) + 1
|
||||
|
||||
return ckpt_step_freq
|
||||
|
||||
@staticmethod
|
||||
def _separate_config(config):
|
||||
training_args_config = {}
|
||||
per_model_config = {}
|
||||
|
||||
for key in config.keys():
|
||||
if key in TrainingArguments.__dict__.keys():
|
||||
training_args_config[key] = config[key]
|
||||
else:
|
||||
per_model_config[key] = config[key]
|
||||
|
||||
return training_args_config, per_model_config
|
||||
|
||||
def _objective(self, config, reporter, checkpoint_dir=None):
|
||||
def model_init():
|
||||
return self._load_model()
|
||||
|
||||
from transformers.trainer_utils import set_seed
|
||||
set_seed(config["seed"])
|
||||
|
||||
training_args_config, per_model_config = AutoTransformers._separate_config(config)
|
||||
this_model = self._load_model(per_model_config=per_model_config)
|
||||
|
||||
trial_id = reporter.trial_id
|
||||
self.path_utils.make_dir_per_trial(trial_id)
|
||||
|
||||
ckpt_freq = self._compute_checkpoint_freq(
|
||||
num_train_epochs=config["num_train_epochs"],
|
||||
batch_size=config["per_device_train_batch_size"])
|
||||
|
||||
assert self.path_utils.ckpt_dir_per_trial
|
||||
training_args = TrainingArguments(
|
||||
output_dir=self.path_utils.ckpt_dir_per_trial,
|
||||
do_eval=False,
|
||||
per_device_eval_batch_size=32,
|
||||
eval_steps=ckpt_freq,
|
||||
evaluation_strategy=IntervalStrategy.STEPS,
|
||||
save_steps=ckpt_freq,
|
||||
save_total_limit=0,
|
||||
fp16=self._fp16,
|
||||
deepspeed=self.ds_config,
|
||||
**training_args_config,
|
||||
)
|
||||
|
||||
trainer = TrainerForAutoTransformers(
|
||||
this_model,
|
||||
training_args,
|
||||
model_init=model_init,
|
||||
train_dataset=self.train_dataset,
|
||||
eval_dataset=self.eval_dataset,
|
||||
tokenizer=self._tokenizer,
|
||||
compute_metrics=self._compute_metrics_by_dataset_name,
|
||||
)
|
||||
trainer.logger = logger
|
||||
trainer.trial_id = reporter.trial_id
|
||||
|
||||
"""
|
||||
create a wandb run. If os.environ["WANDB_MODE"] == "offline", run = None
|
||||
"""
|
||||
run = self.wandb_utils.set_wandb_per_trial()
|
||||
if os.environ["WANDB_MODE"] == "online":
|
||||
for each_hp in config:
|
||||
wandb.log({each_hp: config[each_hp]})
|
||||
trainer.train()
|
||||
trainer.evaluate(self.eval_dataset)
|
||||
"""
|
||||
If a wandb run was created, close the run after train and evaluate finish
|
||||
"""
|
||||
if run:
|
||||
run.finish()
|
||||
|
||||
def _verify_init_config(self,
|
||||
**custom_hpo_args):
|
||||
for key in custom_hpo_args.keys():
|
||||
if key == "points_to_evaluate":
|
||||
for each_init_config in custom_hpo_args[key]:
|
||||
for each_hp in each_init_config.keys():
|
||||
assert each_hp in self._search_space_hpo.keys(), \
|
||||
"points_to_evaluate hp must be within the search space"
|
||||
|
||||
assert isinstance(each_init_config[each_hp], int) or \
|
||||
isinstance(each_init_config[each_hp], float) or \
|
||||
isinstance(each_init_config[each_hp], str) or \
|
||||
isinstance(each_init_config[each_hp], bool), " points_to_evaluate must be a scalar"
|
||||
|
||||
assert isinstance(self._search_space_hpo[each_hp], ray.tune.sample.Categorical) or \
|
||||
isinstance(self._search_space_hpo[each_hp], ray.tune.sample.Float) or \
|
||||
isinstance(self._search_space_hpo[each_hp], ray.tune.sample.Integer), \
|
||||
"Every hp space must either be categorical, integer or float"
|
||||
|
||||
if isinstance(self._search_space_hpo[each_hp], ray.tune.sample.Categorical):
|
||||
assert each_init_config[each_hp] in self._search_space_hpo[each_hp].categories, \
|
||||
"points_to_evaluate {each_hp} value must be within the search space"
|
||||
else:
|
||||
assert self._search_space_hpo[each_hp].lower <= each_init_config[each_hp] <= \
|
||||
self._search_space_hpo[each_hp].upper, \
|
||||
"points_to_evaluate {each_hp} value must be within the search space"
|
||||
|
||||
def _get_search_algo(self,
|
||||
search_algo_name,
|
||||
search_algo_args_mode,
|
||||
**custom_hpo_args):
|
||||
if search_algo_name in ("bs", "cfo"):
|
||||
self._verify_init_config(**custom_hpo_args)
|
||||
search_algo = AutoSearchAlgorithm.from_method_name(
|
||||
search_algo_name,
|
||||
search_algo_args_mode,
|
||||
self._search_space_hpo,
|
||||
**custom_hpo_args)
|
||||
return search_algo
|
||||
|
||||
@staticmethod
|
||||
def _recover_checkpoint(tune_checkpoint_dir):
|
||||
assert tune_checkpoint_dir
|
||||
# Get subdirectory used for Huggingface.
|
||||
subdirs = [
|
||||
os.path.join(tune_checkpoint_dir, name)
|
||||
for name in os.listdir(tune_checkpoint_dir)
|
||||
if os.path.isdir(os.path.join(tune_checkpoint_dir, name))
|
||||
]
|
||||
# There should only be 1 subdir.
|
||||
assert len(subdirs) == 1, subdirs
|
||||
return subdirs[0]
|
||||
|
||||
def get_full_data_name(self):
|
||||
return JobID.dataset_list_to_str(self.jobid_config.dat, "dat")
|
||||
|
||||
def _save_ckpt_json(self,
|
||||
best_ckpt):
|
||||
json.dump({"best_ckpt": best_ckpt},
|
||||
open(os.path.join(self.path_utils.result_dir_per_run,
|
||||
"save_ckpt_" + self.jobid_config.to_jobid_string() + ".json"), "w"))
|
||||
|
||||
def _save_output_metric(self,
|
||||
output_metrics):
|
||||
json.dump(output_metrics, open(
|
||||
os.path.join(self.path_utils.result_dir_per_run,
|
||||
"output_metric_" + self.jobid_config.to_jobid_string() + ".json"), "w"))
|
||||
|
||||
def _load_ckpt_json(self,
|
||||
ckpt_dir=None,
|
||||
**kwargs):
|
||||
if not ckpt_dir:
|
||||
ckpt_dir = os.path.join(self.path_utils.result_dir_per_run,
|
||||
"save_ckpt_" + self.jobid_config.to_jobid_string() + ".json")
|
||||
try:
|
||||
ckpt_json = json.load(open(ckpt_dir))
|
||||
return ckpt_json["best_ckpt"]
|
||||
except FileNotFoundError as err:
|
||||
logger.error("Saved checkpoint not found. Please make sure checkpoint is stored under {}".format(ckpt_dir))
|
||||
raise err
|
||||
|
||||
def _set_metric(self, custom_metric_name=None, custom_metric_mode_name=None):
|
||||
default_metric, default_mode, all_metrics, all_modes = get_default_and_alternative_metric(
|
||||
self.get_full_data_name(),
|
||||
subdataset_name=self.jobid_config.subdat,
|
||||
custom_metric_name=custom_metric_name,
|
||||
custom_metric_mode_name=custom_metric_mode_name)
|
||||
_variable_override_default_alternative(logger,
|
||||
self,
|
||||
"metric_name",
|
||||
default_metric,
|
||||
all_metrics,
|
||||
custom_metric_name)
|
||||
_variable_override_default_alternative(logger,
|
||||
self,
|
||||
"metric_mode_name",
|
||||
default_mode,
|
||||
all_modes,
|
||||
custom_metric_mode_name)
|
||||
self._all_metrics = all_metrics
|
||||
self._all_modes = all_modes
|
||||
|
||||
def _set_task(self):
|
||||
self.task_name = get_default_task(self.get_full_data_name(), self.jobid_config.subdat)
|
||||
|
||||
def fit_hf(self,
|
||||
resources_per_trial,
|
||||
num_samples,
|
||||
time_budget,
|
||||
custom_metric_name=None,
|
||||
custom_metric_mode_name=None,
|
||||
_fp16=True,
|
||||
**custom_hpo_args
|
||||
):
|
||||
'''Fine tuning the huggingface using HF's API Transformers.hyperparameter_search (for comparitive purpose).
|
||||
Transformers.hyperparameter_search has the following disadvantages:
|
||||
(1) it does not return tune.analysis.Analysis result, what is analysis used for
|
||||
(2) it is inconvenient to develop on top of Transformers.hyperparameter_search, whose trainable function,
|
||||
search space, etc. are defined inside of Transformers.hyperparameter_search.
|
||||
|
||||
An example:
|
||||
autohf_settings = {"resources_per_trial": {"cpu": 1},
|
||||
"num_samples": 1,
|
||||
"time_budget": 100000,
|
||||
"ckpt_per_epoch": 1,
|
||||
"fp16": False,
|
||||
}
|
||||
validation_metric, analysis = autohf.fit(**autohf_settings,)
|
||||
|
||||
Args:
|
||||
resources_per_trial:
|
||||
A dict showing the resources used by each trial,
|
||||
e.g., {"gpu": 4, "cpu": 4}
|
||||
num_samples:
|
||||
An int variable of the maximum number of trials
|
||||
time_budget:
|
||||
An int variable of the maximum time budget
|
||||
custom_metric_name:
|
||||
A string of the dataset name or a function,
|
||||
e.g., 'accuracy', 'f1', 'loss',
|
||||
custom_metric_mode_name:
|
||||
A string of the mode name,
|
||||
e.g., "max", "min", "last", "all"
|
||||
fp16:
|
||||
boolean, default = True | whether to use fp16
|
||||
custom_hpo_args:
|
||||
The additional keyword arguments, e.g.,
|
||||
custom_hpo_args = {"points_to_evaluate": [{
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 128, }]}
|
||||
|
||||
Returns:
|
||||
validation_metric:
|
||||
a dict storing the validation score
|
||||
'''
|
||||
|
||||
def model_init():
|
||||
return self._load_model()
|
||||
|
||||
def ray_hp_space(trial):
|
||||
return {
|
||||
"learning_rate": ray.tune.loguniform(1e-6, 1e-4),
|
||||
"num_train_epochs": ray.tune.choice(list(range(1, 6))),
|
||||
"seed": ray.tune.quniform(1, 41, 1),
|
||||
"per_device_train_batch_size": ray.tune.choice([4, 8, 16, 32, 64]),
|
||||
}
|
||||
|
||||
self._set_metric(custom_metric_name, custom_metric_mode_name)
|
||||
self._set_task()
|
||||
|
||||
training_args = TrainingArguments(
|
||||
output_dir=self.path_utils.hpo_ckpt_path,
|
||||
fp16=_fp16,
|
||||
)
|
||||
this_model = self._load_model()
|
||||
|
||||
trainer = TrainerForAutoTransformers(
|
||||
this_model,
|
||||
training_args,
|
||||
model_init=model_init,
|
||||
train_dataset=self.train_dataset,
|
||||
eval_dataset=self.eval_dataset,
|
||||
tokenizer=self._tokenizer,
|
||||
compute_metrics=self._compute_metrics_by_dataset_name,
|
||||
)
|
||||
self.path_utils.make_dir_per_run()
|
||||
|
||||
start_time = time.time()
|
||||
best_run = trainer.hyperparameter_search(
|
||||
n_trials=num_samples,
|
||||
time_budget_s=time_budget,
|
||||
hp_space=ray_hp_space,
|
||||
backend=HPSearchBackend.RAY,
|
||||
resources_per_trial=resources_per_trial)
|
||||
duration = time.time() - start_time
|
||||
self.last_run_duration = duration
|
||||
|
||||
hp_dict = best_run.hyperparameters
|
||||
hp_dict["seed"] = int(hp_dict["seed"])
|
||||
|
||||
best_training_args = TrainingArguments(
|
||||
output_dir=self.path_utils.hpo_ckpt_path,
|
||||
fp16=_fp16,
|
||||
**hp_dict,
|
||||
)
|
||||
|
||||
best_trainer = TrainerForAutoTransformers(
|
||||
this_model,
|
||||
best_training_args,
|
||||
model_init=model_init,
|
||||
train_dataset=self.train_dataset,
|
||||
eval_dataset=self.eval_dataset,
|
||||
tokenizer=self._tokenizer,
|
||||
compute_metrics=self._compute_metrics_by_dataset_name,
|
||||
)
|
||||
|
||||
best_model_checkpoint_path = os.path.join(self.path_utils.hpo_ckpt_path, "hpo_hf")
|
||||
if not os.path.exists(best_model_checkpoint_path):
|
||||
os.mkdir(best_model_checkpoint_path)
|
||||
best_trainer.train()
|
||||
best_trainer.save_model(best_model_checkpoint_path)
|
||||
self._save_ckpt_json(best_model_checkpoint_path)
|
||||
validation_metric = best_trainer.evaluate()
|
||||
|
||||
return validation_metric
|
||||
|
||||
def fit(self,
|
||||
num_samples,
|
||||
time_budget,
|
||||
custom_metric_name=None,
|
||||
custom_metric_mode_name=None,
|
||||
ckpt_per_epoch=1,
|
||||
fp16=True,
|
||||
verbose=1,
|
||||
resources_per_trial={"gpu": 1, "cpu": 1},
|
||||
**custom_hpo_args):
|
||||
'''Fine tuning the huggingface using the hpo setting
|
||||
|
||||
An example:
|
||||
autohf_settings = {"resources_per_trial": {"cpu": 1},
|
||||
"num_samples": 1,
|
||||
"time_budget": 100000,
|
||||
"ckpt_per_epoch": 1,
|
||||
"fp16": False,
|
||||
}
|
||||
validation_metric, analysis = autohf.fit(**autohf_settings)
|
||||
|
||||
Args:
|
||||
resources_per_trial:
|
||||
A dict showing the resources used by each trial,
|
||||
e.g., {"gpu": 4, "cpu": 4}
|
||||
num_samples:
|
||||
An int variable of the maximum number of trials
|
||||
time_budget:
|
||||
An int variable of the maximum time budget
|
||||
custom_metric_name:
|
||||
A string of the dataset name or a function,
|
||||
e.g., 'accuracy', 'f1', 'loss'
|
||||
custom_metric_mode_name:
|
||||
A string of the mode name,
|
||||
e.g., "max", "min", "last", "all"
|
||||
ckpt_per_epoch:
|
||||
An integer value of number of checkpoints per epoch, default = 1
|
||||
verbose:
|
||||
int, default=1 | Controls the verbosity, higher means more
|
||||
messages
|
||||
fp16:
|
||||
boolean, default = True | whether to use fp16
|
||||
custom_hpo_args:
|
||||
The additional keyword arguments, e.g.,
|
||||
custom_hpo_args = {"points_to_evaluate": [{
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 128, }]}
|
||||
|
||||
Returns:
|
||||
validation_metric:
|
||||
a dict storing the validation score
|
||||
analysis:
|
||||
a ray.tune.analysis.Analysis object storing the analysis results from tune.run
|
||||
|
||||
'''
|
||||
self._resources_per_trial = resources_per_trial
|
||||
self._set_metric(custom_metric_name, custom_metric_mode_name)
|
||||
self._set_task()
|
||||
self._fp16 = fp16
|
||||
ray.init(local_mode=True)
|
||||
|
||||
self._set_search_space(**custom_hpo_args)
|
||||
search_algo = self._get_search_algo(self.jobid_config.alg, self.jobid_config.arg, **custom_hpo_args)
|
||||
scheduler = AutoScheduler.from_scheduler_name(self.jobid_config.pru)
|
||||
self.ckpt_per_epoch = ckpt_per_epoch
|
||||
self.path_utils.make_dir_per_run()
|
||||
|
||||
logger.addHandler(logging.FileHandler(os.path.join(self.path_utils.log_dir_per_run, 'tune.log')))
|
||||
old_level = logger.getEffectiveLevel()
|
||||
self._verbose = verbose
|
||||
if verbose == 0:
|
||||
logger.setLevel(logging.WARNING)
|
||||
|
||||
assert self.path_utils.ckpt_dir_per_run
|
||||
start_time = time.time()
|
||||
|
||||
tune_config = self._search_space_hpo
|
||||
tune_config["seed"] = self.jobid_config.sdhf
|
||||
|
||||
analysis = ray.tune.run(
|
||||
self._objective,
|
||||
metric=self.metric_name,
|
||||
mode=self.metric_mode_name,
|
||||
name="ray_result",
|
||||
resources_per_trial=resources_per_trial,
|
||||
config=tune_config,
|
||||
verbose=verbose,
|
||||
local_dir=self.path_utils.ckpt_dir_per_run,
|
||||
num_samples=num_samples,
|
||||
time_budget_s=time_budget,
|
||||
keep_checkpoints_num=1,
|
||||
scheduler=scheduler,
|
||||
search_alg=search_algo,
|
||||
)
|
||||
duration = time.time() - start_time
|
||||
self.last_run_duration = duration
|
||||
logger.info("Total running time: {} seconds".format(duration))
|
||||
|
||||
ray.shutdown()
|
||||
|
||||
best_trial = analysis.get_best_trial(scope="all", metric=self.metric_name, mode=self.metric_mode_name)
|
||||
validation_metric = {"eval_" + self.metric_name
|
||||
: best_trial.metric_analysis[self.metric_name][self.metric_mode_name]}
|
||||
for x in range(len(self._all_metrics)):
|
||||
validation_metric["eval_" + self._all_metrics[x]] \
|
||||
= best_trial.metric_analysis[self._all_metrics[x]][self._all_modes[x]]
|
||||
|
||||
get_best_ckpt = analysis.get_best_checkpoint(best_trial, metric=self.metric_name, mode=self.metric_mode_name)
|
||||
best_ckpt = AutoTransformers._recover_checkpoint(get_best_ckpt)
|
||||
|
||||
self._save_ckpt_json(best_ckpt)
|
||||
|
||||
if verbose == 0:
|
||||
logger.setLevel(old_level)
|
||||
|
||||
return validation_metric, analysis
|
||||
|
||||
def predict(self,
|
||||
ckpt_json_dir=None,
|
||||
**kwargs):
|
||||
'''Predict label for test data.
|
||||
|
||||
An example:
|
||||
predictions, test_metric = autohf.predict()
|
||||
|
||||
Args:
|
||||
ckpt_json_dir:
|
||||
the checkpoint for the fine-tuned huggingface if you wish to override
|
||||
the saved checkpoint in the training stage under self.path_utils._result_dir_per_run
|
||||
|
||||
Returns:
|
||||
A numpy array of shape n * 1 - - each element is a predicted class
|
||||
label for an instance.
|
||||
'''
|
||||
best_checkpoint = self._load_ckpt_json(ckpt_json_dir, **kwargs)
|
||||
best_model = self._load_model(checkpoint_path=best_checkpoint)
|
||||
training_args = TrainingArguments(per_device_eval_batch_size=1,
|
||||
output_dir=self.path_utils.result_dir_per_run)
|
||||
test_trainer = TrainerForAutoTransformers(best_model, training_args)
|
||||
|
||||
if self.jobid_config.spt == "ori":
|
||||
try:
|
||||
self.test_dataset.remove_columns_("label")
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
test_dataloader = test_trainer.get_test_dataloader(self.test_dataset)
|
||||
predictions, labels, _ = test_trainer.prediction_loop(test_dataloader, description="Prediction")
|
||||
predictions = np.squeeze(predictions) \
|
||||
if get_default_task(self.get_full_data_name(), self.jobid_config.subdat) == "regression" \
|
||||
else np.argmax(predictions, axis=1)
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
if self.jobid_config.spt == "rspt":
|
||||
assert labels is not None
|
||||
metric = self._get_metric_func()
|
||||
output_metric = metric.compute(predictions=predictions, references=labels)
|
||||
self._save_output_metric(output_metric)
|
||||
return predictions, output_metric
|
||||
else:
|
||||
return predictions, None
|
||||
|
||||
def output_prediction(self,
|
||||
predictions=None,
|
||||
output_prediction_path=None,
|
||||
output_zip_file_name=None):
|
||||
"""
|
||||
When using the original GLUE split, output the prediction on test data,
|
||||
and prepare the .zip file for submission
|
||||
|
||||
Example:
|
||||
local_archive_path = self.autohf.output_prediction(predictions,
|
||||
output_prediction_path= self.console_args.data_root_dir + "result/",
|
||||
output_zip_file_name=azure_save_file_name)
|
||||
|
||||
Args:
|
||||
predictions:
|
||||
a list of predictions, which is the output of AutoTransformers.predict()
|
||||
output_prediction_path:
|
||||
output path for the prediction
|
||||
output_zip_file_name:
|
||||
an string, which is the name of the output zip file
|
||||
|
||||
Returns:
|
||||
the path of the output .zip file
|
||||
"""
|
||||
return auto_output_prediction(self.get_full_data_name(), output_prediction_path,
|
||||
output_zip_file_name, predictions, self.train_dataset,
|
||||
self._dev_name, self.jobid_config.subdat)
|
||||
0
flaml/nlp/dataset/__init__.py
Normal file
0
flaml/nlp/dataset/__init__.py
Normal file
225
flaml/nlp/dataset/dataprocess_auto.py
Normal file
225
flaml/nlp/dataset/dataprocess_auto.py
Normal file
@@ -0,0 +1,225 @@
|
||||
from collections import OrderedDict
|
||||
from functools import partial
|
||||
|
||||
from transformers import AutoTokenizer
|
||||
from .sentence_keys_auto import get_sentence_keys
|
||||
|
||||
|
||||
def inserting_sepp(sent, start, end, this_tokenizer):
|
||||
return \
|
||||
sent[:start].rstrip() + " " + this_tokenizer.sep_token + " " + sent[start:end] \
|
||||
+ " " + this_tokenizer.sep_token + " " + sent[end:].lstrip()
|
||||
|
||||
|
||||
def tokenize_superglue_copa(this_example,
|
||||
this_tokenizer,
|
||||
dataset_name,
|
||||
subdataset_name=None,
|
||||
**kwargs):
|
||||
return None
|
||||
|
||||
|
||||
def tokenize_superglue_wic_gpt2(this_example,
|
||||
this_tokenizer,
|
||||
dataset_name,
|
||||
subdataset_name=None,
|
||||
**kwargs):
|
||||
return None
|
||||
|
||||
|
||||
def tokenize_superglue_wic(this_example,
|
||||
this_tokenizer,
|
||||
dataset_name,
|
||||
subdataset_name=None,
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
tokenize the data from the wic task (word-in-context dataset),
|
||||
e.g., sentence 1: "There's a lot of trash on the bed of the river"
|
||||
sentence 2: "I keep a glass of water next to my bed when I sleep",
|
||||
label = False (different word senses)
|
||||
In the superglue data, the position of the word in sentence 1 and 2 are provided
|
||||
What this function does is to update the span position after tokenization, based on each LM's own tokenizer,
|
||||
The key is to insert an [SEP] before and after the original sentence, then feed it into the LM's tokenizer.
|
||||
There are two challenges:
|
||||
(1) Each LM's tokenizations are different, e.g., in XLNet's tokenizer, the paddings are on the left'
|
||||
(2) Some LM's tokenization would add an underline symbol before the word, e.g., "There's a lot"
|
||||
-> [_There, _', _s, _a, _lot]
|
||||
When underline meets special char such as '"', "'", the tokenized sequence after adding [SEP] needs to be
|
||||
aligned with the sequence tokenized without [SEP]. We use a two pointer algorithm for the alignment
|
||||
"""
|
||||
sent1, sent2 = this_example["sentence1"], this_example["sentence2"]
|
||||
start1, end1 = this_example["start1"], this_example["end1"]
|
||||
start2, end2 = this_example["start2"], this_example["end2"]
|
||||
"""
|
||||
Add [SEP] to the sentence
|
||||
"""
|
||||
altered_sent1 = inserting_sepp(sent1, start1, end1, this_tokenizer)
|
||||
altered_sent2 = inserting_sepp(sent2, start2, end2, this_tokenizer)
|
||||
input_ids_sepp = this_tokenizer(*(altered_sent1, altered_sent2),
|
||||
padding="max_length",
|
||||
max_length=1024,
|
||||
truncation=True)["input_ids"]
|
||||
data_pair = (sent1, sent2)
|
||||
assert "max_seq_length" in kwargs, "max_seq_length must be provided for glue"
|
||||
this_data = this_tokenizer(*data_pair, padding="max_length", max_length=kwargs["max_seq_length"], truncation=True)
|
||||
input_ids = this_data["input_ids"]
|
||||
which_sepp = 0
|
||||
|
||||
"""
|
||||
span_start_end: a 2x2 array:
|
||||
* (span_start_end[0][0], span_start_end[0][1]) are the spans of the position of the word in the first sentence
|
||||
* (span_start_end[1][0], span_start_end[1][1]) are the spans of the position of the word in the second sentence
|
||||
"""
|
||||
span_start_end = [[-1, -1], [-1, -1]]
|
||||
|
||||
ptr_sepp = ptr_nosepp = 0
|
||||
try:
|
||||
padding_direction = this_tokenizer.padding_side
|
||||
if padding_direction == "left":
|
||||
padding_id = input_ids_sepp[0]
|
||||
while input_ids_sepp[ptr_sepp] == padding_id:
|
||||
ptr_sepp += 1
|
||||
while input_ids[ptr_nosepp] == padding_id:
|
||||
ptr_nosepp += 1
|
||||
except KeyError:
|
||||
pass
|
||||
sep_id = this_tokenizer.convert_tokens_to_ids([this_tokenizer.sep_token])[0]
|
||||
"""
|
||||
use two pointers to align the tokenized sequence before and after adding [SEP];
|
||||
ptr_sepp: the pointer after adding; ptr_nosepp: the pointer without adding
|
||||
"""
|
||||
while ptr_sepp < len(input_ids_sepp) and ptr_nosepp < len(input_ids) and \
|
||||
input_ids_sepp[ptr_sepp] != 0 and input_ids[ptr_nosepp] != 0:
|
||||
if input_ids_sepp[ptr_sepp] == input_ids[ptr_nosepp]:
|
||||
ptr_sepp += 1
|
||||
ptr_nosepp += 1
|
||||
else:
|
||||
if not (input_ids_sepp[ptr_sepp] == sep_id
|
||||
or this_tokenizer.convert_ids_to_tokens([input_ids_sepp[ptr_sepp]])[0] in ('▁', '_')):
|
||||
break
|
||||
if input_ids_sepp[ptr_sepp] == sep_id:
|
||||
span_start_end[int(which_sepp / 2)][which_sepp % 2] = ptr_nosepp
|
||||
which_sepp += 1
|
||||
ptr_sepp += 1
|
||||
else:
|
||||
ptr_sepp += 1
|
||||
"""
|
||||
max_word_span is the maximum tokens of the word
|
||||
It is set to 16 following deberta:
|
||||
https://github.com/microsoft/DeBERTa/blob/master/DeBERTa/apps/tasks/superglue_tasks.py#L1054
|
||||
"""
|
||||
max_word_span = 16
|
||||
word_indices = []
|
||||
for idx1 in range(2):
|
||||
if span_start_end[idx1][1] < kwargs["max_seq_length"]:
|
||||
first_span = [x for x in range(span_start_end[idx1][0], span_start_end[idx1][1])
|
||||
if x < kwargs["max_seq_length"]] + [0] * (max_word_span - span_start_end[idx1][1]
|
||||
+ span_start_end[idx1][0])
|
||||
word_indices.append(first_span)
|
||||
this_data["word_spans"] = word_indices
|
||||
return this_data
|
||||
|
||||
|
||||
def tokenize_glue(this_example,
|
||||
this_tokenizer,
|
||||
dataset_name,
|
||||
subdataset_name=None,
|
||||
**kwargs):
|
||||
sentence_keys = get_sentence_keys(dataset_name, subdataset_name)
|
||||
|
||||
if len(sentence_keys) > 1:
|
||||
sentence1_key, sentence2_key = sentence_keys[0], sentence_keys[1]
|
||||
else:
|
||||
sentence1_key = sentence_keys[0]
|
||||
sentence2_key = None
|
||||
|
||||
data_pair = (
|
||||
(this_example[sentence1_key],) if sentence2_key is None else (
|
||||
this_example[sentence1_key], this_example[sentence2_key])
|
||||
)
|
||||
assert "max_seq_length" in kwargs, "max_seq_length must be provided for glue"
|
||||
return this_tokenizer(*data_pair, padding="max_length", max_length=kwargs["max_seq_length"], truncation=True)
|
||||
|
||||
|
||||
TOKENIZER_MAPPING = OrderedDict(
|
||||
[
|
||||
(("glue", "rte"), tokenize_glue),
|
||||
(("glue", "mrpc"), tokenize_glue),
|
||||
(("glue", "cola"), tokenize_glue),
|
||||
(("glue", "wnli"), tokenize_glue),
|
||||
(("glue", "stsb"), tokenize_glue),
|
||||
(("glue", "sst2"), tokenize_glue),
|
||||
(("glue", "mnli"), tokenize_glue),
|
||||
(("glue", "qqp"), tokenize_glue),
|
||||
(("glue", "qnli"), tokenize_glue),
|
||||
(("super_glue", "wic"), tokenize_superglue_wic),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class AutoEncodeText:
|
||||
"""
|
||||
This is a generic input text tokenization class that will be instantiated as one of the
|
||||
tokenization classes of the library when created with the
|
||||
`~flaml.nlp.dataset.AutoEncodeText.from_model_and_dataset_name` class method.
|
||||
|
||||
This class cannot be instantiated directly using ``__init__()`` (throws an error).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
raise EnvironmentError(
|
||||
"AutoEncodeText is designed to be instantiated "
|
||||
"using the `AutoEncodeText.from_model_and_dataset_name(cls,"
|
||||
"data_raw,model_checkpoint_path,dataset_name,subdataset_name = None,**kwargs)` methods."
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_model_and_dataset_name(cls,
|
||||
data_raw,
|
||||
model_checkpoint_path,
|
||||
dataset_name,
|
||||
subdataset_name=None,
|
||||
**kwargs):
|
||||
"""
|
||||
Instantiate one of the input text tokenization classes from the raw data, model checkpoint path, dataset name
|
||||
and sub dataset name. The raw data is used for creating a mapping function from the raw tokens to the
|
||||
tokenized token ids.
|
||||
|
||||
Args:
|
||||
data_raw:
|
||||
The raw data (a datasets.Dataset object)
|
||||
|
||||
model_checkpoint_path:
|
||||
A string variable which specifies the model path, e.g., "google/electra-base-discriminator"
|
||||
|
||||
dataset_name:
|
||||
A string variable which is the dataset name, e.g., "glue"
|
||||
|
||||
subdataset_name:
|
||||
A string variable which is the sub dataset name,e.g., "rte"
|
||||
|
||||
kwargs:
|
||||
The values in kwargs of any keys will be used for the mapping function
|
||||
|
||||
Examples:
|
||||
>>> from datasets import load_dataset
|
||||
>>> data_raw = load_dataset("glue", "rte")
|
||||
>>> AutoEncodeText.from_model_and_dataset_name(data_raw, "google/electra-base-discriminator", ["glue"], "rte")
|
||||
|
||||
"""
|
||||
if (dataset_name, subdataset_name) in TOKENIZER_MAPPING.keys():
|
||||
this_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_path, use_fast=True)
|
||||
token_func = TOKENIZER_MAPPING[(dataset_name, subdataset_name)]
|
||||
return data_raw.map(
|
||||
partial(token_func,
|
||||
this_tokenizer=this_tokenizer,
|
||||
dataset_name=dataset_name,
|
||||
subdataset_name=subdataset_name,
|
||||
**kwargs), batched=False)
|
||||
raise ValueError(
|
||||
"Unrecognized method {},{} for this kind of AutoGridSearchSpace: {}.\n"
|
||||
"Method name should be one of {}.".format(
|
||||
dataset_name, subdataset_name, cls.__name__, ", ".join(c.__name__ for c in TOKENIZER_MAPPING.keys())
|
||||
)
|
||||
)
|
||||
70
flaml/nlp/dataset/metric_auto.py
Normal file
70
flaml/nlp/dataset/metric_auto.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# https://github.com/huggingface/datasets/blob/master/metrics/glue/glue.py
|
||||
from collections import OrderedDict
|
||||
|
||||
metric_mode_mapping_glue = {
|
||||
"cola": [("matthews_correlation", "max")],
|
||||
"mnli": [("accuracy", "max")],
|
||||
"mrpc": [("accuracy", "max"), ("f1", "max")],
|
||||
"qnli": [("accuracy", "max")],
|
||||
"qqp": [("accuracy", "max"), ("f1", "max")],
|
||||
"rte": [("accuracy", "max")],
|
||||
"sst2": [("accuracy", "max")],
|
||||
"stsb": [("pearson", "max"), ("spearmanr", "max")],
|
||||
"wnli": [("accuracy", "max")]
|
||||
}
|
||||
|
||||
metric_mode_mapping_squad = [("exact_match", "max"), ("f1", "max")]
|
||||
|
||||
metric_mode_mapping_super_glue = {
|
||||
"axb": [("matthews_correlation", "max")],
|
||||
"cb": [("accuracy", "max"), ("f1", "max")],
|
||||
"copa": [("accuracy", "max")],
|
||||
"rte": [("accuracy", "max")],
|
||||
"wic": [("accuracy", "max")],
|
||||
"wsc": [("accuracy", "max")],
|
||||
"wsc.fixed": [("accuracy", "max")],
|
||||
"boolq": [("accuracy", "max")],
|
||||
"axg": [("accuracy", "max")]
|
||||
}
|
||||
|
||||
metric_mode_mapping_imdb = [("accuracy", "max")]
|
||||
|
||||
metric_mode_mapping_yelp = [("accuracy", "max")]
|
||||
|
||||
METRIC_MAPPING = OrderedDict(
|
||||
[
|
||||
("squad", metric_mode_mapping_squad),
|
||||
("glue", metric_mode_mapping_glue),
|
||||
("super_glue", metric_mode_mapping_super_glue),
|
||||
("imdb", metric_mode_mapping_imdb),
|
||||
("yelp_review_full", metric_mode_mapping_yelp)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def get_default_and_alternative_metric(dataset_name,
|
||||
subdataset_name=None,
|
||||
custom_metric_name=None,
|
||||
custom_metric_mode_name=None):
|
||||
if dataset_name not in METRIC_MAPPING.keys():
|
||||
assert custom_metric_name and custom_metric_mode_name, \
|
||||
"The dataset is not in {}, you must explicitly specify " \
|
||||
"the custom_metric_name and custom_metric_mode_name".format(",".join(METRIC_MAPPING.keys()))
|
||||
eval_name_mapping = METRIC_MAPPING[dataset_name]
|
||||
if isinstance(eval_name_mapping, dict):
|
||||
assert subdataset_name and subdataset_name in eval_name_mapping, \
|
||||
"dataset_name and subdataset_name not correctly specified"
|
||||
default_metric, default_mode = eval_name_mapping[subdataset_name][0]
|
||||
all_metrics, all_mode \
|
||||
= [x[0] for x in eval_name_mapping[subdataset_name]] \
|
||||
+ ["loss"], [x[1] for x in eval_name_mapping[subdataset_name]] + ["min"]
|
||||
|
||||
return default_metric, default_mode, all_metrics, all_mode
|
||||
else:
|
||||
assert isinstance(eval_name_mapping, list), "dataset_name and subdataset_name not correctly specified"
|
||||
|
||||
default_metric, default_mode = eval_name_mapping[0]
|
||||
all_metrics, all_mode = [x[0] for x in eval_name_mapping] + ["loss"], \
|
||||
[x[1] for x in eval_name_mapping] + ["min"]
|
||||
|
||||
return default_metric, default_mode, all_metrics, all_mode
|
||||
28
flaml/nlp/dataset/sentence_keys_auto.py
Normal file
28
flaml/nlp/dataset/sentence_keys_auto.py
Normal file
@@ -0,0 +1,28 @@
|
||||
sentence_keys_glue = {
|
||||
"cola": ["sentence"],
|
||||
"mnli": ["premise", "hypothesis"],
|
||||
"mrpc": ["sentence1", "sentence2"],
|
||||
"qnli": ["sentence", "question"],
|
||||
"qqp": ["question1", "question2"],
|
||||
"rte": ["sentence1", "sentence2"],
|
||||
"sst2": ["sentence"],
|
||||
"stsb": ["sentence1", "sentence2"],
|
||||
"wnli": ["sentence1", "sentence2"]
|
||||
}
|
||||
|
||||
sentence_keys_super_glue = {
|
||||
"rte": ["hypothesis", "premise"],
|
||||
"wic": ["sentence1", "sentence2"],
|
||||
"wsc": ["text"]
|
||||
}
|
||||
|
||||
|
||||
def get_sentence_keys(dataset_name, subdataset_name=None):
|
||||
eval_name_mapping = globals()["sentence_keys_" + dataset_name]
|
||||
if isinstance(eval_name_mapping, dict):
|
||||
assert subdataset_name and subdataset_name in eval_name_mapping, \
|
||||
"dataset_name and subdataset_name not correctly specified"
|
||||
sentence_keys = eval_name_mapping[subdataset_name]
|
||||
else:
|
||||
sentence_keys = eval_name_mapping
|
||||
return sentence_keys
|
||||
126
flaml/nlp/dataset/submission_auto.py
Normal file
126
flaml/nlp/dataset/submission_auto.py
Normal file
@@ -0,0 +1,126 @@
|
||||
import os
|
||||
import shutil
|
||||
from collections import OrderedDict
|
||||
|
||||
file_name_mapping_glue = {
|
||||
"ax": ["AX.tsv"],
|
||||
"cola": ["CoLA.tsv"],
|
||||
"mnli": ["MNLI-m.tsv", "MNLI-mm.tsv"],
|
||||
"mrpc": ["MRPC.tsv"],
|
||||
"qnli": ["QNLI.tsv"],
|
||||
"qqp": ["QQP.tsv"],
|
||||
"rte": ["RTE.tsv"],
|
||||
"sst2": ["SST-2.tsv"],
|
||||
"stsb": ["STS-B.tsv"],
|
||||
"wnli": ["WNLI.tsv"]
|
||||
}
|
||||
|
||||
default_prediction_glue = {
|
||||
"ax": ["entailment"],
|
||||
"cola": ["0"],
|
||||
"mnli": ["neutral", "neutral"],
|
||||
"mrpc": ["0"],
|
||||
"qnli": ["not_entailment"],
|
||||
"qqp": ["0"],
|
||||
"rte": ["not_entailment"],
|
||||
"sst2": ["0"],
|
||||
"stsb": ["0.0"],
|
||||
"wnli": ["0"]
|
||||
}
|
||||
|
||||
test_size_glue = {
|
||||
"ax": [1104],
|
||||
"cola": [1064],
|
||||
"mnli": [9796, 9847],
|
||||
"mrpc": [1725],
|
||||
"qnli": [5463],
|
||||
"qqp": [390965],
|
||||
"rte": [3000],
|
||||
"sst2": [1821],
|
||||
"stsb": [1379],
|
||||
"wnli": [146]
|
||||
}
|
||||
|
||||
|
||||
def output_prediction_glue(output_path, output_dir_name, predictions, train_data, dev_name, subdataset_name):
|
||||
output_dir = os.path.join(output_path, output_dir_name)
|
||||
if os.path.exists(output_dir):
|
||||
assert os.path.isdir(output_dir)
|
||||
else:
|
||||
os.mkdir(output_dir)
|
||||
if subdataset_name != "stsb":
|
||||
label_list = train_data.features["label"].names
|
||||
|
||||
output_blank_tsv(output_dir)
|
||||
for each_subdataset_name in file_name_mapping_glue.keys():
|
||||
for idx in range(len(file_name_mapping_glue[each_subdataset_name])):
|
||||
each_file = file_name_mapping_glue[each_subdataset_name][idx]
|
||||
if subdataset_name != "mnli":
|
||||
is_match = subdataset_name == each_subdataset_name
|
||||
else:
|
||||
if dev_name == "validation_matched":
|
||||
is_match = each_file == "MNLI-m.tsv"
|
||||
else:
|
||||
is_match = each_file == "MNLI-mm.tsv"
|
||||
if is_match:
|
||||
with open(os.path.join(output_dir, each_file), "w") as writer:
|
||||
writer.write("index\tprediction\n")
|
||||
for index, item in enumerate(predictions):
|
||||
if subdataset_name == "stsb":
|
||||
if item > 5.0:
|
||||
item = 5.0
|
||||
writer.write(f"{index}\t{item:3.3f}\n")
|
||||
else:
|
||||
if subdataset_name in ("rte", "qnli", "mnli"):
|
||||
item = label_list[item]
|
||||
writer.write(f"{index}\t{item}\n")
|
||||
else:
|
||||
if int(item) == item:
|
||||
item = int(item)
|
||||
writer.write(f"{index}\t{item}\n")
|
||||
else:
|
||||
writer.write(f"{index}\t{item:3.3f}\n")
|
||||
|
||||
shutil.make_archive(os.path.join(output_path, output_dir_name), 'zip', output_dir)
|
||||
return os.path.join(output_path, output_dir_name + ".zip")
|
||||
|
||||
|
||||
OUTPUT_PREDICTION_MAPPING = OrderedDict(
|
||||
[
|
||||
("glue", output_prediction_glue),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def auto_output_prediction(dataset_name,
|
||||
output_path,
|
||||
output_dir_name,
|
||||
predictions,
|
||||
train_data,
|
||||
dev_name,
|
||||
subset_name):
|
||||
if dataset_name in OUTPUT_PREDICTION_MAPPING.keys():
|
||||
return OUTPUT_PREDICTION_MAPPING[dataset_name](output_path,
|
||||
output_dir_name,
|
||||
predictions,
|
||||
train_data,
|
||||
dev_name,
|
||||
subset_name)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unrecognized dataset {}. \n"
|
||||
"Should be one of {}.".format(dataset_name, ", ".join(c.__name__ for c in OUTPUT_PREDICTION_MAPPING.keys())
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def output_blank_tsv(output_dir):
|
||||
for each_subdataset_name in file_name_mapping_glue.keys():
|
||||
for idx in range(len(file_name_mapping_glue[each_subdataset_name])):
|
||||
each_file = file_name_mapping_glue[each_subdataset_name][idx]
|
||||
default_prediction = default_prediction_glue[each_subdataset_name][idx]
|
||||
test_size = test_size_glue[each_subdataset_name][idx]
|
||||
with open(os.path.join(output_dir, each_file), "w") as writer:
|
||||
writer.write("index\tprediction\n")
|
||||
for index in range(test_size):
|
||||
writer.write(f"{index}\t{default_prediction}\n")
|
||||
45
flaml/nlp/dataset/task_auto.py
Normal file
45
flaml/nlp/dataset/task_auto.py
Normal file
@@ -0,0 +1,45 @@
|
||||
# https://github.com/huggingface/datasets/blob/master/metrics/glue/glue.py
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
task_mapping_glue = {
|
||||
"cola": "seq-classification",
|
||||
"mnli": "seq-classification",
|
||||
"mrpc": "seq-classification",
|
||||
"qnli": "seq-classification",
|
||||
"qqp": "seq-classification",
|
||||
"rte": "seq-classification",
|
||||
"sst2": "seq-classification",
|
||||
"stsb": "regression",
|
||||
"wnli": "seq-classification"
|
||||
}
|
||||
|
||||
task_mapping_squad = "question-answering"
|
||||
|
||||
task_mapping_super_glue = {
|
||||
"wic": "seq-classification",
|
||||
"rte": "seq-classification"
|
||||
}
|
||||
|
||||
TASK_MAPPING = OrderedDict(
|
||||
[
|
||||
("squad", task_mapping_squad),
|
||||
("glue", task_mapping_glue),
|
||||
("super_glue", task_mapping_super_glue),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def get_default_task(dataset_name, subdataset_name=None):
|
||||
assert dataset_name in TASK_MAPPING.keys(), "The dataset is not in {}, you must explicitly specify " \
|
||||
"the custom_metric_name and custom_metric_mode_name".format(
|
||||
",".join(TASK_MAPPING.keys()))
|
||||
eval_name_mapping = TASK_MAPPING[dataset_name]
|
||||
if isinstance(eval_name_mapping, dict):
|
||||
assert subdataset_name and subdataset_name in eval_name_mapping, \
|
||||
"dataset_name and subdataset_name not correctly specified"
|
||||
default_task = eval_name_mapping[subdataset_name]
|
||||
else:
|
||||
assert isinstance(eval_name_mapping, list), "dataset_name and subdataset_name not correctly specified"
|
||||
default_task = eval_name_mapping
|
||||
return default_task
|
||||
0
flaml/nlp/hpo/__init__.py
Normal file
0
flaml/nlp/hpo/__init__.py
Normal file
456
flaml/nlp/hpo/get_grid_search_space.py
Normal file
456
flaml/nlp/hpo/get_grid_search_space.py
Normal file
@@ -0,0 +1,456 @@
|
||||
# lookup table for the grid configs in each pre-trained language huggingface for different tasks
|
||||
import copy
|
||||
|
||||
|
||||
def get_space_union_and_unique(search_space_common, search_space_unique, this_case_tags: list):
|
||||
"""
|
||||
get the recommended search configs for each pre-trained language models
|
||||
|
||||
Args:
|
||||
search_space_common:
|
||||
the union of configs recommended by the LM for all cases;
|
||||
search_space_unique:
|
||||
the recommended config by the LM for a specific condition, e.g., small model
|
||||
this_case_tags:
|
||||
a list, which contains the tags describing the specific condition, e.g., ["small"]
|
||||
"""
|
||||
search_space_union = search_space_common.copy()
|
||||
this_search_space = search_space_common.copy()
|
||||
# enumerate over each case where the search space is different
|
||||
# this difference can be the dataset or model size, etc.
|
||||
is_included = False
|
||||
from ..utils import merge_dicts
|
||||
for each_case in search_space_unique.keys():
|
||||
from ..utils import _check_dict_keys_overlaps
|
||||
if each_case in this_case_tags:
|
||||
is_included = True
|
||||
assert not _check_dict_keys_overlaps(this_search_space, search_space_unique[each_case]), \
|
||||
"the hyperparameters of common and unique search spaces should not have overlaps"
|
||||
this_search_space.update(search_space_unique[each_case])
|
||||
search_space_union = merge_dicts(search_space_union, search_space_unique[each_case])
|
||||
if is_included:
|
||||
return this_search_space
|
||||
else:
|
||||
if "other" in search_space_unique.keys():
|
||||
search_space_union = merge_dicts(search_space_union, search_space_unique["other"])
|
||||
return search_space_union
|
||||
|
||||
|
||||
def get_deberta_space(model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
algo_mode=None):
|
||||
"""
|
||||
DEBERTA: DECODING-ENHANCED BERT WITH DISENTANGLED ATTENTION: Table 9
|
||||
https://arxiv.org/abs/2006.03654
|
||||
"""
|
||||
search_space_common = {
|
||||
"cls_dropout": [0, 0.1, 0.15],
|
||||
"warmup_steps": [50, 100, 500, 1000],
|
||||
"per_device_train_batch_size": [16, 32, 48, 64],
|
||||
"num_train_epochs": [10],
|
||||
"adam_epsilon": [1e-6],
|
||||
}
|
||||
search_space_unique = {
|
||||
"large": {
|
||||
"learning_rate": [5e-6, 8e-6, 9e-6, 1e-5],
|
||||
"weight_decay": [0.01],
|
||||
},
|
||||
"base": {
|
||||
"learning_rate": [1.5e-5, 2e-5, 3e-5, 4e-5],
|
||||
}
|
||||
}
|
||||
return get_space_union_and_unique(search_space_common, search_space_unique, [model_size_type])
|
||||
|
||||
|
||||
def get_longformer_space(model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
algo_mode=None):
|
||||
"""
|
||||
TODO: Longformer: The Long-Document Transformer
|
||||
"""
|
||||
if dataset_name == "glue":
|
||||
return
|
||||
|
||||
|
||||
def get_funnel_space(model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
algo_mode=None):
|
||||
"""
|
||||
Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing
|
||||
https://arxiv.org/abs/2006.03236
|
||||
"""
|
||||
search_space_common = {"learning_rate": [1e-5, 2e-5, 3e-5],
|
||||
"hidden_dropout": [0.1],
|
||||
"activation_dropout": [0.0],
|
||||
"attention_dropout": [0.1],
|
||||
"weight_decay": [0.01],
|
||||
"warmup_ratio": [0.1],
|
||||
"adam_epsilon": [1e-6],
|
||||
}
|
||||
search_space_unique = {
|
||||
"imdb": {
|
||||
"per_device_train_batch_size": [32],
|
||||
"num_train_epochs": [5]
|
||||
},
|
||||
"ag_news": {
|
||||
"per_device_train_batch_size": [32],
|
||||
"num_train_epochs": [3]
|
||||
},
|
||||
"dbpedia_14": {
|
||||
"per_device_train_batch_size": [64],
|
||||
"num_train_epochs": [3]
|
||||
},
|
||||
"yelp_polarity": {
|
||||
"per_device_train_batch_size": [128],
|
||||
"num_train_epochs": [3]
|
||||
},
|
||||
"yelp_review_full": {
|
||||
"per_device_train_batch_size": [128],
|
||||
"num_train_epochs": [3]
|
||||
},
|
||||
"amazon_polarity": {
|
||||
"per_device_train_batch_size": [128],
|
||||
"num_train_epochs": [3]
|
||||
},
|
||||
"amazon_review_multi": {
|
||||
"per_device_train_batch_size": [128],
|
||||
"num_train_epochs": [3]
|
||||
},
|
||||
"glue_rte": {
|
||||
"per_device_train_batch_size": [16],
|
||||
"num_train_epochs": [10]
|
||||
},
|
||||
"glue_mrpc": {
|
||||
"per_device_train_batch_size": [16],
|
||||
"num_train_epochs": [10]
|
||||
},
|
||||
"glue_stsb": {
|
||||
"per_device_train_batch_size": [16],
|
||||
"num_train_epochs": [10]
|
||||
},
|
||||
"glue_cola": {
|
||||
"per_device_train_batch_size": [16],
|
||||
"num_train_epochs": [10]
|
||||
},
|
||||
"glue_sst2": {
|
||||
"per_device_train_batch_size": [32],
|
||||
"num_train_epochs": [5]
|
||||
},
|
||||
"glue_qnli": {
|
||||
"per_device_train_batch_size": [32],
|
||||
"num_train_epochs": [3]
|
||||
},
|
||||
"glue_mnli": {
|
||||
"per_device_train_batch_size": [64],
|
||||
"num_train_epochs": [3]
|
||||
},
|
||||
"glue_qqp": {
|
||||
"per_device_train_batch_size": [64],
|
||||
"num_train_epochs": [5]
|
||||
}
|
||||
}
|
||||
from ..result_analysis.azure_utils import JobID
|
||||
return get_space_union_and_unique(search_space_common, search_space_unique,
|
||||
[JobID.get_full_data_name(dataset_name, subdataset_name)])
|
||||
|
||||
|
||||
def get_bert_space(model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
algo_mode=None):
|
||||
"""
|
||||
BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
|
||||
https://arxiv.org/pdf/1810.04805.pdf
|
||||
"""
|
||||
search_space_common = {}
|
||||
search_space_unique = {
|
||||
# Section 4.1: We use a batch size of 32 and fine-tune for 3 epochs over the data for all GLUE tasks. For each
|
||||
# task, we selected the best fine-tuning learning rate (among 5e-5, 4e-5, 3e-5, and 2e-5) on the Dev set
|
||||
"glue": {
|
||||
"learning_rate": [5e-5, 4e-5, 3e-5, 2e-5],
|
||||
"per_device_train_batch_size": [32],
|
||||
"num_train_epochs": [3],
|
||||
},
|
||||
# Section 4.2: We fine-tune for 3 epochs with a learning rate of 5e-5 and a batch size of 32
|
||||
"squad": {
|
||||
"learning_rate": [5e-5],
|
||||
"per_device_train_batch_size": [32],
|
||||
"num_train_epochs": [2],
|
||||
},
|
||||
# Section 4.3: We fine-tuned for 2 epochs with a learning rate of 5e-5 and a batch size of 48.
|
||||
"squad_v2": {
|
||||
"learning_rate": [5e-5],
|
||||
"per_device_train_batch_size": [48],
|
||||
"num_train_epochs": [2],
|
||||
},
|
||||
# Section 4.4: We fine-tune the huggingface for 3 epochs with a learning rate of 2e-5 and a batch size of 16.
|
||||
"swag": {
|
||||
"learning_rate": [2e-5],
|
||||
"per_device_train_batch_size": [16],
|
||||
"num_train_epochs": [3],
|
||||
},
|
||||
# Appedix A. The optimal hyperparameter values are task-specific, but we found the following
|
||||
# range of possible values to work well across all tasks:
|
||||
# - Batch size: 16, 32
|
||||
# - Learning rate (Adam): 5e-5, 3e-5, 2e-5
|
||||
# - Number of epochs: 2, 3, 4
|
||||
"other": {
|
||||
"learning_rate": [5e-5, 3e-5, 2e-5],
|
||||
"per_device_train_batch_size": [16, 32],
|
||||
"num_train_epochs": [2, 3, 4],
|
||||
}
|
||||
}
|
||||
return get_space_union_and_unique(search_space_common, search_space_unique, [dataset_name])
|
||||
|
||||
|
||||
def get_roberta_space(model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
algo_mode=None):
|
||||
# RoBERTa: A Robustly Optimized BERT Pretraining Approach
|
||||
# https://arxiv.org/pdf/1907.11692.pdf
|
||||
search_space_common = {
|
||||
"warmup_ratio": [0.06],
|
||||
}
|
||||
search_space_unique = {
|
||||
# Table 10: Hyperparameters for finetuning RoBERTa-LARGE on RACE, SQuAD and GLUE.
|
||||
# We consider a limited hyperparameter
|
||||
# sweep for each task, with batch sizes ∈ {16, 32}
|
||||
# and learning rates ∈ {1e−5, 2e−5, 3e−5}, with a
|
||||
# linear warmup for the first 6% of steps followed by
|
||||
# a linear decay to 0.
|
||||
"glue": {
|
||||
"learning_rate": [1e-5, 2e-5, 3e-5],
|
||||
"per_device_train_batch_size": [16, 32],
|
||||
"weight_decay": [0.1],
|
||||
"num_train_epochs": [10],
|
||||
},
|
||||
"race": {
|
||||
"learning_rate": [1e-5],
|
||||
"per_device_train_batch_size": [16],
|
||||
"weight_decay": [0.1],
|
||||
"num_train_epochs": [4],
|
||||
},
|
||||
"squad": {
|
||||
"learning_rate": [1.5e-5],
|
||||
"per_device_train_batch_size": [48],
|
||||
"weight_decay": [0.01],
|
||||
"num_train_epochs": [2],
|
||||
}
|
||||
}
|
||||
return get_space_union_and_unique(search_space_common, search_space_unique, [dataset_name])
|
||||
|
||||
|
||||
def get_electra_space(model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
algo_mode=None):
|
||||
"""
|
||||
ELECTRA: PRE-TRAINING TEXT ENCODERS AS DISCRIMINATORS RATHER THAN GENERATORS
|
||||
https://arxiv.org/pdf/2003.10555.pdf
|
||||
"""
|
||||
assert model_size_type in ("small", "base", "large", "intermediate", "xlarge"), \
|
||||
"Electra paper has only provided hyperparameter for the small and base huggingface"
|
||||
search_space_common = {
|
||||
"learning_rate": [3e-5, 5e-5, 1e-4, 1.5e-4] if algo_mode == "grid"
|
||||
else [3e-5, 5e-5, 1e-4, 1.5e-4, 2e-4, 3e-4, 5e-3],
|
||||
"weight_decay": [0.0],
|
||||
"adam_epsilon": [1e-6],
|
||||
"warmup_ratio": [0.1],
|
||||
"per_device_train_batch_size": [32],
|
||||
"hidden_dropout_prob": [0.1],
|
||||
"attention_probs_dropout_prob": [0.1],
|
||||
}
|
||||
search_space_unique = {
|
||||
# Appendix B: For Basesized models we searched for a learning
|
||||
"squad": {
|
||||
"num_train_epochs": [2]
|
||||
},
|
||||
"squad_v2": {
|
||||
"num_train_epochs": [2]
|
||||
},
|
||||
"glue_stsb": {
|
||||
"num_train_epochs": [10],
|
||||
},
|
||||
"glue_rte": {
|
||||
"num_train_epochs": [10],
|
||||
},
|
||||
"glue_wnli": {
|
||||
"num_train_epochs": [3],
|
||||
},
|
||||
"glue_mrpc": {
|
||||
"num_train_epochs": [3],
|
||||
},
|
||||
"glue_cola": {
|
||||
"num_train_epochs": [3],
|
||||
},
|
||||
"glue_sst2": {
|
||||
"num_train_epochs": [3],
|
||||
},
|
||||
"glue_qnli": {
|
||||
"num_train_epochs": [3],
|
||||
},
|
||||
"glue_mnli": {
|
||||
"num_train_epochs": [3],
|
||||
},
|
||||
"glue_qqp": {
|
||||
"num_train_epochs": [3],
|
||||
}
|
||||
}
|
||||
from ..result_analysis.azure_utils import JobID
|
||||
return get_space_union_and_unique(search_space_common, search_space_unique,
|
||||
[JobID.get_full_data_name(dataset_name, subdataset_name), model_size_type])
|
||||
|
||||
|
||||
def get_mobilebert_space(model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
algo_mode=None):
|
||||
"""
|
||||
MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices
|
||||
https://arxiv.org/pdf/2004.02984.pdf
|
||||
"""
|
||||
# To finetune the pre-trained models, we search the optimization hyperparameters
|
||||
# in a search space including different batch sizes (16/32/48), learning
|
||||
# rates ((1-10) * e-5), and the number of epochs (2-10)
|
||||
search_space_common = {
|
||||
"learning_rate": [x * 1e-5 for x in range(1, 11)],
|
||||
"per_device_train_batch_size": [4, 8, 16, 32, 48],
|
||||
"num_train_epochs": [x for x in range(2, 11)],
|
||||
}
|
||||
search_space_unique = {}
|
||||
return get_space_union_and_unique(search_space_common, search_space_unique, [])
|
||||
|
||||
|
||||
def get_albert_space(model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
algo_mode=None):
|
||||
"""
|
||||
Hyperparameters for downstream tasks are shown in Table 14. We adapt these hyperparameters
|
||||
from Liu et al. (2019), Devlin et al. (2019), and Yang et al. (2019).
|
||||
|
||||
LR BSZ ALBERT DR Classifier DR TS WS MSL
|
||||
CoLA 1.00E-05 16 0 0.1 5336 320 512
|
||||
STS 2.00E-05 16 0 0.1 3598 214 512
|
||||
SST-2 1.00E-05 32 0 0.1 20935 1256 512
|
||||
MNLI 3.00E-05 128 0 0.1 10000 1000 512
|
||||
QNLI 1.00E-05 32 0 0.1 33112 1986 512
|
||||
QQP 5.00E-05 128 0.1 0.1 14000 1000 512
|
||||
RTE 3.00E-05 32 0.1 0.1 800 200 512
|
||||
MRPC 2.00E-05 32 0 0.1 800 200 512
|
||||
WNLI 2.00E-05 16 0.1 0.1 2000 250 512
|
||||
SQuAD v1.1 5.00E-05 48 0 0.1 3649 365 384
|
||||
SQuAD v2.0 3.00E-05 48 0 0.1 8144 814 512
|
||||
RACE 2.00E-05 32 0.1 0.1 12000 1000 512
|
||||
"""
|
||||
search_space_common = {
|
||||
}
|
||||
search_space_unique = {
|
||||
"glue_cola": {
|
||||
"learning_rate": [1e-5],
|
||||
"per_device_train_batch_size": [16],
|
||||
"attention_probs_dropout_prob": [0],
|
||||
"classifier_dropout_prob": [0.1],
|
||||
"max_steps": [5336],
|
||||
"warmup_steps": [320],
|
||||
},
|
||||
"glue_stsb": {
|
||||
"learning_rate": [2e-5],
|
||||
"per_device_train_batch_size": [16],
|
||||
"attention_probs_dropout_prob": [0],
|
||||
"classifier_dropout_prob": [0.1],
|
||||
"max_steps": [3598],
|
||||
"warmup_steps": [214],
|
||||
},
|
||||
"glue_sst2": {
|
||||
"learning_rate": [1e-5],
|
||||
"per_device_train_batch_size": [32],
|
||||
"attention_probs_dropout_prob": [0],
|
||||
"classifier_dropout_prob": [0.1],
|
||||
"max_steps": [20935],
|
||||
"warmup_steps": [1256],
|
||||
},
|
||||
"glue_mnli": {
|
||||
"learning_rate": [3e-5],
|
||||
"per_device_train_batch_size": [128],
|
||||
"attention_probs_dropout_prob": [0],
|
||||
"classifier_dropout_prob": [0.1],
|
||||
"max_steps": [10000],
|
||||
"warmup_steps": [1000],
|
||||
},
|
||||
"glue_qnli": {
|
||||
"learning_rate": [1e-5],
|
||||
"per_device_train_batch_size": [32],
|
||||
"attention_probs_dropout_prob": [0],
|
||||
"classifier_dropout_prob": [0.1],
|
||||
"max_steps": [33112],
|
||||
"warmup_steps": [1986],
|
||||
},
|
||||
"glue_qqp": {
|
||||
"learning_rate": [5e-5],
|
||||
"per_device_train_batch_size": [128],
|
||||
"attention_probs_dropout_prob": [0.1],
|
||||
"classifier_dropout_prob": [0.1],
|
||||
"max_steps": [14000],
|
||||
"warmup_steps": [1000],
|
||||
},
|
||||
"glue_rte": {
|
||||
"learning_rate": [3e-5],
|
||||
"per_device_train_batch_size": [32],
|
||||
"attention_probs_dropout_prob": [0.1],
|
||||
"classifier_dropout_prob": [0.1],
|
||||
"max_steps": [800],
|
||||
"warmup_steps": [200],
|
||||
},
|
||||
"glue_mrpc": {
|
||||
"learning_rate": [2e-5],
|
||||
"per_device_train_batch_size": [32],
|
||||
"attention_probs_dropout_prob": [0],
|
||||
"classifier_dropout_prob": [0.1],
|
||||
"max_steps": [800],
|
||||
"warmup_steps": [200],
|
||||
},
|
||||
"glue_wnli": {
|
||||
"learning_rate": [2e-5],
|
||||
"per_device_train_batch_size": [16],
|
||||
"attention_probs_dropout_prob": [0.1],
|
||||
"classifier_dropout_prob": [0.1],
|
||||
"max_steps": [2000],
|
||||
"warmup_steps": [250],
|
||||
},
|
||||
"squad": {
|
||||
"learning_rate": [5e-5],
|
||||
"per_device_train_batch_size": [48],
|
||||
"attention_probs_dropout_prob": [0],
|
||||
"classifier_dropout_prob": [0.1],
|
||||
"max_steps": [3649],
|
||||
"warmup_steps": [365],
|
||||
},
|
||||
"squad_v2": {
|
||||
"learning_rate": [3e-5],
|
||||
"per_device_train_batch_size": [48],
|
||||
"attention_probs_dropout_prob": [0],
|
||||
"classifier_dropout_prob": [0.1],
|
||||
"max_steps": [8144],
|
||||
"warmup_steps": [814],
|
||||
},
|
||||
"race": {
|
||||
"learning_rate": [2e-5],
|
||||
"per_device_train_batch_size": [32],
|
||||
"attention_probs_dropout_prob": [0.1],
|
||||
"classifier_dropout_prob": [0.1],
|
||||
"max_steps": [12000],
|
||||
"warmup_steps": [1000],
|
||||
},
|
||||
}
|
||||
|
||||
# To finetune the pre-trained models, we search the optimization hyperparameters
|
||||
# in a search space including different batch sizes (16/32/48), learning
|
||||
# rates ((1-10) * e-5), and the number of epochs (2-10)
|
||||
from ..result_analysis.azure_utils import JobID
|
||||
return get_space_union_and_unique(search_space_common, search_space_unique,
|
||||
[JobID.get_full_data_name(dataset_name, subdataset_name)])
|
||||
93
flaml/nlp/hpo/grid_searchspace_auto.py
Normal file
93
flaml/nlp/hpo/grid_searchspace_auto.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
from .get_grid_search_space import \
|
||||
(get_electra_space,
|
||||
get_bert_space,
|
||||
get_roberta_space,
|
||||
get_funnel_space,
|
||||
get_deberta_space,
|
||||
get_albert_space
|
||||
)
|
||||
|
||||
GRID_SEARCH_SPACE_MAPPING = OrderedDict(
|
||||
[
|
||||
("electra", get_electra_space),
|
||||
("bert", get_bert_space),
|
||||
("roberta", get_roberta_space),
|
||||
("funnel", get_funnel_space),
|
||||
("deberta", get_deberta_space),
|
||||
("albert", get_albert_space),
|
||||
]
|
||||
)
|
||||
|
||||
HF_MODEL_LIST = [
|
||||
"bert",
|
||||
"roberta",
|
||||
"electra",
|
||||
"xlnet",
|
||||
"albert",
|
||||
"distilbert",
|
||||
"deberta",
|
||||
"mobilebert",
|
||||
"funnel"
|
||||
]
|
||||
|
||||
|
||||
class AutoGridSearchSpace:
|
||||
"""
|
||||
This is a class for getting the recommended grid search space of a pre-trained LM that will be
|
||||
instantiated as one of the search spaces of the library when created with the
|
||||
`~flaml.nlp.hpo.AutoGridSearchSpace.from_model_and_dataset_name` method.
|
||||
|
||||
This class cannot be instantiated directly using ``__init__()`` (throws an error).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
raise EnvironmentError(
|
||||
"AutoGridSearchSpace is designed to be instantiated "
|
||||
"using the `AutoGridSearchSpace.from_config_and_method_name(cls, model_type, model_size_type,"
|
||||
"dataset_name,subdataset_name = None,algo_mode = None)` methods."
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_model_and_dataset_name(cls,
|
||||
model_type,
|
||||
model_size_type,
|
||||
dataset_name,
|
||||
subdataset_name=None,
|
||||
algo_mode=None):
|
||||
"""
|
||||
Instantiate one of the classes for getting the recommended grid search space of a pre-trained LM from
|
||||
the model type, model size type, dataset name, sub dataset name and algorithm mode
|
||||
|
||||
Args:
|
||||
model_type:
|
||||
A string variable which is the model type, e.g. "electra"
|
||||
|
||||
model_size_type:
|
||||
A string variable which is the size of the model, e.g., "small"
|
||||
|
||||
dataset_name:
|
||||
A string variable which is the dataset name, e.g., "glue"
|
||||
|
||||
subdataset_name:
|
||||
A string variable which is the sub dataset name,e.g., "rte"
|
||||
|
||||
algo_mode:
|
||||
A string variable which is the algorithm mode for grid search, e.g., "gridbert"
|
||||
|
||||
Example:
|
||||
>>> AutoGridSearchSpace.from_model_and_dataset_name("electra", "small", "glue", "rte", "grid")
|
||||
|
||||
"""
|
||||
|
||||
if model_type in GRID_SEARCH_SPACE_MAPPING.keys():
|
||||
this_model_recommended_space = GRID_SEARCH_SPACE_MAPPING[model_type](
|
||||
model_size_type, dataset_name, subdataset_name, algo_mode)
|
||||
return this_model_recommended_space
|
||||
raise ValueError(
|
||||
"Unrecognized method {},{} for this kind of AutoGridSearchSpace: {}.\n"
|
||||
"Method name should be one of {}.".format(
|
||||
model_type, dataset_name, cls.__name__, ", ".join(c.__name__ for c in GRID_SEARCH_SPACE_MAPPING.keys())
|
||||
)
|
||||
)
|
||||
242
flaml/nlp/hpo/hpo_searchspace.py
Normal file
242
flaml/nlp/hpo/hpo_searchspace.py
Normal file
@@ -0,0 +1,242 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
from ..huggingface.trainer import TrainerForAutoTransformers
|
||||
from ray import tune
|
||||
from transformers import TrainingArguments
|
||||
|
||||
from .grid_searchspace_auto import AutoGridSearchSpace
|
||||
|
||||
|
||||
def hpo_space_custom(**custom_hpo_args):
|
||||
assert "hpo_space" in custom_hpo_args
|
||||
custom_search_space = custom_hpo_args["hpo_space"]
|
||||
return custom_search_space
|
||||
|
||||
|
||||
def bounded_gridunion(logger=None,
|
||||
model_type=None,
|
||||
model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
**custom_hpo_args):
|
||||
assert "bound" in custom_hpo_args
|
||||
gridunion_space = HPO_SEARCH_SPACE_MAPPING["uni"](logger,
|
||||
model_type,
|
||||
model_size_type,
|
||||
dataset_name,
|
||||
subdataset_name,
|
||||
**custom_hpo_args)
|
||||
for each_key in custom_hpo_args["bound"].keys():
|
||||
if "u" in custom_hpo_args["bound"][each_key]:
|
||||
upper = custom_hpo_args["bound"][each_key]["u"]
|
||||
else:
|
||||
upper = 100000
|
||||
if "l" in custom_hpo_args["bound"][each_key]:
|
||||
lower = custom_hpo_args["bound"][each_key]["l"]
|
||||
else:
|
||||
lower = -100000
|
||||
original_space = sorted(gridunion_space[each_key])
|
||||
upper_id = len(original_space)
|
||||
for x in range(len(original_space)):
|
||||
if original_space[x] > upper:
|
||||
upper_id = x
|
||||
break
|
||||
lower_id = 0
|
||||
for x in range(len(original_space) - 1, -1, -1):
|
||||
if original_space[x] < lower:
|
||||
lower_id = x
|
||||
break
|
||||
gridunion_space[each_key] = original_space[lower_id:upper_id]
|
||||
return gridunion_space
|
||||
|
||||
|
||||
def hpo_space_gridunion(logger=None,
|
||||
model_type=None,
|
||||
model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
**custom_hpo_args):
|
||||
output_config = {}
|
||||
for each_model_type in {"electra", "roberta", "bert"}:
|
||||
# if each_model_type == model_type: continue
|
||||
this_config = AutoGridSearchSpace.from_model_and_dataset_name(
|
||||
each_model_type, model_size_type, dataset_name, subdataset_name, "hpo")
|
||||
from ..utils import merge_dicts
|
||||
output_config = merge_dicts(output_config, this_config)
|
||||
default_values = {}
|
||||
"""
|
||||
adding the default configuration from transformers/training_args.py into hpo space
|
||||
"""
|
||||
training_args = TrainingArguments(output_dir=".")
|
||||
for each_hp in output_config.keys():
|
||||
try:
|
||||
default_values[each_hp] = [getattr(training_args, each_hp)]
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
output_config = merge_dicts(output_config, default_values)
|
||||
|
||||
return output_config
|
||||
|
||||
|
||||
def hpo_space_gridunion_smoke_test(
|
||||
logger=None,
|
||||
model_type=None,
|
||||
model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
**custom_hpo_args):
|
||||
return {'learning_rate': [1e-5],
|
||||
'weight_decay': [0.0],
|
||||
'adam_epsilon': [1e-08],
|
||||
'warmup_ratio': [0.1],
|
||||
'per_device_train_batch_size': [2],
|
||||
'hidden_dropout_prob': [0.1],
|
||||
'attention_probs_dropout_prob': [0.1],
|
||||
'num_train_epochs': [0.1]}
|
||||
|
||||
|
||||
def hpo_space_generic(logger=None,
|
||||
model_type=None,
|
||||
model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
**custom_hpo_args):
|
||||
output_config = {
|
||||
"learning_rate": {"l": 1e-6, "u": 1e-3, "space": "log"},
|
||||
"num_train_epochs": {"l": 1.0, "u": 10.0, "space": "log"},
|
||||
"per_device_train_batch_size": [4, 8, 16, 32, 48],
|
||||
"warmup_ratio": {"l": 0.0, "u": 0.3, "space": "linear"},
|
||||
"weight_decay": {"l": 0.0, "u": 0.3, "space": "linear"}
|
||||
}
|
||||
return output_config
|
||||
|
||||
|
||||
def hpo_space_generic_grid(logger=None,
|
||||
model_type=None,
|
||||
model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
**custom_hpo_args):
|
||||
output_config = {
|
||||
"learning_rate": [1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 1e-4, 1.5e-4],
|
||||
"num_train_epochs": [3, 10],
|
||||
"per_device_train_batch_size": [16, 32],
|
||||
"warmup_ratio": [0, 0.06, 0.1],
|
||||
"weight_decay": [0, 0.1]
|
||||
}
|
||||
return output_config
|
||||
|
||||
|
||||
def hpo_space_small(logger=None,
|
||||
model_type=None,
|
||||
model_size_type=None,
|
||||
dataset_name=None,
|
||||
subdataset_name=None,
|
||||
**custom_hpo_args):
|
||||
config_json = AutoGridSearchSpace.from_model_and_dataset_name(
|
||||
model_type, model_size_type, dataset_name, subdataset_name, "hpo")
|
||||
output_config = {}
|
||||
|
||||
for each_hp in config_json.keys():
|
||||
if each_hp == "learning_rate":
|
||||
if len(config_json[each_hp]) > 1:
|
||||
output_config[each_hp] = {"l": 3e-5, "u": 1.5e-4, "space": "log"}
|
||||
else:
|
||||
output_config[each_hp] = config_json[each_hp]
|
||||
elif each_hp == "num_train_epochs":
|
||||
output_config[each_hp] = {"l": 2.0, "u": 4.0, "space": "linear"}
|
||||
elif each_hp == "per_device_train_batch_size":
|
||||
output_config[each_hp] = [16, 32, 64]
|
||||
elif each_hp == "warmup_ratio":
|
||||
output_config[each_hp] = {"l": 0.0, "u": 0.2, "space": "linear"}
|
||||
elif each_hp == "weight_decay":
|
||||
output_config[each_hp] = {"l": 0.0, "u": 0.3, "space": "linear"}
|
||||
else:
|
||||
output_config[each_hp] = config_json[each_hp]
|
||||
|
||||
return output_config
|
||||
|
||||
|
||||
HPO_SEARCH_SPACE_MAPPING = OrderedDict(
|
||||
[
|
||||
("uni", hpo_space_gridunion),
|
||||
("gnr", hpo_space_generic),
|
||||
("uni_test", hpo_space_gridunion_smoke_test),
|
||||
("cus", hpo_space_custom),
|
||||
("buni", bounded_gridunion)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class AutoHPOSearchSpace:
|
||||
"""
|
||||
This is a class for getting the hpo search space based on the search space mode
|
||||
(a string variable) instantiated as one of the HPO search spaces of the library when
|
||||
created with the `~flaml.nlp.hpo.AutoHPOSearchSpace.from_model_and_dataset_name` method.
|
||||
|
||||
This class cannot be instantiated directly using ``__init__()`` (throws an error).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
raise EnvironmentError(
|
||||
"AutoHPOSearchSpace is designed to be instantiated "
|
||||
"using the `AutoHPOSearchSpace.from_config_and_method_name(cls, logger,hpo_searchspace_name,"
|
||||
"model_type,model_size_type,dataset_name,subdataset_name = None,**custom_hpo_args)` methods."
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_model_and_dataset_name(cls,
|
||||
logger,
|
||||
hpo_searchspace_mode,
|
||||
model_type,
|
||||
model_size_type,
|
||||
dataset_name,
|
||||
subdataset_name=None,
|
||||
**custom_hpo_args):
|
||||
"""
|
||||
Instantiate one of the classes for getting the hpo search space from the search space name, model type,
|
||||
model size type, dataset name and sub dataset name
|
||||
|
||||
Args:
|
||||
logger:
|
||||
Reference to the logger
|
||||
|
||||
hpo_searchspace_mode:
|
||||
A string variable which is name of the hpo search space, e.g., "uni"
|
||||
|
||||
model_type:
|
||||
A string variable which is the type of the model, e.g., "electra"
|
||||
|
||||
model_size_type:
|
||||
A string variable which is the type of the model size, e.g., "small"
|
||||
|
||||
dataset_name:
|
||||
A string variable which is the dataset name, e.g., "glue"
|
||||
|
||||
subdataset_name:
|
||||
A string variable which is the sub dataset name,e.g., "rte"
|
||||
|
||||
custom_hpo_args:
|
||||
Any additional keyword argument to be used for the function for the HPO search space
|
||||
|
||||
Example:
|
||||
>>> AutoHPOSearchSpace.from_model_and_dataset_name(logger, "uni", "electra", "small", "glue", "rte")
|
||||
"""
|
||||
|
||||
if hpo_searchspace_mode in HPO_SEARCH_SPACE_MAPPING.keys():
|
||||
hpo_space = HPO_SEARCH_SPACE_MAPPING[hpo_searchspace_mode](
|
||||
logger,
|
||||
model_type,
|
||||
model_size_type,
|
||||
dataset_name,
|
||||
subdataset_name,
|
||||
**custom_hpo_args)
|
||||
return hpo_space
|
||||
raise ValueError(
|
||||
"Unrecognized method {},{} for this kind of AutoHPOSearchSpace: {}.\n"
|
||||
"Method name should be one of {}.".format(
|
||||
hpo_searchspace_mode, dataset_name, cls.__name__,
|
||||
", ".join(c.__name__ for c in HPO_SEARCH_SPACE_MAPPING.keys())
|
||||
)
|
||||
)
|
||||
51
flaml/nlp/hpo/scheduler_auto.py
Normal file
51
flaml/nlp/hpo/scheduler_auto.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from collections import OrderedDict
|
||||
from ray.tune.schedulers import ASHAScheduler, HyperBandScheduler, MedianStoppingRule
|
||||
|
||||
SCHEDULER_MAPPING = OrderedDict(
|
||||
[
|
||||
("None", None),
|
||||
("asha", ASHAScheduler),
|
||||
("hb", HyperBandScheduler),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class AutoScheduler:
|
||||
"""
|
||||
This is a class for getting the scheduler based on the scheduler name
|
||||
(a string variable) instantiated as one of the schedulers of the library when
|
||||
created with the `~flaml.nlp.hpo.AutoScheduler.from_scheduler_name` method.
|
||||
|
||||
This class cannot be instantiated directly using ``__init__()`` (throws an error).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
raise EnvironmentError(
|
||||
"AutoScheduler is designed to be instantiated "
|
||||
"using the `AutoScheduler.from_scheduler_name(cls, scheduler_name, **kwargs)` methods."
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_scheduler_name(cls, scheduler_name, **kwargs):
|
||||
"""
|
||||
Instantiate one of the schedulers using the scheduler names
|
||||
|
||||
Args:
|
||||
scheduler_name:
|
||||
A string variable for the scheduler name
|
||||
|
||||
Example:
|
||||
>>> AutoScheduler.from_scheduler_name("asha")
|
||||
"""
|
||||
|
||||
if scheduler_name in SCHEDULER_MAPPING.keys():
|
||||
try:
|
||||
return SCHEDULER_MAPPING[scheduler_name](**kwargs)
|
||||
except TypeError:
|
||||
return None
|
||||
raise ValueError(
|
||||
"Unrecognized scheduler {} for this kind of AutoScheduler: {}.\n"
|
||||
"Scheduler name should be one of {}.".format(
|
||||
scheduler_name, cls.__name__, ", ".join(c.__name__ for c in SCHEDULER_MAPPING.keys())
|
||||
)
|
||||
)
|
||||
182
flaml/nlp/hpo/searchalgo_auto.py
Normal file
182
flaml/nlp/hpo/searchalgo_auto.py
Normal file
@@ -0,0 +1,182 @@
|
||||
import itertools
|
||||
from collections import OrderedDict
|
||||
|
||||
import ray
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
from flaml import CFO, BlendSearch
|
||||
|
||||
SEARCH_ALGO_MAPPING = OrderedDict(
|
||||
[
|
||||
("optuna", OptunaSearch),
|
||||
("cfo", CFO),
|
||||
("bs", BlendSearch),
|
||||
("grid", None),
|
||||
("gridbert", None),
|
||||
("rs", None)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class AutoSearchAlgorithm:
|
||||
"""
|
||||
This is a class for getting the search algorithm based on the search algorithm name
|
||||
(a string variable) instantiated as one of the algorithms of the library when
|
||||
created with the `~flaml.nlp.hpo.AutoSearchAlgorithm.from_method_name` method.
|
||||
|
||||
This class cannot be instantiated directly using ``__init__()`` (throws an error).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
raise EnvironmentError(
|
||||
"AutoSearchAlgorithm is designed to be instantiated "
|
||||
"using the `AutoSearchAlgorithm.from_method_name(cls, search_algo_name, search_algo_args_mode,"
|
||||
" hpo_search_space, **custom_hpo_args)` methods."
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_method_name(cls, search_algo_name, search_algo_args_mode, hpo_search_space, **custom_hpo_args):
|
||||
"""
|
||||
Instantiating one of the search algorithm classes based on the search algorithm name, search algorithm
|
||||
argument mode, hpo search space and other keyword args
|
||||
|
||||
Args:
|
||||
search_algo_name:
|
||||
A string variable that specifies the search algorithm name, e.g., "bs"
|
||||
|
||||
search_algo_args_mode:
|
||||
A string variable that specifies the mode for the search algorithm args, e.g., "dft" means
|
||||
initializing using the default mode
|
||||
|
||||
hpo_search_space:
|
||||
The hpo search space
|
||||
|
||||
custom_hpo_args:
|
||||
The customized arguments for the search algorithm (specified by user)
|
||||
|
||||
Example:
|
||||
>>> from flaml.nlp.hpo.hpo_searchspace import AutoHPOSearchSpace
|
||||
>>> search_space_hpo=AutoHPOSearchSpace.from_model_and_dataset_name(logger, "uni", "electra", "small", "glue", "rte")
|
||||
>>> search_algo = AutoSearchAlgorithm.from_method_name("bs", "cus", search_space_hpo,
|
||||
{"points_to_evaluate": [{"learning_rate": 1e-5, "num_train_epochs": 10}])
|
||||
"""
|
||||
|
||||
assert hpo_search_space, "hpo_search_space needs to be specified for calling AutoSearchAlgorithm.from_method_name"
|
||||
if not search_algo_name:
|
||||
search_algo_name = "grid"
|
||||
if search_algo_name in SEARCH_ALGO_MAPPING.keys():
|
||||
try:
|
||||
"""
|
||||
filtering the customized args for hpo from custom_hpo_args, keep those
|
||||
which are in the input variable name list of the constructor of
|
||||
the algorithm, remove those which does not appear in the input variables
|
||||
of the constructor function
|
||||
"""
|
||||
this_search_algo_kwargs = None
|
||||
allowed_arguments = SEARCH_ALGO_MAPPING[search_algo_name].__init__.__code__.co_varnames
|
||||
allowed_custom_args = {key: custom_hpo_args[key] for key in custom_hpo_args.keys() if
|
||||
key in allowed_arguments}
|
||||
|
||||
"""
|
||||
If the search_algo_args_mode is "dft", set the args to the default args, e.g.,the default args for
|
||||
BlendSearch is "low_cost_partial_config": {"num_train_epochs": min_epoch,"per_device_train_batch_size"
|
||||
: max(hpo_search_space["per_device_train_batch_size"].categories)},
|
||||
"""
|
||||
if search_algo_args_mode == "dft":
|
||||
this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name](
|
||||
"dft", hpo_search_space=hpo_search_space, **allowed_custom_args)
|
||||
elif search_algo_args_mode == "cus":
|
||||
this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name](
|
||||
"cus", hpo_search_space=hpo_search_space, **allowed_custom_args)
|
||||
|
||||
"""
|
||||
returning the hpo algorithm with the arguments
|
||||
"""
|
||||
return SEARCH_ALGO_MAPPING[search_algo_name](**this_search_algo_kwargs)
|
||||
except KeyError:
|
||||
return None
|
||||
raise ValueError(
|
||||
"Unrecognized method {} for this kind of AutoSearchAlgorithm: {}.\n"
|
||||
"Method name should be one of {}.".format(
|
||||
search_algo_name, cls.__name__, ", ".join(c.__name__ for c in SEARCH_ALGO_MAPPING.keys())
|
||||
)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def grid2list(grid_config):
|
||||
key_val_list = [[(key, each_val) for each_val in val_list['grid_search']]
|
||||
for (key, val_list) in grid_config.items()]
|
||||
config_list = [dict(x) for x in itertools.product(*key_val_list)]
|
||||
return config_list
|
||||
|
||||
|
||||
def get_search_algo_args_optuna(search_args_mode, hpo_search_space=None, **custom_hpo_args):
|
||||
return {}
|
||||
|
||||
|
||||
def default_search_algo_args_bs(search_args_mode, hpo_search_space=None, **custom_hpo_args):
|
||||
assert hpo_search_space, "hpo_search_space needs to be specified for calling AutoSearchAlgorithm.from_method_name"
|
||||
if "num_train_epochs" in hpo_search_space and \
|
||||
isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Categorical):
|
||||
min_epoch = min(hpo_search_space["num_train_epochs"].categories)
|
||||
else:
|
||||
assert isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Float)
|
||||
min_epoch = hpo_search_space["num_train_epochs"].lower
|
||||
default_search_algo_args = {
|
||||
"low_cost_partial_config": {
|
||||
"num_train_epochs": min_epoch,
|
||||
"per_device_train_batch_size": max(hpo_search_space["per_device_train_batch_size"].categories),
|
||||
},
|
||||
}
|
||||
if search_args_mode == "cus":
|
||||
default_search_algo_args.update(custom_hpo_args)
|
||||
return default_search_algo_args
|
||||
|
||||
|
||||
def experiment_search_algo_args_bs(hpo_search_space=None):
|
||||
if "num_train_epochs" in hpo_search_space and \
|
||||
isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Categorical):
|
||||
min_epoch = min(hpo_search_space["num_train_epochs"].categories)
|
||||
else:
|
||||
assert isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Float)
|
||||
min_epoch = hpo_search_space["num_train_epochs"].lower
|
||||
default_search_algo_args = {
|
||||
"low_cost_partial_config": {
|
||||
"num_train_epochs": min_epoch,
|
||||
},
|
||||
}
|
||||
return default_search_algo_args
|
||||
|
||||
|
||||
def default_search_algo_args_skopt(hpo_search_space=None):
|
||||
return {}
|
||||
|
||||
|
||||
def default_search_algo_args_dragonfly(hpo_search_space=None):
|
||||
return {}
|
||||
|
||||
|
||||
def default_search_algo_args_nevergrad(hpo_search_space=None):
|
||||
return {}
|
||||
|
||||
|
||||
def default_search_algo_args_hyperopt(hpo_search_space=None):
|
||||
return {}
|
||||
|
||||
|
||||
def default_search_algo_args_grid_search(search_args_mode, hpo_search_space=None, **custom_hpo_args):
|
||||
return {}
|
||||
|
||||
|
||||
def default_search_algo_args_random_search(search_args_mode, hpo_search_space=None, **custom_hpo_args):
|
||||
return {}
|
||||
|
||||
|
||||
DEFAULT_SEARCH_ALGO_ARGS_MAPPING = OrderedDict(
|
||||
[
|
||||
("optuna", get_search_algo_args_optuna),
|
||||
("cfo", default_search_algo_args_bs),
|
||||
("bs", default_search_algo_args_bs),
|
||||
("grid", default_search_algo_args_grid_search),
|
||||
("gridbert", default_search_algo_args_random_search)
|
||||
]
|
||||
)
|
||||
52
flaml/nlp/huggingface/switch_head_auto.py
Normal file
52
flaml/nlp/huggingface/switch_head_auto.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
from transformers.models.electra.modeling_electra import ElectraClassificationHead
|
||||
from transformers.models.roberta.modeling_roberta import RobertaClassificationHead
|
||||
|
||||
MODEL_CLASSIFICATION_HEAD_MAPPING = OrderedDict(
|
||||
[
|
||||
("electra", ElectraClassificationHead),
|
||||
("roberta", RobertaClassificationHead),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class AutoSeqClassificationHead:
|
||||
"""
|
||||
This is a class for getting classification head class based on the name of the LM
|
||||
instantiated as one of the ClassificationHead classes of the library when
|
||||
created with the `~flaml.nlp.huggingface.AutoSeqClassificationHead.from_model_type_and_config` method.
|
||||
|
||||
This class cannot be instantiated directly using ``__init__()`` (throws an error).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
raise EnvironmentError(
|
||||
"AutoSeqClassificationHead is designed to be instantiated "
|
||||
"using the `AutoSeqClassificationHead.from_model_type_and_config(cls, model_type, config)` methods."
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_model_type_and_config(cls, model_type, config):
|
||||
"""
|
||||
Instantiate one of the classification head classes from the mode_type and model configuration.
|
||||
|
||||
Args:
|
||||
model_type:
|
||||
A string, which desribes the model type, e.g., "electra"
|
||||
config (:class:`~transformers.PretrainedConfig`):
|
||||
The huggingface class of the model's configuration:
|
||||
|
||||
Examples::
|
||||
>>> from transformers import AutoConfig
|
||||
>>> model_config = AutoConfig.from_pretrained("google/electra-base-discriminator")
|
||||
>>> AutoSeqClassificationHead.from_model_type_and_config("electra", model_config)
|
||||
"""
|
||||
if model_type in MODEL_CLASSIFICATION_HEAD_MAPPING.keys():
|
||||
return MODEL_CLASSIFICATION_HEAD_MAPPING[model_type](config)
|
||||
raise ValueError(
|
||||
"Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
|
||||
"Model type should be one of {}.".format(
|
||||
config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_CLASSIFICATION_HEAD_MAPPING.keys())
|
||||
)
|
||||
)
|
||||
121
flaml/nlp/huggingface/trainer.py
Normal file
121
flaml/nlp/huggingface/trainer.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import copy
|
||||
import os
|
||||
|
||||
import transformers
|
||||
|
||||
from ray import tune
|
||||
import torch
|
||||
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
|
||||
|
||||
transformers.logging.set_verbosity_error()
|
||||
|
||||
|
||||
class TrainerForAutoTransformers(transformers.Trainer):
|
||||
"""
|
||||
Overriding transformers.Trainer.
|
||||
|
||||
Args:
|
||||
huggingface (:class:`~transformers.PreTrainedModel` or :obj:`torch.nn.Module`, `optional`):
|
||||
"""
|
||||
|
||||
def get_optimizers(
|
||||
self, num_training_steps
|
||||
):
|
||||
self.current_optimizer, self.current_scheduler = super().get_optimizers(num_training_steps)
|
||||
return (self.current_optimizer, self.current_scheduler)
|
||||
|
||||
def evaluate(self,
|
||||
eval_dataset=None):
|
||||
"""
|
||||
Overriding transformers.Trainer.evaluate by saving state with save_state
|
||||
|
||||
Args:
|
||||
eval_dataset:
|
||||
the dataset to be evaluated
|
||||
"""
|
||||
import wandb
|
||||
eval_dataloader = self.get_eval_dataloader(eval_dataset)
|
||||
output = self.prediction_loop(
|
||||
eval_dataloader, description="Evaluation")
|
||||
self.log(output.metrics)
|
||||
|
||||
self.save_state()
|
||||
|
||||
for key in list(output.metrics.keys()):
|
||||
if key.startswith("eval_"):
|
||||
output.metrics[key[5:]] = output.metrics[key]
|
||||
tune.report(**output.metrics)
|
||||
|
||||
return output.metrics
|
||||
|
||||
def save_state(self):
|
||||
"""
|
||||
Overriding transformers.Trainer.save_state. It is only through saving
|
||||
the states can best_trial.get_best_checkpoint return a non-empty value.
|
||||
"""
|
||||
with tune.checkpoint_dir(step=self.state.global_step) as checkpoint_dir:
|
||||
self.args.output_dir = checkpoint_dir
|
||||
# This is the directory name that Huggingface requires.
|
||||
output_dir = os.path.join(
|
||||
self.args.output_dir,
|
||||
f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}")
|
||||
self.save_model(output_dir)
|
||||
torch.save(self.optimizer.state_dict(),
|
||||
os.path.join(output_dir, "optimizer.pt"))
|
||||
torch.save(self.lr_scheduler.state_dict(),
|
||||
os.path.join(output_dir, "scheduler.pt"))
|
||||
|
||||
@staticmethod
|
||||
def convert_num_train_epochs_to_max_steps(
|
||||
num_train_epochs: int,
|
||||
num_train_examples: int,
|
||||
per_device_train_batch_size: int,
|
||||
device_count: int):
|
||||
return int(num_train_epochs * num_train_examples / per_device_train_batch_size / device_count)
|
||||
|
||||
@staticmethod
|
||||
def convert_max_steps_to_num_train_epochs(
|
||||
max_steps: int,
|
||||
num_train_examples: int,
|
||||
per_device_train_batch_size: int,
|
||||
device_count: int):
|
||||
return float(max_steps * per_device_train_batch_size * device_count) / num_train_examples
|
||||
|
||||
@staticmethod
|
||||
def convert_warmup_ratio_to_warmup_steps(
|
||||
warmup_ratio,
|
||||
max_steps=None,
|
||||
num_train_epochs=None,
|
||||
num_train_examples=None,
|
||||
per_device_train_batch_size=None,
|
||||
device_count=None):
|
||||
if max_steps:
|
||||
return int(warmup_ratio * max_steps)
|
||||
max_steps = TrainerForAutoTransformers.convert_num_train_epochs_to_max_steps(
|
||||
num_train_epochs,
|
||||
num_train_examples,
|
||||
per_device_train_batch_size,
|
||||
device_count)
|
||||
return int(warmup_ratio * max_steps)
|
||||
|
||||
@staticmethod
|
||||
def convert_warmup_steps_to_warmup_ratio(
|
||||
warmup_steps: int,
|
||||
num_train_epochs: int,
|
||||
num_train_examples: int,
|
||||
per_device_train_batch_size: int,
|
||||
device_count: int):
|
||||
max_steps = TrainerForAutoTransformers.convert_num_train_epochs_to_max_steps(
|
||||
num_train_epochs,
|
||||
num_train_examples,
|
||||
per_device_train_batch_size,
|
||||
device_count)
|
||||
return float(warmup_steps / max_steps)
|
||||
|
||||
@staticmethod
|
||||
def resolve_hp_conflict(search_space_dict):
|
||||
if "max_steps" in search_space_dict and "num_train_epochs" in search_space_dict:
|
||||
del search_space_dict["num_train_epochs"]
|
||||
if "warmup_ratio" in search_space_dict and "warmup_steps" in search_space_dict:
|
||||
del search_space_dict["warmup_ratio"]
|
||||
return search_space_dict
|
||||
0
flaml/nlp/result_analysis/__init__.py
Normal file
0
flaml/nlp/result_analysis/__init__.py
Normal file
677
flaml/nlp/result_analysis/azure_utils.py
Normal file
677
flaml/nlp/result_analysis/azure_utils.py
Normal file
@@ -0,0 +1,677 @@
|
||||
import re
|
||||
import pathlib
|
||||
import os
|
||||
from azure.storage.blob import BlobServiceClient, ContainerClient
|
||||
from transformers import AutoConfig
|
||||
|
||||
from ..utils import get_wandb_azure_key
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass, field
|
||||
from ..hpo.grid_searchspace_auto import HF_MODEL_LIST
|
||||
import json
|
||||
|
||||
|
||||
@dataclass
|
||||
class JobID:
|
||||
dat: list = field(default=None)
|
||||
subdat: str = field(default=None)
|
||||
mod: str = field(default=None)
|
||||
spa: str = field(default=None)
|
||||
arg: str = field(default=None)
|
||||
alg: str = field(default=None)
|
||||
pru: str = field(default=None)
|
||||
pre_full: str = field(default=None)
|
||||
pre: str = field(default=None)
|
||||
presz: str = field(default=None)
|
||||
spt: str = field(default=None)
|
||||
rep: int = field(default=0)
|
||||
sddt: int = field(default=None)
|
||||
sdhf: int = field(default=None)
|
||||
|
||||
def __init__(self,
|
||||
console_args=None):
|
||||
if console_args:
|
||||
self.set_jobid_from_console_args(console_args)
|
||||
|
||||
def set_unittest_config(self):
|
||||
"""
|
||||
set the JobID config for unit test
|
||||
"""
|
||||
self.dat = ["glue"]
|
||||
self.subdat = "mrpc"
|
||||
self.mod = "hpo"
|
||||
self.spa = "uni_test"
|
||||
self.arg = "dft"
|
||||
self.alg = "bs"
|
||||
self.pru = "None"
|
||||
self.pre_full = "google/mobilebert-uncased"
|
||||
self.pre = "mobilebert"
|
||||
self.presz = "small"
|
||||
self.spt = "rspt"
|
||||
self.rep = 0
|
||||
self.sddt = 43
|
||||
self.sdhf = 42
|
||||
|
||||
def is_match(self, partial_jobid):
|
||||
"""
|
||||
return a boolean variable whether the current object matches the partial jobid defined
|
||||
in partial_jobid. For example,
|
||||
self = JobID(dat = ['glue'],
|
||||
subdat = 'cola',
|
||||
mod = 'bestnn',
|
||||
spa = 'buni',
|
||||
arg = 'cus',
|
||||
alg = 'bs',
|
||||
pru = 'None',
|
||||
pre = 'funnel',
|
||||
presz = 'xlarge',
|
||||
spt = 'rspt',
|
||||
rep = 0,
|
||||
sddt = 43,
|
||||
sdhf = 42)
|
||||
partial_jobid1 = JobID(dat = ['glue'],
|
||||
subdat = 'cola',
|
||||
mod = 'hpo')
|
||||
partial_jobid2 = JobID(dat = ['glue'],
|
||||
subdat = 'cola',
|
||||
mod = 'bestnn')
|
||||
return False for partial_jobid1 and True for partial_jobid2
|
||||
"""
|
||||
is_not_match = False
|
||||
for key, val in partial_jobid.__dict__.items():
|
||||
if val is None:
|
||||
continue
|
||||
if getattr(self, key) != val:
|
||||
is_not_match = True
|
||||
return not is_not_match
|
||||
|
||||
def to_wandb_string(self):
|
||||
"""
|
||||
preparing for the job ID for wandb
|
||||
"""
|
||||
field_dict = self.__dict__
|
||||
keytoval_str = "_".join([JobID.dataset_list_to_str(field_dict[key], key)
|
||||
if type(field_dict[key]) == list
|
||||
else str(field_dict[key])
|
||||
for key in field_dict.keys() if not key.endswith("_full")])
|
||||
return keytoval_str
|
||||
|
||||
def to_jobid_string(self):
|
||||
"""
|
||||
convert the current JobID into a blob name string which contains all the fields
|
||||
"""
|
||||
list_keys = list(JobID.__dataclass_fields__.keys())
|
||||
field_dict = self.__dict__
|
||||
keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key], key)
|
||||
if type(field_dict[key]) == list
|
||||
else key + "=" + str(field_dict[key])
|
||||
for key in list_keys if not key.endswith("_full")])
|
||||
return keytoval_str
|
||||
|
||||
def to_partial_jobid_string(self):
|
||||
"""
|
||||
convert the current JobID into a blob name string which only contains the fields whose values are not "None"
|
||||
"""
|
||||
list_keys = list(JobID.__dataclass_fields__.keys())
|
||||
field_dict = self.__dict__ # field_dict contains fields whose values are not None
|
||||
keytoval_str = "_".join([key + "=" + JobID.dataset_list_to_str(field_dict[key], key)
|
||||
if type(field_dict[key]) == list
|
||||
else key + "=" + str(field_dict[key])
|
||||
for key in list_keys if key in field_dict.keys()])
|
||||
return keytoval_str
|
||||
|
||||
@staticmethod
|
||||
def blobname_to_jobid_dict(keytoval_str):
|
||||
"""
|
||||
converting an azure blobname to a JobID config,
|
||||
e.g., blobname = "dat=glue_subdat=cola_mod=bestnn_spa=buni_arg=cus_
|
||||
alg=bs_pru=None_pre=funnel_presz=xlarge_spt=rspt_rep=0.json"
|
||||
the converted jobid dict = {dat = ['glue'], subdat = 'cola', mod = 'bestnn',
|
||||
spa = 'buni', arg = 'cus', alg = 'bs', pru = 'None',
|
||||
pre = 'funnel', presz = 'xlarge', spt = 'rspt',
|
||||
rep = 0, sddt = 43, sdhf = 42)
|
||||
"""
|
||||
field_keys = [key for key in
|
||||
list(JobID.__dataclass_fields__.keys()) if not key.endswith("_full")]
|
||||
regex_expression = ".*" + "_".join([key + "=(?P<" + key + ">.*)" for key in field_keys]) + ".(json|zip)"
|
||||
result = re.search(regex_expression, keytoval_str)
|
||||
if result:
|
||||
result_dict = {}
|
||||
for key in field_keys:
|
||||
if key == "dat":
|
||||
result_dict[key] = [result.group(key)]
|
||||
elif key == "rep":
|
||||
try:
|
||||
result_dict[key] = int(result.group(key))
|
||||
except IndexError:
|
||||
result_dict[key] = -1
|
||||
else:
|
||||
result_dict[key] = result.group(key)
|
||||
return result_dict
|
||||
else:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def dataset_list_to_str(dataset_name, key):
|
||||
if key == "dat":
|
||||
assert isinstance(dataset_name, list)
|
||||
return "-".join(dataset_name)
|
||||
else:
|
||||
return dataset_name
|
||||
|
||||
@staticmethod
|
||||
def set_jobid_from_arg_list(self,
|
||||
**jobid_list
|
||||
):
|
||||
"""
|
||||
set the jobid from a dict object
|
||||
"""
|
||||
|
||||
for key in jobid_list.keys():
|
||||
assert key in JobID.__dataclass_fields__.keys()
|
||||
setattr(self, key, jobid_list[key])
|
||||
|
||||
@staticmethod
|
||||
def convert_blobname_to_jobid(blobname):
|
||||
"""
|
||||
converting a blobname string to a JobID object
|
||||
"""
|
||||
jobconfig_dict = JobID.blobname_to_jobid_dict(blobname)
|
||||
if jobconfig_dict:
|
||||
jobconfig = JobID()
|
||||
jobconfig.set_jobid_from_arg_list(**jobconfig_dict)
|
||||
return jobconfig
|
||||
else:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_full_data_name(dataset_name, subdataset_name=None):
|
||||
"""
|
||||
convert a dataset name and sub dataset name to a full dataset name
|
||||
"""
|
||||
full_dataset_name = dataset_name
|
||||
if subdataset_name:
|
||||
full_dataset_name = full_dataset_name + "_" + subdataset_name
|
||||
return full_dataset_name
|
||||
|
||||
def get_jobid_full_data_name(self):
|
||||
"""
|
||||
get the full dataset name of the current JobID object
|
||||
"""
|
||||
return JobID.get_full_data_name(JobID.dataset_list_to_str(self.dat, "dat"), self.subdat)
|
||||
|
||||
@staticmethod
|
||||
def _extract_model_type_with_keywords_match(pre_full):
|
||||
matched_model_type = []
|
||||
for each_model_type in HF_MODEL_LIST:
|
||||
if each_model_type in pre_full:
|
||||
matched_model_type.append(each_model_type)
|
||||
assert len(matched_model_type) > 0
|
||||
return max(enumerate(matched_model_type), key=lambda x: len(x[1]))[1]
|
||||
|
||||
@staticmethod
|
||||
def extract_model_type(full_model_name):
|
||||
model_config = AutoConfig.from_pretrained(full_model_name)
|
||||
config_json_file = model_config.get_config_dict(full_model_name)[0]
|
||||
try:
|
||||
model_type = config_json_file["model_type"]
|
||||
except KeyError:
|
||||
model_type = JobID._extract_model_type_with_keywords_match()
|
||||
return model_type
|
||||
|
||||
def set_jobid_from_console_args(self, console_args):
|
||||
self.dat = console_args.dataset_subdataset_name.split(":")[0].split(",")
|
||||
self.subdat = console_args.dataset_subdataset_name.split(":")[1]
|
||||
self.mod = console_args.algo_mode
|
||||
self.spa = console_args.space_mode
|
||||
self.arg = console_args.search_alg_args_mode
|
||||
self.alg = console_args.algo_name
|
||||
self.pru = console_args.pruner
|
||||
self.pre_full = console_args.pretrained_model_size.split(":")[0]
|
||||
self.pre = JobID.extract_model_type(self.pre_full)
|
||||
self.presz = console_args.pretrained_model_size.split(":")[1]
|
||||
self.spt = console_args.resplit_mode
|
||||
self.rep = console_args.rep_id
|
||||
self.sddt = console_args.seed_data
|
||||
self.sdhf = console_args.seed_transformers
|
||||
|
||||
@staticmethod
|
||||
def legacy_old_blobname_to_new_blobname(self,
|
||||
old_blobname):
|
||||
spa_id2val = {
|
||||
0: "gnr",
|
||||
1: "uni"
|
||||
}
|
||||
alg_id2val = {
|
||||
0: "bs",
|
||||
1: "optuna",
|
||||
2: "cfo"
|
||||
}
|
||||
pre_id2val = {
|
||||
0: "xlnet-base-cased",
|
||||
1: "albert-large-v1",
|
||||
2: "distilbert-base-uncased",
|
||||
3: "microsoft/deberta-base",
|
||||
4: "funnel-transformer/small-base",
|
||||
5: "microsoft/deberta-large",
|
||||
6: "funnel-transformer/large-base",
|
||||
7: "funnel-transformer/intermediate-base",
|
||||
8: "funnel-transformer/xlarge-base"
|
||||
}
|
||||
presz_id2val = {
|
||||
0: "base",
|
||||
1: "small",
|
||||
2: "base",
|
||||
3: "base",
|
||||
4: "base",
|
||||
5: "large",
|
||||
6: "large",
|
||||
7: "intermediate",
|
||||
8: "xlarge"
|
||||
}
|
||||
spt_id2val = {
|
||||
0: "rspt",
|
||||
1: "ori"
|
||||
}
|
||||
result_grid = re.search(r".*_mod(el)?(?P<model_id>\d+)_None_None(_spt(?P<split_id>\d+))?_rep(?P<rep_id>\d+).log",
|
||||
old_blobname)
|
||||
result = re.search(
|
||||
r".*_mod(el)?(?P<model_id>\d+)_(alg)?(?P<algo_id>\d+)_(spa)?"
|
||||
r"(?P<space_id>\d+)(_spt(?P<split_id>\d+))?_rep(?P<rep_id>\d+).log",
|
||||
old_blobname)
|
||||
if result_grid:
|
||||
dat = [old_blobname.split("/")[1].split("_")[0]]
|
||||
subdat = old_blobname.split("/")[1].split("_")[1]
|
||||
mod = "hpo"
|
||||
spa = None
|
||||
arg = None
|
||||
alg = None
|
||||
pru = None
|
||||
pre = pre_id2val[int(result_grid.group("model_id"))]
|
||||
presz = presz_id2val[int(result_grid.group("model_id"))]
|
||||
try:
|
||||
spt = spt_id2val[int(result_grid.group("split_id"))]
|
||||
except KeyError:
|
||||
spt = spt_id2val[0]
|
||||
rep = None
|
||||
self.set_jobid_from_arg_list(dat, subdat, mod, spa, arg, alg, pru, pre, presz, spt, rep)
|
||||
return self.to_jobid_string()
|
||||
if result:
|
||||
dat = [old_blobname.split("/")[1].split("_")[0]]
|
||||
subdat = old_blobname.split("/")[1].split("_")[1]
|
||||
mod = "hpo"
|
||||
spa = spa_id2val[int(result.group("space_id"))]
|
||||
arg = "dft"
|
||||
alg = alg_id2val[int(result.group("algo_id"))]
|
||||
pru = "None"
|
||||
pre = pre_id2val[int(result_grid.group("model_id"))]
|
||||
presz = presz_id2val[int(result_grid.group("model_id"))]
|
||||
try:
|
||||
spt = spt_id2val[int(result_grid.group("split_id"))]
|
||||
except KeyError:
|
||||
spt = spt_id2val[0]
|
||||
rep = int(result.group("rep_id"))
|
||||
self.set_jobid_from_arg_list(dat, subdat, mod, spa, arg, alg, pru, pre, presz, spt, rep)
|
||||
return self.to_jobid_string()
|
||||
return None
|
||||
|
||||
|
||||
class AzureUtils:
|
||||
|
||||
def __init__(self,
|
||||
root_log_path=None,
|
||||
console_args=None,
|
||||
jobid=None,
|
||||
autohf=None):
|
||||
if root_log_path:
|
||||
self.root_log_path = root_log_path
|
||||
else:
|
||||
self.root_log_path = "logs_azure"
|
||||
self.jobid = jobid
|
||||
self.console_args = console_args
|
||||
self.autohf = autohf
|
||||
if console_args:
|
||||
wandb_key, azure_key, container_name = get_wandb_azure_key(console_args.key_path)
|
||||
self._container_name = container_name
|
||||
self._azure_key = azure_key
|
||||
|
||||
def _get_complete_connection_string(self):
|
||||
return "DefaultEndpointsProtocol=https;AccountName=docws5141197765;AccountKey=" \
|
||||
+ self._azure_key + ";EndpointSuffix=core.windows.net"
|
||||
|
||||
def _init_azure_clients(self):
|
||||
connection_string = self._get_complete_connection_string()
|
||||
container_client = ContainerClient.from_connection_string(conn_str=connection_string,
|
||||
container_name=self._container_name)
|
||||
return container_client
|
||||
|
||||
def _init_blob_client(self,
|
||||
local_file_path):
|
||||
connection_string = self._get_complete_connection_string()
|
||||
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
|
||||
blob_client = blob_service_client.get_blob_client(container=self._container_name, blob=local_file_path)
|
||||
return blob_client
|
||||
|
||||
def upload_local_file_to_azure(self, local_file_path):
|
||||
blob_client = self._init_blob_client(local_file_path)
|
||||
with open(local_file_path, "rb") as fin:
|
||||
blob_client.upload_blob(fin, overwrite=True)
|
||||
|
||||
def download_azure_blob(self, blobname):
|
||||
blob_client = self._init_blob_client(blobname)
|
||||
pathlib.Path(re.search("(?P<parent_path>^.*)/[^/]+$", blobname).group("parent_path")).mkdir(
|
||||
parents=True, exist_ok=True)
|
||||
with open(blobname, "wb") as fout:
|
||||
fout.write(blob_client.download_blob().readall())
|
||||
|
||||
def write_exception(self):
|
||||
result_json = {
|
||||
"timestamp": datetime.now(),
|
||||
}
|
||||
local_file_path = self.generate_local_json_path()
|
||||
self.create_local_json_and_upload(result_json, local_file_path)
|
||||
|
||||
def extract_log_from_analysis(self,
|
||||
analysis):
|
||||
"""
|
||||
Extracting a json object for storing the key information returned from tune.run
|
||||
"""
|
||||
json_log = []
|
||||
for each_trial in analysis.trials:
|
||||
trial_id = each_trial.trial_id
|
||||
start_time = each_trial.start_time
|
||||
last_update_time = each_trial.last_update_time
|
||||
config = each_trial.config
|
||||
try:
|
||||
metric_score = each_trial.metric_analysis["eval_" + analysis.default_metric]
|
||||
time_stamp = each_trial.metric_analysis['timestamp']
|
||||
json_log.append({"trial_id": trial_id,
|
||||
"start_time": start_time,
|
||||
"last_update_time": last_update_time,
|
||||
"config": config,
|
||||
"metric_score": metric_score,
|
||||
"time_stamp": time_stamp})
|
||||
except KeyError:
|
||||
pass
|
||||
return json_log
|
||||
|
||||
def write_autohf_output(self,
|
||||
json_log=None,
|
||||
valid_metric=None,
|
||||
predictions=None,
|
||||
duration=None):
|
||||
"""
|
||||
write the key info from a job and upload to azure blob storage
|
||||
"""
|
||||
local_file_path = self.generate_local_json_path()
|
||||
output_json = {}
|
||||
if json_log:
|
||||
output_json["val_log"] = json_log
|
||||
if valid_metric:
|
||||
output_json["valid_metric"] = valid_metric
|
||||
if duration:
|
||||
output_json["duration"] = duration
|
||||
if len(output_json) > 0:
|
||||
self.create_local_json_and_upload(output_json, local_file_path)
|
||||
if predictions is not None:
|
||||
self.create_local_prediction_and_upload(local_file_path, predictions)
|
||||
|
||||
def generate_local_json_path(self):
|
||||
"""
|
||||
return a path string for storing the json file locally
|
||||
"""
|
||||
full_dataset_name = self.jobid.get_jobid_full_data_name()
|
||||
jobid_str = self.jobid.to_jobid_string()
|
||||
local_file_path = os.path.join(self.root_log_path, full_dataset_name, jobid_str + ".json")
|
||||
pathlib.Path(os.path.join(self.root_log_path, full_dataset_name)).mkdir(parents=True, exist_ok=True)
|
||||
return local_file_path
|
||||
|
||||
def create_local_json_and_upload(self, result_json, local_file_path):
|
||||
with open(local_file_path, "w") as fout:
|
||||
fout.write(json.dumps(result_json))
|
||||
fout.flush()
|
||||
self.upload_local_file_to_azure(local_file_path)
|
||||
|
||||
def legacy_to_json(self):
|
||||
container_client = self._init_azure_clients()
|
||||
for old_blob in container_client.list_blobs():
|
||||
new_jobid_str = self.jobid.legacy_old_blobname_to_new_blobname(old_blob.name)
|
||||
if new_jobid_str:
|
||||
self.download_azure_blob(old_blob.name)
|
||||
with open(old_blob.name, "r") as fin:
|
||||
alllines = fin.readlines()
|
||||
wandb_group_name = alllines[0].rstrip("\n:")
|
||||
timestamp = re.search(
|
||||
r"timestamp:(?P<timestamp>.*):",
|
||||
alllines[1].strip("\n")).group("timestamp")
|
||||
duration = re.search(
|
||||
r"duration:(?P<duration>.*)$",
|
||||
alllines[3].strip("\n")).group("duration")
|
||||
sample_num = int(re.search(
|
||||
r"sample_num: (?P<sample_num>\d+)$",
|
||||
alllines[4].strip("\n")).group("sample_num"))
|
||||
validation = {"accuracy": float(re.search(
|
||||
"validation accuracy: (?P<validation>.*)$",
|
||||
alllines[2].strip("\n")).group("validation"))}
|
||||
test = None
|
||||
if len(alllines) > 6:
|
||||
result_test = re.search("test accuracy:(?P<test>.*)$", alllines[6].strip("\n"))
|
||||
if result_test:
|
||||
test = json.loads(result_test.group("test"))
|
||||
yml_file = None
|
||||
if len(alllines) > 8:
|
||||
if alllines[8].startswith("aml"):
|
||||
yml_file = alllines[8].strip("\n")
|
||||
new_json = {"wandb_group_name": wandb_group_name,
|
||||
"validation": validation,
|
||||
"test": test,
|
||||
"timestamp": timestamp,
|
||||
"duration": duration,
|
||||
"sample_num": sample_num,
|
||||
"yml_file": yml_file}
|
||||
full_dataset_name = self.jobid.get_jobid_full_data_name()
|
||||
new_blobname = os.path.join("logs_azure/", full_dataset_name, new_jobid_str + ".json")
|
||||
self.create_local_json_and_upload(new_json, new_blobname)
|
||||
|
||||
def create_local_prediction_and_upload(self,
|
||||
local_json_file,
|
||||
predictions):
|
||||
"""
|
||||
store predictions (a .zip file) locally and upload
|
||||
"""
|
||||
azure_save_file_name = local_json_file.split("/")[-1][:-5]
|
||||
local_archive_path = self.autohf.output_prediction(predictions,
|
||||
output_prediction_path=self.console_args.data_root_dir + "result/",
|
||||
output_zip_file_name=azure_save_file_name)
|
||||
self.upload_local_file_to_azure(local_archive_path)
|
||||
|
||||
def get_ranked_configs(self, metric_mode):
|
||||
"""
|
||||
extract the configs (ranked in descebding order by the score) for the azure file of the current object
|
||||
(defined by self.jobid_config)
|
||||
"""
|
||||
azure_file_path = self.generate_local_json_path()
|
||||
self.download_azure_blob(azure_file_path)
|
||||
|
||||
json_log = json.load(open(azure_file_path, "r"))
|
||||
assert "val_log" in json_log
|
||||
|
||||
trialid_to_score = {}
|
||||
trialid_to_config = {}
|
||||
|
||||
for each_entry in json_log["val_log"]:
|
||||
trial_id = each_entry["trial_id"]
|
||||
config = each_entry["config"]
|
||||
this_score = each_entry["metric_score"][metric_mode]
|
||||
trialid_to_config[trial_id] = config
|
||||
trialid_to_score[trial_id] = this_score
|
||||
|
||||
sorted_trialid_to_score = sorted(trialid_to_score.items(), key=lambda x: x[1], reverse=True)
|
||||
return [trialid_to_config[entry[0]] for entry in sorted_trialid_to_score]
|
||||
|
||||
@staticmethod
|
||||
def is_after_earliest_time(this_blob, earliest_time):
|
||||
import pytz
|
||||
utc = pytz.UTC
|
||||
if this_blob.last_modified >= utc.localize(datetime(earliest_time[0], earliest_time[1], earliest_time[2])):
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_blob_list_matching_partial_jobid(self, root_log_path, partial_jobid, earliest_time=None):
|
||||
"""
|
||||
get all blobs whose jobid configs match the partial_jobid
|
||||
"""
|
||||
blob_list = []
|
||||
container_client = self._init_azure_clients()
|
||||
jobid_config = JobID()
|
||||
for each_blob in container_client.list_blobs():
|
||||
if each_blob.name.startswith(root_log_path):
|
||||
each_jobconfig = jobid_config.convert_blobname_to_jobid(each_blob.name)
|
||||
is_append = False
|
||||
if each_jobconfig:
|
||||
if each_jobconfig.is_match(partial_jobid):
|
||||
is_append = True
|
||||
if earliest_time and not AzureUtils.is_after_earliest_time(each_blob, earliest_time):
|
||||
is_append = False
|
||||
if is_append:
|
||||
blob_list.append((each_jobconfig, each_blob))
|
||||
return blob_list
|
||||
|
||||
@staticmethod
|
||||
def extract_config_and_score(blobname):
|
||||
data_json = json.load(open(blobname, "r"))
|
||||
return [(x['config'], x['metric_score']["max"], x['start_time']) for x in data_json['val_log']]
|
||||
|
||||
def get_config_and_score_from_partial_jobid(self,
|
||||
root_log_path,
|
||||
partial_jobid,
|
||||
group_attrs,
|
||||
method,
|
||||
earliest_time=None):
|
||||
"""
|
||||
get the best config and best score for each job matching the partial_jobid
|
||||
"""
|
||||
matched_blob_list = self.get_blob_list_matching_partial_jobid(
|
||||
root_log_path,
|
||||
partial_jobid,
|
||||
earliest_time=earliest_time)
|
||||
group_dict = {}
|
||||
for (each_jobconfig, each_blob) in matched_blob_list:
|
||||
self.download_azure_blob(each_blob.name)
|
||||
config_and_score = AzureUtils.extract_config_and_score(each_blob.name)
|
||||
if method == "unsorted":
|
||||
sorted_config_and_score = config_and_score
|
||||
elif method == "sort_time":
|
||||
sorted_config_and_score = sorted(config_and_score, key=lambda x: x[2], reverse=False)
|
||||
else:
|
||||
sorted_config_and_score = sorted(config_and_score, key=lambda x: x[1], reverse=True)
|
||||
group_attr_list = []
|
||||
for each_attr in group_attrs:
|
||||
group_val = getattr(each_jobconfig, each_attr)
|
||||
if isinstance(group_val, list):
|
||||
group_attr_list.append(JobID.dataset_list_to_str(group_val, each_attr))
|
||||
else:
|
||||
group_attr_list.append(group_val)
|
||||
group_attr_tuple = tuple(group_attr_list)
|
||||
group_dict.setdefault(group_attr_tuple, [])
|
||||
group_dict[group_attr_tuple].append([(config, score, each_blob.name)
|
||||
for (config, score, ts) in sorted_config_and_score])
|
||||
return group_dict
|
||||
|
||||
def get_validation_perf(self, console_args=None, partial_jobid_config=None):
|
||||
"""
|
||||
get the validation score for all blobs matching the partial_jobid_config
|
||||
"""
|
||||
if partial_jobid_config.pre == "electra":
|
||||
dataset_namelist = ["wnli", "rte", "mrpc", "cola", "stsb", "sst2", "qnli", "mnli"]
|
||||
else:
|
||||
dataset_namelist = ["wnli", "rte", "mrpc", "cola", "stsb", "sst2"]
|
||||
dataset_vallist1 = [0] * len(dataset_namelist)
|
||||
dataset_vallist2 = [0] * len(dataset_namelist)
|
||||
|
||||
matched_blob_list = self.get_blob_list_matching_partial_jobid(console_args.azure_root_log_path,
|
||||
partial_jobid_config)
|
||||
for (each_jobconfig, each_blob) in matched_blob_list:
|
||||
subdat_name = each_jobconfig.subdat
|
||||
self.download_azure_blob(each_blob.name)
|
||||
data_json = json.load(open(each_blob.name, "r"))
|
||||
print(len(data_json["val_log"]))
|
||||
validation_metric = data_json['valid_metric']
|
||||
try:
|
||||
dataset_idx = dataset_namelist.index(subdat_name)
|
||||
dataset_vallist1[dataset_idx], dataset_vallist2[dataset_idx] \
|
||||
= self.get_validation_metricstr(validation_metric)
|
||||
except ValueError:
|
||||
pass
|
||||
# print(" & ".join(dataset_vallist1))
|
||||
# print(", ,".join(dataset_vallist2))
|
||||
|
||||
def get_validation_metricstr(self, validation_metric):
|
||||
"""
|
||||
get a string representing validations for pasting to Google spreadsheet
|
||||
"""
|
||||
validation_str1 = validation_str2 = ""
|
||||
is_first = True
|
||||
for key in ["f1", "accuracy", "pearson", "spearmanr", "matthews_correlation"]:
|
||||
if "eval_" + key in validation_metric.keys():
|
||||
if is_first:
|
||||
validation_str1 += str("%.1f" % (validation_metric["eval_" + key] * 100))
|
||||
validation_str2 += str(validation_metric["eval_" + key] * 100)
|
||||
is_first = False
|
||||
else:
|
||||
validation_str1 += "/" + str("%.1f" % (validation_metric["eval_" + key] * 100))
|
||||
validation_str2 += "," + str(validation_metric["eval_" + key] * 100)
|
||||
return validation_str1, validation_str2
|
||||
|
||||
def get_test_perf(self, partial_jobid_config=None, result_root_dir=None):
|
||||
"""
|
||||
get the test scores for all blobs matching the partial_jobid_config
|
||||
"""
|
||||
import shutil
|
||||
from flaml.nlp.dataset.submission_auto import file_name_mapping_glue, output_blank_tsv
|
||||
matched_blob_list = self.get_blob_list_matching_partial_jobid("data/", partial_jobid_config)
|
||||
partial_jobid_str = partial_jobid_config.to_partial_jobid_string()
|
||||
output_dir = os.path.join(result_root_dir, partial_jobid_str)
|
||||
if os.path.exists(output_dir):
|
||||
assert os.path.isdir(output_dir)
|
||||
else:
|
||||
os.mkdir(output_dir)
|
||||
output_blank_tsv(output_dir)
|
||||
|
||||
for (each_jobconfig, each_blob) in matched_blob_list:
|
||||
subdat_name = each_jobconfig.subdat
|
||||
self.download_azure_blob(each_blob.name)
|
||||
import zipfile
|
||||
if os.path.exists(each_blob.name[:-4]):
|
||||
assert os.path.isdir(each_blob.name[:-4])
|
||||
else:
|
||||
os.mkdir(each_blob.name[:-4])
|
||||
with zipfile.ZipFile(each_blob.name, 'r') as zip_ref:
|
||||
zip_ref.extractall(each_blob.name[:-4])
|
||||
src = os.path.join(each_blob.name[:-4], file_name_mapping_glue[subdat_name][0])
|
||||
dst = os.path.join(output_dir, file_name_mapping_glue[subdat_name][0])
|
||||
shutil.copy(src, dst)
|
||||
shutil.make_archive(os.path.join(output_dir), 'zip', output_dir)
|
||||
|
||||
def get_best_perf_config(self, console_args, jobid_config):
|
||||
"""
|
||||
get the config of the best performed trial
|
||||
"""
|
||||
matched_blob_list = self.get_blob_list_matching_partial_jobid(console_args.azure_root_log_path, jobid_config)
|
||||
try:
|
||||
assert len(matched_blob_list) == 1
|
||||
except AssertionError:
|
||||
import pdb
|
||||
pdb.set_trace()
|
||||
|
||||
each_jobconfig, each_blob = matched_blob_list[0]
|
||||
self.download_azure_blob(each_blob.name)
|
||||
data_json = json.load(open(each_blob.name, "r"))
|
||||
|
||||
sorted_entries = sorted(data_json['val_log'], key=lambda x: x['metric_score']['max'], reverse=True)
|
||||
best_config = sorted_entries[0]['config']
|
||||
if jobid_config.subdat != "mrpc":
|
||||
best_score = sorted_entries[0]['metric_score']['max']
|
||||
else:
|
||||
best_score = (data_json["valid_metric"]["eval_f1"], data_json["valid_metric"]["eval_accuracy"])
|
||||
return best_config, best_score
|
||||
357
flaml/nlp/result_analysis/generate_result_summary.py
Normal file
357
flaml/nlp/result_analysis/generate_result_summary.py
Normal file
@@ -0,0 +1,357 @@
|
||||
def extract_ranked_config_score(console_args, partial_config_dict):
|
||||
from .azure_utils import AzureUtils
|
||||
azure_utils = AzureUtils(console_args=console_args)
|
||||
|
||||
for method, each_partial_config in partial_config_dict.items():
|
||||
dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(each_partial_config,
|
||||
["dat", "subdat"], method)
|
||||
for each_dataset, configscorelist in dataset2configscorelist.items():
|
||||
for config_idx in range(len(configscorelist)):
|
||||
avg_scores = configscorelist[config_idx][0][1]
|
||||
top_config = configscorelist[config_idx][0][0]
|
||||
print(avg_scores)
|
||||
print(top_config)
|
||||
# print(method + "," + str(each_dataset) + ",rep=" + str(config_idx))
|
||||
# print("avg score :" + str(avg_scores))
|
||||
# print(''.join(['{0}={1}\n'.format(key, top_config[key]) for key in sorted(top_config.keys())]))
|
||||
|
||||
|
||||
def extract_sorted_config_list(dataset2configscorelist, topk):
|
||||
dataset2topkconfigs = {}
|
||||
for dataset, configscorelist in dataset2configscorelist.items():
|
||||
all_configscorelist = []
|
||||
for scorelist in configscorelist:
|
||||
for item in scorelist:
|
||||
if item[0] not in [x[0] for x in all_configscorelist]:
|
||||
all_configscorelist.append(item)
|
||||
sorted_all_configscorelist = sorted(all_configscorelist, key=lambda x: x[1], reverse=True)
|
||||
topk_configs = []
|
||||
|
||||
for each_hp in ("learning_rate", "num_train_epochs", "per_device_train_batch_size", "warmup_ratio",
|
||||
"weight_decay", "adam_epsilon"):
|
||||
topk_configs.append((each_hp, [sorted_all_configscorelist[x][0][each_hp] for x in range(topk)]))
|
||||
topk_configs.append(("perf", [sorted_all_configscorelist[x][1] for x in range(topk)]))
|
||||
|
||||
dataset2topkconfigs[dataset] = topk_configs
|
||||
return dataset2topkconfigs
|
||||
|
||||
|
||||
def dict2tuple(this_dict):
|
||||
tuple_list = []
|
||||
for key in sorted(this_dict.keys()):
|
||||
tuple_list.append(this_dict[key])
|
||||
return tuple(tuple_list)
|
||||
|
||||
|
||||
def merge_configscore_list(small_dataset2configscorelist):
|
||||
dataset2merged_configscorelist = {}
|
||||
for (dataset, each_configscore_list) in small_dataset2configscorelist.items():
|
||||
merged_configscore_list = {}
|
||||
for rep_id in range(len(each_configscore_list)):
|
||||
for each_configscore_entry in each_configscore_list[rep_id]:
|
||||
is_exist = False
|
||||
for configscore in merged_configscore_list.keys():
|
||||
if configscore[0] == each_configscore_entry[0]:
|
||||
is_exist = True
|
||||
break
|
||||
if is_exist is False:
|
||||
merged_configscore_list[dict2tuple(each_configscore_entry[0])] = each_configscore_entry[1]
|
||||
dataset2merged_configscorelist[dataset] = merged_configscore_list
|
||||
return dataset2merged_configscorelist
|
||||
|
||||
|
||||
def get_result(console_args, partial_jobid_config):
|
||||
from .azure_utils import AzureUtils, JobID
|
||||
azure_utils = AzureUtils(console_args=console_args)
|
||||
dataset2configscorelist = \
|
||||
azure_utils.get_config_and_score_from_partial_config(
|
||||
console_args.azure_root_log_path,
|
||||
partial_jobid_config,
|
||||
["dat", "subdat"],
|
||||
"hpo")
|
||||
for dataset, configscore_list in dataset2configscorelist.items():
|
||||
for rep_id in range(len(configscore_list)):
|
||||
config_dict = configscore_list[rep_id][0][0]
|
||||
score = configscore_list[rep_id][0][1]
|
||||
print(dataset, rep_id)
|
||||
print_config(config_dict)
|
||||
print(score)
|
||||
print()
|
||||
|
||||
|
||||
def print_config(config_dict):
|
||||
for key in sorted(config_dict.keys()):
|
||||
if key in ("attention_probs_dropout_prob", "hidden_dropout_prob", "seed"):
|
||||
continue
|
||||
if key == "per_device_train_batch_size":
|
||||
short_key = "batch_size"
|
||||
elif key == "num_train_epochs":
|
||||
short_key = "epochs"
|
||||
else:
|
||||
short_key = key
|
||||
print(short_key, config_dict[key])
|
||||
|
||||
|
||||
def compare_small_vs_large(console_args):
|
||||
from .azure_utils import AzureUtils, JobID
|
||||
azure_utils = AzureUtils(console_args=console_args)
|
||||
|
||||
partial_jobid_config = JobID()
|
||||
partial_jobid_config.pre = "deberta"
|
||||
partial_jobid_config.mod = "hpo"
|
||||
partial_jobid_config.spa = "uni"
|
||||
partial_jobid_config.presz = "base"
|
||||
|
||||
small_dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(partial_jobid_config,
|
||||
["dat", "subdat"], "list")
|
||||
|
||||
small_mergedconfiglist = merge_configscore_list(small_dataset2configscorelist)
|
||||
|
||||
partial_jobid_config = JobID()
|
||||
partial_jobid_config.pre = "deberta"
|
||||
partial_jobid_config.mod = "hpo"
|
||||
partial_jobid_config.spa = "uni"
|
||||
partial_jobid_config.presz = "large"
|
||||
|
||||
large_dataset2configscorelist = azure_utils.get_config_and_score_from_partial_config(partial_jobid_config,
|
||||
["dat", "subdat"], "hpo")
|
||||
|
||||
large_mergedconfiglist = merge_configscore_list(large_dataset2configscorelist)
|
||||
|
||||
for (each_dataset, merged_small_configlist) in small_mergedconfiglist.items():
|
||||
merged_large_configlist = large_mergedconfiglist[each_dataset]
|
||||
print(each_dataset)
|
||||
print()
|
||||
for (each_tuple, large_score) in sorted(merged_large_configlist.items(), key=lambda x: x[1], reverse=True):
|
||||
# small_score = merged_small_configlist[each_tuple]
|
||||
is_in_onlysmall = each_tuple in small_mergedconfiglist[each_dataset]
|
||||
for each_val in each_tuple:
|
||||
print(each_val, end=", ")
|
||||
print(large_score, is_in_onlysmall, sep=",")
|
||||
print()
|
||||
for (each_tuple, small_score) in \
|
||||
sorted(small_mergedconfiglist[each_dataset].items(), key=lambda x: x[1], reverse=True):
|
||||
is_in_large = each_tuple in large_mergedconfiglist[each_dataset]
|
||||
for each_val in each_tuple:
|
||||
print(each_val, end=", ")
|
||||
print(small_score, is_in_large, sep=",")
|
||||
|
||||
|
||||
def check_conflict(console_args, partial_jobid_config_list):
|
||||
from .azure_utils import AzureUtils, JobID
|
||||
azure_utils = AzureUtils(console_args=console_args)
|
||||
for each_partial_config in partial_jobid_config_list:
|
||||
dataset2configscorelist = \
|
||||
azure_utils.get_config_and_score_from_partial_config(
|
||||
console_args.azure_root_log_path,
|
||||
each_partial_config,
|
||||
["dat", "subdat"],
|
||||
"unsorted")
|
||||
for (dataset, configscorelists) in dataset2configscorelist.items():
|
||||
config2score = {}
|
||||
for each_configscorelist in configscorelists:
|
||||
for (config, score, blobname) in each_configscorelist:
|
||||
config_dict = dict2tuple(config)
|
||||
try:
|
||||
config2score[config_dict].append((score, blobname))
|
||||
except KeyError:
|
||||
config2score.setdefault(config_dict, [])
|
||||
config2score[config_dict].append((score, blobname))
|
||||
dup_keys = [config for config in config2score.keys() if len(config2score[config]) > 1]
|
||||
dupkey_count = [len(set([y[0] for y in config2score[x]])) for x in dup_keys]
|
||||
print(dataset)
|
||||
print(len(config2score))
|
||||
print(len(dupkey_count))
|
||||
print(dupkey_count)
|
||||
|
||||
|
||||
def print_cfo(console_args):
|
||||
from .azure_utils import JobID, AzureUtils
|
||||
jobid_config = JobID()
|
||||
jobid_config.mod = "bestnn"
|
||||
jobid_config.spa = "buni"
|
||||
jobid_config.alg = "bs"
|
||||
jobid_config.pre = "funnel"
|
||||
jobid_config.presz = "xlarge"
|
||||
|
||||
for each_rep in range(3):
|
||||
jobid_config.rep = each_rep
|
||||
azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config)
|
||||
|
||||
dataset2configscorelist = \
|
||||
azure_utils.get_config_and_score_from_partial_config(
|
||||
console_args.azure_root_log_path,
|
||||
jobid_config,
|
||||
["dat", "subdat"],
|
||||
"sort_time")
|
||||
dataset = ('glue', 'mrpc')
|
||||
configscorelist = dataset2configscorelist[dataset]
|
||||
count = 0
|
||||
print(dataset)
|
||||
for (config, score, blobname) in sorted(configscorelist[0], key=lambda x: x[1], reverse=True)[0:1]:
|
||||
print(count)
|
||||
print(score)
|
||||
print_config(config)
|
||||
print()
|
||||
count += 1
|
||||
|
||||
|
||||
def download_validation(console_args, result_root_dir):
|
||||
from .azure_utils import JobID, AzureUtils
|
||||
partial_jobid_config = JobID()
|
||||
partial_jobid_config.mod = "grid"
|
||||
partial_jobid_config.pre = "roberta"
|
||||
partial_jobid_config.presz = "base"
|
||||
# partial_jobid_config.alg = "optuna"
|
||||
# partial_jobid_config.pru = "asha"
|
||||
partial_jobid_config.rep = 0
|
||||
|
||||
azure_utils = AzureUtils(console_args=console_args, jobid=partial_jobid_config)
|
||||
azure_utils.get_validation_perf(console_args=console_args, partial_jobid_config=partial_jobid_config)
|
||||
azure_utils.get_test_perf(partial_jobid_config, result_root_dir)
|
||||
|
||||
|
||||
def get_result_str(jobid_config, val_score, test_score, best_config, subdat2config=None, mode="grid"):
|
||||
result_str = jobid_config.subdat.upper() + ","
|
||||
if jobid_config.alg:
|
||||
result_str += jobid_config.alg.upper().replace("OPTUNA", "Optuna")
|
||||
if jobid_config.pru is not None and jobid_config.pru != "None":
|
||||
result_str += "+" + jobid_config.pru.upper()
|
||||
if jobid_config.subdat != "mrpc":
|
||||
result_str += ",rep " + str(jobid_config.rep) + " & " + str(
|
||||
"%.1f" % (val_score * 100)) + " & " + str(test_score)
|
||||
else:
|
||||
result_str += ",rep " + str(jobid_config.rep) + " & " + str(
|
||||
"%.1f" % (val_score[0] * 100)) + "/" + str(
|
||||
"%.1f" % (val_score[1] * 100)) + " & " + str(test_score)
|
||||
for hp in ["learning_rate", "warmup_ratio", "per_device_train_batch_size", "hidden_dropout", "attention_dropout",
|
||||
"weight_decay"]:
|
||||
if hp not in best_config:
|
||||
result_str += " & "
|
||||
else:
|
||||
if mode == "hpo":
|
||||
if best_config[hp] > 1.2 * subdat2config[jobid_config.subdat][hp]:
|
||||
wrap_left = "\\cellcolor{green!85}{"
|
||||
elif best_config[hp] > subdat2config[jobid_config.subdat][hp]:
|
||||
wrap_left = "\\cellcolor{green!15}{"
|
||||
elif best_config[hp] < subdat2config[jobid_config.subdat][hp] / 1.2:
|
||||
wrap_left = "\\cellcolor{red!85}{"
|
||||
else:
|
||||
wrap_left = "\\cellcolor{red!15}{"
|
||||
wrap_right = "}"
|
||||
else:
|
||||
wrap_left = wrap_right = ""
|
||||
if hp == "per_device_train_batch_size" or hp == "learning_rate":
|
||||
wrap_left = wrap_right = ""
|
||||
if hp == "learning_rate":
|
||||
result_str += " & " + wrap_left + "{:.1e}".format(best_config[hp]) + wrap_right
|
||||
elif hp == "per_device_train_batch_size":
|
||||
result_str += " & " + wrap_left + str(best_config[hp]) + wrap_right
|
||||
else:
|
||||
result_str += " & " + wrap_left + str("%.3f" % best_config[hp]) + wrap_right
|
||||
return result_str + "\\\\"
|
||||
|
||||
|
||||
def extract_grid(console_args, jobid_config, overfitting_subdat, test_scores):
|
||||
from .azure_utils import JobID, AzureUtils
|
||||
key2printstr = {}
|
||||
subdat2config = {}
|
||||
for idx in range(len(overfitting_subdat)):
|
||||
jobid_config.subdat = overfitting_subdat[idx]
|
||||
jobid_config.mod = "grid"
|
||||
jobid_config.rep = 0
|
||||
azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config)
|
||||
best_config, val_score = azure_utils.get_best_perf_config(console_args, jobid_config)
|
||||
best_config["hidden_dropout"] = 0.1
|
||||
best_config["attention_dropout"] = 0.1
|
||||
test_score = test_scores[idx]
|
||||
key2printstr[jobid_config.subdat.upper() + ", grid"] = get_result_str(jobid_config, val_score,
|
||||
test_score, best_config)
|
||||
subdat2config[jobid_config.subdat] = best_config
|
||||
print()
|
||||
for key, printstr in sorted(key2printstr.items(), key=lambda x: x[0]):
|
||||
print(printstr)
|
||||
return subdat2config
|
||||
|
||||
|
||||
def extract_hpo(
|
||||
console_args,
|
||||
jobid_config,
|
||||
overfitting_subdat,
|
||||
overfitting_alg,
|
||||
overfitting_pru,
|
||||
overfitting_rep,
|
||||
subdat2config,
|
||||
test_scores):
|
||||
from .azure_utils import AzureUtils
|
||||
key2printstr = {}
|
||||
for idx in range(len(overfitting_subdat)):
|
||||
jobid_config.subdat = overfitting_subdat[idx]
|
||||
jobid_config.alg = overfitting_alg[idx]
|
||||
jobid_config.pru = overfitting_pru[idx]
|
||||
jobid_config.rep = overfitting_rep[idx]
|
||||
azure_utils = AzureUtils(console_args=console_args, jobid=jobid_config)
|
||||
best_config, val_score = azure_utils.get_best_perf_config(console_args, jobid_config)
|
||||
test_score = test_scores[idx]
|
||||
key2printstr[jobid_config.subdat.upper() + "," + jobid_config.alg.upper() + ","
|
||||
+ jobid_config.pru + ",rep " + str(jobid_config.rep)] \
|
||||
= get_result_str(jobid_config, val_score, test_score, best_config, subdat2config, mode="hpo")
|
||||
|
||||
for key, printstr in sorted(key2printstr.items(), key=lambda x: x[0]):
|
||||
print(printstr)
|
||||
|
||||
|
||||
def extract_roberta_overfitting_configs(console_args):
|
||||
from .azure_utils import JobID, AzureUtils
|
||||
jobid_config = JobID()
|
||||
jobid_config.pre = "roberta"
|
||||
jobid_config.presz = "base"
|
||||
|
||||
overfitting_subdat = ["rte", "mrpc", "cola", "sst2", "stsb"]
|
||||
test_scores = ["73.1", "91.4/88.5", "61.4", "96", "89.5/88.7"]
|
||||
subdat2config = extract_grid(console_args, jobid_config, overfitting_subdat, test_scores)
|
||||
|
||||
jobid_config = JobID()
|
||||
jobid_config.pre = "roberta"
|
||||
jobid_config.presz = "base"
|
||||
|
||||
overfitting_subdat = ["rte", "rte", "rte", "mrpc", "mrpc", "mrpc", "sst2",
|
||||
"rte", "mrpc", "mrpc", "stsb", "sst2", "sst2",
|
||||
"rte", "rte", "mrpc", "mrpc", "sst2", "sst2"]
|
||||
overfitting_alg = ["rs", "rs", "rs", "rs", "rs", "rs", "rs",
|
||||
"rs", "rs", "rs", "rs", "rs", "rs",
|
||||
"optuna", "optuna", "optuna", "optuna", "optuna", "optuna"]
|
||||
overfitting_pru = ["None", "None", "None", "None", "None", "None", "None",
|
||||
"asha", "asha", "asha", "asha", "asha", "asha",
|
||||
"asha", "asha", "asha", "asha", "asha", "asha"]
|
||||
overfitting_rep = [0, 1, 2, 0, 1, 2, 0,
|
||||
1, 0, 2, 2, 1, 2,
|
||||
1, 2, 0, 1, 1, 2]
|
||||
test_scores = ["71.5", "72.3", "72.2", "90.5/87.1", "90.5/87.4", "90.5/87.2", "95.6",
|
||||
"72.4", "90.7/87.4", "91.0/87.9", "89.4/88.8", "95.2", "95.7",
|
||||
"72.4", "72.4", "90.8/87.4", "90.3/86.5", "95.1", "95.8"]
|
||||
extract_hpo(console_args, jobid_config, overfitting_subdat, overfitting_alg, overfitting_pru, overfitting_rep,
|
||||
subdat2config, test_scores)
|
||||
|
||||
|
||||
def extract_electra_overfitting_configs(console_args):
|
||||
from .azure_utils import JobID, AzureUtils
|
||||
jobid_config = JobID()
|
||||
jobid_config.pre = "electra"
|
||||
jobid_config.presz = "base"
|
||||
|
||||
overfitting_subdat = ["rte", "qnli", "cola"]
|
||||
test_scores = ["74.4", "93.2", "64.8"]
|
||||
subdat2config = extract_grid(console_args, jobid_config, overfitting_subdat, test_scores)
|
||||
|
||||
jobid_config = JobID()
|
||||
jobid_config.pre = "electra"
|
||||
jobid_config.presz = "base"
|
||||
|
||||
overfitting_subdat = ["rte", "rte", "qnli", "cola", "qnli", "cola"]
|
||||
overfitting_alg = ["rs", "rs", "rs", "rs", "rs", "optuna"]
|
||||
overfitting_pru = ["None", "None", "None", "asha", "asha", "asha"]
|
||||
overfitting_rep = [0, 1, 0, 2, 0, 0]
|
||||
test_scores = ["73.8", "74.3", "92.8", "64.7", "92.9", "63.6"]
|
||||
extract_hpo(console_args, jobid_config, overfitting_subdat, overfitting_alg, overfitting_pru, overfitting_rep,
|
||||
subdat2config, test_scores)
|
||||
71
flaml/nlp/result_analysis/wandb_utils.py
Normal file
71
flaml/nlp/result_analysis/wandb_utils.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import os
|
||||
from ..utils import get_wandb_azure_key
|
||||
import subprocess
|
||||
import wandb
|
||||
import hashlib
|
||||
from time import time
|
||||
|
||||
|
||||
class WandbUtils:
|
||||
|
||||
# Documentation on the wandb setting:
|
||||
# There are two ways to initialize wandb in tune.run:
|
||||
# (1) using WandbLoggerCallback, by adding the following argument to tune.run:
|
||||
# callbacks=[WandbLoggerCallback(
|
||||
# project="hpo",
|
||||
# api_key = os.environ["WANDB_API_KEY"],
|
||||
# group = os.environ["WANDB_RUN_GROUP"],
|
||||
# log_config=True)]
|
||||
# (2) using wandb_mixin decorator (the current implementation)
|
||||
# The current implementation uses (2) because (1) has the following bug.
|
||||
# In Ray 1.2, when using WandbLoggerCallback + setting time limit using the time_budget_s argument,
|
||||
# A bug exists which is the previous run will not clear the cache after tune.run returns. After the
|
||||
# later run has already starts, some zombie trials in the previous run remain in the memory and never stop.
|
||||
# This bug can be reproduced by switching to (1) by adding the above callbacks argument
|
||||
# and removing the wandb_mixin decorator
|
||||
# https://docs.ray.io/en/master/tune/tutorials/tune-wandb.html
|
||||
|
||||
def __init__(self,
|
||||
is_wandb_on=None,
|
||||
console_args=None,
|
||||
jobid_config=None):
|
||||
if is_wandb_on:
|
||||
wandb_key, azure_key, container_name = get_wandb_azure_key(console_args.key_path)
|
||||
subprocess.run(["wandb", "login", "--relogin", wandb_key])
|
||||
os.environ["WANDB_API_KEY"] = wandb_key
|
||||
os.environ["WANDB_MODE"] = "online"
|
||||
else:
|
||||
os.environ["WANDB_MODE"] = "disabled"
|
||||
self.jobid_config = jobid_config
|
||||
|
||||
def set_wandb_per_trial(self):
|
||||
print("before wandb.init\n\n\n")
|
||||
if os.environ["WANDB_MODE"] == "online":
|
||||
os.environ["WANDB_SILENT"] = "false"
|
||||
return wandb.init(project=self.jobid_config.get_jobid_full_data_name(),
|
||||
group=self.wandb_group_name,
|
||||
name=str(WandbUtils._get_next_trial_ids()),
|
||||
settings=wandb.Settings(
|
||||
_disable_stats=True),
|
||||
reinit=False)
|
||||
else:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _get_next_trial_ids():
|
||||
hash = hashlib.sha1()
|
||||
hash.update(str(time()).encode('utf-8'))
|
||||
return "trial_" + hash.hexdigest()[:3]
|
||||
|
||||
def set_wandb_per_run(self):
|
||||
os.environ["WANDB_RUN_GROUP"] = self.jobid_config.to_wandb_string() + wandb.util.generate_id()
|
||||
self.wandb_group_name = os.environ["WANDB_RUN_GROUP"]
|
||||
if os.environ["WANDB_MODE"] == "online":
|
||||
os.environ["WANDB_SILENT"] = "false"
|
||||
return wandb.init(project=self.jobid_config.get_jobid_full_data_name(),
|
||||
group=os.environ["WANDB_RUN_GROUP"],
|
||||
settings=wandb.Settings(
|
||||
_disable_stats=True),
|
||||
reinit=False)
|
||||
else:
|
||||
return None
|
||||
155
flaml/nlp/utils.py
Normal file
155
flaml/nlp/utils.py
Normal file
@@ -0,0 +1,155 @@
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
def dataset_subdataset_name_format_check(val_str):
|
||||
regex = re.compile(r"^[^:]*:[^:]*$")
|
||||
if not regex.match(val_str):
|
||||
raise argparse.ArgumentTypeError("dataset_subdataset_name must be in the format {data_name}:{subdata_name}")
|
||||
return val_str
|
||||
|
||||
|
||||
def pretrained_model_size_format_check(val_str):
|
||||
regex = re.compile(r"^[^:]*:(small|base|large|xlarge)")
|
||||
if not regex.match(val_str):
|
||||
raise argparse.ArgumentTypeError("pretrained_model_size must be in the format {model_name}:{model_size},"
|
||||
"where {model_name} is the name from huggingface.co/models, {model_size}"
|
||||
"is chosen from small, base, large, xlarge")
|
||||
return val_str
|
||||
|
||||
|
||||
def load_console_args(**custom_data_args):
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('--server_name', type=str, help='server name', required=False,
|
||||
choices=["tmdev", "dgx", "azureml"], default="tmdev")
|
||||
arg_parser.add_argument('--algo_mode', type=str, help='hpo or grid search', required=False,
|
||||
choices=["grid", "gridbert", "hpo", "hfhpo", "list_s", "list", "bestnn"], default="hpo")
|
||||
arg_parser.add_argument('--data_root_dir', type=str, help='data dir', required=False, default="data/")
|
||||
arg_parser.add_argument('--dataset_subdataset_name', type=dataset_subdataset_name_format_check,
|
||||
help='dataset and subdataset name', required=False, default=None)
|
||||
arg_parser.add_argument('--space_mode', type=str, help='space mode', required=False,
|
||||
choices=["gnr", "uni", "uni_test", "cus", "buni"], default="uni")
|
||||
arg_parser.add_argument('--search_alg_args_mode', type=str, help='search algorithm args mode', required=False,
|
||||
choices=["dft", "exp", "cus"], default="dft")
|
||||
arg_parser.add_argument('--algo_name', type=str, help='algorithm', required=False,
|
||||
choices=["bs", "optuna", "cfo", "rs"], default="bs")
|
||||
arg_parser.add_argument('--pruner', type=str, help='pruner', required=False,
|
||||
choices=["asha", "None"], default="None")
|
||||
arg_parser.add_argument('--pretrained_model_size', type=pretrained_model_size_format_check,
|
||||
help='pretrained model', required=False, default=None)
|
||||
arg_parser.add_argument('--sample_num', type=int, help='sample num', required=False, default=None)
|
||||
arg_parser.add_argument('--time_budget', type=int, help='time budget', required=False, default=None)
|
||||
arg_parser.add_argument('--time_as_grid', type=int, help='time as grid search', required=False, default=None)
|
||||
arg_parser.add_argument('--rep_id', type=int, help='rep id', required=False, default=0)
|
||||
arg_parser.add_argument('--azure_key', type=str, help='azure key', required=False, default=None)
|
||||
arg_parser.add_argument('--resplit_mode', type=str, help='resplit mode', required=False,
|
||||
choices=["rspt", "ori"], default="ori")
|
||||
arg_parser.add_argument('--ds_config', type=str, help='deep speed config file path',
|
||||
required=False, default=None)
|
||||
arg_parser.add_argument('--yml_file', type=str, help='yml file path', required=False, default="test.yml")
|
||||
arg_parser.add_argument('--key_path', type=str, help='path for key.json', required=False, default=None)
|
||||
arg_parser.add_argument('--root_log_path', type=str, help='root path for log', required=False, default="logs_azure")
|
||||
arg_parser.add_argument('--round_idx', type=int, help='round idx for acl experiments', required=False, default=0)
|
||||
arg_parser.add_argument('--seed_data', type=int, help='seed of data shuffling', required=False, default=43)
|
||||
arg_parser.add_argument('--seed_transformers', type=int, help='seed of transformers', required=False, default=42)
|
||||
args, unknown = arg_parser.parse_known_args()
|
||||
|
||||
for each_key in custom_data_args.keys():
|
||||
if args.__contains__(each_key):
|
||||
try:
|
||||
check_key_format_func = globals()[each_key + "_format_check"]
|
||||
check_key_format_func(custom_data_args[each_key])
|
||||
except KeyError:
|
||||
pass
|
||||
setattr(args, each_key, custom_data_args[each_key])
|
||||
return args
|
||||
|
||||
|
||||
def get_wandb_azure_key(key_path):
|
||||
key_json = json.load(open(os.path.join(key_path, "key.json"), "r"))
|
||||
wandb_key = key_json["wandb_key"]
|
||||
azure_key = key_json["azure_key"]
|
||||
azure_container_name = key_json["container_name"]
|
||||
return wandb_key, azure_key, azure_container_name
|
||||
|
||||
|
||||
def merge_dicts(dict1, dict2):
|
||||
for key2 in dict2.keys():
|
||||
if key2 in dict1:
|
||||
dict1_vals = set(dict1[key2])
|
||||
dict2_vals = set(dict2[key2])
|
||||
dict1[key2] = list(dict1_vals.union(dict2_vals))
|
||||
else:
|
||||
dict1[key2] = dict2[key2]
|
||||
return dict1
|
||||
|
||||
|
||||
def _check_dict_keys_overlaps(dict1: dict, dict2: dict):
|
||||
dict1_keys = set(dict1.keys())
|
||||
dict2_keys = set(dict2.keys())
|
||||
return len(dict1_keys.intersection(dict2_keys)) > 0
|
||||
|
||||
|
||||
def _variable_override_default_alternative(logger, obj_ref, var_name, default_value, all_values, overriding_value=None):
|
||||
"""
|
||||
Setting the value of var. If overriding_value is specified, var is set to overriding_value;
|
||||
If overriding_value is not specified, var is set to default_value meanwhile showing all_values
|
||||
"""
|
||||
assert isinstance(all_values, list)
|
||||
if overriding_value:
|
||||
setattr(obj_ref, var_name, overriding_value)
|
||||
logger.warning("The value for {} is specified as {}".format(var_name, overriding_value))
|
||||
else:
|
||||
setattr(obj_ref, var_name, default_value)
|
||||
logger.warning("The value for {} is not specified, setting it to the default value {}. "
|
||||
"Alternatively, you can set it to {}".format(var_name, default_value, ",".join(all_values)))
|
||||
|
||||
|
||||
@dataclass
|
||||
class PathUtils:
|
||||
hpo_ckpt_path: str = field(metadata={"help": "the directory for hpo output"})
|
||||
hpo_result_path: str = field(metadata={"help": "the directory for hpo result"})
|
||||
hpo_log_path: str = field(metadata={"help": "the directory for log"})
|
||||
hpo_config_path: str = field(metadata={"help": "the directory for log"})
|
||||
|
||||
log_dir_per_run: str = field(metadata={"help": "log directory for each run."})
|
||||
result_dir_per_run: str = field(metadata={"help": "result directory for each run."})
|
||||
ckpt_dir_per_run: str = field(metadata={"help": "checkpoint directory for each run."})
|
||||
ckpt_dir_per_trial: str = field(metadata={"help": "checkpoint directory for each trial."})
|
||||
|
||||
def __init__(self,
|
||||
jobid_config,
|
||||
hpo_data_root_path,
|
||||
):
|
||||
self.jobid_config = jobid_config
|
||||
self.hpo_data_root_path = hpo_data_root_path
|
||||
self.hpo_ckpt_path = os.path.join(hpo_data_root_path, "checkpoint")
|
||||
self.hpo_result_path = os.path.join(hpo_data_root_path, "result")
|
||||
self.hpo_log_path = self.hpo_result_path
|
||||
|
||||
@staticmethod
|
||||
def init_and_make_one_dir(dir_path):
|
||||
assert dir_path
|
||||
if not os.path.exists(dir_path):
|
||||
pathlib.Path(dir_path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def make_dir_per_run(self):
|
||||
jobid_str = self.jobid_config.to_jobid_string()
|
||||
self.ckpt_dir_per_run = os.path.join(self.hpo_ckpt_path, jobid_str)
|
||||
PathUtils.init_and_make_one_dir(self.ckpt_dir_per_run)
|
||||
|
||||
self.result_dir_per_run = os.path.join(self.hpo_result_path, jobid_str)
|
||||
PathUtils.init_and_make_one_dir(self.result_dir_per_run)
|
||||
|
||||
self.log_dir_per_run = os.path.join(self.hpo_log_path, jobid_str)
|
||||
PathUtils.init_and_make_one_dir(self.log_dir_per_run)
|
||||
|
||||
def make_dir_per_trial(self, trial_id):
|
||||
jobid_str = self.jobid_config.to_jobid_string()
|
||||
ckpt_dir_per_run = os.path.join(self.hpo_ckpt_path, jobid_str)
|
||||
self.ckpt_dir_per_trial = os.path.join(ckpt_dir_per_run, jobid_str, trial_id)
|
||||
PathUtils.init_and_make_one_dir(self.ckpt_dir_per_trial)
|
||||
43
notebook/flaml_autohf.ipynb
Normal file
43
notebook/flaml_autohf.ipynb
Normal file
@@ -0,0 +1,43 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"1. Electra Example"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
5
setup.py
5
setup.py
@@ -51,6 +51,11 @@ setuptools.setup(
|
||||
"optuna==2.3.0",
|
||||
"vowpalwabbit",
|
||||
"openml",
|
||||
"transformers==4.4.1",
|
||||
"wandb==0.10.26",
|
||||
"torch==1.8.1",
|
||||
"datasets==1.4.1",
|
||||
"azure-storage-blob"
|
||||
],
|
||||
"blendsearch": [
|
||||
"optuna==2.3.0"
|
||||
|
||||
75
test/hf/run_analysis.py
Normal file
75
test/hf/run_analysis.py
Normal file
@@ -0,0 +1,75 @@
|
||||
'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
|
||||
'''
|
||||
# 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
|
||||
import argparse
|
||||
from flaml.nlp.result_analysis.azure_utils import JobID
|
||||
|
||||
|
||||
def create_partial_config_bestnn():
|
||||
jobid_config = JobID()
|
||||
# funnel xlarge
|
||||
# jobid_config.mod = "bestnn"
|
||||
jobid_config.spa = "uni"
|
||||
# jobid_config.arg = "cus"
|
||||
# jobid_config.alg = "cfo"
|
||||
jobid_config.pre = "funnel"
|
||||
jobid_config.presz = "xlarge"
|
||||
# funnel small
|
||||
# jobid_config.mod = "list"
|
||||
# jobid_config.pre = "funnel"
|
||||
# jobid_config.presz = "small"
|
||||
# jobid_config.rep = 0
|
||||
|
||||
# # deberta large
|
||||
# jobid_config.mod = "bestnn"
|
||||
# jobid_config.spa = "uni"
|
||||
# jobid_config.arg = "cus"
|
||||
# jobid_config.alg = "cfo"
|
||||
# jobid_config.pre = "deberta"
|
||||
# jobid_config.presz = "large"
|
||||
|
||||
# # deberta base
|
||||
# jobid_config.mod = "hpo"
|
||||
# jobid_config.pre = "deberta"
|
||||
# jobid_config.presz = "base"
|
||||
# jobid_config.rep = 0
|
||||
|
||||
# # deberta large
|
||||
# jobid_config.mod = "hpo"
|
||||
# jobid_config.pre = "deberta"
|
||||
# jobid_config.presz = "large"
|
||||
|
||||
return jobid_config
|
||||
|
||||
|
||||
def create_partial_config_list():
|
||||
jobid_config = JobID()
|
||||
jobid_config.mod = "list"
|
||||
jobid_config.spa = "uni"
|
||||
jobid_config.presz = "xlarge"
|
||||
return jobid_config
|
||||
|
||||
|
||||
def create_partial_config_hpo():
|
||||
jobid_config = JobID()
|
||||
jobid_config.mod = "hpo"
|
||||
jobid_config.spa = "uni"
|
||||
return jobid_config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('--key_path', type=str, help='key path', required=False, default="../../")
|
||||
arg_parser.add_argument('--azure_root_log_path', type=str,
|
||||
help='root log path of blob storage', required=True, default="logs_azure/")
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
partial_config_large = create_partial_config_bestnn()
|
||||
from flaml.nlp.result_analysis.generate_result_summary import compare_small_vs_large, get_result, check_conflict, \
|
||||
print_cfo, download_validation, extract_roberta_overfitting_configs, extract_electra_overfitting_configs
|
||||
|
||||
# get_result(args, partial_config_large)
|
||||
# check_conflict(args, [partial_config_large])
|
||||
download_validation(args, "/data/xliu127/projects/hyperopt/data/result/")
|
||||
|
||||
# extract_roberta_overfitting_configs(args)
|
||||
285
test/hf/run_autohf.py
Normal file
285
test/hf/run_autohf.py
Normal file
@@ -0,0 +1,285 @@
|
||||
'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
|
||||
'''
|
||||
# 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
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from flaml.nlp import AutoTransformers
|
||||
from flaml.nlp import AzureUtils, JobID
|
||||
from flaml.nlp.utils import load_console_args
|
||||
|
||||
global azure_log_path
|
||||
global azure_key
|
||||
|
||||
|
||||
def get_resplit_portion(jobid_config):
|
||||
if jobid_config.dat == ["glue"] and jobid_config.subdat in {"mnli"}:
|
||||
return {"source": ["train", "validation_matched"], "train": [0, 0.8], "validation": [0.8, 0.9],
|
||||
"test": [0.9, 1.0]}
|
||||
else:
|
||||
return {"source": ["train", "validation"], "train": [0, 0.8], "validation": [0.8, 0.9], "test": [0.9, 1.0]}
|
||||
|
||||
|
||||
def get_preparedata_setting(args, jobid_config):
|
||||
preparedata_setting = {
|
||||
"server_name": args.server_name,
|
||||
"data_root_path": args.data_root_dir,
|
||||
"max_seq_length": 128,
|
||||
"jobid_config": jobid_config,
|
||||
"is_wandb_on": True
|
||||
}
|
||||
if jobid_config.spt == 'rspt':
|
||||
preparedata_setting["resplit_portion"] = get_resplit_portion(jobid_config)
|
||||
if ("albert" == jobid_config.pre and jobid_config.dat == ["squad"]) or \
|
||||
("funnel" in jobid_config.pre and jobid_config.dat[0] in {"imdb", "yelp_review_full", "yelp_polarity",
|
||||
"amazon_polarity", "amazon_review_multi"}):
|
||||
preparedata_setting["max_seq_length"] = 512
|
||||
if jobid_config.dat[0] == "glue" and jobid_config.subdat == "mnli":
|
||||
preparedata_setting["fold_name"] = ['train', 'validation_matched', 'test_matched']
|
||||
return preparedata_setting
|
||||
|
||||
|
||||
def get_autohf_settings(args, **custom_args):
|
||||
autohf_settings = {"resources_per_trial": {"gpu": 1, "cpu": 1},
|
||||
"num_samples": args.sample_num,
|
||||
"time_budget": args.time_budget,
|
||||
"ckpt_per_epoch": 1,
|
||||
}
|
||||
for other_attr in ["ds_config", "rep_id"]:
|
||||
if hasattr(args, other_attr):
|
||||
autohf_settings[other_attr] = getattr(args, other_attr)
|
||||
else:
|
||||
autohf_settings[other_attr] = None
|
||||
if len(custom_args) > 0:
|
||||
autohf_settings.update(custom_args)
|
||||
return autohf_settings
|
||||
|
||||
|
||||
def rm_home_result():
|
||||
from os.path import expanduser
|
||||
home = expanduser("~")
|
||||
if os.path.exists(home + "/ray_results/"):
|
||||
shutil.rmtree(home + "/ray_results/")
|
||||
|
||||
|
||||
def get_best_base_config(args, jobid_config, autohf):
|
||||
import copy
|
||||
import re
|
||||
args_small = copy.deepcopy(args)
|
||||
args_small.algo_name = "optuna"
|
||||
args_small.search_alg_args_mode = "dft"
|
||||
args_small.algo_mode = "hpo"
|
||||
args_small.space_mode = "uni"
|
||||
args_small.pruner = "None"
|
||||
|
||||
if "funnel" not in args_small.pretrained_model_size:
|
||||
args_small.algo_mode = "hpo"
|
||||
else:
|
||||
args_small.algo_mode = "list"
|
||||
args_small.sample_num = 10000
|
||||
args_small.time_budget = 3600
|
||||
args_small.rep_id = 0
|
||||
jobid_config_small = JobID(args_small)
|
||||
if jobid_config_small.pre == "deberta":
|
||||
jobid_config_small.presz = "base"
|
||||
else:
|
||||
jobid_config_small.presz = "small"
|
||||
jobid_config_small.pre_full = re.sub("(xlarge|large|intermediate)", jobid_config_small.presz,
|
||||
jobid_config_small.pre_full)
|
||||
azure_utils_small = AzureUtils(
|
||||
console_args=args_small,
|
||||
jobid=jobid_config_small,
|
||||
autohf=autohf)
|
||||
preparedata_setting = get_preparedata_setting(args, jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
autohf.set_metric()
|
||||
|
||||
best_config = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)[0]
|
||||
return best_config
|
||||
|
||||
|
||||
def search_base_and_search_lower_lr(args, jobid_config, autohf):
|
||||
best_config = get_best_base_config(args, jobid_config, autohf)
|
||||
|
||||
import copy
|
||||
args_large = copy.deepcopy(args)
|
||||
args_large.time_budget = args.time_budget - 3600
|
||||
args_large.sample_num = 100000
|
||||
args_large.algo_name = args.algo_name
|
||||
args_large.search_alg_args_mode = "cus"
|
||||
args_large.space_mode = "buni"
|
||||
args_large.pruner = "None"
|
||||
jobid_config_large = JobID(args_large)
|
||||
jobid_config_large.presz = jobid_config.presz
|
||||
jobid_config_large.pre_full = jobid_config.pre_full
|
||||
azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf)
|
||||
|
||||
_test_hpo(args_large,
|
||||
jobid_config_large,
|
||||
autohf,
|
||||
azure_utils_large,
|
||||
autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config],
|
||||
"bound": {"learning_rate": {
|
||||
"u": best_config["learning_rate"]}}}))
|
||||
|
||||
|
||||
def search_base_and_search_around_best(args, jobid_config, autohf):
|
||||
args.algo_name = "bs"
|
||||
args.search_alg_args_mode = "dft"
|
||||
args.spa = "uni"
|
||||
args.pru = "None"
|
||||
best_config = get_best_base_config(args, jobid_config, autohf)
|
||||
|
||||
import copy
|
||||
args_large = copy.deepcopy(args)
|
||||
args_large.time_budget = args.time_budget - 3600
|
||||
args_large.sample_num = 100000
|
||||
args_large.algo_name = "cfo"
|
||||
args_large.search_alg_args_mode = "cus"
|
||||
args_large.space_mode = "uni"
|
||||
jobid_config_large = JobID(args_large)
|
||||
jobid_config_large.presz = jobid_config.presz
|
||||
jobid_config_large.pre_full = jobid_config.pre_full
|
||||
azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf)
|
||||
|
||||
_test_hpo(args_large,
|
||||
jobid_config_large,
|
||||
autohf,
|
||||
azure_utils_large,
|
||||
autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config]}))
|
||||
|
||||
|
||||
def evaluate_configs(autohf, args, ranked_all_configs):
|
||||
import copy
|
||||
this_args = copy.deepcopy(args)
|
||||
this_args.time_budget = 100000
|
||||
this_args.sample_num = int(len(ranked_all_configs))
|
||||
this_args.search_alg_args_mode = "cus"
|
||||
jobid_config = JobID(this_args)
|
||||
azure_utils_large = AzureUtils(console_args=this_args, jobid=jobid_config, autohf=autohf)
|
||||
_test_hpo(this_args,
|
||||
jobid_config,
|
||||
autohf,
|
||||
azure_utils_large,
|
||||
autohf_settings=get_autohf_settings(this_args, **{"points_to_evaluate": ranked_all_configs}))
|
||||
|
||||
|
||||
def convert_config_to_different_size(origin_config, mode):
|
||||
import re
|
||||
import copy
|
||||
if mode == "small":
|
||||
new_config = copy.deepcopy(origin_config)
|
||||
if new_config.pre == "funnel":
|
||||
new_config.mod = "list"
|
||||
else:
|
||||
new_config.mod = "hpo"
|
||||
if new_config.pre == "funnel":
|
||||
new_config.presz = "small"
|
||||
else:
|
||||
new_config.presz = "base"
|
||||
new_config.pre_full = re.sub("(xlarge|large|intermediate)", new_config.presz, origin_config.pre_full)
|
||||
elif mode == "large":
|
||||
new_config = copy.deepcopy(origin_config)
|
||||
new_config.mod = "hpo"
|
||||
if new_config.pre == "funnel":
|
||||
new_config.presz = "xlarge"
|
||||
new_config.pre_full = re.sub("(small)", "xlarge", origin_config.pre_full)
|
||||
else:
|
||||
new_config.presz = "large"
|
||||
new_config.pre_full = re.sub("(small)", "large", origin_config.pre_full)
|
||||
|
||||
return new_config
|
||||
|
||||
|
||||
def evaluate_small_best_configs_on_large(large_args, autohf):
|
||||
jobid_config_small = convert_config_to_different_size(JobID(large_args), mode="small")
|
||||
jobid_config_small.rep = 0
|
||||
azure_utils_small = AzureUtils(console_args=None, jobid=jobid_config_small, autohf=autohf)
|
||||
ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)
|
||||
evaluate_configs(large_args, ranked_all_small_configs[:int(len(ranked_all_small_configs) / 2)])
|
||||
|
||||
|
||||
def add_dict_item_to_list(this_list, this_dict):
|
||||
is_exist = len([x for x in this_list if x == this_dict]) > 0
|
||||
if not is_exist:
|
||||
this_list.append(this_dict)
|
||||
return this_list
|
||||
|
||||
|
||||
def evaluate_large_best_configs_on_small(small_args, autohf):
|
||||
jobid_config_large = convert_config_to_different_size(JobID(small_args), mode="large")
|
||||
autohf.jobid_config = jobid_config_large
|
||||
autohf.set_metric()
|
||||
all_configs_from_large = []
|
||||
for rep_id in range(3):
|
||||
jobid_config_large.rep = rep_id
|
||||
azure_utils_large = AzureUtils(console_args=small_args, jobid=jobid_config_large, autohf=autohf)
|
||||
ranked_all_large_configs = azure_utils_large.get_ranked_configs(autohf.metric_mode_name)
|
||||
for each_config in ranked_all_large_configs:
|
||||
all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config)
|
||||
jobid_config_small = convert_config_to_different_size(JobID(small_args), mode="small")
|
||||
jobid_config_small.rep = 0
|
||||
azure_utils_small = AzureUtils(console_args=small_args, jobid=jobid_config_small, autohf=autohf)
|
||||
ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)
|
||||
for each_config in ranked_all_small_configs:
|
||||
all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config)
|
||||
|
||||
evaluate_configs(autohf, small_args, list(all_configs_from_large))
|
||||
|
||||
|
||||
def _test_hpo(args,
|
||||
jobid_config,
|
||||
autohf,
|
||||
azure_utils=None,
|
||||
autohf_settings=None,
|
||||
):
|
||||
try:
|
||||
if not azure_utils:
|
||||
azure_utils = AzureUtils(console_args=args, jobid=jobid_config, autohf=autohf)
|
||||
preparedata_setting = get_preparedata_setting(args, jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
|
||||
analysis = validation_metric = test_metric = None
|
||||
if not autohf_settings:
|
||||
autohf_settings = get_autohf_settings(args)
|
||||
if args.algo_mode != "hfhpo":
|
||||
validation_metric, analysis = autohf.fit(**autohf_settings, )
|
||||
else:
|
||||
autohf.fit_hf(**autohf_settings)
|
||||
|
||||
if jobid_config.spt == "ori":
|
||||
predictions, test_metric = autohf.predict()
|
||||
if validation_metric:
|
||||
test_metric.update({"validation": validation_metric})
|
||||
else:
|
||||
predictions = None
|
||||
if test_metric:
|
||||
validation_metric.update({"test": test_metric})
|
||||
|
||||
if analysis is not None:
|
||||
json_log = azure_utils.extract_log_from_analysis(analysis)
|
||||
else:
|
||||
json_log = None
|
||||
azure_utils.write_autohf_output(json_log=json_log,
|
||||
valid_metric=validation_metric,
|
||||
predictions=predictions,
|
||||
duration=autohf.last_run_duration)
|
||||
|
||||
except AssertionError:
|
||||
azure_utils.write_exception()
|
||||
rm_home_result()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
autohf = AutoTransformers()
|
||||
args = load_console_args()
|
||||
jobid_config = JobID(args)
|
||||
|
||||
if args.algo_mode in ("hpo", "hfhpo", "grid", "gridbert"):
|
||||
_test_hpo(args, jobid_config, autohf)
|
||||
elif args.algo_mode == "bestnn":
|
||||
search_base_and_search_lower_lr(args, jobid_config, autohf)
|
||||
elif args.algo_mode == "list":
|
||||
evaluate_small_best_configs_on_large(args, autohf)
|
||||
elif args.algo_mode == "list_s":
|
||||
evaluate_large_best_configs_on_small(args, autohf)
|
||||
62
test/hf/test_mobilebert.py
Normal file
62
test/hf/test_mobilebert.py
Normal file
@@ -0,0 +1,62 @@
|
||||
'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
|
||||
'''
|
||||
# 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
|
||||
|
||||
global azure_log_path
|
||||
global azure_key
|
||||
|
||||
|
||||
def get_preparedata_setting(jobid_config):
|
||||
preparedata_setting = {
|
||||
"server_name": "tmdev",
|
||||
"data_root_path": "data/",
|
||||
"max_seq_length": 128,
|
||||
"jobid_config": jobid_config,
|
||||
"resplit_portion": {"source": ["train", "validation"],
|
||||
"train": [0, 0.8],
|
||||
"validation": [0.8, 0.9],
|
||||
"test": [0.9, 1.0]}
|
||||
}
|
||||
return preparedata_setting
|
||||
|
||||
|
||||
def get_autohf_settings():
|
||||
autohf_settings = {"resources_per_trial": {"cpu": 1},
|
||||
"num_samples": 1,
|
||||
"time_budget": 100000,
|
||||
"ckpt_per_epoch": 1,
|
||||
"fp16": False,
|
||||
}
|
||||
return autohf_settings
|
||||
|
||||
|
||||
def test_hpo():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
from flaml.nlp import AutoTransformers
|
||||
from flaml.nlp import JobID
|
||||
|
||||
jobid_config = JobID()
|
||||
jobid_config.set_unittest_config()
|
||||
autohf = AutoTransformers()
|
||||
|
||||
try:
|
||||
preparedata_setting = get_preparedata_setting(jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
|
||||
autohf_settings = get_autohf_settings()
|
||||
validation_metric, analysis = autohf.fit(**autohf_settings, )
|
||||
|
||||
predictions, test_metric = autohf.predict()
|
||||
if test_metric:
|
||||
validation_metric.update({"test": test_metric})
|
||||
|
||||
except AssertionError:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_hpo()
|
||||
Reference in New Issue
Block a user