mirror of
https://github.com/microsoft/autogen.git
synced 2026-04-20 03:02:16 -04:00
exception, coverage for autohf (#106)
* increase coverage * fixing exception messages * fixing import
This commit is contained in:
@@ -1,75 +0,0 @@
|
||||
'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
|
||||
'''
|
||||
# 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
|
||||
import argparse
|
||||
from flaml.nlp.result_analysis.azure_utils import JobID
|
||||
|
||||
|
||||
def create_partial_config_bestnn():
|
||||
jobid_config = JobID()
|
||||
# funnel xlarge
|
||||
# jobid_config.mod = "bestnn"
|
||||
jobid_config.spa = "uni"
|
||||
# jobid_config.arg = "cus"
|
||||
# jobid_config.alg = "cfo"
|
||||
jobid_config.pre = "funnel"
|
||||
jobid_config.presz = "xlarge"
|
||||
# funnel small
|
||||
# jobid_config.mod = "list"
|
||||
# jobid_config.pre = "funnel"
|
||||
# jobid_config.presz = "small"
|
||||
# jobid_config.rep = 0
|
||||
|
||||
# # deberta large
|
||||
# jobid_config.mod = "bestnn"
|
||||
# jobid_config.spa = "uni"
|
||||
# jobid_config.arg = "cus"
|
||||
# jobid_config.alg = "cfo"
|
||||
# jobid_config.pre = "deberta"
|
||||
# jobid_config.presz = "large"
|
||||
|
||||
# # deberta base
|
||||
# jobid_config.mod = "hpo"
|
||||
# jobid_config.pre = "deberta"
|
||||
# jobid_config.presz = "base"
|
||||
# jobid_config.rep = 0
|
||||
|
||||
# # deberta large
|
||||
# jobid_config.mod = "hpo"
|
||||
# jobid_config.pre = "deberta"
|
||||
# jobid_config.presz = "large"
|
||||
|
||||
return jobid_config
|
||||
|
||||
|
||||
def create_partial_config_list():
|
||||
jobid_config = JobID()
|
||||
jobid_config.mod = "list"
|
||||
jobid_config.spa = "uni"
|
||||
jobid_config.presz = "xlarge"
|
||||
return jobid_config
|
||||
|
||||
|
||||
def create_partial_config_hpo():
|
||||
jobid_config = JobID()
|
||||
jobid_config.mod = "hpo"
|
||||
jobid_config.spa = "uni"
|
||||
return jobid_config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('--key_path', type=str, help='key path', required=False, default="../../")
|
||||
arg_parser.add_argument('--azure_root_log_path', type=str,
|
||||
help='root log path of blob storage', required=True, default="logs_azure/")
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
partial_config_large = create_partial_config_bestnn()
|
||||
from flaml.nlp.result_analysis.generate_result_summary import compare_small_vs_large, get_result, check_conflict, \
|
||||
print_cfo, download_validation, extract_roberta_overfitting_configs, extract_electra_overfitting_configs
|
||||
|
||||
# get_result(args, partial_config_large)
|
||||
# check_conflict(args, [partial_config_large])
|
||||
download_validation(args, "/data/xliu127/projects/hyperopt/data/result/")
|
||||
|
||||
# extract_roberta_overfitting_configs(args)
|
||||
@@ -1,285 +0,0 @@
|
||||
'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
|
||||
'''
|
||||
# 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
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from flaml.nlp import AutoTransformers
|
||||
from flaml.nlp import AzureUtils, JobID
|
||||
from flaml.nlp.utils import load_console_args
|
||||
|
||||
global azure_log_path
|
||||
global azure_key
|
||||
|
||||
|
||||
def get_resplit_portion(jobid_config):
|
||||
if jobid_config.dat == ["glue"] and jobid_config.subdat in {"mnli"}:
|
||||
return {"source": ["train", "validation_matched"], "train": [0, 0.8], "validation": [0.8, 0.9],
|
||||
"test": [0.9, 1.0]}
|
||||
else:
|
||||
return {"source": ["train", "validation"], "train": [0, 0.8], "validation": [0.8, 0.9], "test": [0.9, 1.0]}
|
||||
|
||||
|
||||
def get_preparedata_setting(args, jobid_config):
|
||||
preparedata_setting = {
|
||||
"server_name": args.server_name,
|
||||
"data_root_path": args.data_root_dir,
|
||||
"max_seq_length": 128,
|
||||
"jobid_config": jobid_config,
|
||||
"is_wandb_on": True
|
||||
}
|
||||
if jobid_config.spt == 'rspt':
|
||||
preparedata_setting["resplit_portion"] = get_resplit_portion(jobid_config)
|
||||
if ("albert" == jobid_config.pre and jobid_config.dat == ["squad"]) or \
|
||||
("funnel" in jobid_config.pre and jobid_config.dat[0] in {"imdb", "yelp_review_full", "yelp_polarity",
|
||||
"amazon_polarity", "amazon_review_multi"}):
|
||||
preparedata_setting["max_seq_length"] = 512
|
||||
if jobid_config.dat[0] == "glue" and jobid_config.subdat == "mnli":
|
||||
preparedata_setting["fold_name"] = ['train', 'validation_matched', 'test_matched']
|
||||
return preparedata_setting
|
||||
|
||||
|
||||
def get_autohf_settings(args, **custom_args):
|
||||
autohf_settings = {"resources_per_trial": {"gpu": 1, "cpu": 1},
|
||||
"num_samples": args.sample_num,
|
||||
"time_budget": args.time_budget,
|
||||
"ckpt_per_epoch": 1,
|
||||
}
|
||||
for other_attr in ["ds_config", "rep_id"]:
|
||||
if hasattr(args, other_attr):
|
||||
autohf_settings[other_attr] = getattr(args, other_attr)
|
||||
else:
|
||||
autohf_settings[other_attr] = None
|
||||
if len(custom_args) > 0:
|
||||
autohf_settings.update(custom_args)
|
||||
return autohf_settings
|
||||
|
||||
|
||||
def rm_home_result():
|
||||
from os.path import expanduser
|
||||
home = expanduser("~")
|
||||
if os.path.exists(home + "/ray_results/"):
|
||||
shutil.rmtree(home + "/ray_results/")
|
||||
|
||||
|
||||
def get_best_base_config(args, jobid_config, autohf):
|
||||
import copy
|
||||
import re
|
||||
args_small = copy.deepcopy(args)
|
||||
args_small.algo_name = "optuna"
|
||||
args_small.search_alg_args_mode = "dft"
|
||||
args_small.algo_mode = "hpo"
|
||||
args_small.space_mode = "uni"
|
||||
args_small.pruner = "None"
|
||||
|
||||
if "funnel" not in args_small.pretrained_model_size:
|
||||
args_small.algo_mode = "hpo"
|
||||
else:
|
||||
args_small.algo_mode = "list"
|
||||
args_small.sample_num = 10000
|
||||
args_small.time_budget = 3600
|
||||
args_small.rep_id = 0
|
||||
jobid_config_small = JobID(args_small)
|
||||
if jobid_config_small.pre == "deberta":
|
||||
jobid_config_small.presz = "base"
|
||||
else:
|
||||
jobid_config_small.presz = "small"
|
||||
jobid_config_small.pre_full = re.sub("(xlarge|large|intermediate)", jobid_config_small.presz,
|
||||
jobid_config_small.pre_full)
|
||||
azure_utils_small = AzureUtils(
|
||||
console_args=args_small,
|
||||
jobid=jobid_config_small,
|
||||
autohf=autohf)
|
||||
preparedata_setting = get_preparedata_setting(args, jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
autohf.set_metric()
|
||||
|
||||
best_config = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)[0]
|
||||
return best_config
|
||||
|
||||
|
||||
def search_base_and_search_lower_lr(args, jobid_config, autohf):
|
||||
best_config = get_best_base_config(args, jobid_config, autohf)
|
||||
|
||||
import copy
|
||||
args_large = copy.deepcopy(args)
|
||||
args_large.time_budget = args.time_budget - 3600
|
||||
args_large.sample_num = 100000
|
||||
args_large.algo_name = args.algo_name
|
||||
args_large.search_alg_args_mode = "cus"
|
||||
args_large.space_mode = "buni"
|
||||
args_large.pruner = "None"
|
||||
jobid_config_large = JobID(args_large)
|
||||
jobid_config_large.presz = jobid_config.presz
|
||||
jobid_config_large.pre_full = jobid_config.pre_full
|
||||
azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf)
|
||||
|
||||
_test_hpo(args_large,
|
||||
jobid_config_large,
|
||||
autohf,
|
||||
azure_utils_large,
|
||||
autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config],
|
||||
"bound": {"learning_rate": {
|
||||
"u": best_config["learning_rate"]}}}))
|
||||
|
||||
|
||||
def search_base_and_search_around_best(args, jobid_config, autohf):
|
||||
args.algo_name = "bs"
|
||||
args.search_alg_args_mode = "dft"
|
||||
args.spa = "uni"
|
||||
args.pru = "None"
|
||||
best_config = get_best_base_config(args, jobid_config, autohf)
|
||||
|
||||
import copy
|
||||
args_large = copy.deepcopy(args)
|
||||
args_large.time_budget = args.time_budget - 3600
|
||||
args_large.sample_num = 100000
|
||||
args_large.algo_name = "cfo"
|
||||
args_large.search_alg_args_mode = "cus"
|
||||
args_large.space_mode = "uni"
|
||||
jobid_config_large = JobID(args_large)
|
||||
jobid_config_large.presz = jobid_config.presz
|
||||
jobid_config_large.pre_full = jobid_config.pre_full
|
||||
azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf)
|
||||
|
||||
_test_hpo(args_large,
|
||||
jobid_config_large,
|
||||
autohf,
|
||||
azure_utils_large,
|
||||
autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config]}))
|
||||
|
||||
|
||||
def evaluate_configs(autohf, args, ranked_all_configs):
|
||||
import copy
|
||||
this_args = copy.deepcopy(args)
|
||||
this_args.time_budget = 100000
|
||||
this_args.sample_num = int(len(ranked_all_configs))
|
||||
this_args.search_alg_args_mode = "cus"
|
||||
jobid_config = JobID(this_args)
|
||||
azure_utils_large = AzureUtils(console_args=this_args, jobid=jobid_config, autohf=autohf)
|
||||
_test_hpo(this_args,
|
||||
jobid_config,
|
||||
autohf,
|
||||
azure_utils_large,
|
||||
autohf_settings=get_autohf_settings(this_args, **{"points_to_evaluate": ranked_all_configs}))
|
||||
|
||||
|
||||
def convert_config_to_different_size(origin_config, mode):
|
||||
import re
|
||||
import copy
|
||||
if mode == "small":
|
||||
new_config = copy.deepcopy(origin_config)
|
||||
if new_config.pre == "funnel":
|
||||
new_config.mod = "list"
|
||||
else:
|
||||
new_config.mod = "hpo"
|
||||
if new_config.pre == "funnel":
|
||||
new_config.presz = "small"
|
||||
else:
|
||||
new_config.presz = "base"
|
||||
new_config.pre_full = re.sub("(xlarge|large|intermediate)", new_config.presz, origin_config.pre_full)
|
||||
elif mode == "large":
|
||||
new_config = copy.deepcopy(origin_config)
|
||||
new_config.mod = "hpo"
|
||||
if new_config.pre == "funnel":
|
||||
new_config.presz = "xlarge"
|
||||
new_config.pre_full = re.sub("(small)", "xlarge", origin_config.pre_full)
|
||||
else:
|
||||
new_config.presz = "large"
|
||||
new_config.pre_full = re.sub("(small)", "large", origin_config.pre_full)
|
||||
|
||||
return new_config
|
||||
|
||||
|
||||
def evaluate_small_best_configs_on_large(large_args, autohf):
|
||||
jobid_config_small = convert_config_to_different_size(JobID(large_args), mode="small")
|
||||
jobid_config_small.rep = 0
|
||||
azure_utils_small = AzureUtils(console_args=None, jobid=jobid_config_small, autohf=autohf)
|
||||
ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)
|
||||
evaluate_configs(large_args, ranked_all_small_configs[:int(len(ranked_all_small_configs) / 2)])
|
||||
|
||||
|
||||
def add_dict_item_to_list(this_list, this_dict):
|
||||
is_exist = len([x for x in this_list if x == this_dict]) > 0
|
||||
if not is_exist:
|
||||
this_list.append(this_dict)
|
||||
return this_list
|
||||
|
||||
|
||||
def evaluate_large_best_configs_on_small(small_args, autohf):
|
||||
jobid_config_large = convert_config_to_different_size(JobID(small_args), mode="large")
|
||||
autohf.jobid_config = jobid_config_large
|
||||
autohf.set_metric()
|
||||
all_configs_from_large = []
|
||||
for rep_id in range(3):
|
||||
jobid_config_large.rep = rep_id
|
||||
azure_utils_large = AzureUtils(console_args=small_args, jobid=jobid_config_large, autohf=autohf)
|
||||
ranked_all_large_configs = azure_utils_large.get_ranked_configs(autohf.metric_mode_name)
|
||||
for each_config in ranked_all_large_configs:
|
||||
all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config)
|
||||
jobid_config_small = convert_config_to_different_size(JobID(small_args), mode="small")
|
||||
jobid_config_small.rep = 0
|
||||
azure_utils_small = AzureUtils(console_args=small_args, jobid=jobid_config_small, autohf=autohf)
|
||||
ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)
|
||||
for each_config in ranked_all_small_configs:
|
||||
all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config)
|
||||
|
||||
evaluate_configs(autohf, small_args, list(all_configs_from_large))
|
||||
|
||||
|
||||
def _test_hpo(args,
|
||||
jobid_config,
|
||||
autohf,
|
||||
azure_utils=None,
|
||||
autohf_settings=None,
|
||||
):
|
||||
try:
|
||||
if not azure_utils:
|
||||
azure_utils = AzureUtils(console_args=args, jobid=jobid_config, autohf=autohf)
|
||||
preparedata_setting = get_preparedata_setting(args, jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
|
||||
analysis = validation_metric = test_metric = None
|
||||
if not autohf_settings:
|
||||
autohf_settings = get_autohf_settings(args)
|
||||
if args.algo_mode != "hfhpo":
|
||||
validation_metric, analysis = autohf.fit(**autohf_settings, )
|
||||
else:
|
||||
autohf.fit_hf(**autohf_settings)
|
||||
|
||||
if jobid_config.spt == "ori":
|
||||
predictions, test_metric = autohf.predict()
|
||||
if validation_metric:
|
||||
test_metric.update({"validation": validation_metric})
|
||||
else:
|
||||
predictions = None
|
||||
if test_metric:
|
||||
validation_metric.update({"test": test_metric})
|
||||
|
||||
if analysis is not None:
|
||||
json_log = azure_utils.extract_log_from_analysis(analysis)
|
||||
else:
|
||||
json_log = None
|
||||
azure_utils.write_autohf_output(json_log=json_log,
|
||||
valid_metric=validation_metric,
|
||||
predictions=predictions,
|
||||
duration=autohf.last_run_duration)
|
||||
|
||||
except AssertionError:
|
||||
azure_utils.write_exception()
|
||||
rm_home_result()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
autohf = AutoTransformers()
|
||||
args = load_console_args()
|
||||
jobid_config = JobID(args)
|
||||
|
||||
if args.algo_mode in ("hpo", "hfhpo", "grid", "gridbert"):
|
||||
_test_hpo(args, jobid_config, autohf)
|
||||
elif args.algo_mode == "bestnn":
|
||||
search_base_and_search_lower_lr(args, jobid_config, autohf)
|
||||
elif args.algo_mode == "list":
|
||||
evaluate_small_best_configs_on_large(args, autohf)
|
||||
elif args.algo_mode == "list_s":
|
||||
evaluate_large_best_configs_on_small(args, autohf)
|
||||
126
test/hf/test_cover_azure.py
Normal file
126
test/hf/test_cover_azure.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""
|
||||
test suites for covering azure_utils.py
|
||||
"""
|
||||
|
||||
|
||||
def get_preparedata_setting(jobid_config):
|
||||
preparedata_setting = {
|
||||
"server_name": "tmdev",
|
||||
"data_root_path": "data/",
|
||||
"max_seq_length": 128,
|
||||
"jobid_config": jobid_config,
|
||||
"resplit_portion": {"source": ["train", "validation"],
|
||||
"train": [0, 0.8],
|
||||
"validation": [0.8, 0.9],
|
||||
"test": [0.9, 1.0]}
|
||||
}
|
||||
return preparedata_setting
|
||||
|
||||
|
||||
def get_console_args():
|
||||
from flaml.nlp.utils import load_dft_args
|
||||
args = load_dft_args()
|
||||
args.dataset_subdataset_name = "glue:mrpc"
|
||||
args.algo_mode = "hpo"
|
||||
args.space_mode = "uni"
|
||||
args.search_alg_args_mode = "dft"
|
||||
args.algo_name = "bs"
|
||||
args.pruner = "None"
|
||||
args.pretrained_model_size = "google/electra-base-discriminator:base"
|
||||
args.resplit_mode = "rspt"
|
||||
args.rep_id = 0
|
||||
args.seed_data = 43
|
||||
args.seed_transformers = 42
|
||||
return args
|
||||
|
||||
|
||||
def test_get_configblob_from_partial_jobid():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
from flaml.nlp.result_analysis.azure_utils import JobID
|
||||
each_blob_name = "dat=glue_subdat=cola_mod=grid_spa=cus_arg=dft_alg=grid" \
|
||||
"_pru=None_pre=deberta_presz=large_spt=rspt_rep=0_sddt=43" \
|
||||
"_sdhf=42_var1=1e-05_var2=0.0.json"
|
||||
partial_jobid = JobID()
|
||||
partial_jobid.pre = "deberta"
|
||||
partial_jobid.mod = "grid"
|
||||
partial_jobid.spa = "cus"
|
||||
partial_jobid.presz = "large"
|
||||
|
||||
each_jobconfig = JobID.convert_blobname_to_jobid(each_blob_name)
|
||||
each_jobconfig.is_match(partial_jobid)
|
||||
|
||||
partial_jobid = JobID()
|
||||
partial_jobid.pre = "deberta"
|
||||
partial_jobid.mod = "hpo"
|
||||
partial_jobid.spa = "cus"
|
||||
partial_jobid.presz = "large"
|
||||
partial_jobid.sddt = None
|
||||
|
||||
each_jobconfig = JobID.convert_blobname_to_jobid(each_blob_name)
|
||||
each_jobconfig.is_match(partial_jobid)
|
||||
|
||||
|
||||
def test_jobid():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
from flaml.nlp.result_analysis.azure_utils import JobID
|
||||
args = get_console_args()
|
||||
|
||||
jobid_config = JobID(args)
|
||||
jobid_config.to_partial_jobid_string()
|
||||
JobID.convert_blobname_to_jobid("test")
|
||||
JobID.dataset_list_to_str("glue")
|
||||
JobID.get_full_data_name(["glue"], "mrpc")
|
||||
JobID._extract_model_type_with_keywords_match("google/electra-base-discriminator:base")
|
||||
|
||||
jobid_config.to_wandb_string()
|
||||
|
||||
|
||||
def test_azureutils():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
from flaml.nlp.result_analysis.azure_utils import AzureUtils, ConfigScore, JobID, ConfigScoreList
|
||||
from flaml.nlp import AutoTransformers
|
||||
|
||||
args = get_console_args()
|
||||
args.key_path = "."
|
||||
jobid_config = JobID(args)
|
||||
autohf = AutoTransformers()
|
||||
autohf.jobid_config = jobid_config
|
||||
|
||||
preparedata_setting = get_preparedata_setting(jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
|
||||
each_configscore = ConfigScore(trial_id="test", start_time=0.0, last_update_time=0.0,
|
||||
config={}, metric_score={"max": 0.0}, time_stamp=0.0)
|
||||
configscore_list = ConfigScoreList([each_configscore])
|
||||
for each_method in ["unsorted", "sort_time", "sort_accuracy"]:
|
||||
configscore_list.sorted(each_method)
|
||||
configscore_list.get_best_config()
|
||||
|
||||
azureutils = AzureUtils(console_args=args, autohf=autohf)
|
||||
azureutils.autohf = autohf
|
||||
azureutils.root_log_path = "logs_azure/"
|
||||
|
||||
azureutils.write_autohf_output(configscore_list=[each_configscore],
|
||||
valid_metric={},
|
||||
predictions=[],
|
||||
duration=0)
|
||||
|
||||
azureutils.get_config_and_score_from_partial_jobid(root_log_path="data/", partial_jobid=jobid_config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_get_configblob_from_partial_jobid()
|
||||
test_jobid()
|
||||
test_azureutils()
|
||||
214
test/hf/test_cover_other.py
Normal file
214
test/hf/test_cover_other.py
Normal file
@@ -0,0 +1,214 @@
|
||||
"""
|
||||
test suites for covering other functions
|
||||
"""
|
||||
|
||||
from transformers import AutoConfig
|
||||
from flaml.nlp.huggingface.trainer import TrainerForAutoTransformers
|
||||
|
||||
|
||||
def get_console_args():
|
||||
from flaml.nlp.utils import load_dft_args
|
||||
args = load_dft_args()
|
||||
args.dataset_subdataset_name = "glue:mrpc"
|
||||
args.algo_mode = "hpo"
|
||||
args.space_mode = "uni"
|
||||
args.search_alg_args_mode = "dft"
|
||||
args.algo_name = "bs"
|
||||
args.pruner = "None"
|
||||
args.pretrained_model_size = "google/electra-base-discriminator:base"
|
||||
args.resplit_mode = "rspt"
|
||||
args.rep_id = 0
|
||||
args.seed_data = 43
|
||||
args.seed_transformers = 42
|
||||
return args
|
||||
|
||||
|
||||
def model_init():
|
||||
from flaml.nlp.result_analysis.azure_utils import JobID
|
||||
jobid_config = JobID()
|
||||
jobid_config.set_unittest_config()
|
||||
from flaml.nlp import AutoTransformers
|
||||
autohf = AutoTransformers()
|
||||
|
||||
preparedata_setting = get_preparedata_setting(jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
return autohf._load_model()
|
||||
|
||||
|
||||
def get_preparedata_setting(jobid_config):
|
||||
preparedata_setting = {
|
||||
"server_name": "tmdev",
|
||||
"data_root_path": "data/",
|
||||
"max_seq_length": 128,
|
||||
"jobid_config": jobid_config,
|
||||
"resplit_portion": {"source": ["train", "validation"],
|
||||
"train": [0, 0.8],
|
||||
"validation": [0.8, 0.9],
|
||||
"test": [0.9, 1.0]}
|
||||
}
|
||||
return preparedata_setting
|
||||
|
||||
|
||||
def test_dataprocess():
|
||||
"""
|
||||
test to increase the coverage for flaml.nlp.dataprocess_auto
|
||||
"""
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
from flaml.nlp import AutoTransformers
|
||||
from flaml.nlp import JobID
|
||||
from flaml.nlp.dataset.dataprocess_auto import TOKENIZER_MAPPING
|
||||
|
||||
jobid_config = JobID()
|
||||
jobid_config.set_unittest_config()
|
||||
autohf = AutoTransformers()
|
||||
|
||||
dataset_name = JobID.dataset_list_to_str(jobid_config.dat)
|
||||
default_func = TOKENIZER_MAPPING[(dataset_name, jobid_config.subdat)]
|
||||
|
||||
funcs_to_eval = set([(dat, subdat) for (dat, subdat) in TOKENIZER_MAPPING.keys()
|
||||
if TOKENIZER_MAPPING[(dat, subdat)] != default_func])
|
||||
|
||||
for (dat, subdat) in funcs_to_eval:
|
||||
print("loading dataset for {}, {}".format(dat, subdat))
|
||||
jobid_config.dat = dat.split(",")
|
||||
jobid_config.subdat = subdat
|
||||
|
||||
preparedata_setting = get_preparedata_setting(jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
|
||||
|
||||
def test_gridsearch_space():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
from flaml.nlp.hpo.grid_searchspace_auto import GRID_SEARCH_SPACE_MAPPING, AutoGridSearchSpace
|
||||
from flaml.nlp.result_analysis.azure_utils import JobID
|
||||
jobid_config = JobID()
|
||||
jobid_config.set_unittest_config()
|
||||
|
||||
for each_model_type in GRID_SEARCH_SPACE_MAPPING.keys():
|
||||
AutoGridSearchSpace.from_model_and_dataset_name(
|
||||
each_model_type,
|
||||
"base",
|
||||
jobid_config.dat,
|
||||
jobid_config.subdat, "hpo")
|
||||
|
||||
|
||||
def test_hpo_space():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
from flaml.nlp.hpo.hpo_searchspace import AutoHPOSearchSpace, HPO_SEARCH_SPACE_MAPPING
|
||||
from flaml.nlp.result_analysis.azure_utils import JobID
|
||||
jobid_config = JobID()
|
||||
jobid_config.set_unittest_config()
|
||||
|
||||
for spa in HPO_SEARCH_SPACE_MAPPING.keys():
|
||||
jobid_config.spa = spa
|
||||
if jobid_config.spa == "cus":
|
||||
custom_hpo_args = {"hpo_space": {"learning_rate": [1e-5]}}
|
||||
elif jobid_config.spa == "buni":
|
||||
best_config = {"learning_rate": 1e-5}
|
||||
custom_hpo_args = {"points_to_evaluate": [best_config],
|
||||
"bound": {"learning_rate": {"u": best_config["learning_rate"]}}}
|
||||
else:
|
||||
custom_hpo_args = {}
|
||||
|
||||
AutoHPOSearchSpace.from_model_and_dataset_name(jobid_config.spa, jobid_config.pre, jobid_config.presz,
|
||||
jobid_config.dat, jobid_config.subdat, **custom_hpo_args)
|
||||
|
||||
|
||||
def test_trainer():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
num_train_epochs = 3
|
||||
num_train_examples = 100
|
||||
per_device_train_batch_size = 32
|
||||
device_count = 1
|
||||
max_steps = 1000
|
||||
warmup_steps = 100
|
||||
warmup_ratio = 0.1
|
||||
trainer = TrainerForAutoTransformers(model_init=model_init)
|
||||
trainer.convert_num_train_epochs_to_max_steps(num_train_epochs,
|
||||
num_train_examples,
|
||||
per_device_train_batch_size,
|
||||
device_count)
|
||||
trainer.convert_max_steps_to_num_train_epochs(max_steps,
|
||||
num_train_examples,
|
||||
per_device_train_batch_size,
|
||||
device_count)
|
||||
trainer.convert_warmup_ratio_to_warmup_steps(warmup_ratio,
|
||||
max_steps=max_steps,
|
||||
num_train_epochs=num_train_epochs,
|
||||
num_train_examples=num_train_examples,
|
||||
per_device_train_batch_size=per_device_train_batch_size,
|
||||
device_count=device_count)
|
||||
trainer.convert_warmup_steps_to_warmup_ratio(warmup_steps,
|
||||
num_train_epochs,
|
||||
num_train_examples,
|
||||
per_device_train_batch_size,
|
||||
device_count)
|
||||
|
||||
|
||||
def test_switch_head():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
from flaml.nlp.huggingface.switch_head_auto import AutoSeqClassificationHead, MODEL_CLASSIFICATION_HEAD_MAPPING
|
||||
from flaml.nlp.result_analysis.azure_utils import JobID
|
||||
jobid_config = JobID()
|
||||
jobid_config.set_unittest_config()
|
||||
checkpoint_path = jobid_config.pre_full
|
||||
|
||||
model_config = AutoConfig.from_pretrained(
|
||||
checkpoint_path,
|
||||
num_labels=AutoConfig.from_pretrained(checkpoint_path).num_labels)
|
||||
|
||||
for model in list(MODEL_CLASSIFICATION_HEAD_MAPPING.keys()):
|
||||
jobid_config.pre = model
|
||||
AutoSeqClassificationHead \
|
||||
.from_model_type_and_config(jobid_config.pre,
|
||||
model_config)
|
||||
|
||||
|
||||
def test_wandb_utils():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
from flaml.nlp.result_analysis.wandb_utils import WandbUtils
|
||||
from flaml.nlp.result_analysis.azure_utils import JobID
|
||||
import os
|
||||
|
||||
args = get_console_args()
|
||||
args.key_path = "."
|
||||
jobid_config = JobID(args)
|
||||
|
||||
wandb_utils = WandbUtils(is_wandb_on=True, console_args=args, jobid_config=jobid_config)
|
||||
os.environ["WANDB_MODE"] = "online"
|
||||
wandb_utils.wandb_group_name = "test"
|
||||
wandb_utils._get_next_trial_ids()
|
||||
wandb_utils.set_wandb_per_run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_wandb_utils()
|
||||
test_dataprocess()
|
||||
test_gridsearch_space()
|
||||
test_hpo_space()
|
||||
test_trainer()
|
||||
test_switch_head()
|
||||
@@ -1,7 +1,5 @@
|
||||
'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
|
||||
'''
|
||||
# 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
|
||||
|
||||
global azure_log_path
|
||||
global azure_key
|
||||
|
||||
@@ -26,6 +24,7 @@ def get_autohf_settings():
|
||||
"time_budget": 100000,
|
||||
"ckpt_per_epoch": 1,
|
||||
"fp16": False,
|
||||
"ray_local_mode": True
|
||||
}
|
||||
return autohf_settings
|
||||
|
||||
@@ -38,24 +37,38 @@ def test_hpo():
|
||||
|
||||
from flaml.nlp import AutoTransformers
|
||||
from flaml.nlp import JobID
|
||||
from flaml.nlp import AzureUtils
|
||||
|
||||
jobid_config = JobID()
|
||||
jobid_config.set_unittest_config()
|
||||
autohf = AutoTransformers()
|
||||
|
||||
try:
|
||||
preparedata_setting = get_preparedata_setting(jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
preparedata_setting = get_preparedata_setting(jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
|
||||
autohf_settings = get_autohf_settings()
|
||||
validation_metric, analysis = autohf.fit(**autohf_settings, )
|
||||
autohf_settings = get_autohf_settings()
|
||||
autohf_settings["points_to_evaluate"] = [{"learning_rate": 2e-5}]
|
||||
validation_metric, analysis = autohf.fit(**autohf_settings)
|
||||
|
||||
predictions, test_metric = autohf.predict()
|
||||
if test_metric:
|
||||
validation_metric.update({"test": test_metric})
|
||||
predictions, test_metric = autohf.predict()
|
||||
if test_metric:
|
||||
validation_metric.update({"test": test_metric})
|
||||
|
||||
except AssertionError:
|
||||
pass
|
||||
azure_utils = AzureUtils(root_log_path="logs_test/", autohf=autohf)
|
||||
azure_utils._azure_key = "test"
|
||||
azure_utils._container_name = "test"
|
||||
|
||||
configscore_list = azure_utils.extract_configscore_list_from_analysis(analysis)
|
||||
azure_utils.write_autohf_output(configscore_list=configscore_list,
|
||||
valid_metric=validation_metric,
|
||||
predictions=predictions,
|
||||
duration=autohf.last_run_duration)
|
||||
|
||||
jobid_config.mod = "grid"
|
||||
autohf = AutoTransformers()
|
||||
|
||||
preparedata_setting = get_preparedata_setting(jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user