mirror of
https://github.com/microsoft/autogen.git
synced 2026-02-04 08:45:09 -05:00
75
test/hf/run_analysis.py
Normal file
75
test/hf/run_analysis.py
Normal file
@@ -0,0 +1,75 @@
|
||||
'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
|
||||
'''
|
||||
# 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
|
||||
import argparse
|
||||
from flaml.nlp.result_analysis.azure_utils import JobID
|
||||
|
||||
|
||||
def create_partial_config_bestnn():
|
||||
jobid_config = JobID()
|
||||
# funnel xlarge
|
||||
# jobid_config.mod = "bestnn"
|
||||
jobid_config.spa = "uni"
|
||||
# jobid_config.arg = "cus"
|
||||
# jobid_config.alg = "cfo"
|
||||
jobid_config.pre = "funnel"
|
||||
jobid_config.presz = "xlarge"
|
||||
# funnel small
|
||||
# jobid_config.mod = "list"
|
||||
# jobid_config.pre = "funnel"
|
||||
# jobid_config.presz = "small"
|
||||
# jobid_config.rep = 0
|
||||
|
||||
# # deberta large
|
||||
# jobid_config.mod = "bestnn"
|
||||
# jobid_config.spa = "uni"
|
||||
# jobid_config.arg = "cus"
|
||||
# jobid_config.alg = "cfo"
|
||||
# jobid_config.pre = "deberta"
|
||||
# jobid_config.presz = "large"
|
||||
|
||||
# # deberta base
|
||||
# jobid_config.mod = "hpo"
|
||||
# jobid_config.pre = "deberta"
|
||||
# jobid_config.presz = "base"
|
||||
# jobid_config.rep = 0
|
||||
|
||||
# # deberta large
|
||||
# jobid_config.mod = "hpo"
|
||||
# jobid_config.pre = "deberta"
|
||||
# jobid_config.presz = "large"
|
||||
|
||||
return jobid_config
|
||||
|
||||
|
||||
def create_partial_config_list():
|
||||
jobid_config = JobID()
|
||||
jobid_config.mod = "list"
|
||||
jobid_config.spa = "uni"
|
||||
jobid_config.presz = "xlarge"
|
||||
return jobid_config
|
||||
|
||||
|
||||
def create_partial_config_hpo():
|
||||
jobid_config = JobID()
|
||||
jobid_config.mod = "hpo"
|
||||
jobid_config.spa = "uni"
|
||||
return jobid_config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('--key_path', type=str, help='key path', required=False, default="../../")
|
||||
arg_parser.add_argument('--azure_root_log_path', type=str,
|
||||
help='root log path of blob storage', required=True, default="logs_azure/")
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
partial_config_large = create_partial_config_bestnn()
|
||||
from flaml.nlp.result_analysis.generate_result_summary import compare_small_vs_large, get_result, check_conflict, \
|
||||
print_cfo, download_validation, extract_roberta_overfitting_configs, extract_electra_overfitting_configs
|
||||
|
||||
# get_result(args, partial_config_large)
|
||||
# check_conflict(args, [partial_config_large])
|
||||
download_validation(args, "/data/xliu127/projects/hyperopt/data/result/")
|
||||
|
||||
# extract_roberta_overfitting_configs(args)
|
||||
285
test/hf/run_autohf.py
Normal file
285
test/hf/run_autohf.py
Normal file
@@ -0,0 +1,285 @@
|
||||
'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
|
||||
'''
|
||||
# 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
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from flaml.nlp import AutoTransformers
|
||||
from flaml.nlp import AzureUtils, JobID
|
||||
from flaml.nlp.utils import load_console_args
|
||||
|
||||
global azure_log_path
|
||||
global azure_key
|
||||
|
||||
|
||||
def get_resplit_portion(jobid_config):
|
||||
if jobid_config.dat == ["glue"] and jobid_config.subdat in {"mnli"}:
|
||||
return {"source": ["train", "validation_matched"], "train": [0, 0.8], "validation": [0.8, 0.9],
|
||||
"test": [0.9, 1.0]}
|
||||
else:
|
||||
return {"source": ["train", "validation"], "train": [0, 0.8], "validation": [0.8, 0.9], "test": [0.9, 1.0]}
|
||||
|
||||
|
||||
def get_preparedata_setting(args, jobid_config):
|
||||
preparedata_setting = {
|
||||
"server_name": args.server_name,
|
||||
"data_root_path": args.data_root_dir,
|
||||
"max_seq_length": 128,
|
||||
"jobid_config": jobid_config,
|
||||
"is_wandb_on": True
|
||||
}
|
||||
if jobid_config.spt == 'rspt':
|
||||
preparedata_setting["resplit_portion"] = get_resplit_portion(jobid_config)
|
||||
if ("albert" == jobid_config.pre and jobid_config.dat == ["squad"]) or \
|
||||
("funnel" in jobid_config.pre and jobid_config.dat[0] in {"imdb", "yelp_review_full", "yelp_polarity",
|
||||
"amazon_polarity", "amazon_review_multi"}):
|
||||
preparedata_setting["max_seq_length"] = 512
|
||||
if jobid_config.dat[0] == "glue" and jobid_config.subdat == "mnli":
|
||||
preparedata_setting["fold_name"] = ['train', 'validation_matched', 'test_matched']
|
||||
return preparedata_setting
|
||||
|
||||
|
||||
def get_autohf_settings(args, **custom_args):
|
||||
autohf_settings = {"resources_per_trial": {"gpu": 1, "cpu": 1},
|
||||
"num_samples": args.sample_num,
|
||||
"time_budget": args.time_budget,
|
||||
"ckpt_per_epoch": 1,
|
||||
}
|
||||
for other_attr in ["ds_config", "rep_id"]:
|
||||
if hasattr(args, other_attr):
|
||||
autohf_settings[other_attr] = getattr(args, other_attr)
|
||||
else:
|
||||
autohf_settings[other_attr] = None
|
||||
if len(custom_args) > 0:
|
||||
autohf_settings.update(custom_args)
|
||||
return autohf_settings
|
||||
|
||||
|
||||
def rm_home_result():
|
||||
from os.path import expanduser
|
||||
home = expanduser("~")
|
||||
if os.path.exists(home + "/ray_results/"):
|
||||
shutil.rmtree(home + "/ray_results/")
|
||||
|
||||
|
||||
def get_best_base_config(args, jobid_config, autohf):
|
||||
import copy
|
||||
import re
|
||||
args_small = copy.deepcopy(args)
|
||||
args_small.algo_name = "optuna"
|
||||
args_small.search_alg_args_mode = "dft"
|
||||
args_small.algo_mode = "hpo"
|
||||
args_small.space_mode = "uni"
|
||||
args_small.pruner = "None"
|
||||
|
||||
if "funnel" not in args_small.pretrained_model_size:
|
||||
args_small.algo_mode = "hpo"
|
||||
else:
|
||||
args_small.algo_mode = "list"
|
||||
args_small.sample_num = 10000
|
||||
args_small.time_budget = 3600
|
||||
args_small.rep_id = 0
|
||||
jobid_config_small = JobID(args_small)
|
||||
if jobid_config_small.pre == "deberta":
|
||||
jobid_config_small.presz = "base"
|
||||
else:
|
||||
jobid_config_small.presz = "small"
|
||||
jobid_config_small.pre_full = re.sub("(xlarge|large|intermediate)", jobid_config_small.presz,
|
||||
jobid_config_small.pre_full)
|
||||
azure_utils_small = AzureUtils(
|
||||
console_args=args_small,
|
||||
jobid=jobid_config_small,
|
||||
autohf=autohf)
|
||||
preparedata_setting = get_preparedata_setting(args, jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
autohf.set_metric()
|
||||
|
||||
best_config = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)[0]
|
||||
return best_config
|
||||
|
||||
|
||||
def search_base_and_search_lower_lr(args, jobid_config, autohf):
|
||||
best_config = get_best_base_config(args, jobid_config, autohf)
|
||||
|
||||
import copy
|
||||
args_large = copy.deepcopy(args)
|
||||
args_large.time_budget = args.time_budget - 3600
|
||||
args_large.sample_num = 100000
|
||||
args_large.algo_name = args.algo_name
|
||||
args_large.search_alg_args_mode = "cus"
|
||||
args_large.space_mode = "buni"
|
||||
args_large.pruner = "None"
|
||||
jobid_config_large = JobID(args_large)
|
||||
jobid_config_large.presz = jobid_config.presz
|
||||
jobid_config_large.pre_full = jobid_config.pre_full
|
||||
azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf)
|
||||
|
||||
_test_hpo(args_large,
|
||||
jobid_config_large,
|
||||
autohf,
|
||||
azure_utils_large,
|
||||
autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config],
|
||||
"bound": {"learning_rate": {
|
||||
"u": best_config["learning_rate"]}}}))
|
||||
|
||||
|
||||
def search_base_and_search_around_best(args, jobid_config, autohf):
|
||||
args.algo_name = "bs"
|
||||
args.search_alg_args_mode = "dft"
|
||||
args.spa = "uni"
|
||||
args.pru = "None"
|
||||
best_config = get_best_base_config(args, jobid_config, autohf)
|
||||
|
||||
import copy
|
||||
args_large = copy.deepcopy(args)
|
||||
args_large.time_budget = args.time_budget - 3600
|
||||
args_large.sample_num = 100000
|
||||
args_large.algo_name = "cfo"
|
||||
args_large.search_alg_args_mode = "cus"
|
||||
args_large.space_mode = "uni"
|
||||
jobid_config_large = JobID(args_large)
|
||||
jobid_config_large.presz = jobid_config.presz
|
||||
jobid_config_large.pre_full = jobid_config.pre_full
|
||||
azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf)
|
||||
|
||||
_test_hpo(args_large,
|
||||
jobid_config_large,
|
||||
autohf,
|
||||
azure_utils_large,
|
||||
autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config]}))
|
||||
|
||||
|
||||
def evaluate_configs(autohf, args, ranked_all_configs):
|
||||
import copy
|
||||
this_args = copy.deepcopy(args)
|
||||
this_args.time_budget = 100000
|
||||
this_args.sample_num = int(len(ranked_all_configs))
|
||||
this_args.search_alg_args_mode = "cus"
|
||||
jobid_config = JobID(this_args)
|
||||
azure_utils_large = AzureUtils(console_args=this_args, jobid=jobid_config, autohf=autohf)
|
||||
_test_hpo(this_args,
|
||||
jobid_config,
|
||||
autohf,
|
||||
azure_utils_large,
|
||||
autohf_settings=get_autohf_settings(this_args, **{"points_to_evaluate": ranked_all_configs}))
|
||||
|
||||
|
||||
def convert_config_to_different_size(origin_config, mode):
|
||||
import re
|
||||
import copy
|
||||
if mode == "small":
|
||||
new_config = copy.deepcopy(origin_config)
|
||||
if new_config.pre == "funnel":
|
||||
new_config.mod = "list"
|
||||
else:
|
||||
new_config.mod = "hpo"
|
||||
if new_config.pre == "funnel":
|
||||
new_config.presz = "small"
|
||||
else:
|
||||
new_config.presz = "base"
|
||||
new_config.pre_full = re.sub("(xlarge|large|intermediate)", new_config.presz, origin_config.pre_full)
|
||||
elif mode == "large":
|
||||
new_config = copy.deepcopy(origin_config)
|
||||
new_config.mod = "hpo"
|
||||
if new_config.pre == "funnel":
|
||||
new_config.presz = "xlarge"
|
||||
new_config.pre_full = re.sub("(small)", "xlarge", origin_config.pre_full)
|
||||
else:
|
||||
new_config.presz = "large"
|
||||
new_config.pre_full = re.sub("(small)", "large", origin_config.pre_full)
|
||||
|
||||
return new_config
|
||||
|
||||
|
||||
def evaluate_small_best_configs_on_large(large_args, autohf):
|
||||
jobid_config_small = convert_config_to_different_size(JobID(large_args), mode="small")
|
||||
jobid_config_small.rep = 0
|
||||
azure_utils_small = AzureUtils(console_args=None, jobid=jobid_config_small, autohf=autohf)
|
||||
ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)
|
||||
evaluate_configs(large_args, ranked_all_small_configs[:int(len(ranked_all_small_configs) / 2)])
|
||||
|
||||
|
||||
def add_dict_item_to_list(this_list, this_dict):
|
||||
is_exist = len([x for x in this_list if x == this_dict]) > 0
|
||||
if not is_exist:
|
||||
this_list.append(this_dict)
|
||||
return this_list
|
||||
|
||||
|
||||
def evaluate_large_best_configs_on_small(small_args, autohf):
|
||||
jobid_config_large = convert_config_to_different_size(JobID(small_args), mode="large")
|
||||
autohf.jobid_config = jobid_config_large
|
||||
autohf.set_metric()
|
||||
all_configs_from_large = []
|
||||
for rep_id in range(3):
|
||||
jobid_config_large.rep = rep_id
|
||||
azure_utils_large = AzureUtils(console_args=small_args, jobid=jobid_config_large, autohf=autohf)
|
||||
ranked_all_large_configs = azure_utils_large.get_ranked_configs(autohf.metric_mode_name)
|
||||
for each_config in ranked_all_large_configs:
|
||||
all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config)
|
||||
jobid_config_small = convert_config_to_different_size(JobID(small_args), mode="small")
|
||||
jobid_config_small.rep = 0
|
||||
azure_utils_small = AzureUtils(console_args=small_args, jobid=jobid_config_small, autohf=autohf)
|
||||
ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)
|
||||
for each_config in ranked_all_small_configs:
|
||||
all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config)
|
||||
|
||||
evaluate_configs(autohf, small_args, list(all_configs_from_large))
|
||||
|
||||
|
||||
def _test_hpo(args,
|
||||
jobid_config,
|
||||
autohf,
|
||||
azure_utils=None,
|
||||
autohf_settings=None,
|
||||
):
|
||||
try:
|
||||
if not azure_utils:
|
||||
azure_utils = AzureUtils(console_args=args, jobid=jobid_config, autohf=autohf)
|
||||
preparedata_setting = get_preparedata_setting(args, jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
|
||||
analysis = validation_metric = test_metric = None
|
||||
if not autohf_settings:
|
||||
autohf_settings = get_autohf_settings(args)
|
||||
if args.algo_mode != "hfhpo":
|
||||
validation_metric, analysis = autohf.fit(**autohf_settings, )
|
||||
else:
|
||||
autohf.fit_hf(**autohf_settings)
|
||||
|
||||
if jobid_config.spt == "ori":
|
||||
predictions, test_metric = autohf.predict()
|
||||
if validation_metric:
|
||||
test_metric.update({"validation": validation_metric})
|
||||
else:
|
||||
predictions = None
|
||||
if test_metric:
|
||||
validation_metric.update({"test": test_metric})
|
||||
|
||||
if analysis is not None:
|
||||
json_log = azure_utils.extract_log_from_analysis(analysis)
|
||||
else:
|
||||
json_log = None
|
||||
azure_utils.write_autohf_output(json_log=json_log,
|
||||
valid_metric=validation_metric,
|
||||
predictions=predictions,
|
||||
duration=autohf.last_run_duration)
|
||||
|
||||
except AssertionError:
|
||||
azure_utils.write_exception()
|
||||
rm_home_result()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
autohf = AutoTransformers()
|
||||
args = load_console_args()
|
||||
jobid_config = JobID(args)
|
||||
|
||||
if args.algo_mode in ("hpo", "hfhpo", "grid", "gridbert"):
|
||||
_test_hpo(args, jobid_config, autohf)
|
||||
elif args.algo_mode == "bestnn":
|
||||
search_base_and_search_lower_lr(args, jobid_config, autohf)
|
||||
elif args.algo_mode == "list":
|
||||
evaluate_small_best_configs_on_large(args, autohf)
|
||||
elif args.algo_mode == "list_s":
|
||||
evaluate_large_best_configs_on_small(args, autohf)
|
||||
62
test/hf/test_mobilebert.py
Normal file
62
test/hf/test_mobilebert.py
Normal file
@@ -0,0 +1,62 @@
|
||||
'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
|
||||
'''
|
||||
# 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
|
||||
|
||||
global azure_log_path
|
||||
global azure_key
|
||||
|
||||
|
||||
def get_preparedata_setting(jobid_config):
|
||||
preparedata_setting = {
|
||||
"server_name": "tmdev",
|
||||
"data_root_path": "data/",
|
||||
"max_seq_length": 128,
|
||||
"jobid_config": jobid_config,
|
||||
"resplit_portion": {"source": ["train", "validation"],
|
||||
"train": [0, 0.8],
|
||||
"validation": [0.8, 0.9],
|
||||
"test": [0.9, 1.0]}
|
||||
}
|
||||
return preparedata_setting
|
||||
|
||||
|
||||
def get_autohf_settings():
|
||||
autohf_settings = {"resources_per_trial": {"cpu": 1},
|
||||
"num_samples": 1,
|
||||
"time_budget": 100000,
|
||||
"ckpt_per_epoch": 1,
|
||||
"fp16": False,
|
||||
}
|
||||
return autohf_settings
|
||||
|
||||
|
||||
def test_hpo():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
from flaml.nlp import AutoTransformers
|
||||
from flaml.nlp import JobID
|
||||
|
||||
jobid_config = JobID()
|
||||
jobid_config.set_unittest_config()
|
||||
autohf = AutoTransformers()
|
||||
|
||||
try:
|
||||
preparedata_setting = get_preparedata_setting(jobid_config)
|
||||
autohf.prepare_data(**preparedata_setting)
|
||||
|
||||
autohf_settings = get_autohf_settings()
|
||||
validation_metric, analysis = autohf.fit(**autohf_settings, )
|
||||
|
||||
predictions, test_metric = autohf.predict()
|
||||
if test_metric:
|
||||
validation_metric.update({"test": test_metric})
|
||||
|
||||
except AssertionError:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_hpo()
|
||||
Reference in New Issue
Block a user