exception, coverage for autohf (#106)

* increase coverage * fixing exception messages * fixing import
2026-04-20 03:02:16 -04:00 · 2021-06-14 17:11:40 -04:00
parent c26720c299
commit 926589bdda
27 changed files with 1066 additions and 1500 deletions
--- a/test/hf/run_analysis.py
+++ b/test/hf/run_analysis.py
@@ -1,75 +0,0 @@
-'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
-'''
-# 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
-import argparse
-from flaml.nlp.result_analysis.azure_utils import JobID
-
-
-def create_partial_config_bestnn():
-    jobid_config = JobID()
-    # funnel xlarge
-    # jobid_config.mod = "bestnn"
-    jobid_config.spa = "uni"
-    # jobid_config.arg = "cus"
-    # jobid_config.alg = "cfo"
-    jobid_config.pre = "funnel"
-    jobid_config.presz = "xlarge"
-    # funnel small
-    # jobid_config.mod = "list"
-    # jobid_config.pre = "funnel"
-    # jobid_config.presz = "small"
-    # jobid_config.rep = 0
-
-    # # deberta large
-    # jobid_config.mod = "bestnn"
-    # jobid_config.spa = "uni"
-    # jobid_config.arg = "cus"
-    # jobid_config.alg = "cfo"
-    # jobid_config.pre = "deberta"
-    # jobid_config.presz = "large"
-
-    # # deberta base
-    # jobid_config.mod = "hpo"
-    # jobid_config.pre = "deberta"
-    # jobid_config.presz = "base"
-    # jobid_config.rep = 0
-
-    # # deberta large
-    # jobid_config.mod = "hpo"
-    # jobid_config.pre = "deberta"
-    # jobid_config.presz = "large"
-
-    return jobid_config
-
-
-def create_partial_config_list():
-    jobid_config = JobID()
-    jobid_config.mod = "list"
-    jobid_config.spa = "uni"
-    jobid_config.presz = "xlarge"
-    return jobid_config
-
-
-def create_partial_config_hpo():
-    jobid_config = JobID()
-    jobid_config.mod = "hpo"
-    jobid_config.spa = "uni"
-    return jobid_config
-
-
-if __name__ == "__main__":
-    arg_parser = argparse.ArgumentParser()
-    arg_parser.add_argument('--key_path', type=str, help='key path', required=False, default="../../")
-    arg_parser.add_argument('--azure_root_log_path', type=str,
-                            help='root log path of blob storage', required=True, default="logs_azure/")
-    args = arg_parser.parse_args()
-
-    partial_config_large = create_partial_config_bestnn()
-    from flaml.nlp.result_analysis.generate_result_summary import compare_small_vs_large, get_result, check_conflict, \
-        print_cfo, download_validation, extract_roberta_overfitting_configs, extract_electra_overfitting_configs
-
-    # get_result(args, partial_config_large)
-    # check_conflict(args, [partial_config_large])
-    download_validation(args, "/data/xliu127/projects/hyperopt/data/result/")
-
-    # extract_roberta_overfitting_configs(args)
--- a/test/hf/run_autohf.py
+++ b/test/hf/run_autohf.py
@@ -1,285 +0,0 @@
-'''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
-'''
-# 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
-import os
-import shutil
-
-from flaml.nlp import AutoTransformers
-from flaml.nlp import AzureUtils, JobID
-from flaml.nlp.utils import load_console_args
-
-global azure_log_path
-global azure_key
-
-
-def get_resplit_portion(jobid_config):
-    if jobid_config.dat == ["glue"] and jobid_config.subdat in {"mnli"}:
-        return {"source": ["train", "validation_matched"], "train": [0, 0.8], "validation": [0.8, 0.9],
-                "test": [0.9, 1.0]}
-    else:
-        return {"source": ["train", "validation"], "train": [0, 0.8], "validation": [0.8, 0.9], "test": [0.9, 1.0]}
-
-
-def get_preparedata_setting(args, jobid_config):
-    preparedata_setting = {
-        "server_name": args.server_name,
-        "data_root_path": args.data_root_dir,
-        "max_seq_length": 128,
-        "jobid_config": jobid_config,
-        "is_wandb_on": True
-    }
-    if jobid_config.spt == 'rspt':
-        preparedata_setting["resplit_portion"] = get_resplit_portion(jobid_config)
-    if ("albert" == jobid_config.pre and jobid_config.dat == ["squad"]) or \
-            ("funnel" in jobid_config.pre and jobid_config.dat[0] in {"imdb", "yelp_review_full", "yelp_polarity",
-                                                                      "amazon_polarity", "amazon_review_multi"}):
-        preparedata_setting["max_seq_length"] = 512
-    if jobid_config.dat[0] == "glue" and jobid_config.subdat == "mnli":
-        preparedata_setting["fold_name"] = ['train', 'validation_matched', 'test_matched']
-    return preparedata_setting
-
-
-def get_autohf_settings(args, **custom_args):
-    autohf_settings = {"resources_per_trial": {"gpu": 1, "cpu": 1},
-                       "num_samples": args.sample_num,
-                       "time_budget": args.time_budget,
-                       "ckpt_per_epoch": 1,
-                       }
-    for other_attr in ["ds_config", "rep_id"]:
-        if hasattr(args, other_attr):
-            autohf_settings[other_attr] = getattr(args, other_attr)
-        else:
-            autohf_settings[other_attr] = None
-    if len(custom_args) > 0:
-        autohf_settings.update(custom_args)
-    return autohf_settings
-
-
-def rm_home_result():
-    from os.path import expanduser
-    home = expanduser("~")
-    if os.path.exists(home + "/ray_results/"):
-        shutil.rmtree(home + "/ray_results/")
-
-
-def get_best_base_config(args, jobid_config, autohf):
-    import copy
-    import re
-    args_small = copy.deepcopy(args)
-    args_small.algo_name = "optuna"
-    args_small.search_alg_args_mode = "dft"
-    args_small.algo_mode = "hpo"
-    args_small.space_mode = "uni"
-    args_small.pruner = "None"
-
-    if "funnel" not in args_small.pretrained_model_size:
-        args_small.algo_mode = "hpo"
-    else:
-        args_small.algo_mode = "list"
-    args_small.sample_num = 10000
-    args_small.time_budget = 3600
-    args_small.rep_id = 0
-    jobid_config_small = JobID(args_small)
-    if jobid_config_small.pre == "deberta":
-        jobid_config_small.presz = "base"
-    else:
-        jobid_config_small.presz = "small"
-    jobid_config_small.pre_full = re.sub("(xlarge|large|intermediate)", jobid_config_small.presz,
-                                         jobid_config_small.pre_full)
-    azure_utils_small = AzureUtils(
-        console_args=args_small,
-        jobid=jobid_config_small,
-        autohf=autohf)
-    preparedata_setting = get_preparedata_setting(args, jobid_config)
-    autohf.prepare_data(**preparedata_setting)
-    autohf.set_metric()
-
-    best_config = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)[0]
-    return best_config
-
-
-def search_base_and_search_lower_lr(args, jobid_config, autohf):
-    best_config = get_best_base_config(args, jobid_config, autohf)
-
-    import copy
-    args_large = copy.deepcopy(args)
-    args_large.time_budget = args.time_budget - 3600
-    args_large.sample_num = 100000
-    args_large.algo_name = args.algo_name
-    args_large.search_alg_args_mode = "cus"
-    args_large.space_mode = "buni"
-    args_large.pruner = "None"
-    jobid_config_large = JobID(args_large)
-    jobid_config_large.presz = jobid_config.presz
-    jobid_config_large.pre_full = jobid_config.pre_full
-    azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf)
-
-    _test_hpo(args_large,
-              jobid_config_large,
-              autohf,
-              azure_utils_large,
-              autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config],
-                                                                 "bound": {"learning_rate": {
-                                                                     "u": best_config["learning_rate"]}}}))
-
-
-def search_base_and_search_around_best(args, jobid_config, autohf):
-    args.algo_name = "bs"
-    args.search_alg_args_mode = "dft"
-    args.spa = "uni"
-    args.pru = "None"
-    best_config = get_best_base_config(args, jobid_config, autohf)
-
-    import copy
-    args_large = copy.deepcopy(args)
-    args_large.time_budget = args.time_budget - 3600
-    args_large.sample_num = 100000
-    args_large.algo_name = "cfo"
-    args_large.search_alg_args_mode = "cus"
-    args_large.space_mode = "uni"
-    jobid_config_large = JobID(args_large)
-    jobid_config_large.presz = jobid_config.presz
-    jobid_config_large.pre_full = jobid_config.pre_full
-    azure_utils_large = AzureUtils(console_args=args_large, jobid=jobid_config_large, autohf=autohf)
-
-    _test_hpo(args_large,
-              jobid_config_large,
-              autohf,
-              azure_utils_large,
-              autohf_settings=get_autohf_settings(args_large, **{"points_to_evaluate": [best_config]}))
-
-
-def evaluate_configs(autohf, args, ranked_all_configs):
-    import copy
-    this_args = copy.deepcopy(args)
-    this_args.time_budget = 100000
-    this_args.sample_num = int(len(ranked_all_configs))
-    this_args.search_alg_args_mode = "cus"
-    jobid_config = JobID(this_args)
-    azure_utils_large = AzureUtils(console_args=this_args, jobid=jobid_config, autohf=autohf)
-    _test_hpo(this_args,
-              jobid_config,
-              autohf,
-              azure_utils_large,
-              autohf_settings=get_autohf_settings(this_args, **{"points_to_evaluate": ranked_all_configs}))
-
-
-def convert_config_to_different_size(origin_config, mode):
-    import re
-    import copy
-    if mode == "small":
-        new_config = copy.deepcopy(origin_config)
-        if new_config.pre == "funnel":
-            new_config.mod = "list"
-        else:
-            new_config.mod = "hpo"
-        if new_config.pre == "funnel":
-            new_config.presz = "small"
-        else:
-            new_config.presz = "base"
-        new_config.pre_full = re.sub("(xlarge|large|intermediate)", new_config.presz, origin_config.pre_full)
-    elif mode == "large":
-        new_config = copy.deepcopy(origin_config)
-        new_config.mod = "hpo"
-        if new_config.pre == "funnel":
-            new_config.presz = "xlarge"
-            new_config.pre_full = re.sub("(small)", "xlarge", origin_config.pre_full)
-        else:
-            new_config.presz = "large"
-            new_config.pre_full = re.sub("(small)", "large", origin_config.pre_full)
-
-    return new_config
-
-
-def evaluate_small_best_configs_on_large(large_args, autohf):
-    jobid_config_small = convert_config_to_different_size(JobID(large_args), mode="small")
-    jobid_config_small.rep = 0
-    azure_utils_small = AzureUtils(console_args=None, jobid=jobid_config_small, autohf=autohf)
-    ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)
-    evaluate_configs(large_args, ranked_all_small_configs[:int(len(ranked_all_small_configs) / 2)])
-
-
-def add_dict_item_to_list(this_list, this_dict):
-    is_exist = len([x for x in this_list if x == this_dict]) > 0
-    if not is_exist:
-        this_list.append(this_dict)
-    return this_list
-
-
-def evaluate_large_best_configs_on_small(small_args, autohf):
-    jobid_config_large = convert_config_to_different_size(JobID(small_args), mode="large")
-    autohf.jobid_config = jobid_config_large
-    autohf.set_metric()
-    all_configs_from_large = []
-    for rep_id in range(3):
-        jobid_config_large.rep = rep_id
-        azure_utils_large = AzureUtils(console_args=small_args, jobid=jobid_config_large, autohf=autohf)
-        ranked_all_large_configs = azure_utils_large.get_ranked_configs(autohf.metric_mode_name)
-        for each_config in ranked_all_large_configs:
-            all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config)
-    jobid_config_small = convert_config_to_different_size(JobID(small_args), mode="small")
-    jobid_config_small.rep = 0
-    azure_utils_small = AzureUtils(console_args=small_args, jobid=jobid_config_small, autohf=autohf)
-    ranked_all_small_configs = azure_utils_small.get_ranked_configs(autohf.metric_mode_name)
-    for each_config in ranked_all_small_configs:
-        all_configs_from_large = add_dict_item_to_list(all_configs_from_large, each_config)
-
-    evaluate_configs(autohf, small_args, list(all_configs_from_large))
-
-
-def _test_hpo(args,
-              jobid_config,
-              autohf,
-              azure_utils=None,
-              autohf_settings=None,
-              ):
-    try:
-        if not azure_utils:
-            azure_utils = AzureUtils(console_args=args, jobid=jobid_config, autohf=autohf)
-        preparedata_setting = get_preparedata_setting(args, jobid_config)
-        autohf.prepare_data(**preparedata_setting)
-
-        analysis = validation_metric = test_metric = None
-        if not autohf_settings:
-            autohf_settings = get_autohf_settings(args)
-        if args.algo_mode != "hfhpo":
-            validation_metric, analysis = autohf.fit(**autohf_settings, )
-        else:
-            autohf.fit_hf(**autohf_settings)
-
-        if jobid_config.spt == "ori":
-            predictions, test_metric = autohf.predict()
-            if validation_metric:
-                test_metric.update({"validation": validation_metric})
-        else:
-            predictions = None
-            if test_metric:
-                validation_metric.update({"test": test_metric})
-
-        if analysis is not None:
-            json_log = azure_utils.extract_log_from_analysis(analysis)
-        else:
-            json_log = None
-        azure_utils.write_autohf_output(json_log=json_log,
-                                        valid_metric=validation_metric,
-                                        predictions=predictions,
-                                        duration=autohf.last_run_duration)
-
-    except AssertionError:
-        azure_utils.write_exception()
-    rm_home_result()
-
-
-if __name__ == "__main__":
-    autohf = AutoTransformers()
-    args = load_console_args()
-    jobid_config = JobID(args)
-
-    if args.algo_mode in ("hpo", "hfhpo", "grid", "gridbert"):
-        _test_hpo(args, jobid_config, autohf)
-    elif args.algo_mode == "bestnn":
-        search_base_and_search_lower_lr(args, jobid_config, autohf)
-    elif args.algo_mode == "list":
-        evaluate_small_best_configs_on_large(args, autohf)
-    elif args.algo_mode == "list_s":
-        evaluate_large_best_configs_on_small(args, autohf)
--- a/test/hf/test_cover_azure.py
+++ b/test/hf/test_cover_azure.py
@@ -0,0 +1,126 @@
+"""
+    test suites for covering azure_utils.py
+"""
+
+
+def get_preparedata_setting(jobid_config):
+    preparedata_setting = {
+        "server_name": "tmdev",
+        "data_root_path": "data/",
+        "max_seq_length": 128,
+        "jobid_config": jobid_config,
+        "resplit_portion": {"source": ["train", "validation"],
+                            "train": [0, 0.8],
+                            "validation": [0.8, 0.9],
+                            "test": [0.9, 1.0]}
+    }
+    return preparedata_setting
+
+
+def get_console_args():
+    from flaml.nlp.utils import load_dft_args
+    args = load_dft_args()
+    args.dataset_subdataset_name = "glue:mrpc"
+    args.algo_mode = "hpo"
+    args.space_mode = "uni"
+    args.search_alg_args_mode = "dft"
+    args.algo_name = "bs"
+    args.pruner = "None"
+    args.pretrained_model_size = "google/electra-base-discriminator:base"
+    args.resplit_mode = "rspt"
+    args.rep_id = 0
+    args.seed_data = 43
+    args.seed_transformers = 42
+    return args
+
+
+def test_get_configblob_from_partial_jobid():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    each_blob_name = "dat=glue_subdat=cola_mod=grid_spa=cus_arg=dft_alg=grid" \
+                     "_pru=None_pre=deberta_presz=large_spt=rspt_rep=0_sddt=43" \
+                     "_sdhf=42_var1=1e-05_var2=0.0.json"
+    partial_jobid = JobID()
+    partial_jobid.pre = "deberta"
+    partial_jobid.mod = "grid"
+    partial_jobid.spa = "cus"
+    partial_jobid.presz = "large"
+
+    each_jobconfig = JobID.convert_blobname_to_jobid(each_blob_name)
+    each_jobconfig.is_match(partial_jobid)
+
+    partial_jobid = JobID()
+    partial_jobid.pre = "deberta"
+    partial_jobid.mod = "hpo"
+    partial_jobid.spa = "cus"
+    partial_jobid.presz = "large"
+    partial_jobid.sddt = None
+
+    each_jobconfig = JobID.convert_blobname_to_jobid(each_blob_name)
+    each_jobconfig.is_match(partial_jobid)
+
+
+def test_jobid():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    args = get_console_args()
+
+    jobid_config = JobID(args)
+    jobid_config.to_partial_jobid_string()
+    JobID.convert_blobname_to_jobid("test")
+    JobID.dataset_list_to_str("glue")
+    JobID.get_full_data_name(["glue"], "mrpc")
+    JobID._extract_model_type_with_keywords_match("google/electra-base-discriminator:base")
+
+    jobid_config.to_wandb_string()
+
+
+def test_azureutils():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.result_analysis.azure_utils import AzureUtils, ConfigScore, JobID, ConfigScoreList
+    from flaml.nlp import AutoTransformers
+
+    args = get_console_args()
+    args.key_path = "."
+    jobid_config = JobID(args)
+    autohf = AutoTransformers()
+    autohf.jobid_config = jobid_config
+
+    preparedata_setting = get_preparedata_setting(jobid_config)
+    autohf.prepare_data(**preparedata_setting)
+
+    each_configscore = ConfigScore(trial_id="test", start_time=0.0, last_update_time=0.0,
+                                   config={}, metric_score={"max": 0.0}, time_stamp=0.0)
+    configscore_list = ConfigScoreList([each_configscore])
+    for each_method in ["unsorted", "sort_time", "sort_accuracy"]:
+        configscore_list.sorted(each_method)
+    configscore_list.get_best_config()
+
+    azureutils = AzureUtils(console_args=args, autohf=autohf)
+    azureutils.autohf = autohf
+    azureutils.root_log_path = "logs_azure/"
+
+    azureutils.write_autohf_output(configscore_list=[each_configscore],
+                                   valid_metric={},
+                                   predictions=[],
+                                   duration=0)
+
+    azureutils.get_config_and_score_from_partial_jobid(root_log_path="data/", partial_jobid=jobid_config)
+
+
+if __name__ == "__main__":
+    test_get_configblob_from_partial_jobid()
+    test_jobid()
+    test_azureutils()
--- a/test/hf/test_cover_other.py
+++ b/test/hf/test_cover_other.py
@@ -0,0 +1,214 @@
+"""
+    test suites for covering other functions
+"""
+
+from transformers import AutoConfig
+from flaml.nlp.huggingface.trainer import TrainerForAutoTransformers
+
+
+def get_console_args():
+    from flaml.nlp.utils import load_dft_args
+    args = load_dft_args()
+    args.dataset_subdataset_name = "glue:mrpc"
+    args.algo_mode = "hpo"
+    args.space_mode = "uni"
+    args.search_alg_args_mode = "dft"
+    args.algo_name = "bs"
+    args.pruner = "None"
+    args.pretrained_model_size = "google/electra-base-discriminator:base"
+    args.resplit_mode = "rspt"
+    args.rep_id = 0
+    args.seed_data = 43
+    args.seed_transformers = 42
+    return args
+
+
+def model_init():
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    jobid_config = JobID()
+    jobid_config.set_unittest_config()
+    from flaml.nlp import AutoTransformers
+    autohf = AutoTransformers()
+
+    preparedata_setting = get_preparedata_setting(jobid_config)
+    autohf.prepare_data(**preparedata_setting)
+    return autohf._load_model()
+
+
+def get_preparedata_setting(jobid_config):
+    preparedata_setting = {
+        "server_name": "tmdev",
+        "data_root_path": "data/",
+        "max_seq_length": 128,
+        "jobid_config": jobid_config,
+        "resplit_portion": {"source": ["train", "validation"],
+                            "train": [0, 0.8],
+                            "validation": [0.8, 0.9],
+                            "test": [0.9, 1.0]}
+    }
+    return preparedata_setting
+
+
+def test_dataprocess():
+    """
+    test to increase the coverage for flaml.nlp.dataprocess_auto
+    """
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp import AutoTransformers
+    from flaml.nlp import JobID
+    from flaml.nlp.dataset.dataprocess_auto import TOKENIZER_MAPPING
+
+    jobid_config = JobID()
+    jobid_config.set_unittest_config()
+    autohf = AutoTransformers()
+
+    dataset_name = JobID.dataset_list_to_str(jobid_config.dat)
+    default_func = TOKENIZER_MAPPING[(dataset_name, jobid_config.subdat)]
+
+    funcs_to_eval = set([(dat, subdat) for (dat, subdat) in TOKENIZER_MAPPING.keys()
+                         if TOKENIZER_MAPPING[(dat, subdat)] != default_func])
+
+    for (dat, subdat) in funcs_to_eval:
+        print("loading dataset for {}, {}".format(dat, subdat))
+        jobid_config.dat = dat.split(",")
+        jobid_config.subdat = subdat
+
+        preparedata_setting = get_preparedata_setting(jobid_config)
+        autohf.prepare_data(**preparedata_setting)
+
+
+def test_gridsearch_space():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.hpo.grid_searchspace_auto import GRID_SEARCH_SPACE_MAPPING, AutoGridSearchSpace
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    jobid_config = JobID()
+    jobid_config.set_unittest_config()
+
+    for each_model_type in GRID_SEARCH_SPACE_MAPPING.keys():
+        AutoGridSearchSpace.from_model_and_dataset_name(
+            each_model_type,
+            "base",
+            jobid_config.dat,
+            jobid_config.subdat, "hpo")
+
+
+def test_hpo_space():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.hpo.hpo_searchspace import AutoHPOSearchSpace, HPO_SEARCH_SPACE_MAPPING
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    jobid_config = JobID()
+    jobid_config.set_unittest_config()
+
+    for spa in HPO_SEARCH_SPACE_MAPPING.keys():
+        jobid_config.spa = spa
+        if jobid_config.spa == "cus":
+            custom_hpo_args = {"hpo_space": {"learning_rate": [1e-5]}}
+        elif jobid_config.spa == "buni":
+            best_config = {"learning_rate": 1e-5}
+            custom_hpo_args = {"points_to_evaluate": [best_config],
+                               "bound": {"learning_rate": {"u": best_config["learning_rate"]}}}
+        else:
+            custom_hpo_args = {}
+
+        AutoHPOSearchSpace.from_model_and_dataset_name(jobid_config.spa, jobid_config.pre, jobid_config.presz,
+                                                       jobid_config.dat, jobid_config.subdat, **custom_hpo_args)
+
+
+def test_trainer():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    num_train_epochs = 3
+    num_train_examples = 100
+    per_device_train_batch_size = 32
+    device_count = 1
+    max_steps = 1000
+    warmup_steps = 100
+    warmup_ratio = 0.1
+    trainer = TrainerForAutoTransformers(model_init=model_init)
+    trainer.convert_num_train_epochs_to_max_steps(num_train_epochs,
+                                                  num_train_examples,
+                                                  per_device_train_batch_size,
+                                                  device_count)
+    trainer.convert_max_steps_to_num_train_epochs(max_steps,
+                                                  num_train_examples,
+                                                  per_device_train_batch_size,
+                                                  device_count)
+    trainer.convert_warmup_ratio_to_warmup_steps(warmup_ratio,
+                                                 max_steps=max_steps,
+                                                 num_train_epochs=num_train_epochs,
+                                                 num_train_examples=num_train_examples,
+                                                 per_device_train_batch_size=per_device_train_batch_size,
+                                                 device_count=device_count)
+    trainer.convert_warmup_steps_to_warmup_ratio(warmup_steps,
+                                                 num_train_epochs,
+                                                 num_train_examples,
+                                                 per_device_train_batch_size,
+                                                 device_count)
+
+
+def test_switch_head():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.huggingface.switch_head_auto import AutoSeqClassificationHead, MODEL_CLASSIFICATION_HEAD_MAPPING
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    jobid_config = JobID()
+    jobid_config.set_unittest_config()
+    checkpoint_path = jobid_config.pre_full
+
+    model_config = AutoConfig.from_pretrained(
+        checkpoint_path,
+        num_labels=AutoConfig.from_pretrained(checkpoint_path).num_labels)
+
+    for model in list(MODEL_CLASSIFICATION_HEAD_MAPPING.keys()):
+        jobid_config.pre = model
+        AutoSeqClassificationHead \
+            .from_model_type_and_config(jobid_config.pre,
+                                        model_config)
+
+
+def test_wandb_utils():
+    try:
+        import ray
+    except ImportError:
+        return
+
+    from flaml.nlp.result_analysis.wandb_utils import WandbUtils
+    from flaml.nlp.result_analysis.azure_utils import JobID
+    import os
+
+    args = get_console_args()
+    args.key_path = "."
+    jobid_config = JobID(args)
+
+    wandb_utils = WandbUtils(is_wandb_on=True, console_args=args, jobid_config=jobid_config)
+    os.environ["WANDB_MODE"] = "online"
+    wandb_utils.wandb_group_name = "test"
+    wandb_utils._get_next_trial_ids()
+    wandb_utils.set_wandb_per_run()
+
+
+if __name__ == "__main__":
+    test_wandb_utils()
+    test_dataprocess()
+    test_gridsearch_space()
+    test_hpo_space()
+    test_trainer()
+    test_switch_head()
--- a/test/hf/test_mobilebert.py
+++ b/test/hf/test_mobilebert.py
@@ -1,7 +1,5 @@
 '''Require: pip install torch transformers datasets wandb flaml[blendsearch,ray]
 '''
-# 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
-
 global azure_log_path
 global azure_key

@@ -26,6 +24,7 @@ def get_autohf_settings():
                       "time_budget": 100000,
                       "ckpt_per_epoch": 1,
                       "fp16": False,
+                       "ray_local_mode": True
                       }
    return autohf_settings

@@ -38,24 +37,38 @@ def test_hpo():

    from flaml.nlp import AutoTransformers
    from flaml.nlp import JobID
+    from flaml.nlp import AzureUtils

    jobid_config = JobID()
    jobid_config.set_unittest_config()
    autohf = AutoTransformers()

-    try:
-        preparedata_setting = get_preparedata_setting(jobid_config)
-        autohf.prepare_data(**preparedata_setting)
+    preparedata_setting = get_preparedata_setting(jobid_config)
+    autohf.prepare_data(**preparedata_setting)

-        autohf_settings = get_autohf_settings()
-        validation_metric, analysis = autohf.fit(**autohf_settings, )
+    autohf_settings = get_autohf_settings()
+    autohf_settings["points_to_evaluate"] = [{"learning_rate": 2e-5}]
+    validation_metric, analysis = autohf.fit(**autohf_settings)

-        predictions, test_metric = autohf.predict()
-        if test_metric:
-            validation_metric.update({"test": test_metric})
+    predictions, test_metric = autohf.predict()
+    if test_metric:
+        validation_metric.update({"test": test_metric})

-    except AssertionError:
-        pass
+    azure_utils = AzureUtils(root_log_path="logs_test/", autohf=autohf)
+    azure_utils._azure_key = "test"
+    azure_utils._container_name = "test"
+
+    configscore_list = azure_utils.extract_configscore_list_from_analysis(analysis)
+    azure_utils.write_autohf_output(configscore_list=configscore_list,
+                                    valid_metric=validation_metric,
+                                    predictions=predictions,
+                                    duration=autohf.last_run_duration)
+
+    jobid_config.mod = "grid"
+    autohf = AutoTransformers()
+
+    preparedata_setting = get_preparedata_setting(jobid_config)
+    autohf.prepare_data(**preparedata_setting)


 if __name__ == "__main__":