Merge pull request #2 from microsoft/main

update
2026-04-20 03:02:16 -04:00 · 2022-01-14 23:07:06 +08:00
parent b66930e4f0 c1b5cb5348
commit d7ff258b89
9 changed files with 151 additions and 67 deletions
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -470,7 +470,6 @@ class AutoML(BaseEstimator):
                'classification', 'regression', 'ts_forecast', 'rank',
                'seq-classification', 'seq-regression', 'summarization'.
            n_jobs: An integer of the number of threads for training.
-            gpu_per_trial: A float of the number of gpus per trial, only used by TransformersEstimator.
            log_file_name: A string of the log file name. To disable logging,
                set it to be an empty string "".
            estimator_list: A list of strings for estimator names, or 'auto'
@@ -576,7 +575,6 @@ class AutoML(BaseEstimator):
        settings["time_budget"] = settings.get("time_budget", 60)
        settings["task"] = settings.get("task", "classification")
        settings["n_jobs"] = settings.get("n_jobs", -1)
-        settings["gpu_per_trial"] = settings.get("gpu_per_trial", 0)
        settings["eval_method"] = settings.get("eval_method", "auto")
        settings["split_ratio"] = settings.get("split_ratio", SPLIT_RATIO)
        settings["n_splits"] = settings.get("n_splits", N_SPLITS)
@@ -1306,7 +1304,7 @@ class AutoML(BaseEstimator):
        split_type=None,
        groups=None,
        n_jobs=-1,
-        gpu_per_trial=0,
+        # gpu_per_trial=0,
        train_best=True,
        train_full=False,
        record_id=-1,
@@ -1358,7 +1356,6 @@ class AutoML(BaseEstimator):
                for training data.
            n_jobs: An integer of the number of threads for training. Use all
                available resources when n_jobs == -1.
-            gpu_per_trial: A float of the number of gpus per trial. Only used by TransformersEstimator.
            train_best: A boolean of whether to train the best config in the
                time budget; if false, train the last config in the budget.
            train_full: A boolean of whether to train on the full data. If true,
@@ -1451,9 +1448,9 @@ class AutoML(BaseEstimator):
        import os

        self._state.resources_per_trial = (
-            {"cpu": os.cpu_count(), "gpu": gpu_per_trial}
+            {"cpu": os.cpu_count(), "gpu": fit_kwargs.get("gpu_per_trial", 0)}
            if self._state.n_jobs < 0
-            else {"cpu": self._state.n_jobs, "gpu": gpu_per_trial}
+            else {"cpu": self._state.n_jobs, "gpu": fit_kwargs.get("gpu_per_trial", 0)}
        )
        self._trained_estimator = self._state._train_with_config(
            best_estimator,
@@ -1711,7 +1708,7 @@ class AutoML(BaseEstimator):
        metric=None,
        task=None,
        n_jobs=None,
-        gpu_per_trial=0,
+        # gpu_per_trial=0,
        log_file_name=None,
        estimator_list=None,
        time_budget=None,
@@ -1807,7 +1804,6 @@ class AutoML(BaseEstimator):
                'classification', 'regression', 'ts_forecast', 'rank',
                'seq-classification', 'seq-regression', 'summarization'
            n_jobs: An integer of the number of threads for training.
-            gpu_per_trial: A float of the number of gpus per trial, only used by TransformersEstimator.
            log_file_name: A string of the log file name. To disable logging,
                set it to be an empty string "".
            estimator_list: A list of strings for estimator names, or 'auto'
@@ -1918,8 +1914,10 @@ class AutoML(BaseEstimator):
                datasets, but will incur more overhead in time. Only use it if
                you run into OOM failures.
            **fit_kwargs: Other key word arguments to pass to fit() function of
-                the searched learners, such as sample_weight. Include period as
-                a key word argument for 'ts_forecast' task.
+                the searched learners, such as sample_weight. Include:
+                    period: int | forecast horizon for 'ts_forecast' task.
+                    gpu_per_trial: float, default = 0 | A float of the number of gpus per trial,
+                    only used by TransformersEstimator.
        """

        self._state._start_time_flag = self._start_time_flag = time.time()
@@ -1927,11 +1925,7 @@ class AutoML(BaseEstimator):
        self._estimator_type = "classifier" if task in CLASSIFICATION else "regressor"
        time_budget = time_budget or self._settings.get("time_budget")
        n_jobs = n_jobs or self._settings.get("n_jobs")
-        gpu_per_trial = (
-            self._settings.get("gpu_per_trial")
-            if gpu_per_trial is None
-            else gpu_per_trial
-        )
+        gpu_per_trial = fit_kwargs.get("gpu_per_trial", 0)
        eval_method = eval_method or self._settings.get("eval_method")
        split_ratio = split_ratio or self._settings.get("split_ratio")
        n_splits = n_splits or self._settings.get("n_splits")
@@ -2081,6 +2075,7 @@ class AutoML(BaseEstimator):
        if _is_nlp_task(self._state.task):
            self._state.fit_kwargs["metric"] = metric
            self._state.fit_kwargs["use_ray"] = self._use_ray
+            self._state.fit_kwargs["gpu_per_trial"] = gpu_per_trial

        self._state.metric = metric

@@ -2146,7 +2141,8 @@ class AutoML(BaseEstimator):
                    ]
                if TS_FORECAST == self._state.task:
                    # catboost is removed because it has a `name` parameter, making it incompatible with hcrystalball
-                    estimator_list.remove("catboost")
+                    if "catboost" in estimator_list:
+                        estimator_list.remove("catboost")
                    try:
                        import prophet

@@ -2365,7 +2361,17 @@ class AutoML(BaseEstimator):
        if mlflow is not None and mlflow.active_run():
            with mlflow.start_run(nested=True):
                mlflow.log_metric("iter_counter", self._iter_per_learner[estimator])
-                mlflow.log_param("metric_for_logging", search_state.metric_for_logging)
+                if "intermediate_results" in search_state.metric_for_logging:
+                    for each_entry in search_state.metric_for_logging[
+                        "intermediate_results"
+                    ]:
+                        with mlflow.start_run(nested=True):
+                            mlflow.log_metrics(each_entry)
+                            mlflow.log_metric(
+                                "iter_counter", self._iter_per_learner[estimator]
+                            )
+                    del search_state.metric_for_logging["intermediate_results"]
+                mlflow.log_metrics(search_state.metric_for_logging)
                mlflow.log_metric("trial_time", search_state.trial_time)
                mlflow.log_metric("wall_clock_time", self._state.time_from_start)
                mlflow.log_metric("validation_loss", search_state.val_loss)
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -94,11 +94,19 @@ huggingface_submetric_to_metric = {"rouge1": "rouge", "rouge2": "rouge"}
 def get_estimator_class(task, estimator_name):
    # when adding a new learner, need to add an elif branch
    if "xgboost" == estimator_name:
-        estimator_class = XGBoost_TS_Regressor if TS_FORECAST == task else XGBoostSklearnEstimator
+        estimator_class = (
+            XGBoost_TS_Regressor if TS_FORECAST == task else XGBoostSklearnEstimator
+        )
    elif "xgb_limitdepth" == estimator_name:
-        estimator_class = XGBoostLimitDepth_TS_Regressor if TS_FORECAST == task else XGBoostLimitDepthEstimator
+        estimator_class = (
+            XGBoostLimitDepth_TS_Regressor
+            if TS_FORECAST == task
+            else XGBoostLimitDepthEstimator
+        )
    elif "rf" == estimator_name:
-        estimator_class = RF_TS_Regressor if TS_FORECAST == task else RandomForestEstimator
+        estimator_class = (
+            RF_TS_Regressor if TS_FORECAST == task else RandomForestEstimator
+        )
    elif "lgbm" == estimator_name:
        estimator_class = LGBM_TS_Regressor if TS_FORECAST == task else LGBMEstimator
    elif "lrl1" == estimator_name:
@@ -108,7 +116,9 @@ def get_estimator_class(task, estimator_name):
    elif "catboost" == estimator_name:
        estimator_class = CatBoostEstimator
    elif "extra_tree" == estimator_name:
-        estimator_class = ExtraTrees_TS_Regressor if TS_FORECAST == task else ExtraTreesEstimator
+        estimator_class = (
+            ExtraTrees_TS_Regressor if TS_FORECAST == task else ExtraTreesEstimator
+        )
    elif "kneighbor" == estimator_name:
        estimator_class = KNeighborsEstimator
    elif "prophet" in estimator_name:
@@ -207,8 +217,10 @@ def metric_loss_score(
                    + ", ".join(huggingface_metric_to_mode.keys())
                    + ". Please pass a customized metric function to AutoML.fit(metric=func)"
                )
-        multiplier = -1 if metric_mode == "max" else 1
-        return score * multiplier
+        if metric_mode == "max":
+            return 1 - score
+        else:
+            return score


 def is_in_sklearn_metric_name_set(metric_name):
@@ -409,6 +421,8 @@ def get_val_loss(
        log_training_metric,
        fit_kwargs,
    )
+    if hasattr(estimator, "intermediate_results"):
+        metric_for_logging["intermediate_results"] = estimator.intermediate_results
    train_time = time.time() - start
    return val_loss, metric_for_logging, train_time, pred_time

--- a/flaml/model.py
+++ b/flaml/model.py
@@ -325,6 +325,7 @@ class TransformersEstimator(BaseEstimator):
            },
            "num_train_epochs": {
                "domain": tune.loguniform(lower=0.1, upper=10.0),
+                "init_value": 3,
            },
            "per_device_train_batch_size": {
                "domain": tune.choice([4, 8, 16, 32]),
@@ -395,12 +396,14 @@ class TransformersEstimator(BaseEstimator):
        )

    def fit(self, X_train: DataFrame, y_train: Series, budget=None, **kwargs):
+        import transformers
+
+        transformers.logging.set_verbosity_error()
+
        from transformers import EarlyStoppingCallback
        from transformers.trainer_utils import set_seed
        from transformers import AutoTokenizer
-        from transformers.data import DataCollatorWithPadding

-        import transformers
        from datasets import Dataset
        from .nlp.utils import (
            get_num_labels,
@@ -536,8 +539,8 @@ class TransformersEstimator(BaseEstimator):
                evaluate_during_training=True,
                save_steps=ckpt_freq,
                save_total_limit=0,
+                metric_for_best_model="loss",
                fp16=self.custom_hpo_args.fp16,
-                load_best_model_at_end=True,
                **training_args_config,
            )
        else:
@@ -553,8 +556,8 @@ class TransformersEstimator(BaseEstimator):
                evaluation_strategy=IntervalStrategy.STEPS,
                save_steps=ckpt_freq,
                save_total_limit=0,
+                metric_for_best_model="loss",
                fp16=self.custom_hpo_args.fp16,
-                load_best_model_at_end=True,
                **training_args_config,
            )

@@ -577,6 +580,8 @@ class TransformersEstimator(BaseEstimator):
        setattr(self._trainer, "_use_ray", self.use_ray)
        if self._task in NLG_TASKS:
            setattr(self._trainer, "_is_seq2seq", True)
+        if kwargs.get("gpu_per_trial"):
+            self._trainer.args._n_gpu = kwargs.get("gpu_per_trial")
        self._trainer.train()

        self.params[self.ITER_HP] = self._trainer.state.global_step
@@ -672,6 +677,9 @@ class TransformersEstimator(BaseEstimator):
                X_train=self._X_train,
                y_train=self._y_train,
            )
+            if not hasattr(self, "intermediate_results"):
+                self.intermediate_results = []
+            self.intermediate_results.append(metric_dict)
            return metric_dict

    def _init_model_for_predict(self, X_test):
@@ -699,6 +707,7 @@ class TransformersEstimator(BaseEstimator):
            )
            if self._task == MULTICHOICECLASSIFICATION
            else None,
+            compute_metrics=self._compute_metrics_by_dataset_name,
        )
        return test_dataset, training_args

--- a/flaml/nlp/huggingface/trainer.py
+++ b/flaml/nlp/huggingface/trainer.py
@@ -86,28 +86,3 @@ class TrainerForAuto(Seq2SeqTrainer):
            self.ckpt_to_global_step = {ckpt_dir: self.state.global_step}
            self.ckpt_to_metric = {ckpt_dir: metrics} if metrics else {}
        return metrics
-
-# TODO: if your task is SUMMARIZATION, you need a different
-#  class Seq2SeqTrainerForAuto, uncomment the code below
-#  Note: I have implemented it here,
-#  but I don't know whether it's correct, you need to debug
-#  Seq2SeqTrainerForAuto to make sure it's correct
-
-
-# class Seq2SeqTrainerForAuto(TrainerForAuto):
-#     def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval"):
-#         """Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path"""
-#         self._is_seq2seq = True
-#         TrainerForAuto.evaluate(self, eval_dataset, ignore_keys, metric_key_prefix)
-#         # super(TrainerForAuto, self).evaluate(
-#         #     eval_dataset, ignore_keys, metric_key_prefix
-#         # )
-
-
-# TODO: if your task is QUESTIONANSWERING, uncomment the code below
-#  by adapting the code in https://github.com/huggingface/transformers/blob/master/examples/pytorch/question-answering/trainer_qa.py#L28
-
-
-# class QATrainerForAuto(TrainerForAuto):
-#     pass
-# TODO: if your task is QUESTIONANSWERING, do the post processing here
--- a/flaml/nlp/utils.py
+++ b/flaml/nlp/utils.py
@@ -18,7 +18,7 @@ def load_default_huggingface_metric_for_task(task):
    if task == SEQCLASSIFICATION:
        return "accuracy"
    elif task == SEQREGRESSION:
-        return "rmse"
+        return "r2"
    elif task == SUMMARIZATION:
        return "rouge"
    elif task == MULTICHOICECLASSIFICATION:
@@ -400,6 +400,10 @@ def get_trial_fold_name(local_dir, trial_config, trial_id):


 def load_model(checkpoint_path, task, num_labels, per_model_config=None):
+    import transformers
+
+    transformers.logging.set_verbosity_error()
+
    from transformers import AutoConfig
    from .huggingface.switch_head_auto import (
        AutoSeqClassificationHead,
--- a/test/nlp/run_gpu.py
+++ b/test/nlp/run_gpu.py
@@ -0,0 +1,82 @@
+import sys
+import pytest
+import pickle
+import shutil
+
+
+@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
+def _test_hf_data():
+    from flaml import AutoML
+    import requests
+    from datasets import load_dataset
+
+    try:
+        train_dataset = load_dataset("glue", "mrpc", split="train[:1%]").to_pandas()
+        dev_dataset = load_dataset("glue", "mrpc", split="validation[:1%]").to_pandas()
+        test_dataset = load_dataset("glue", "mrpc", split="test[:1%]").to_pandas()
+    except requests.exceptions.ConnectionError:
+        return
+
+    custom_sent_keys = ["sentence1", "sentence2"]
+    label_key = "label"
+
+    X_train = train_dataset[custom_sent_keys]
+    y_train = train_dataset[label_key]
+
+    X_val = dev_dataset[custom_sent_keys]
+    y_val = dev_dataset[label_key]
+
+    X_test = test_dataset[custom_sent_keys]
+
+    automl = AutoML()
+
+    automl_settings = {
+        "gpu_per_trial": 1,
+        "max_iter": 2,
+        "time_budget": 5000,
+        "task": "seq-classification",
+        "metric": "accuracy",
+        "log_file_name": "seqclass.log",
+        "use_ray": True,
+    }
+
+    automl_settings["custom_hpo_args"] = {
+        "model_path": "facebook/muppet-roberta-base",
+        "output_dir": "test/data/output/",
+        "ckpt_per_epoch": 5,
+        "fp16": True,
+    }
+
+    automl.fit(
+        X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
+    )
+
+    automl = AutoML()
+    automl.retrain_from_log(
+        X_train=X_train,
+        y_train=y_train,
+        train_full=True,
+        record_id=0,
+        **automl_settings
+    )
+    with open("automl.pkl", "wb") as f:
+        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
+    with open("automl.pkl", "rb") as f:
+        automl = pickle.load(f)
+    shutil.rmtree("test/data/output/")
+    automl.predict(X_test)
+    automl.predict(["test test", "test test"])
+    automl.predict(
+        [
+            ["test test", "test test"],
+            ["test test", "test test"],
+            ["test test", "test test"],
+        ]
+    )
+
+    automl.predict_proba(X_test)
+    print(automl.classes_)
+
+
+if __name__ == "__main__":
+    _test_hf_data()
--- a/test/nlp/test_autohf_custom_metric.py
+++ b/test/nlp/test_autohf_custom_metric.py
@@ -17,6 +17,7 @@ def custom_metric(
 ):
    from datasets import Dataset
    from flaml.model import TransformersEstimator
+    from flaml.nlp.utils import load_default_huggingface_metric_for_task

    if estimator._trainer is None:
        estimator._init_model_for_predict(X_test)
@@ -31,12 +32,13 @@ def custom_metric(
        X_test, _ = estimator._preprocess(X_test)
        eval_dataset = Dataset.from_pandas(X_test)

-    trainer_compute_metrics_cache = trainer.compute_metrics
-    trainer.compute_metrics = None
+    estimator_metric_cache = estimator._metric
+    estimator._metric = load_default_huggingface_metric_for_task(estimator._task)

    metrics = trainer.evaluate(eval_dataset)
-    trainer.compute_metrics = trainer_compute_metrics_cache
-    return metrics["eval_loss"], metrics
+    estimator._metric = estimator_metric_cache
+
+    return metrics["eval_val_loss"], metrics


@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
--- a/test/nlp/test_autohf_regression.py
+++ b/test/nlp/test_autohf_regression.py
@@ -38,7 +38,7 @@ def test_regression():
        "max_iter": 2,
        "time_budget": 5,
        "task": "seq-regression",
-        "metric": "rmse",
+        "metric": "pearsonr",
        "starting_points": {"transformer": {"num_train_epochs": 1}},
        "use_ray": True,
    }
--- a/website/yarn.lock
+++ b/website/yarn.lock
@@ -6164,15 +6164,7 @@ postcss-zindex@^5.0.1:
  resolved "https://registry.yarnpkg.com/postcss-zindex/-/postcss-zindex-5.0.1.tgz#c585724beb69d356af8c7e68847b28d6298ece03"
  integrity sha512-nwgtJJys+XmmSGoYCcgkf/VczP8Mp/0OfSv3v0+fw0uABY4yxw+eFs0Xp9nAZHIKnS5j+e9ywQ+RD+ONyvl5pA==

-"postcss@5 - 7":
-  version "7.0.39"
-  resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.39.tgz#9624375d965630e2e1f2c02a935c82a59cb48309"
-  integrity sha512-yioayjNbHn6z1/Bywyb2Y4s3yvDAeXGOyxqD+LnVOinq6Mdmd++SW2wUNVzavyyHxd6+DxzWGIuosg6P1Rj8uA==
-  dependencies:
-    picocolors "^0.2.1"
-    source-map "^0.6.1"
-
-postcss@^8.2.15, postcss@^8.3.11, postcss@^8.3.5, postcss@^8.3.7:
+postcss@^8.2.15, postcss@^8.3.11, postcss@^8.3.5, postcss@^8.3.7, "postcss@5 - 7":
  version "8.4.4"
  resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.4.4.tgz#d53d4ec6a75fd62557a66bb41978bf47ff0c2869"
  integrity sha512-joU6fBsN6EIer28Lj6GDFoC/5yOZzLCfn0zHAn/MYXI7aPt4m4hK5KC5ovEZXy+lnCjmYIbQWngvju2ddyEr8Q==