From fb59bb992843aefd67a49f876eae4175ee3622d4 Mon Sep 17 00:00:00 2001 From: Xueqing Liu Date: Fri, 3 Dec 2021 12:45:16 -0500 Subject: [PATCH] adding TODOs for NLP module, so students can implement other tasks easier (#321) * fixing ray pickle bug, skipping macosx bug, completing code for seqregression * catching connectionerror * ading TODOs for NLP module --- flaml/data.py | 9 ++- flaml/model.py | 48 ++++++++++++++-- flaml/nlp/huggingface/trainer.py | 48 +++++++++++++++- flaml/nlp/utils.py | 66 ++++++++++++++++------ test/nlp/test_autohf.py | 38 +++++++------ test/nlp/test_autohf_classificationhead.py | 13 ++++- test/nlp/test_autohf_cv.py | 15 +++-- test/nlp/test_autohf_maxiter1.py | 15 +++-- test/nlp/test_autohf_regression.py | 25 +++++--- 9 files changed, 214 insertions(+), 63 deletions(-) diff --git a/flaml/data.py b/flaml/data.py index b21150a68..427414bdb 100644 --- a/flaml/data.py +++ b/flaml/data.py @@ -12,6 +12,7 @@ from .training_log import training_log_reader from datetime import datetime from typing import Dict, Union, List +# TODO: if your task is not specified in here, define your task as an all-capitalized word SEQCLASSIFICATION = "seq-classification" CLASSIFICATION = ("binary", "multi", "classification", SEQCLASSIFICATION) SEQREGRESSION = "seq-regression" @@ -20,10 +21,16 @@ TS_FORECAST = "ts_forecast" TS_TIMESTAMP_COL = "ds" TS_VALUE_COL = "y" FORECAST = "forecast" +SUMMARIZATION = "summarization" +NLG_TASKS = (SUMMARIZATION,) +NLU_TASKS = ( + SEQREGRESSION, + SEQCLASSIFICATION, +) def _is_nlp_task(task): - if task in [SEQCLASSIFICATION, SEQREGRESSION]: + if task in NLU_TASKS + NLG_TASKS: return True else: return False diff --git a/flaml/model.py b/flaml/model.py index bc37bd6a3..ad2261882 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -23,6 +23,8 @@ from .data import ( TS_FORECAST, TS_TIMESTAMP_COL, TS_VALUE_COL, + SEQCLASSIFICATION, + SEQREGRESSION, ) import pandas as pd @@ -303,8 +305,8 @@ class TransformersEstimator(BaseEstimator): return train_df @classmethod - def search_space(cls, **params): - return { + def search_space(cls, data_size, task, **params): + search_space_dict = { "learning_rate": { "domain": tune.loguniform(lower=1e-6, upper=1e-3), "init_value": 1e-5, @@ -331,6 +333,14 @@ class TransformersEstimator(BaseEstimator): "seed": {"domain": tune.choice(list(range(40, 45))), "init_value": 42}, "global_max_steps": {"domain": sys.maxsize, "init_value": sys.maxsize}, } + # TODO: if self._task == SUMMARIZATION, uncomment the code below, SET the search space for + # "num_beams" in search_space_dict using + # search_space_dict["num_beams"] = {...} + + # if task in NLG_TASKS: + # search_space_dict["num_beams"] = {"domain": tune.choice(...)} + + return search_space_dict def _init_hpo_args(self, automl_fit_kwargs: dict = None): from .nlp.utils import HPOArgs @@ -356,7 +366,15 @@ class TransformersEstimator(BaseEstimator): def fit(self, X_train: DataFrame, y_train: Series, budget=None, **kwargs): from transformers import EarlyStoppingCallback from transformers.trainer_utils import set_seed - from transformers import AutoTokenizer, TrainingArguments + from transformers import AutoTokenizer + + # TODO: if self._task == SUMMARIZATION, uncomment the code below (add indentation before + # from transformers import TrainingArguments) + # if self._task in NLG_TASKS: + # from transformers import Seq2SeqTrainingArguments as TrainingArguments + # else: + from transformers import TrainingArguments + import transformers from datasets import Dataset from .nlp.utils import ( @@ -367,6 +385,13 @@ class TransformersEstimator(BaseEstimator): get_trial_fold_name, date_str, ) + + # TODO: if self._task == QUESTIONANSWERING, uncomment the code below (add indentation before + # from .nlp.huggingface.trainer import TrainerForAuto) + + # if self._task in NLG_TASKS: + # from .nlp.huggingface.trainer import Seq2SeqTrainerForAuto as TrainerForAuto + # else: from .nlp.huggingface.trainer import TrainerForAuto this_params = self.params @@ -414,6 +439,13 @@ class TransformersEstimator(BaseEstimator): X_train = self._preprocess(X_train, self._task, **kwargs) train_dataset = Dataset.from_pandas(self._join(X_train, y_train)) + + # TODO: set a breakpoint here, observe the resulting train_dataset, + # compare it with the output of the tokenized results in your transformer example + # for example, if your task is MULTIPLECHOICE, you need to compare train_dataset with + # the output of https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L329 + # make sure they are the same + if X_val is not None: X_val = self._preprocess(X_val, self._task, **kwargs) eval_dataset = Dataset.from_pandas(self._join(X_val, y_val)) @@ -528,6 +560,7 @@ class TransformersEstimator(BaseEstimator): logger.warning("checkpoint {} not found".format(ckpt_location)) def cleanup(self): + super().cleanup() if hasattr(self, "_ckpt_remains"): for each_ckpt in self._ckpt_remains: self._delete_one_ckpt(each_ckpt) @@ -558,7 +591,6 @@ class TransformersEstimator(BaseEstimator): def _compute_metrics_by_dataset_name(self, eval_pred): from .ml import sklearn_metric_loss_score - from .data import SEQREGRESSION import datasets from .nlp.utils import load_default_huggingface_metric_for_task @@ -638,7 +670,13 @@ class TransformersEstimator(BaseEstimator): self._model = TrainerForAuto(model=best_model, args=training_args) predictions = self._model.predict(test_dataset) - return np.argmax(predictions.predictions, axis=1) + if self._task == SEQCLASSIFICATION: + return np.argmax(predictions.predictions, axis=1) + elif self._task == SEQREGRESSION: + return predictions.predictions + # TODO: elif self._task == your task, return the corresponding prediction + # e.g., if your task == QUESTIONANSWERING, you need to return the answer instead + # of the index def config2params(cls, config: dict) -> dict: params = config.copy() diff --git a/flaml/nlp/huggingface/trainer.py b/flaml/nlp/huggingface/trainer.py index 2eb3a4c5c..a4a005605 100644 --- a/flaml/nlp/huggingface/trainer.py +++ b/flaml/nlp/huggingface/trainer.py @@ -2,12 +2,19 @@ import os try: from transformers import Trainer as TFTrainer + from transformers import Seq2SeqTrainer except ImportError: TFTrainer = object class TrainerForAuto(TFTrainer): - def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval"): + def evaluate( + self, + eval_dataset=None, + ignore_keys=None, + metric_key_prefix="eval", + is_seq2seq=False, + ): """Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path""" from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR @@ -15,8 +22,21 @@ class TrainerForAuto(TFTrainer): self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}" ) eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset + + # TODO: if your task is seq2seq (i.e., SUMMARIZATION), uncomment the code below (add indentation before metrics = eval_dataset... + + # if is_seq2seq: + # metrics = eval_dataset and super().evaluate( + # eval_dataset, + # ignore_keys, + # metric_key_prefix, + # num_beams=self.args.num_beams, + # ) + # else: metrics = eval_dataset and super().evaluate( - eval_dataset, ignore_keys, metric_key_prefix + eval_dataset, + ignore_keys, + metric_key_prefix, ) if metrics: for key in list(metrics.keys()): @@ -29,3 +49,27 @@ class TrainerForAuto(TFTrainer): else: self.ckpt_to_global_step = {ckpt_dir: self.state.global_step} self.ckpt_to_metric = {ckpt_dir: metrics} if metrics else {} + + +# TODO: if your task is SUMMARIZATION, you need a different +# class Seq2SeqTrainerForAuto, uncomment the code below +# Note: I have implemented it here, +# but I don't know whether it's correct, you need to debug +# Seq2SeqTrainerForAuto to make sure it's correct + + +# class Seq2SeqTrainerForAuto(Seq2SeqTrainer, TrainerForAuto): +# def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval"): +# """Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path""" +# super(TrainerForAuto).evaluate( +# eval_dataset, ignore_keys, metric_key_prefix, is_seq2seq=True +# ) + + +# TODO: if your task is QUESTIONANSWERING, uncomment the code below +# by adapting the code in https://github.com/huggingface/transformers/blob/master/examples/pytorch/question-answering/trainer_qa.py#L28 + + +# class QATrainerForAuto(TrainerForAuto): +# pass +# TODO: if your task is QUESTIONANSWERING, do the post processing here diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index 8a02a4c43..97526081a 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -10,6 +10,14 @@ def load_default_huggingface_metric_for_task(task): return "accuracy", "max" elif task == SEQREGRESSION: return "rmse", "max" + # TODO: elif task == your task, return the default metric name for your task, + # e.g., if task == MULTIPLECHOICE, return "accuracy" + # notice this metric name has to be in ['accuracy', 'bertscore', 'bleu', 'bleurt', + # 'cer', 'chrf', 'code_eval', 'comet', 'competition_math', 'coval', 'cuad', + # 'f1', 'gleu', 'glue', 'google_bleu', 'indic_glue', 'matthews_correlation', + # 'meteor', 'pearsonr', 'precision', 'recall', 'rouge', 'sacrebleu', 'sari', + # 'seqeval', 'spearmanr', 'squad', 'squad_v2', 'super_glue', 'ter', 'wer', + # 'wiki_split', 'xnli'] global tokenized_column_names @@ -20,6 +28,11 @@ def tokenize_text(X, task, custom_hpo_task): if task in (SEQCLASSIFICATION, SEQREGRESSION): return tokenize_text_seqclassification(X, custom_hpo_task) + # TODO: elif task == your task, return the tokenized result + # for example, if your task == MULTIPLE CHOICE, you should + # create a function named tokenize_text_multiplechoice(X, custom_hpo_args) + # and what it does is the same as preprocess_function at + # https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L329 def tokenize_text_seqclassification(X, custom_hpo_args): @@ -79,6 +92,8 @@ def get_num_labels(task, y_train): return 1 elif task == SEQCLASSIFICATION: return len(set(y_train)) + else: + return None def _clean_value(value: Any) -> str: @@ -155,25 +170,43 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None): def get_this_model(): from transformers import AutoModelForSequenceClassification - return AutoModelForSequenceClassification.from_pretrained( - checkpoint_path, config=model_config - ) + if task in (SEQCLASSIFICATION, SEQREGRESSION): + return AutoModelForSequenceClassification.from_pretrained( + checkpoint_path, config=model_config + ) + # TODO: elif task == your task, fill in the line in your transformers example + # that loads the model, e.g., if task == MULTIPLE CHOICE, according to + # https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L298 + # you can return AutoModelForMultipleChoice.from_pretrained(checkpoint_path, config=model_config) def is_pretrained_model_in_classification_head_list(model_type): return model_type in MODEL_CLASSIFICATION_HEAD_MAPPING def _set_model_config(checkpoint_path): - if per_model_config and len(per_model_config) > 0: - model_config = AutoConfig.from_pretrained( - checkpoint_path, - num_labels=model_config_num_labels, - **per_model_config, - ) - else: - model_config = AutoConfig.from_pretrained( - checkpoint_path, num_labels=model_config_num_labels - ) - return model_config + if task in (SEQCLASSIFICATION, SEQREGRESSION): + if per_model_config and len(per_model_config) > 0: + model_config = AutoConfig.from_pretrained( + checkpoint_path, + num_labels=model_config_num_labels, + **per_model_config, + ) + else: + model_config = AutoConfig.from_pretrained( + checkpoint_path, num_labels=model_config_num_labels + ) + return model_config + # TODO: elif task == your task, uncomment the code below: + # else: + # if per_model_config and len(per_model_config) > 0: + # model_config = AutoConfig.from_pretrained( + # checkpoint_path, + # **per_model_config, + # ) + # else: + # model_config = AutoConfig.from_pretrained( + # checkpoint_path + # ) + # return model_config if task == SEQCLASSIFICATION: num_labels_old = AutoConfig.from_pretrained(checkpoint_path).num_labels @@ -199,8 +232,9 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None): this_model = get_this_model() this_model.resize_token_embeddings(this_vocab_size) return this_model - elif task == SEQREGRESSION: - model_config_num_labels = 1 + else: + if task == SEQREGRESSION: + model_config_num_labels = 1 model_config = _set_model_config(checkpoint_path) this_model = get_this_model() return this_model diff --git a/test/nlp/test_autohf.py b/test/nlp/test_autohf.py index e7436bcc2..6250df5ce 100644 --- a/test/nlp/test_autohf.py +++ b/test/nlp/test_autohf.py @@ -1,22 +1,25 @@ -import os +import sys import pytest -@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os") +@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os") def test_hf_data(): from flaml import AutoML - + import requests from datasets import load_dataset - train_dataset = ( - load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4] - ) - dev_dataset = ( - load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[0:4] - ) - test_dataset = ( - load_dataset("glue", "mrpc", split="test[1%:2%]").to_pandas().iloc[0:4] - ) + try: + train_dataset = ( + load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4] + ) + dev_dataset = ( + load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[0:4] + ) + test_dataset = ( + load_dataset("glue", "mrpc", split="test[1%:2%]").to_pandas().iloc[0:4] + ) + except requests.exceptions.ConnectionError: + return custom_sent_keys = ["sentence1", "sentence2"] label_key = "label" @@ -75,12 +78,15 @@ def test_hf_data(): def _test_custom_data(): from flaml import AutoML - + import requests import pandas as pd - train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3) - dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3) - test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3) + try: + train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3) + dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3) + test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3) + except requests.exceptions.ConnectionError: + pass custom_sent_keys = ["#1 String", "#2 String"] label_key = "Quality" diff --git a/test/nlp/test_autohf_classificationhead.py b/test/nlp/test_autohf_classificationhead.py index c81cd1069..a8740bda3 100644 --- a/test/nlp/test_autohf_classificationhead.py +++ b/test/nlp/test_autohf_classificationhead.py @@ -1,10 +1,17 @@ def test_classification_head(): from flaml import AutoML - + import requests from datasets import load_dataset - train_dataset = load_dataset("emotion", split="train[:1%]").to_pandas().iloc[0:10] - dev_dataset = load_dataset("emotion", split="train[1%:2%]").to_pandas().iloc[0:10] + try: + train_dataset = ( + load_dataset("emotion", split="train[:1%]").to_pandas().iloc[0:10] + ) + dev_dataset = ( + load_dataset("emotion", split="train[1%:2%]").to_pandas().iloc[0:10] + ) + except requests.exceptions.ConnectionError: + return custom_sent_keys = ["text"] label_key = "label" diff --git a/test/nlp/test_autohf_cv.py b/test/nlp/test_autohf_cv.py index 0e75a32ca..9d723e7b8 100644 --- a/test/nlp/test_autohf_cv.py +++ b/test/nlp/test_autohf_cv.py @@ -1,16 +1,19 @@ -import os +import sys import pytest -@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os") +@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os") def test_cv(): from flaml import AutoML - + import requests from datasets import load_dataset - train_dataset = ( - load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4] - ) + try: + train_dataset = ( + load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4] + ) + except requests.exceptions.ConnectionError: + return custom_sent_keys = ["sentence1", "sentence2"] label_key = "label" diff --git a/test/nlp/test_autohf_maxiter1.py b/test/nlp/test_autohf_maxiter1.py index 0fe72b189..c389b0df0 100644 --- a/test/nlp/test_autohf_maxiter1.py +++ b/test/nlp/test_autohf_maxiter1.py @@ -1,15 +1,20 @@ -import os +import sys import pytest -@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os") +@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os") def test_max_iter_1(): from flaml import AutoML - + import requests from datasets import load_dataset - train_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4] - dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4] + try: + train_dataset = ( + load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4] + ) + dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4] + except requests.exceptions.ConnectionError: + return custom_sent_keys = ["sentence1", "sentence2"] label_key = "label" diff --git a/test/nlp/test_autohf_regression.py b/test/nlp/test_autohf_regression.py index afd24a410..a4b4877db 100644 --- a/test/nlp/test_autohf_regression.py +++ b/test/nlp/test_autohf_regression.py @@ -1,23 +1,26 @@ -import os +import sys import pytest -@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os") +@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os") def test_regression(): try: import ray except ImportError: return from flaml import AutoML - + import requests from datasets import load_dataset - train_dataset = ( - load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20] - ) - dev_dataset = ( - load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20] - ) + try: + train_dataset = ( + load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20] + ) + dev_dataset = ( + load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20] + ) + except requests.exceptions.ConnectionError: + return custom_sent_keys = ["sentence1", "sentence2"] label_key = "label" @@ -50,3 +53,7 @@ def test_regression(): automl.fit( X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings ) + + +if __name__ == "__main__": + test_regression()