handling nlp divide by zero (#926)

* handling nlp divide by zero

* catching zerodivisionerror

* catching zerodivisionerror

* catching zerodivisionerror

* addressing comments

* addressing comments

* updating test case

* update

* add blank to last line

* update nlp notebook

* rerun

* rerun

* sync with main

* add model selection for nlg

* addressing keyerror

* add raise exception

* update

* fix bug

* revert

* updating automl_nlp

* Update flaml/automl/model.py

Co-authored-by: Zvi Baratz <z.baratz@gmail.com>

* address comments

* address comments

---------

Co-authored-by: Li Jiang <lijiang1@microsoft.com>
Co-authored-by: Zvi Baratz <z.baratz@gmail.com>
This commit is contained in:
Susan Xueqing Liu
2023-04-09 12:53:30 -04:00
committed by GitHub
parent 82f0a4309d
commit ef5a17cd83
3 changed files with 18 additions and 10 deletions

View File

@@ -594,6 +594,7 @@ class AutoML(BaseEstimator):
return None
X = self._state.task.preprocess(X, self._transformer)
y_pred = estimator.predict(X, **pred_kwargs)
if (
isinstance(y_pred, np.ndarray)
and y_pred.ndim > 1

View File

@@ -1191,8 +1191,13 @@ class TransformersEstimator(BaseEstimator):
test_dataset = Dataset.from_pandas(X_test)
new_trainer = self._init_model_for_predict()
predictions = new_trainer.predict(test_dataset)
return predictions.predictions
try:
predictions = new_trainer.predict(test_dataset).predictions
except ZeroDivisionError:
logger.warning("Zero division error appeared in HuggingFace Transformers.")
predictions = np.array([-0.05] * len(test_dataset))
else:
return predictions
def score(self, X_val: DataFrame, y_val: Series, **kwargs):
import transformers
@@ -1222,13 +1227,13 @@ class TransformersEstimator(BaseEstimator):
new_trainer = self._init_model_for_predict()
if self._task not in NLG_TASKS:
predictions = new_trainer.predict(test_dataset)
else:
predictions = new_trainer.predict(
test_dataset,
metric_key_prefix="predict",
)
kwargs = {} if self._task not in NLG_TASKS else {"metric_key_prefix": "predict"}
try:
predictions = new_trainer.predict(test_dataset, **kwargs)
except ZeroDivisionError:
logger.warning("Zero division error appeared in HuggingFace Transformers.")
predictions = np.array([0] * len(test_dataset))
post_y_pred, _ = postprocess_prediction_and_true(
task=self._task,
y_pred=predictions.predictions,

View File

@@ -62,7 +62,9 @@ def test_hf_data():
**automl_settings
)
automl.predict(X_test, **{"per_device_eval_batch_size": 2})
automl.predict(["test test", "test test"])
automl.predict(["", ""])
automl.predict_proba(["", ""])
automl.predict(
[
["test test", "test test"],