handling nlp divide by zero (#926)

* handling nlp divide by zero * catching zerodivisionerror * catching zerodivisionerror * catching zerodivisionerror * addressing comments * addressing comments * updating test case * update * add blank to last line * update nlp notebook * rerun * rerun * sync with main * add model selection for nlg * addressing keyerror * add raise exception * update * fix bug * revert * updating automl_nlp * Update flaml/automl/model.py Co-authored-by: Zvi Baratz <z.baratz@gmail.com> * address comments * address comments --------- Co-authored-by: Li Jiang <lijiang1@microsoft.com> Co-authored-by: Zvi Baratz <z.baratz@gmail.com>
2026-01-24 15:18:01 -05:00 · 2023-04-09 12:53:30 -04:00
parent 82f0a4309d
commit ef5a17cd83
3 changed files with 18 additions and 10 deletions
--- a/flaml/automl/automl.py
+++ b/flaml/automl/automl.py
@@ -594,6 +594,7 @@ class AutoML(BaseEstimator):
            return None
        X = self._state.task.preprocess(X, self._transformer)
        y_pred = estimator.predict(X, **pred_kwargs)
+
        if (
            isinstance(y_pred, np.ndarray)
            and y_pred.ndim > 1
--- a/flaml/automl/model.py
+++ b/flaml/automl/model.py
@@ -1191,8 +1191,13 @@ class TransformersEstimator(BaseEstimator):
        test_dataset = Dataset.from_pandas(X_test)

        new_trainer = self._init_model_for_predict()
-        predictions = new_trainer.predict(test_dataset)
-        return predictions.predictions
+        try:
+            predictions = new_trainer.predict(test_dataset).predictions
+        except ZeroDivisionError:
+            logger.warning("Zero division error appeared in HuggingFace Transformers.")
+            predictions = np.array([-0.05] * len(test_dataset))
+        else:
+            return predictions

    def score(self, X_val: DataFrame, y_val: Series, **kwargs):
        import transformers
@@ -1222,13 +1227,13 @@ class TransformersEstimator(BaseEstimator):

        new_trainer = self._init_model_for_predict()

-        if self._task not in NLG_TASKS:
-            predictions = new_trainer.predict(test_dataset)
-        else:
-            predictions = new_trainer.predict(
-                test_dataset,
-                metric_key_prefix="predict",
-            )
+        kwargs = {} if self._task not in NLG_TASKS else {"metric_key_prefix": "predict"}
+        try:
+            predictions = new_trainer.predict(test_dataset, **kwargs)
+        except ZeroDivisionError:
+            logger.warning("Zero division error appeared in HuggingFace Transformers.")
+            predictions = np.array([0] * len(test_dataset))
+
        post_y_pred, _ = postprocess_prediction_and_true(
            task=self._task,
            y_pred=predictions.predictions,
--- a/test/nlp/test_autohf.py
+++ b/test/nlp/test_autohf.py
@@ -62,7 +62,9 @@ def test_hf_data():
        **automl_settings
    )
    automl.predict(X_test, **{"per_device_eval_batch_size": 2})
-    automl.predict(["test test", "test test"])
+    automl.predict(["", ""])
+    automl.predict_proba(["", ""])
+
    automl.predict(
        [
            ["test test", "test test"],