From 28511340528dfc9def29862f5076b4516eb7305f Mon Sep 17 00:00:00 2001
From: Qiaochu Song <qsong6@stevens.edu>
Date: Thu, 19 May 2022 11:43:34 -0400
Subject: [PATCH] Quick-fix (#539)

* fix doc string; enable label transform in automl.score
---
 flaml/automl.py           |  2 ++
 flaml/data.py             |  4 ---
 test/automl/test_score.py | 58 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/flaml/automl.py b/flaml/automl.py
index 1e853f78e..348bf9cc4 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -848,6 +848,8 @@ class AutoML(BaseEstimator):
             )
             return None
         X = self._preprocess(X)
+        if self._label_transformer:
+            y = self._label_transformer.transform(y)
         return estimator.score(X, y, **kwargs)
 
     def predict(
diff --git a/flaml/data.py b/flaml/data.py
index be7a067e6..149cd8983 100644
--- a/flaml/data.py
+++ b/flaml/data.py
@@ -384,13 +384,9 @@ class DataTransformer:
 
         Args:
             X: A numpy array or a pandas dataframe of training data.
-            y: A numpy array or a pandas series of labels.
-            task: A string of the task type, e.g.,
-                'classification', 'regression', 'ts_forecast', 'rank'.
 
         Returns:
             X: Processed numpy array or pandas dataframe of training data.
-            y: Processed numpy array or pandas series of labels.
         """
         X = X.copy()
 
diff --git a/test/automl/test_score.py b/test/automl/test_score.py
index 2e62a833c..c9b879066 100644
--- a/test/automl/test_score.py
+++ b/test/automl/test_score.py
@@ -212,6 +212,64 @@ class TestScore:
             except NotImplementedError:
                 pass
 
+    def test_class(self):
+        # to test classification task with labels need encoding
+        X = pd.DataFrame(
+            {
+                "f1": [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
+                "f2": [
+                    3.0,
+                    16.0,
+                    10.0,
+                    12.0,
+                    3.0,
+                    14.0,
+                    11.0,
+                    12.0,
+                    5.0,
+                    14.0,
+                    20.0,
+                    16.0,
+                    15.0,
+                    11.0,
+                ],
+            }
+        )
+        y = pd.Series(
+            [
+                "a",
+                "b",
+                "c",
+                "d",
+                "a",
+                "b",
+                "c",
+                "d",
+                "a",
+                "b",
+                "c",
+                "d",
+                "a",
+                "b",
+            ]
+        )
+
+        automl = AutoML()
+
+        automl_settings = {
+            "time_budget": 6,
+            "task": "classification",
+            "n_jobs": 1,
+            "estimator_list": ["xgboost"],
+            "metric": "accuracy",
+            "log_training_metric": True,
+        }
+
+        automl.fit(X, y, **automl_settings)
+        assert automl._label_transformer is not None
+        assert automl.score(X, y) > 0
+        automl.pickle("automl.pkl")
+
 
 if __name__ == "__main__":
     test = TestScore()