From a701cd82f871249923d3c92c74c557fbff64ffd1 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <6035284+Borda@users.noreply.github.com>
Date: Mon, 10 Apr 2023 21:50:40 +0200
Subject: [PATCH] set black with 120 line length (#975)

* set black with 120 line length

* apply pre-commit

* apply black
---
 .pre-commit-config.yaml                       |   1 +
 flaml/autogen/code_utils.py                   |  16 +-
 flaml/autogen/oai/completion.py               | 126 ++----
 flaml/automl/automl.py                        | 368 +++++-------------
 flaml/automl/data.py                          |  43 +-
 flaml/automl/ml.py                            |  81 +---
 flaml/automl/model.py                         | 264 +++----------
 flaml/automl/nlp/huggingface/data_collator.py |  13 +-
 flaml/automl/nlp/huggingface/trainer.py       |  16 +-
 flaml/automl/nlp/huggingface/training_args.py |  36 +-
 flaml/automl/nlp/huggingface/utils.py         |  62 +--
 flaml/automl/nlp/utils.py                     |  16 +-
 flaml/automl/spark/metrics.py                 |  18 +-
 flaml/automl/spark/utils.py                   |  29 +-
 flaml/automl/state.py                         |  66 +---
 flaml/automl/task/generic_task.py             | 219 +++--------
 flaml/automl/task/task.py                     |   4 +-
 flaml/automl/training_log.py                  |   4 +-
 flaml/default/estimator.py                    |  33 +-
 flaml/default/greedy.py                       |  13 +-
 flaml/default/portfolio.py                    |  26 +-
 flaml/default/regret.py                       |  14 +-
 flaml/default/suggest.py                      |  37 +-
 flaml/onlineml/autovw.py                      |  29 +-
 flaml/onlineml/trial.py                       |  40 +-
 flaml/onlineml/trial_runner.py                |  72 +---
 flaml/tune/analysis.py                        |   9 +-
 flaml/tune/result.py                          |   4 +-
 flaml/tune/sample.py                          |  70 +---
 flaml/tune/scheduler/online_scheduler.py      |   9 +-
 flaml/tune/searcher/blendsearch.py            | 115 ++----
 flaml/tune/searcher/flow2.py                  | 144 ++-----
 flaml/tune/searcher/online_searcher.py        |  69 +---
 flaml/tune/searcher/search_thread.py          |  47 +--
 flaml/tune/searcher/suggestion.py             | 128 ++----
 flaml/tune/searcher/variant_generator.py      |  26 +-
 flaml/tune/space.py                           |  84 +---
 flaml/tune/spark/utils.py                     |  10 +-
 flaml/tune/trial.py                           |  16 +-
 flaml/tune/trial_runner.py                    |   8 +-
 flaml/tune/tune.py                            | 103 ++---
 flaml/tune/utils.py                           |   6 +-
 test/automl/test_classification.py            |  24 +-
 test/automl/test_constraints.py               |  22 +-
 test/automl/test_forecast.py                  |  45 +--
 test/automl/test_multiclass.py                |  76 +---
 test/automl/test_notebook_example.py          |  12 +-
 test/automl/test_python_log.py                |  10 +-
 test/automl/test_regression.py                |  33 +-
 test/automl/test_score.py                     |  11 +-
 test/automl/test_split.py                     |  20 +-
 test/automl/test_training_log.py              |  10 +-
 test/automl/test_warmstart.py                 |  38 +-
 test/automl/test_xgboost2d.py                 |   8 +-
 test/automl/test_xgboost2d_sample_size.py     |   4 +-
 test/default/test_defaults.py                 |  44 +--
 test/default_lgbm.py                          |   4 +-
 test/nlp/test_autohf.py                       |  16 +-
 test/nlp/test_autohf_classificationhead.py    |  44 +--
 test/nlp/test_autohf_custom_metric.py         |  12 +-
 test/nlp/test_autohf_cv.py                    |   4 +-
 test/nlp/test_autohf_loadargs.py              |   4 +-
 .../test_autohf_multichoice_classification.py |  12 +-
 test/nlp/test_autohf_regression.py            |   4 +-
 test/nlp/test_autohf_summarization.py         |  20 +-
 test/nlp/test_autohf_tokenclassification.py   |  24 +-
 test/nlp/test_default.py                      |  28 +-
 test/nlp/utils.py                             |   6 +-
 test/nni/mnist.py                             |  16 +-
 test/object_store.py                          |  18 +-
 test/openai/test_completion.py                |   8 +-
 .../submit_train_pipeline.py                  |   4 +-
 .../submit_tuner_pipeline.py                  |   8 +-
 test/ray/distribute_tune.py                   |   4 +-
 test/rep.py                                   |   4 +-
 test/run_distribute_automl.py                 |   8 +-
 test/run_distribute_tune.py                   |   8 +-
 test/spark/test_0sparkml.py                   |  20 +-
 test/spark/test_automl.py                     |  12 +-
 test/spark/test_exceptions.py                 |  12 +-
 test/spark/test_multiclass.py                 |  62 +--
 test/spark/test_notebook.py                   |   4 +-
 test/spark/test_overtime.py                   |  10 +-
 test/spark/test_performance.py                |  12 +-
 test/spark/test_tune.py                       |   8 +-
 test/spark/test_utils.py                      |  20 +-
 test/test_autovw.py                           |  59 +--
 test/test_gpu.py                              |  16 +-
 test/tune/test_flaml_raytune_consistency.py   |  28 +-
 test/tune/test_lexiflow.py                    |   8 +-
 test/tune/test_pytorch_cifar10.py             |  50 +--
 test/tune/test_reproducibility.py             |  12 +-
 test/tune/test_restore.py                     |   4 +-
 test/tune/test_scheduler.py                   |   8 +-
 test/tune/test_searcher.py                    |  24 +-
 test/tune/test_tune.py                        |  11 +-
 test/tune_example.py                          |  18 +-
 97 files changed, 809 insertions(+), 2696 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 076613ecf..1ad683e64 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -11,6 +11,7 @@ repos:
     rev: 23.1.0
     hooks:
     - id: black
+      args: ["--line-length=120"]
   - repo: https://github.com/pycqa/flake8
     rev: 6.0.0
     hooks:
diff --git a/flaml/autogen/code_utils.py b/flaml/autogen/code_utils.py
index abf9a1030..e7215100d 100644
--- a/flaml/autogen/code_utils.py
+++ b/flaml/autogen/code_utils.py
@@ -27,9 +27,7 @@ def execute_code(code: str, max_exec_time: Optional[int] = 3):
     return int(result.returncode == 0)
 
 
-def generate_assertions(
-    definition: str, model: Optional[str] = "gpt-3.5-turbo"
-) -> Tuple[str, float]:
+def generate_assertions(definition: str, model: Optional[str] = "gpt-3.5-turbo") -> Tuple[str, float]:
     """Generate assertions for a function.
 
     Args:
@@ -112,9 +110,7 @@ def eval_function_completions(
         for i in range(n):
             response = responses[i] = _remove_check(responses[i])
             code = (
-                f"{response}\n{assertions}"
-                if response.startswith("def")
-                else f"{definition}{response}\n{assertions}"
+                f"{response}\n{assertions}" if response.startswith("def") else f"{definition}{response}\n{assertions}"
             )
             succeed_assertions = execute_code(code)
             if succeed_assertions:
@@ -149,9 +145,7 @@ def eval_function_completions(
 def implement(
     definition: str,
     configs: List[Dict],
-    assertions: Optional[
-        Union[str, Callable[[str], Tuple[str, float]]]
-    ] = generate_assertions,
+    assertions: Optional[Union[str, Callable[[str], Tuple[str, float]]]] = generate_assertions,
 ) -> Tuple[str, float]:
     """Implement a function from a definition.
 
@@ -172,9 +166,7 @@ def implement(
         response = oai.Completion.create({"definition": definition}, **config)
         cost += oai.Completion.cost(config["model"], response)
         responses = oai.Completion.extract_text(response)
-        metrics = eval_function_completions(
-            responses, definition, assertions=assertions
-        )
+        metrics = eval_function_completions(responses, definition, assertions=assertions)
         assertions = metrics["assertions"]
         cost += metrics["gen_cost"]
         if metrics["succeed_assertions"] or i == len(configs) - 1:
diff --git a/flaml/autogen/oai/completion.py b/flaml/autogen/oai/completion.py
index 513bcf8f2..962e2f4b7 100644
--- a/flaml/autogen/oai/completion.py
+++ b/flaml/autogen/oai/completion.py
@@ -21,9 +21,7 @@ try:
 
     ERROR = None
 except ImportError:
-    ERROR = ImportError(
-        "please install flaml[openai] option to use the flaml.oai subpackage."
-    )
+    ERROR = ImportError("please install flaml[openai] option to use the flaml.oai subpackage.")
 logger = logging.getLogger(__name__)
 if not logger.handlers:
     # Add the console handler.
@@ -142,17 +140,11 @@ class Completion:
             if response is not None and (response != -1 or not eval_only):
                 # print("using cached response")
                 return response
-        openai_completion = (
-            openai.ChatCompletion
-            if config["model"] in cls.chat_models
-            else openai.Completion
-        )
+        openai_completion = openai.ChatCompletion if config["model"] in cls.chat_models else openai.Completion
         start_time = time.time()
         while True:
             try:
-                response = openai_completion.create(
-                    request_timeout=cls.request_timeout, **config
-                )
+                response = openai_completion.create(request_timeout=cls.request_timeout, **config)
                 cls._cache.set(key, response)
                 return response
             except (
@@ -176,9 +168,7 @@ class Completion:
                 if "azure" == openai.api_type and "model" in config:
                     # azure api uses "engine" instead of "model"
                     config = config.copy()
-                    config["engine"] = config.pop("model").replace(
-                        "gpt-3.5-turbo", "gpt-35-turbo"
-                    )
+                    config["engine"] = config.pop("model").replace("gpt-3.5-turbo", "gpt-35-turbo")
                 else:
                     raise
         logger.warning(
@@ -193,11 +183,7 @@ class Completion:
         # find the max value in max_valid_n_per_max_tokens
         # whose key is equal or larger than max_tokens
         return max(
-            (
-                value
-                for k, value in cls._max_valid_n_per_max_tokens.get(key, {}).items()
-                if k >= max_tokens
-            ),
+            (value for k, value in cls._max_valid_n_per_max_tokens.get(key, {}).items() if k >= max_tokens),
             default=1,
         )
 
@@ -206,11 +192,7 @@ class Completion:
         # find the min value in min_invalid_n_per_max_tokens
         # whose key is equal or smaller than max_tokens
         return min(
-            (
-                value
-                for k, value in cls._min_invalid_n_per_max_tokens.get(key, {}).items()
-                if k <= max_tokens
-            ),
+            (value for k, value in cls._min_invalid_n_per_max_tokens.get(key, {}).items() if k <= max_tokens),
             default=None,
         )
 
@@ -227,12 +209,10 @@ class Completion:
     def _update_invalid_n(cls, prune, region_key, max_tokens, num_completions):
         if prune:
             # update invalid n and prune this config
-            cls._min_invalid_n_per_max_tokens[
-                region_key
-            ] = invalid_n = cls._min_invalid_n_per_max_tokens.get(region_key, {})
-            invalid_n[max_tokens] = min(
-                num_completions, invalid_n.get(max_tokens, np.inf)
+            cls._min_invalid_n_per_max_tokens[region_key] = invalid_n = cls._min_invalid_n_per_max_tokens.get(
+                region_key, {}
             )
+            invalid_n[max_tokens] = min(num_completions, invalid_n.get(max_tokens, np.inf))
 
     @classmethod
     def _pop_subspace(cls, config):
@@ -280,16 +260,12 @@ class Completion:
         model = config["model"]
         data_length = len(data)
         price = cls.price1K.get(model)
-        price_input, price_output = (
-            price if isinstance(price, tuple) else (price, price)
-        )
+        price_input, price_output = price if isinstance(price, tuple) else (price, price)
         inference_budget = getattr(cls, "inference_budget", None)
         prune_hp = getattr(cls, "_prune_hp", "n")
         metric = cls._metric
         config_n = config.get(prune_hp, 1)  # default value in OpenAI is 1
-        max_tokens = config.get(
-            "max_tokens", np.inf if model in cls.chat_models else 16
-        )
+        max_tokens = config.get("max_tokens", np.inf if model in cls.chat_models else 16)
         prompt, messages = cls._get_prompt_messages_from_config(model, config)
         stop = cls._stops and cls._stops[config["stop"]]
         target_output_tokens = None
@@ -300,9 +276,7 @@ class Completion:
             region_key = cls._get_region_key(config)
             max_valid_n = cls._get_max_valid_n(region_key, max_tokens)
             if cls.avg_input_tokens:
-                target_output_tokens = (
-                    inference_budget * 1000 - cls.avg_input_tokens * price_input
-                ) / price_output
+                target_output_tokens = (inference_budget * 1000 - cls.avg_input_tokens * price_input) / price_output
                 # max_tokens bounds the maximum tokens
                 # so using it we can calculate a valid n according to the avg # input tokens
                 max_valid_n = max(
@@ -340,16 +314,12 @@ class Completion:
             while True:  # data_limit <= data_length
                 # limit the number of data points to avoid rate limit
                 for i in range(prev_data_limit, data_limit):
-                    logger.debug(
-                        f"num_completions={num_completions}, data instance={i}"
-                    )
+                    logger.debug(f"num_completions={num_completions}, data instance={i}")
                     data_i = data[i]
                     params = cls._construct_params(data_i, params, prompt, messages)
                     response = cls._get_response(params, eval_only)
                     if response == -1:  # rate limit error, treat as invalid
-                        cls._update_invalid_n(
-                            prune, region_key, max_tokens, num_completions
-                        )
+                        cls._update_invalid_n(prune, region_key, max_tokens, num_completions)
                         result[metric] = 0
                         result["cost"] = cost
                         return result
@@ -361,16 +331,10 @@ class Completion:
                     if not cls.avg_input_tokens and not input_tokens[i]:
                         # store the # input tokens
                         input_tokens[i] = n_input_tokens
-                    query_cost = (
-                        price_input * n_input_tokens + price_output * n_output_tokens
-                    ) / 1000
+                    query_cost = (price_input * n_input_tokens + price_output * n_output_tokens) / 1000
                     cls._total_cost += query_cost
                     cost += query_cost
-                    if (
-                        cls.optimization_budget
-                        and cls._total_cost >= cls.optimization_budget
-                        and not eval_only
-                    ):
+                    if cls.optimization_budget and cls._total_cost >= cls.optimization_budget and not eval_only:
                         # limit the total tuning cost
                         return {
                             metric: 0,
@@ -393,14 +357,8 @@ class Completion:
                 )
                 # Hoeffding-Serfling bound
                 ratio = 0.1 * np.sqrt(rho / data_limit)
-                if (
-                    target_output_tokens
-                    and avg_n_tokens > target_output_tokens * (1 + ratio)
-                    and not eval_only
-                ):
-                    cls._update_invalid_n(
-                        prune, region_key, max_tokens, num_completions
-                    )
+                if target_output_tokens and avg_n_tokens > target_output_tokens * (1 + ratio) and not eval_only:
+                    cls._update_invalid_n(prune, region_key, max_tokens, num_completions)
                     result[metric] = 0
                     result["total_cost"] = cls._total_cost
                     result["cost"] = cost
@@ -409,19 +367,13 @@ class Completion:
                     prune
                     and target_output_tokens
                     and avg_n_tokens <= target_output_tokens * (1 - ratio)
-                    and (
-                        num_completions < config_n
-                        or num_completions == config_n
-                        and data_limit == data_length
-                    )
+                    and (num_completions < config_n or num_completions == config_n and data_limit == data_length)
                 ):
                     # update valid n
-                    cls._max_valid_n_per_max_tokens[
-                        region_key
-                    ] = valid_n = cls._max_valid_n_per_max_tokens.get(region_key, {})
-                    valid_n[max_tokens] = max(
-                        num_completions, valid_n.get(max_tokens, 0)
+                    cls._max_valid_n_per_max_tokens[region_key] = valid_n = cls._max_valid_n_per_max_tokens.get(
+                        region_key, {}
                     )
+                    valid_n[max_tokens] = max(num_completions, valid_n.get(max_tokens, 0))
                     if num_completions < config_n:
                         # valid already, skip the rest of the data
                         data_limit = data_length
@@ -455,9 +407,7 @@ class Completion:
                         target_output_tokens = (
                             inference_budget * 1000 - cls.avg_input_tokens * price_input
                         ) / price_output
-                result["inference_cost"] = (
-                    avg_n_tokens * price_output + cls.avg_input_tokens * price_input
-                ) / 1000
+                result["inference_cost"] = (avg_n_tokens * price_output + cls.avg_input_tokens * price_input) / 1000
                 break
             else:
                 if data_early_stop:
@@ -552,9 +502,7 @@ class Completion:
                 space.pop("temperature_or_top_p")
                 space["temperature"] = temperature
                 space["top_p"] = top_p
-                logger.warning(
-                    "temperature and top_p are not recommended to vary together."
-                )
+                logger.warning("temperature and top_p are not recommended to vary together.")
         cls._max_valid_n_per_max_tokens, cls._min_invalid_n_per_max_tokens = {}, {}
         cls.optimization_budget = optimization_budget
         cls.inference_budget = inference_budget
@@ -569,12 +517,8 @@ class Completion:
                 cls._messages = [cls._messages]
             space["messages"] = tune.choice(list(range(len(cls._messages))))
         else:
-            assert (
-                space.get("messages") is None
-            ), "messages and prompt cannot be provided at the same time."
-            assert isinstance(
-                cls._prompts, (str, list)
-            ), "prompt must be a string or a list of strings."
+            assert space.get("messages") is None, "messages and prompt cannot be provided at the same time."
+            assert isinstance(cls._prompts, (str, list)), "prompt must be a string or a list of strings."
             if isinstance(cls._prompts, str):
                 cls._prompts = [cls._prompts]
             space["prompt"] = tune.choice(list(range(len(cls._prompts))))
@@ -703,9 +647,7 @@ class Completion:
         # or "messages" should be in config (for tuning chat models only)
         if prompt is None and model in cls.chat_models:
             if messages is None:
-                raise ValueError(
-                    "Either prompt or messages should be in config for chat models."
-                )
+                raise ValueError("Either prompt or messages should be in config for chat models.")
         if prompt is None:
             params["messages"] = [
                 {
@@ -725,18 +667,12 @@ class Completion:
             params["messages"] = [
                 {
                     "role": "user",
-                    "content": prompt_msg
-                    if isinstance(prompt, str)
-                    else prompt(data_instance),
+                    "content": prompt_msg if isinstance(prompt, str) else prompt(data_instance),
                 },
             ]
             params.pop("prompt", None)
         else:
-            params["prompt"] = (
-                prompt.format(**data_instance)
-                if isinstance(prompt, str)
-                else prompt(data_instance)
-            )
+            params["prompt"] = prompt.format(**data_instance) if isinstance(prompt, str) else prompt(data_instance)
         return params
 
     @classmethod
@@ -855,9 +791,7 @@ class Completion:
         elif isinstance(agg_method, dict):
             for key in metric_keys:
                 metric_agg_method = agg_method[key]
-                assert callable(
-                    metric_agg_method
-                ), "please provide a callable for each metric"
+                assert callable(metric_agg_method), "please provide a callable for each metric"
                 result_agg[key] = metric_agg_method([r[key] for r in result_list])
         else:
             raise ValueError(
diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
index 81ec42458..2bd7e5f8f 100644
--- a/flaml/automl/automl.py
+++ b/flaml/automl/automl.py
@@ -387,15 +387,11 @@ class AutoML(BaseEstimator):
         settings["free_mem_ratio"] = settings.get("free_mem_ratio", 0)
         settings["metric_constraints"] = settings.get("metric_constraints", [])
         settings["cv_score_agg_func"] = settings.get("cv_score_agg_func", None)
-        settings["fit_kwargs_by_estimator"] = settings.get(
-            "fit_kwargs_by_estimator", {}
-        )
+        settings["fit_kwargs_by_estimator"] = settings.get("fit_kwargs_by_estimator", {})
         settings["custom_hp"] = settings.get("custom_hp", {})
         settings["skip_transform"] = settings.get("skip_transform", False)
 
-        self._estimator_type = (
-            "classifier" if settings["task"] in CLASSIFICATION else "regressor"
-        )
+        self._estimator_type = "classifier" if settings["task"] in CLASSIFICATION else "regressor"
 
     def get_params(self, deep: bool = False) -> dict:
         return self._settings.copy()
@@ -452,18 +448,14 @@ class AutoML(BaseEstimator):
     def best_config_per_estimator(self):
         """A dictionary of all estimators' best configuration."""
         return {
-            e: e_search_state.best_config
-            and AutoMLState.sanitize(e_search_state.best_config)
+            e: e_search_state.best_config and AutoMLState.sanitize(e_search_state.best_config)
             for e, e_search_state in self._search_states.items()
         }
 
     @property
     def best_loss_per_estimator(self):
         """A dictionary of all estimators' best loss."""
-        return {
-            e: e_search_state.best_loss
-            for e, e_search_state in self._search_states.items()
-        }
+        return {e: e_search_state.best_loss for e, e_search_state in self._search_states.items()}
 
     @property
     def best_loss(self):
@@ -482,16 +474,12 @@ class AutoML(BaseEstimator):
         associated with the best config. These two objects correspond to the returned
         objects by the customized metric function for the config with the best loss."""
         state = self._search_states.get(self._best_estimator)
-        return self._state.best_loss, state and getattr(state, "best_result", {}).get(
-            "metric_for_logging"
-        )
+        return self._state.best_loss, state and getattr(state, "best_result", {}).get("metric_for_logging")
 
     @property
     def best_config_train_time(self):
         """A float of the seconds taken by training the best config."""
-        return getattr(
-            self._search_states[self._best_estimator], "best_config_train_time", None
-        )
+        return getattr(self._search_states[self._best_estimator], "best_config_train_time", None)
 
     def save_best_config(self, filename):
         best = {
@@ -544,9 +532,7 @@ class AutoML(BaseEstimator):
     ):
         estimator = getattr(self, "_trained_estimator", None)
         if estimator is None:
-            logger.warning(
-                "No estimator is trained. Please run fit with enough budget."
-            )
+            logger.warning("No estimator is trained. Please run fit with enough budget.")
             return None
         X = self._state.task.preprocess(X, self._transformer)
         if self._label_transformer:
@@ -588,23 +574,15 @@ class AutoML(BaseEstimator):
         """
         estimator = getattr(self, "_trained_estimator", None)
         if estimator is None:
-            logger.warning(
-                "No estimator is trained. Please run fit with enough budget."
-            )
+            logger.warning("No estimator is trained. Please run fit with enough budget.")
             return None
         X = self._state.task.preprocess(X, self._transformer)
         y_pred = estimator.predict(X, **pred_kwargs)
 
-        if (
-            isinstance(y_pred, np.ndarray)
-            and y_pred.ndim > 1
-            and isinstance(y_pred, np.ndarray)
-        ):
+        if isinstance(y_pred, np.ndarray) and y_pred.ndim > 1 and isinstance(y_pred, np.ndarray):
             y_pred = y_pred.flatten()
         if self._label_transformer:
-            return self._label_transformer.inverse_transform(
-                pd.Series(y_pred.astype(int))
-            )
+            return self._label_transformer.inverse_transform(pd.Series(y_pred.astype(int)))
         else:
             return y_pred
 
@@ -623,9 +601,7 @@ class AutoML(BaseEstimator):
         """
         estimator = getattr(self, "_trained_estimator", None)
         if estimator is None:
-            logger.warning(
-                "No estimator is trained. Please run fit with enough budget."
-            )
+            logger.warning("No estimator is trained. Please run fit with enough budget.")
             return None
         X = self._state.task.preprocess(X, self._transformer)
         proba = self._trained_estimator.predict_proba(X, **pred_kwargs)
@@ -640,9 +616,7 @@ class AutoML(BaseEstimator):
         """
         self._state.learner_classes[learner_name] = learner_class
 
-    def get_estimator_from_log(
-        self, log_file_name: str, record_id: int, task: Union[str, Task]
-    ):
+    def get_estimator_from_log(self, log_file_name: str, record_id: int, task: Union[str, Task]):
         """Get the estimator from log file.
 
         Args:
@@ -820,30 +794,18 @@ class AutoML(BaseEstimator):
         split_ratio = split_ratio or self._settings.get("split_ratio")
         n_splits = n_splits or self._settings.get("n_splits")
         split_type = split_type or self._settings.get("split_type")
-        auto_augment = (
-            self._settings.get("auto_augment") if auto_augment is None else auto_augment
-        )
+        auto_augment = self._settings.get("auto_augment") if auto_augment is None else auto_augment
         self._state.task = task
         self._estimator_type = "classifier" if task.is_classification() else "regressor"
 
         self._state.fit_kwargs = fit_kwargs
         self._state.custom_hp = custom_hp or self._settings.get("custom_hp")
-        self._skip_transform = (
-            self._settings.get("skip_transform")
-            if skip_transform is None
-            else skip_transform
-        )
-        self._state.fit_kwargs_by_estimator = (
-            fit_kwargs_by_estimator or self._settings.get("fit_kwargs_by_estimator")
-        )
+        self._skip_transform = self._settings.get("skip_transform") if skip_transform is None else skip_transform
+        self._state.fit_kwargs_by_estimator = fit_kwargs_by_estimator or self._settings.get("fit_kwargs_by_estimator")
         self.preserve_checkpoint = (
-            self._settings.get("preserve_checkpoint")
-            if preserve_checkpoint is None
-            else preserve_checkpoint
-        )
-        task.validate_data(
-            self, self._state, X_train, y_train, dataframe, label, groups=groups
+            self._settings.get("preserve_checkpoint") if preserve_checkpoint is None else preserve_checkpoint
         )
+        task.validate_data(self, self._state, X_train, y_train, dataframe, label, groups=groups)
 
         logger.info("log file name {}".format(log_file_name))
 
@@ -877,9 +839,7 @@ class AutoML(BaseEstimator):
                             best_val_loss = val_loss
                             sample_size = size
                 if not training_duration:
-                    logger.warning(
-                        f"No estimator found within time_budget={time_budget}"
-                    )
+                    logger.warning(f"No estimator found within time_budget={time_budget}")
                     from .model import BaseEstimator as Estimator
 
                     self._trained_estimator = Estimator()
@@ -901,9 +861,7 @@ class AutoML(BaseEstimator):
             self._state.fit_kwargs_by_estimator[best_estimator] = self._state.fit_kwargs
 
         logger.info(
-            "estimator = {}, config = {}, #training instances = {}".format(
-                best_estimator, best_config, sample_size
-            )
+            "estimator = {}, config = {}, #training instances = {}".format(best_estimator, best_config, sample_size)
         )
         # Partially copied from fit() function
         # Initilize some attributes required for retrain_from_log
@@ -944,9 +902,7 @@ class AutoML(BaseEstimator):
                 "auto",
                 "cv",
             ], "eval_method must be 'auto' or 'cv' for custom data splitter."
-            assert (
-                self._state.X_val is None
-            ), "custom splitter and custom validation data can't be used together."
+            assert self._state.X_val is None, "custom splitter and custom validation data can't be used together."
             return "cv"
         if self._state.X_val is not None:
             assert eval_method in [
@@ -1051,10 +1007,7 @@ class AutoML(BaseEstimator):
                 c = self._search_states[estimator].cat_hp_cost
                 configs.append(c)
             configs.append(
-                [
-                    self._state.learner_classes.get(estimator).cost_relative2lgbm()
-                    for estimator in self.estimator_list
-                ]
+                [self._state.learner_classes.get(estimator).cost_relative2lgbm() for estimator in self.estimator_list]
             )
             config = {"ml": configs}
         return config
@@ -1112,9 +1065,7 @@ class AutoML(BaseEstimator):
         for estimator in self.estimator_list:
             search_state = self._search_states[estimator]
             if hasattr(search_state, "training_function"):
-                estimator_to_training_function[
-                    estimator
-                ] = search_state.training_function
+                estimator_to_training_function[estimator] = search_state.training_function
                 del search_state.training_function
 
         with open(output_file_name, "wb") as f:
@@ -1577,74 +1528,44 @@ class AutoML(BaseEstimator):
         eval_method = eval_method or self._settings.get("eval_method")
         split_ratio = split_ratio or self._settings.get("split_ratio")
         n_splits = n_splits or self._settings.get("n_splits")
-        auto_augment = (
-            self._settings.get("auto_augment") if auto_augment is None else auto_augment
-        )
+        auto_augment = self._settings.get("auto_augment") if auto_augment is None else auto_augment
         metric = metric or self._settings.get("metric")
         estimator_list = estimator_list or self._settings.get("estimator_list")
-        log_file_name = (
-            self._settings.get("log_file_name")
-            if log_file_name is None
-            else log_file_name
-        )
+        log_file_name = self._settings.get("log_file_name") if log_file_name is None else log_file_name
         max_iter = self._settings.get("max_iter") if max_iter is None else max_iter
         sample_is_none = sample is None
         if sample_is_none:
             sample = self._settings.get("sample")
         ensemble = self._settings.get("ensemble") if ensemble is None else ensemble
         log_type = log_type or self._settings.get("log_type")
-        model_history = (
-            self._settings.get("model_history")
-            if model_history is None
-            else model_history
-        )
+        model_history = self._settings.get("model_history") if model_history is None else model_history
         log_training_metric = (
-            self._settings.get("log_training_metric")
-            if log_training_metric is None
-            else log_training_metric
+            self._settings.get("log_training_metric") if log_training_metric is None else log_training_metric
         )
         mem_thres = mem_thres or self._settings.get("mem_thres")
         pred_time_limit = pred_time_limit or self._settings.get("pred_time_limit")
         train_time_limit = train_time_limit or self._settings.get("train_time_limit")
-        self._metric_constraints = metric_constraints or self._settings.get(
-            "metric_constraints"
-        )
+        self._metric_constraints = metric_constraints or self._settings.get("metric_constraints")
         if np.isfinite(pred_time_limit):
             self._metric_constraints.append(("pred_time", "<=", pred_time_limit))
         verbose = self._settings.get("verbose") if verbose is None else verbose
-        retrain_full = (
-            self._settings.get("retrain_full") if retrain_full is None else retrain_full
-        )
+        retrain_full = self._settings.get("retrain_full") if retrain_full is None else retrain_full
         split_type = split_type or self._settings.get("split_type")
         hpo_method = hpo_method or self._settings.get("hpo_method")
         learner_selector = learner_selector or self._settings.get("learner_selector")
         no_starting_points = starting_points is None
         if no_starting_points:
             starting_points = self._settings.get("starting_points")
-        n_concurrent_trials = n_concurrent_trials or self._settings.get(
-            "n_concurrent_trials"
-        )
-        keep_search_state = (
-            self._settings.get("keep_search_state")
-            if keep_search_state is None
-            else keep_search_state
-        )
+        n_concurrent_trials = n_concurrent_trials or self._settings.get("n_concurrent_trials")
+        keep_search_state = self._settings.get("keep_search_state") if keep_search_state is None else keep_search_state
         self.preserve_checkpoint = (
-            self._settings.get("preserve_checkpoint")
-            if preserve_checkpoint is None
-            else preserve_checkpoint
-        )
-        early_stop = (
-            self._settings.get("early_stop") if early_stop is None else early_stop
-        )
-        force_cancel = (
-            self._settings.get("force_cancel") if force_cancel is None else force_cancel
+            self._settings.get("preserve_checkpoint") if preserve_checkpoint is None else preserve_checkpoint
         )
+        early_stop = self._settings.get("early_stop") if early_stop is None else early_stop
+        force_cancel = self._settings.get("force_cancel") if force_cancel is None else force_cancel
         # no search budget is provided?
         no_budget = time_budget < 0 and max_iter is None and not early_stop
-        append_log = (
-            self._settings.get("append_log") if append_log is None else append_log
-        )
+        append_log = self._settings.get("append_log") if append_log is None else append_log
         min_sample_size = min_sample_size or self._settings.get("min_sample_size")
         use_ray = self._settings.get("use_ray") if use_ray is None else use_ray
         use_spark = self._settings.get("use_spark") if use_spark is None else use_spark
@@ -1698,11 +1619,7 @@ class AutoML(BaseEstimator):
         if self._use_ray is not False:
             import ray
 
-            n_cpus = (
-                ray.is_initialized()
-                and ray.available_resources()["CPU"]
-                or os.cpu_count()
-            )
+            n_cpus = ray.is_initialized() and ray.available_resources()["CPU"] or os.cpu_count()
 
             self._state.resources_per_trial = (
                 # when using gpu, default cpu is 1 per job; otherwise, default cpu is n_cpus / n_concurrent_trials
@@ -1724,30 +1641,16 @@ class AutoML(BaseEstimator):
                 dataframe = ray.get(dataframe)
         else:
             # TODO: Integrate with Spark
-            self._state.resources_per_trial = (
-                {"cpu": n_jobs} if n_jobs > 0 else {"cpu": 1}
-            )
-        self._state.free_mem_ratio = (
-            self._settings.get("free_mem_ratio")
-            if free_mem_ratio is None
-            else free_mem_ratio
-        )
+            self._state.resources_per_trial = {"cpu": n_jobs} if n_jobs > 0 else {"cpu": 1}
+        self._state.free_mem_ratio = self._settings.get("free_mem_ratio") if free_mem_ratio is None else free_mem_ratio
         self._state.task = task
         self._state.log_training_metric = log_training_metric
 
         self._state.fit_kwargs = fit_kwargs
         custom_hp = custom_hp or self._settings.get("custom_hp")
-        self._skip_transform = (
-            self._settings.get("skip_transform")
-            if skip_transform is None
-            else skip_transform
-        )
-        fit_kwargs_by_estimator = fit_kwargs_by_estimator or self._settings.get(
-            "fit_kwargs_by_estimator"
-        )
-        self._state.fit_kwargs_by_estimator = (
-            fit_kwargs_by_estimator.copy()
-        )  # shallow copy of fit_kwargs_by_estimator
+        self._skip_transform = self._settings.get("skip_transform") if skip_transform is None else skip_transform
+        fit_kwargs_by_estimator = fit_kwargs_by_estimator or self._settings.get("fit_kwargs_by_estimator")
+        self._state.fit_kwargs_by_estimator = fit_kwargs_by_estimator.copy()  # shallow copy of fit_kwargs_by_estimator
         self._state.weight_val = sample_weight_val
 
         task.validate_data(
@@ -1777,13 +1680,9 @@ class AutoML(BaseEstimator):
         eval_method = self._decide_eval_method(eval_method, time_budget)
         self._state.eval_method = eval_method
         logger.info("Evaluation method: {}".format(eval_method))
-        self._state.cv_score_agg_func = cv_score_agg_func or self._settings.get(
-            "cv_score_agg_func"
-        )
+        self._state.cv_score_agg_func = cv_score_agg_func or self._settings.get("cv_score_agg_func")
 
-        self._retrain_in_budget = retrain_full == "budget" and (
-            eval_method == "holdout" and self._state.X_val is None
-        )
+        self._retrain_in_budget = retrain_full == "budget" and (eval_method == "holdout" and self._state.X_val is None)
         self._auto_augment = auto_augment
 
         _sample_size_from_starting_points = {}
@@ -1805,9 +1704,7 @@ class AutoML(BaseEstimator):
                         ]
                     )
                     if _sample_size_set:
-                        _sample_size_from_starting_points[_estimator] = min(
-                            _sample_size_set
-                        )
+                        _sample_size_from_starting_points[_estimator] = min(_sample_size_set)
                     if len(_sample_size_set) > 1:
                         logger.warning(
                             "Using the min FLAML_sample_size of all the provided starting points for estimator {}. (Provided FLAML_sample_size are: {})".format(
@@ -1831,10 +1728,7 @@ class AutoML(BaseEstimator):
                     sample
                     and not task.is_rank()
                     and eval_method != "cv"
-                    and (
-                        self._min_sample_size[k] * SAMPLE_MULTIPLY_FACTOR
-                        < self._state.data_size[0]
-                    ),
+                    and (self._min_sample_size[k] * SAMPLE_MULTIPLY_FACTOR < self._state.data_size[0]),
                 )
                 for k in self._min_sample_size.keys()
             }
@@ -1843,10 +1737,7 @@ class AutoML(BaseEstimator):
                 sample
                 and not task.is_rank()
                 and eval_method != "cv"
-                and (
-                    self._min_sample_size * SAMPLE_MULTIPLY_FACTOR
-                    < self._state.data_size[0]
-                )
+                and (self._min_sample_size * SAMPLE_MULTIPLY_FACTOR < self._state.data_size[0])
             )
 
         metric = task.default_metric(metric)
@@ -1874,10 +1765,7 @@ class AutoML(BaseEstimator):
             if task.is_nlp():
                 from flaml.automl.ml import huggingface_metric_to_mode
 
-                if (
-                    metric in huggingface_metric_to_mode
-                    and huggingface_metric_to_mode[metric] == "max"
-                ):
+                if metric in huggingface_metric_to_mode and huggingface_metric_to_mode[metric] == "max":
                     return True, f"-{metric}"
             return False, None
 
@@ -1891,9 +1779,7 @@ class AutoML(BaseEstimator):
             error_metric = "customized metric"
         logger.info(f"Minimizing error metric: {error_metric}")
 
-        is_spark_dataframe = isinstance(X_train, psDataFrame) or isinstance(
-            dataframe, psDataFrame
-        )
+        is_spark_dataframe = isinstance(X_train, psDataFrame) or isinstance(dataframe, psDataFrame)
         estimator_list = task.default_estimator_list(estimator_list, is_spark_dataframe)
 
         if is_spark_dataframe and self._use_spark:
@@ -1954,9 +1840,7 @@ class AutoML(BaseEstimator):
                         location,
                         k=1,
                     )
-                    starting_points[estimator_name] = [
-                        x["hyperparameters"] for x in configs
-                    ]
+                    starting_points[estimator_name] = [x["hyperparameters"] for x in configs]
                 except FileNotFoundError:
                     pass
             try:
@@ -1978,9 +1862,7 @@ class AutoML(BaseEstimator):
         for estimator_name in estimator_list:
             estimator_class = self._state.learner_classes[estimator_name]
             estimator_class.init()
-            this_estimator_kwargs = self._state.fit_kwargs_by_estimator.get(
-                estimator_name
-            )
+            this_estimator_kwargs = self._state.fit_kwargs_by_estimator.get(estimator_name)
             if this_estimator_kwargs:
                 # make another shallow copy of the value (a dict obj), so user's fit_kwargs_by_estimator won't be updated
                 this_estimator_kwargs = this_estimator_kwargs.copy()
@@ -1991,9 +1873,7 @@ class AutoML(BaseEstimator):
                     estimator_name
                 ] = this_estimator_kwargs  # set self._state.fit_kwargs_by_estimator[estimator_name] to the update, so only self._state.fit_kwargs_by_estimator will be updated
             else:
-                self._state.fit_kwargs_by_estimator[
-                    estimator_name
-                ] = self._state.fit_kwargs
+                self._state.fit_kwargs_by_estimator[estimator_name] = self._state.fit_kwargs
 
             self._search_states[estimator_name] = SearchState(
                 learner_class=estimator_class,
@@ -2004,9 +1884,7 @@ class AutoML(BaseEstimator):
                     "period"
                 ),  # NOTE: this is after kwargs is updated to fit_kwargs_by_estimator
                 custom_hp=custom_hp and custom_hp.get(estimator_name),
-                max_iter=max_iter / len(estimator_list)
-                if self._learner_selector == "roundrobin"
-                else max_iter,
+                max_iter=max_iter / len(estimator_list) if self._learner_selector == "roundrobin" else max_iter,
                 budget=self._state.time_budget,
             )
         logger.info("List of ML learners in AutoML Run: {}".format(estimator_list))
@@ -2040,9 +1918,7 @@ class AutoML(BaseEstimator):
             self._search()
         if self._best_estimator:
             logger.info("fit succeeded")
-            logger.info(
-                f"Time taken to find the best model: {self._time_taken_best_iter}"
-            )
+            logger.info(f"Time taken to find the best model: {self._time_taken_best_iter}")
             if (
                 self._hpo_method in ("cfo", "bs")
                 and self._state.time_budget > 0
@@ -2085,10 +1961,7 @@ class AutoML(BaseEstimator):
                     from ray.tune.search import ConcurrencyLimiter
                 import ray
             except (ImportError, AssertionError):
-                raise ImportError(
-                    "use_ray=True requires installation of ray. "
-                    "Please run pip install flaml[ray]"
-                )
+                raise ImportError("use_ray=True requires installation of ray. " "Please run pip install flaml[ray]")
         else:
             from flaml.tune.searcher.suggestion import ConcurrencyLimiter
 
@@ -2116,16 +1989,11 @@ class AutoML(BaseEstimator):
                 from flaml.tune.searcher.suggestion import OptunaSearch as SearchAlgo
         else:
             raise NotImplementedError(
-                f"hpo_method={self._hpo_method} is not recognized. "
-                "'auto', 'cfo' and 'bs' are supported."
+                f"hpo_method={self._hpo_method} is not recognized. " "'auto', 'cfo' and 'bs' are supported."
             )
         space = self.search_space
         self._state.time_from_start = time.time() - self._start_time_flag
-        time_budget_s = (
-            self._state.time_budget - self._state.time_from_start
-            if self._state.time_budget >= 0
-            else None
-        )
+        time_budget_s = self._state.time_budget - self._state.time_from_start if self._state.time_budget >= 0 else None
         if self._hpo_method != "optuna":
             min_resource = self.min_resource
             if isinstance(min_resource, dict):
@@ -2146,9 +2014,7 @@ class AutoML(BaseEstimator):
                 resource_attr=self.resource_attr,
                 min_resource=min_resource_all_estimator,
                 max_resource=self.max_resource,
-                config_constraints=[
-                    (partial(size, self._state.learner_classes), "<=", self._mem_thres)
-                ],
+                config_constraints=[(partial(size, self._state.learner_classes), "<=", self._mem_thres)],
                 metric_constraints=self.metric_constraints,
                 seed=self._seed,
                 time_budget_s=time_budget_s,
@@ -2171,9 +2037,7 @@ class AutoML(BaseEstimator):
             search_alg = SearchAlgo(
                 metric="val_loss",
                 mode="min",
-                points_to_evaluate=[
-                    p for p in new_points_to_evaluate if len(p) == len(converted_space)
-                ],
+                points_to_evaluate=[p for p in new_points_to_evaluate if len(p) == len(converted_space)],
             )
         search_alg = ConcurrencyLimiter(search_alg, self._n_concurrent_trials)
         resources_per_trial = self._state.resources_per_trial
@@ -2218,8 +2082,7 @@ class AutoML(BaseEstimator):
             (
                 trial
                 for trial in analysis.trials
-                if trial.last_result
-                and trial.last_result.get("wall_clock_time") is not None
+                if trial.last_result and trial.last_result.get("wall_clock_time") is not None
             ),
             key=lambda x: x.last_result["wall_clock_time"],
         )
@@ -2272,14 +2135,10 @@ class AutoML(BaseEstimator):
                 if (search_state.metric_for_logging is not None) and (
                     "intermediate_results" in search_state.metric_for_logging
                 ):
-                    for each_entry in search_state.metric_for_logging[
-                        "intermediate_results"
-                    ]:
+                    for each_entry in search_state.metric_for_logging["intermediate_results"]:
                         with mlflow.start_run(nested=True):
                             mlflow.log_metrics(each_entry)
-                            mlflow.log_metric(
-                                "iter_counter", self._iter_per_learner[estimator]
-                            )
+                            mlflow.log_metric("iter_counter", self._iter_per_learner[estimator])
                     del search_state.metric_for_logging["intermediate_results"]
                 if search_state.metric_for_logging:
                     mlflow.log_metrics(search_state.metric_for_logging)
@@ -2325,8 +2184,7 @@ class AutoML(BaseEstimator):
             from flaml.tune.searcher.cfo_cat import CFOCat as SearchAlgo
         else:
             raise NotImplementedError(
-                f"hpo_method={self._hpo_method} is not recognized. "
-                "'cfo' and 'bs' are supported."
+                f"hpo_method={self._hpo_method} is not recognized. " "'cfo' and 'bs' are supported."
             )
 
         est_retrain_time = next_trial_time = 0
@@ -2348,8 +2206,7 @@ class AutoML(BaseEstimator):
                 if not self._retrain_in_budget
                 or better
                 or (not self.best_estimator)
-                or self._search_states[self.best_estimator].sample_size
-                < self._state.data_size[0]
+                or self._search_states[self.best_estimator].sample_size < self._state.data_size[0]
                 else time_left - est_retrain_time
             )
             if not search_state.search_alg:
@@ -2363,8 +2220,7 @@ class AutoML(BaseEstimator):
                     resource_attr = "FLAML_sample_size"
                     min_resource = (
                         self._min_sample_size[estimator]
-                        if isinstance(self._min_sample_size, dict)
-                        and estimator in self._min_sample_size
+                        if isinstance(self._min_sample_size, dict) and estimator in self._min_sample_size
                         else self._min_sample_size_input
                     )
                     max_resource = self._state.data_size[0]
@@ -2391,9 +2247,7 @@ class AutoML(BaseEstimator):
 
                     low_cost_partial_config = search_state.low_cost_partial_config
                 time_budget_s = (
-                    min(budget_left, self._state.train_time_limit or np.inf)
-                    if self._state.time_budget >= 0
-                    else None
+                    min(budget_left, self._state.train_time_limit or np.inf) if self._state.time_budget >= 0 else None
                 )
                 if self._hpo_method in ("bs", "cfo", "grid", "cfocat", "random"):
                     algo = SearchAlgo(
@@ -2406,9 +2260,7 @@ class AutoML(BaseEstimator):
                         resource_attr=resource_attr,
                         min_resource=min_resource,
                         max_resource=max_resource,
-                        config_constraints=[
-                            (learner_class.size, "<=", self._mem_thres)
-                        ],
+                        config_constraints=[(learner_class.size, "<=", self._mem_thres)],
                         metric_constraints=self.metric_constraints,
                         seed=self._seed,
                         allow_empty_config=True,
@@ -2419,9 +2271,7 @@ class AutoML(BaseEstimator):
                     # if self._hpo_method is optuna, sometimes the search space and the initial config dimension do not match
                     # need to remove the extra keys from the search space to be consistent with the initial config
                     converted_space = SearchAlgo.convert_search_space(search_space)
-                    removed_keys = set(search_space.keys()).difference(
-                        converted_space.keys()
-                    )
+                    removed_keys = set(search_space.keys()).difference(converted_space.keys())
                     new_points_to_evaluate = []
                     for idx in range(len(points_to_evaluate)):
                         r = points_to_evaluate[idx].copy()
@@ -2434,9 +2284,7 @@ class AutoML(BaseEstimator):
                         metric="val_loss",
                         mode="min",
                         space=search_space,
-                        points_to_evaluate=[
-                            p for p in points_to_evaluate if len(p) == len(search_space)
-                        ],
+                        points_to_evaluate=[p for p in points_to_evaluate if len(p) == len(search_space)],
                     )
                 search_state.search_alg = ConcurrencyLimiter(algo, max_concurrent=1)
                 # search_state.search_alg = algo
@@ -2467,9 +2315,7 @@ class AutoML(BaseEstimator):
                     eci_base = search_state.init_eci
                     self._eci.append(search_state.estimated_cost4improvement)
                     for e in self.estimator_list[1:]:
-                        self._eci.append(
-                            self._search_states[e].init_eci / eci_base * self._eci[0]
-                        )
+                        self._eci.append(self._search_states[e].init_eci / eci_base * self._eci[0])
                     self._estimator_index = 0
                     min_budget = max(10 * self._eci[0], sum(self._eci))
                     max_budget = 10000 * self._eci[0]
@@ -2536,14 +2382,10 @@ class AutoML(BaseEstimator):
                 if (
                     self._hpo_method in ("cfo", "bs")
                     and all(
-                        state.search_alg
-                        and state.search_alg.searcher.is_ls_ever_converged
+                        state.search_alg and state.search_alg.searcher.is_ls_ever_converged
                         for state in self._search_states.values()
                     )
-                    and (
-                        self._state.time_from_start
-                        > self._warn_threshold * self._time_taken_best_iter
-                    )
+                    and (self._state.time_from_start > self._warn_threshold * self._time_taken_best_iter)
                 ):
                     logger.warning(
                         "All estimator hyperparameters local search has "
@@ -2566,8 +2408,7 @@ class AutoML(BaseEstimator):
                 and best_config_sig
                 and est_retrain_time
                 and not better
-                and self._search_states[self._best_estimator].sample_size
-                == self._state.data_size[0]
+                and self._search_states[self._best_estimator].sample_size == self._state.data_size[0]
                 and (
                     est_retrain_time
                     <= self._state.time_budget - self._state.time_from_start
@@ -2580,18 +2421,11 @@ class AutoML(BaseEstimator):
                     state.best_config,
                     self.data_size_full,
                 )
-                logger.info(
-                    "retrain {} for {:.1f}s".format(self._best_estimator, retrain_time)
-                )
-                self._retrained_config[
-                    best_config_sig
-                ] = state.best_config_train_time = retrain_time
+                logger.info("retrain {} for {:.1f}s".format(self._best_estimator, retrain_time))
+                self._retrained_config[best_config_sig] = state.best_config_train_time = retrain_time
                 est_retrain_time = 0
             self._state.time_from_start = time.time() - self._start_time_flag
-            if (
-                self._state.time_from_start >= self._state.time_budget >= 0
-                or not self._active_estimators
-            ):
+            if self._state.time_from_start >= self._state.time_budget >= 0 or not self._active_estimators:
                 break
             if self._ensemble and self._best_estimator:
                 time_left = self._state.time_budget - self._state.time_from_start
@@ -2636,9 +2470,7 @@ class AutoML(BaseEstimator):
         self._state.time_from_start = time.time() - self._start_time_flag
         if self._best_estimator:
             self._selected = self._search_states[self._best_estimator]
-            self.modelcount = sum(
-                search_state.total_iter for search_state in self._search_states.values()
-            )
+            self.modelcount = sum(search_state.total_iter for search_state in self._search_states.values())
             if self._trained_estimator:
                 logger.info(f"selected model: {self._trained_estimator.model}")
             estimators = []
@@ -2647,9 +2479,7 @@ class AutoML(BaseEstimator):
                 "multiclass",
                 "regression",
             ):
-                search_states = list(
-                    x for x in self._search_states.items() if x[1].best_config
-                )
+                search_states = list(x for x in self._search_states.items() if x[1].best_config)
                 search_states.sort(key=lambda x: x[1].best_loss)
                 estimators = [
                     (
@@ -2674,9 +2504,7 @@ class AutoML(BaseEstimator):
                     for x in search_states[2:]
                     if x[1].best_loss < 4 * self._selected.best_loss
                 ]
-                logger.info(
-                    [(estimator[0], estimator[1].params) for estimator in estimators]
-                )
+                logger.info([(estimator[0], estimator[1].params) for estimator in estimators])
             if len(estimators) > 1:
                 if self._state.task.is_classification():
                     from sklearn.ensemble import StackingClassifier as Stacker
@@ -2685,11 +2513,7 @@ class AutoML(BaseEstimator):
                 if self._use_ray is not False:
                     import ray
 
-                    n_cpus = (
-                        ray.is_initialized()
-                        and ray.available_resources()["CPU"]
-                        or os.cpu_count()
-                    )
+                    n_cpus = ray.is_initialized() and ray.available_resources()["CPU"] or os.cpu_count()
                 elif self._use_spark:
                     from flaml.tune.spark.utils import get_n_cpus
 
@@ -2698,15 +2522,12 @@ class AutoML(BaseEstimator):
                     n_cpus = os.cpu_count()
                 ensemble_n_jobs = (
                     -self._state.n_jobs  # maximize total parallelization degree
-                    if abs(self._state.n_jobs)
-                    == 1  # 1 and -1 correspond to min/max parallelization
+                    if abs(self._state.n_jobs) == 1  # 1 and -1 correspond to min/max parallelization
                     else max(1, int(n_cpus / 2 / self._state.n_jobs))
                     # the total degree of parallelization = parallelization degree per estimator * parallelization degree of ensemble
                 )
                 if isinstance(self._ensemble, dict):
-                    final_estimator = self._ensemble.get(
-                        "final_estimator", self._trained_estimator
-                    )
+                    final_estimator = self._ensemble.get("final_estimator", self._trained_estimator)
                     passthrough = self._ensemble.get("passthrough", True)
                     ensemble_n_jobs = self._ensemble.get("n_jobs", ensemble_n_jobs)
                 else:
@@ -2719,9 +2540,7 @@ class AutoML(BaseEstimator):
                     passthrough=passthrough,
                 )
                 sample_weight_dict = (
-                    (self._sample_weight_full is not None)
-                    and {"sample_weight": self._sample_weight_full}
-                    or {}
+                    (self._sample_weight_full is not None) and {"sample_weight": self._sample_weight_full} or {}
                 )
                 for e in estimators:
                     e[1].__class__.init()
@@ -2776,8 +2595,7 @@ class AutoML(BaseEstimator):
                         or self._state.time_budget - self._state.time_from_start
                         > self._selected.est_retrain_time(self.data_size_full)
                     )
-                    and self._selected.best_config_sample_size
-                    == self._state.data_size[0]
+                    and self._selected.best_config_sample_size == self._state.data_size[0]
                 ):
                     state = self._search_states[self._best_estimator]
                     (
@@ -2788,11 +2606,7 @@ class AutoML(BaseEstimator):
                         state.best_config,
                         self.data_size_full,
                     )
-                    logger.info(
-                        "retrain {} for {:.1f}s".format(
-                            self._best_estimator, retrain_time
-                        )
-                    )
+                    logger.info("retrain {} for {:.1f}s".format(self._best_estimator, retrain_time))
                     state.best_config_train_time = retrain_time
                     if self._trained_estimator:
                         logger.info(f"retrained model: {self._trained_estimator.model}")
@@ -2827,16 +2641,12 @@ class AutoML(BaseEstimator):
                     self._state.time_budget >= 0
                     and self._search_states[estimator].time2eval_best
                     > self._state.time_budget - self._state.time_from_start
-                    or self._iter_per_learner_fullsize[estimator]
-                    >= self._max_iter_per_learner
+                    or self._iter_per_learner_fullsize[estimator] >= self._max_iter_per_learner
                 ):
                     inv.append(0)
                     continue
                 estimated_cost = search_state.estimated_cost4improvement
-                if (
-                    search_state.sample_size < self._state.data_size[0]
-                    and self._state.time_budget >= 0
-                ):
+                if search_state.sample_size < self._state.data_size[0] and self._state.time_budget >= 0:
                     estimated_cost = min(
                         estimated_cost,
                         search_state.time2eval_best
@@ -2847,12 +2657,8 @@ class AutoML(BaseEstimator):
                     )
                 gap = search_state.best_loss - self._state.best_loss
                 if gap > 0 and not self._ensemble:
-                    delta_loss = (
-                        search_state.best_loss_old - search_state.best_loss
-                    ) or search_state.best_loss
-                    delta_time = (
-                        search_state.total_time_used - search_state.time_best_found_old
-                    ) or 1e-10
+                    delta_loss = (search_state.best_loss_old - search_state.best_loss) or search_state.best_loss
+                    delta_time = (search_state.total_time_used - search_state.time_best_found_old) or 1e-10
                     speed = delta_loss / delta_time
                     if speed:
                         estimated_cost = max(2 * gap / speed, estimated_cost)
diff --git a/flaml/automl/data.py b/flaml/automl/data.py
index 8f3e994fc..c2b22cf9f 100644
--- a/flaml/automl/data.py
+++ b/flaml/automl/data.py
@@ -35,9 +35,7 @@ TS_TIMESTAMP_COL = "ds"
 TS_VALUE_COL = "y"
 
 
-def load_openml_dataset(
-    dataset_id, data_dir=None, random_state=0, dataset_format="dataframe"
-):
+def load_openml_dataset(dataset_id, data_dir=None, random_state=0, dataset_format="dataframe"):
     """Load dataset from open ML.
 
     If the file is not cached locally, download it from open ML.
@@ -77,9 +75,7 @@ def load_openml_dataset(
             pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
     print("Dataset name:", dataset.name)
     try:
-        X, y, *__ = dataset.get_data(
-            target=dataset.default_target_attribute, dataset_format=dataset_format
-        )
+        X, y, *__ = dataset.get_data(target=dataset.default_target_attribute, dataset_format=dataset_format)
     except ValueError:
         from sklearn.datasets import fetch_openml
 
@@ -267,9 +263,7 @@ def add_time_idx_col(X):
 class DataTransformer:
     """Transform input training data."""
 
-    def fit_transform(
-        self, X: Union[DataFrame, np.ndarray], y, task: Union[str, "Task"]
-    ):
+    def fit_transform(self, X: Union[DataFrame, np.ndarray], y, task: Union[str, "Task"]):
         """Fit transformer and process the input training data according to the task type.
 
         Args:
@@ -312,21 +306,13 @@ class DataTransformer:
             for column in X.columns:
                 # sklearn\utils\validation.py needs int/float values
                 if X[column].dtype.name in ("object", "category"):
-                    if (
-                        X[column].nunique() == 1
-                        or X[column].nunique(dropna=True)
-                        == n - X[column].isnull().sum()
-                    ):
+                    if X[column].nunique() == 1 or X[column].nunique(dropna=True) == n - X[column].isnull().sum():
                         X.drop(columns=column, inplace=True)
                         drop = True
                     elif X[column].dtype.name == "category":
                         current_categories = X[column].cat.categories
                         if "__NAN__" not in current_categories:
-                            X[column] = (
-                                X[column]
-                                .cat.add_categories("__NAN__")
-                                .fillna("__NAN__")
-                            )
+                            X[column] = X[column].cat.add_categories("__NAN__").fillna("__NAN__")
                         cat_columns.append(column)
                     else:
                         X[column] = X[column].fillna("__NAN__")
@@ -349,10 +335,7 @@ class DataTransformer:
                             f"quarter_{column}": tmp_dt.quarter,
                         }
                         for key, value in new_columns_dict.items():
-                            if (
-                                key not in X.columns
-                                and value.nunique(dropna=False) >= 2
-                            ):
+                            if key not in X.columns and value.nunique(dropna=False) >= 2:
                                 X[key] = value
                                 num_columns.append(key)
                         X[column] = X[column].map(datetime.toordinal)
@@ -368,9 +351,7 @@ class DataTransformer:
             if num_columns:
                 X_num = X[num_columns]
                 if np.issubdtype(X_num.columns.dtype, np.integer) and (
-                    drop
-                    or min(X_num.columns) != 0
-                    or max(X_num.columns) != X_num.shape[1] - 1
+                    drop or min(X_num.columns) != 0 or max(X_num.columns) != X_num.shape[1] - 1
                 ):
                     X_num.columns = range(X_num.shape[1])
                     drop = True
@@ -395,11 +376,7 @@ class DataTransformer:
                 datetime_columns,
             )
             self._drop = drop
-        if (
-            task.is_classification()
-            or not pd.api.types.is_numeric_dtype(y)
-            and not task.is_nlg()
-        ):
+        if task.is_classification() or not pd.api.types.is_numeric_dtype(y) and not task.is_nlg():
             if not task.is_token_classification():
                 from sklearn.preprocessing import LabelEncoder
 
@@ -466,9 +443,7 @@ class DataTransformer:
                 elif X[column].dtype.name == "category":
                     current_categories = X[column].cat.categories
                     if "__NAN__" not in current_categories:
-                        X[column] = (
-                            X[column].cat.add_categories("__NAN__").fillna("__NAN__")
-                        )
+                        X[column] = X[column].cat.add_categories("__NAN__").fillna("__NAN__")
             if cat_columns:
                 X[cat_columns] = X[cat_columns].astype("category")
             if num_columns:
diff --git a/flaml/automl/ml.py b/flaml/automl/ml.py
index c38694908..d81587b3e 100644
--- a/flaml/automl/ml.py
+++ b/flaml/automl/ml.py
@@ -132,9 +132,7 @@ def get_estimator_class(task: str, estimator_name: str) -> EstimatorSubclass:
     if "xgboost" == estimator_name:
         estimator_class = XGBoost_TS if task in TS_FORECAST else XGBoostSklearnEstimator
     elif "xgb_limitdepth" == estimator_name:
-        estimator_class = (
-            XGBoostLimitDepth_TS if task in TS_FORECAST else XGBoostLimitDepthEstimator
-        )
+        estimator_class = XGBoostLimitDepth_TS if task in TS_FORECAST else XGBoostLimitDepthEstimator
     elif "rf" == estimator_name:
         estimator_class = RF_TS if task in TS_FORECAST else RandomForestEstimator
     elif "lgbm" == estimator_name:
@@ -203,40 +201,27 @@ def metric_loss_score(
         try:
             import datasets
 
-            datasets_metric_name = huggingface_submetric_to_metric.get(
-                metric_name, metric_name.split(":")[0]
-            )
+            datasets_metric_name = huggingface_submetric_to_metric.get(metric_name, metric_name.split(":")[0])
             metric = datasets.load_metric(datasets_metric_name)
             metric_mode = huggingface_metric_to_mode[datasets_metric_name]
 
             if metric_name.startswith("seqeval"):
-                y_processed_true = [
-                    [labels[tr] for tr in each_list] for each_list in y_processed_true
-                ]
+                y_processed_true = [[labels[tr] for tr in each_list] for each_list in y_processed_true]
             elif metric in ("pearsonr", "spearmanr"):
                 y_processed_true = (
-                    y_processed_true.to_list()
-                    if isinstance(y_processed_true, pd.Series)
-                    else list(y_processed_true)
+                    y_processed_true.to_list() if isinstance(y_processed_true, pd.Series) else list(y_processed_true)
                 )
-            score_dict = metric.compute(
-                predictions=y_processed_predict, references=y_processed_true
-            )
+            score_dict = metric.compute(predictions=y_processed_predict, references=y_processed_true)
             if "rouge" in metric_name:
                 score = score_dict[metric_name].mid.fmeasure
             elif metric_name.startswith("seqeval"):
                 metric_submetric_names = metric_name.split(":")
-                score = score_dict[
-                    metric_submetric_names[1]
-                    if len(metric_submetric_names) > 1
-                    else "overall_accuracy"
-                ]
+                score = score_dict[metric_submetric_names[1] if len(metric_submetric_names) > 1 else "overall_accuracy"]
             else:
                 score = score_dict[metric_name]
         except ImportError:
             raise ValueError(
-                metric_name
-                + " is not an built-in sklearn metric and [hf] is not installed. "
+                metric_name + " is not an built-in sklearn metric and [hf] is not installed. "
                 "Currently built-in sklearn metrics are: "
                 "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,"
                 "log_loss, mape, f1, micro_f1, macro_f1, ap. "
@@ -303,9 +288,7 @@ def sklearn_metric_loss_score(
     if "r2" == metric_name:
         score = 1.0 - r2_score(y_true, y_predict, sample_weight=sample_weight)
     elif metric_name == "rmse":
-        score = np.sqrt(
-            mean_squared_error(y_true, y_predict, sample_weight=sample_weight)
-        )
+        score = np.sqrt(mean_squared_error(y_true, y_predict, sample_weight=sample_weight))
     elif metric_name == "mae":
         score = mean_absolute_error(y_true, y_predict, sample_weight=sample_weight)
     elif metric_name == "mse":
@@ -315,17 +298,11 @@ def sklearn_metric_loss_score(
     elif metric_name == "roc_auc":
         score = 1.0 - roc_auc_score(y_true, y_predict, sample_weight=sample_weight)
     elif metric_name == "roc_auc_ovr":
-        score = 1.0 - roc_auc_score(
-            y_true, y_predict, sample_weight=sample_weight, multi_class="ovr"
-        )
+        score = 1.0 - roc_auc_score(y_true, y_predict, sample_weight=sample_weight, multi_class="ovr")
     elif metric_name == "roc_auc_ovo":
-        score = 1.0 - roc_auc_score(
-            y_true, y_predict, sample_weight=sample_weight, multi_class="ovo"
-        )
+        score = 1.0 - roc_auc_score(y_true, y_predict, sample_weight=sample_weight, multi_class="ovo")
     elif metric_name == "roc_auc_weighted":
-        score = 1.0 - roc_auc_score(
-            y_true, y_predict, sample_weight=sample_weight, average="weighted"
-        )
+        score = 1.0 - roc_auc_score(y_true, y_predict, sample_weight=sample_weight, average="weighted")
     elif metric_name == "roc_auc_ovo_weighted":
         score = 1.0 - roc_auc_score(
             y_true,
@@ -350,19 +327,13 @@ def sklearn_metric_loss_score(
         except ValueError:
             return np.inf
     elif "micro_f1" == metric_name:
-        score = 1 - f1_score(
-            y_true, y_predict, sample_weight=sample_weight, average="micro"
-        )
+        score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight, average="micro")
     elif "macro_f1" == metric_name:
-        score = 1 - f1_score(
-            y_true, y_predict, sample_weight=sample_weight, average="macro"
-        )
+        score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight, average="macro")
     elif "f1" == metric_name:
         score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight)
     elif "ap" == metric_name:
-        score = 1 - average_precision_score(
-            y_true, y_predict, sample_weight=sample_weight
-        )
+        score = 1 - average_precision_score(y_true, y_predict, sample_weight=sample_weight)
     elif "ndcg" in metric_name:
         if "@" in metric_name:
             k = int(metric_name.split("@", 1)[-1])
@@ -524,9 +495,7 @@ def default_cv_score_agg_func(val_loss_folds, log_metrics_folds):
     if metrics_to_log:
         n = len(val_loss_folds)
         metrics_to_log = (
-            {k: v / n for k, v in metrics_to_log.items()}
-            if isinstance(metrics_to_log, dict)
-            else metrics_to_log / n
+            {k: v / n for k, v in metrics_to_log.items()} if isinstance(metrics_to_log, dict) else metrics_to_log / n
         )
     return metric_to_minimize, metrics_to_log
 
@@ -546,9 +515,7 @@ def compute_estimator(
     eval_method: str,
     eval_metric: Union[str, Callable],
     best_val_loss=np.Inf,
-    n_jobs: Optional[
-        int
-    ] = 1,  # some estimators of EstimatorSubclass don't accept n_jobs. Should be None in that case.
+    n_jobs: Optional[int] = 1,  # some estimators of EstimatorSubclass don't accept n_jobs. Should be None in that case.
     estimator_class: Optional[EstimatorSubclass] = None,
     cv_score_agg_func: Optional[callable] = None,
     log_training_metric: Optional[bool] = False,
@@ -583,9 +550,7 @@ def compute_estimator(
             groups_val,
             eval_metric,
             task,
-            labels=fit_kwargs.get(
-                "label_list"
-            ),  # pass the label list on to compute the evaluation metric
+            labels=fit_kwargs.get("label_list"),  # pass the label list on to compute the evaluation metric
             budget=budget,
             log_training_metric=log_training_metric,
             fit_kwargs=fit_kwargs,
@@ -619,9 +584,7 @@ def train_estimator(
     y_train,
     task: str,
     estimator_name: str,
-    n_jobs: Optional[
-        int
-    ] = 1,  # some estimators of EstimatorSubclass don't accept n_jobs. Should be None in that case.
+    n_jobs: Optional[int] = 1,  # some estimators of EstimatorSubclass don't accept n_jobs. Should be None in that case.
     estimator_class: Optional[EstimatorSubclass] = None,
     budget=None,
     fit_kwargs: Optional[dict] = None,
@@ -642,18 +605,14 @@ def train_estimator(
         fit_kwargs["metric"] = eval_metric
 
     if X_train is not None:
-        train_time = estimator.fit(
-            X_train, y_train, budget, free_mem_ratio, **fit_kwargs
-        )
+        train_time = estimator.fit(X_train, y_train, budget, free_mem_ratio, **fit_kwargs)
     else:
         estimator = estimator.estimator_class(**estimator.params)
     train_time = time.time() - start_time
     return estimator, train_time
 
 
-def norm_confusion_matrix(
-    y_true: Union[np.array, pd.Series], y_pred: Union[np.array, pd.Series]
-):
+def norm_confusion_matrix(y_true: Union[np.array, pd.Series], y_pred: Union[np.array, pd.Series]):
     """normalized confusion matrix.
 
     Args:
diff --git a/flaml/automl/model.py b/flaml/automl/model.py
index 7208674c5..5bd0d0ba4 100644
--- a/flaml/automl/model.py
+++ b/flaml/automl/model.py
@@ -139,9 +139,7 @@ class BaseEstimator:
         if "_estimator_type" in config:
             self._estimator_type = self.params.pop("_estimator_type")
         else:
-            self._estimator_type = (
-                "classifier" if task in CLASSIFICATION else "regressor"
-            )
+            self._estimator_type = "classifier" if task in CLASSIFICATION else "regressor"
 
     def get_params(self, deep=False):
         params = self.params.copy()
@@ -255,8 +253,7 @@ class BaseEstimator:
             mem = psutil.virtual_memory() if psutil is not None else None
             try:
                 with limit_resource(
-                    mem.available * (1 - free_mem_ratio)
-                    + psutil.Process(os.getpid()).memory_info().rss
+                    mem.available * (1 - free_mem_ratio) + psutil.Process(os.getpid()).memory_info().rss
                     if mem is not None
                     else -1,
                     budget,
@@ -290,9 +287,7 @@ class BaseEstimator:
             X = self._preprocess(X)
             return self._model.predict(X, **kwargs)
         else:
-            logger.warning(
-                "Estimator is not fit yet. Please run fit() before predict()."
-            )
+            logger.warning("Estimator is not fit yet. Please run fit() before predict().")
             return np.ones(X.shape[0])
 
     def predict_proba(self, X, **kwargs):
@@ -341,9 +336,7 @@ class BaseEstimator:
 
         if self._model is not None:
             if self._task == "rank":
-                raise NotImplementedError(
-                    "AutoML.score() is not implemented for ranking"
-                )
+                raise NotImplementedError("AutoML.score() is not implemented for ranking")
             else:
                 X_val = self._preprocess(X_val)
                 metric = kwargs.pop("metric", None)
@@ -356,9 +349,7 @@ class BaseEstimator:
                 else:
                     return self._model.score(X_val, y_val, **kwargs)
         else:
-            logger.warning(
-                "Estimator is not fit yet. Please run fit() before predict()."
-            )
+            logger.warning("Estimator is not fit yet. Please run fit() before predict().")
             return 0.0
 
     def cleanup(self):
@@ -425,9 +416,7 @@ class SparkEstimator(BaseEstimator):
 
     def __init__(self, task="binary", **config):
         if not _have_spark:
-            raise ImportError(
-                "pyspark is not installed. Try `pip install flaml[spark]`."
-            )
+            raise ImportError("pyspark is not installed. Try `pip install flaml[spark]`.")
         super().__init__(task, **config)
         self.df_train = None
 
@@ -473,9 +462,7 @@ class SparkEstimator(BaseEstimator):
         current_time = time.time()
         pipeline_model = self.estimator_class(**self.params, **kwargs)
         if logger.level == logging.DEBUG:
-            logger.debug(
-                f"flaml.model - {pipeline_model} fit started with params {self.params}"
-            )
+            logger.debug(f"flaml.model - {pipeline_model} fit started with params {self.params}")
         pipeline_model.fit(df_train)
         if logger.level == logging.DEBUG:
             logger.debug(f"flaml.model - {pipeline_model} fit finished")
@@ -494,9 +481,7 @@ class SparkEstimator(BaseEstimator):
         """
         if self._model is not None:
             X = self._preprocess(X, index_col=index_col)
-            predictions = to_pandas_on_spark(
-                self._model.transform(X), index_col=index_col
-            )
+            predictions = to_pandas_on_spark(self._model.transform(X), index_col=index_col)
             predictions.index.name = None
             pred_y = predictions["prediction"]
             if return_all:
@@ -504,9 +489,7 @@ class SparkEstimator(BaseEstimator):
             else:
                 return pred_y
         else:
-            logger.warning(
-                "Estimator is not fit yet. Please run fit() before predict()."
-            )
+            logger.warning("Estimator is not fit yet. Please run fit() before predict().")
             return np.ones(X.shape[0])
 
     def predict_proba(self, X, index_col="tmp_index_col", return_all=False, **kwargs):
@@ -524,9 +507,7 @@ class SparkEstimator(BaseEstimator):
         assert self._task in CLASSIFICATION, "predict_proba() only for classification."
         if self._model is not None:
             X = self._preprocess(X, index_col=index_col)
-            predictions = to_pandas_on_spark(
-                self._model.transform(X), index_col=index_col
-            )
+            predictions = to_pandas_on_spark(self._model.transform(X), index_col=index_col)
             predictions.index.name = None
             pred_y = predictions["probability"]
 
@@ -535,9 +516,7 @@ class SparkEstimator(BaseEstimator):
             else:
                 return pred_y
         else:
-            logger.warning(
-                "Estimator is not fit yet. Please run fit() before predict()."
-            )
+            logger.warning("Estimator is not fit yet. Please run fit() before predict().")
             return np.ones(X.shape[0])
 
 
@@ -600,9 +579,7 @@ class SparkLGBMEstimator(SparkEstimator):
 
     @classmethod
     def size(cls, config):
-        num_leaves = int(
-            round(config.get("numLeaves") or 1 << config.get("maxDepth", 16))
-        )
+        num_leaves = int(round(config.get("numLeaves") or 1 << config.get("maxDepth", 16)))
         n_estimators = int(round(config["numIterations"]))
         return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8
 
@@ -653,23 +630,17 @@ class SparkLGBMEstimator(SparkEstimator):
     ):
         start_time = time.time()
         if self.model_n_classes_ is None and self._task not in ["regression", "rank"]:
-            self.model_n_classes_, self.model_classes_ = len_labels(
-                y_train, return_labels=True
-            )
+            self.model_n_classes_, self.model_classes_ = len_labels(y_train, return_labels=True)
         df_train = self._preprocess(X_train, y_train, index_col=index_col)
         # n_iter = self.params.get(self.ITER_HP, self.DEFAULT_ITER)
         # trained = False
         # mem0 = psutil.virtual_memory().available if psutil is not None else 1
         _kwargs = kwargs.copy()
         if self._task not in ["regression", "rank"] and "objective" not in _kwargs:
-            _kwargs["objective"] = (
-                "binary" if self.model_n_classes_ == 2 else "multiclass"
-            )
+            _kwargs["objective"] = "binary" if self.model_n_classes_ == 2 else "multiclass"
         for k in list(_kwargs.keys()):
             if k not in self.estimator_params:
-                logger.warning(
-                    f"[SparkLGBMEstimator] [Warning] Ignored unknown parameter: {k}"
-                )
+                logger.warning(f"[SparkLGBMEstimator] [Warning] Ignored unknown parameter: {k}")
                 _kwargs.pop(k)
         # TODO: find a better estimation of early stopping
         # if (
@@ -803,9 +774,7 @@ class TransformersEstimator(BaseEstimator):
                 "If you need to fix the value of {} to {}, the only way is to add a single-value domain in the search "
                 "space by adding:\n '{}': {{ 'domain': {} }} to 'custom_hp'. For example:"
                 'automl_settings["custom_hp"] = {{ "transformer": {{ "model_path": {{ "domain" : '
-                '"google/electra-small-discriminator" }} }} }}'.format(
-                    key, key, val, key, val
-                )
+                '"google/electra-small-discriminator" }} }} }}'.format(key, key, val, key, val)
             )
 
         """
@@ -823,25 +792,18 @@ class TransformersEstimator(BaseEstimator):
         """
             Update the attributes in TrainingArguments that depends on the values of self.params
         """
-        local_dir = os.path.join(
-            self._training_args.output_dir, "train_{}".format(date_str())
-        )
+        local_dir = os.path.join(self._training_args.output_dir, "train_{}".format(date_str()))
         if self._use_ray is True:
             import ray
 
             self._training_args.output_dir = ray.tune.get_trial_dir()
         else:
-            self._training_args.output_dir = Counter.get_trial_fold_name(
-                local_dir, self.params, self.trial_id
-            )
+            self._training_args.output_dir = Counter.get_trial_fold_name(local_dir, self.params, self.trial_id)
 
         self._training_args.fp16 = self.fp16
         self._training_args.no_cuda = self.no_cuda
 
-        if (
-            self._task == TOKENCLASSIFICATION
-            and self._training_args.max_seq_length is not None
-        ):
+        if self._task == TOKENCLASSIFICATION and self._training_args.max_seq_length is not None:
             logger.warning(
                 "For token classification task, FLAML currently does not support customizing the max_seq_length, max_seq_length will be reset to None."
             )
@@ -938,10 +900,7 @@ class TransformersEstimator(BaseEstimator):
             }
 
             for key in list(kwargs.keys()):
-                if (
-                    key not in data_collator_class.__dict__.keys()
-                    and key != "tokenizer"
-                ):
+                if key not in data_collator_class.__dict__.keys() and key != "tokenizer":
                     del kwargs[key]
             return data_collator_class(**kwargs)
         else:
@@ -984,9 +943,7 @@ class TransformersEstimator(BaseEstimator):
         )  # If using roberta model, must set add_prefix_space to True to avoid the assertion error at
         # https://github.com/huggingface/transformers/blob/main/src/transformers/models/roberta/tokenization_roberta_fast.py#L249
 
-        train_dataset, self._X_train, self._y_train = self._preprocess_data(
-            X_train, y_train
-        )
+        train_dataset, self._X_train, self._y_train = self._preprocess_data(X_train, y_train)
         if X_val is not None:
             eval_dataset, self._X_val, self._y_val = self._preprocess_data(X_val, y_val)
         else:
@@ -1007,10 +964,7 @@ class TransformersEstimator(BaseEstimator):
                     self.time_per_iter = time.time() - self.step_begin_time
                 if (
                     budget
-                    and (
-                        time.time() + self.time_per_iter
-                        > self.train_begin_time + budget
-                    )
+                    and (time.time() + self.time_per_iter > self.train_begin_time + budget)
                     or state.global_step >= this_params[TransformersEstimator.ITER_HP]
                 ):
                     control.should_training_stop = True
@@ -1019,10 +973,7 @@ class TransformersEstimator(BaseEstimator):
                 return control
 
             def on_epoch_end(self, args, state, control, **callback_kwargs):
-                if (
-                    control.should_training_stop
-                    or state.epoch + 1 >= args.num_train_epochs
-                ):
+                if control.should_training_stop or state.epoch + 1 >= args.num_train_epochs:
                     control.should_save = True
                     control.should_evaluate = True
 
@@ -1051,9 +1002,7 @@ class TransformersEstimator(BaseEstimator):
             # if gpu_per_trial == 0:
             #     os.environ["CUDA_VISIBLE_DEVICES"] = ""
             if tmp_cuda_visible_devices.count(",") != math.ceil(gpu_per_trial) - 1:
-                os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
-                    [str(x) for x in range(math.ceil(gpu_per_trial))]
-                )
+                os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(x) for x in range(math.ceil(gpu_per_trial))])
 
         import time
 
@@ -1070,10 +1019,7 @@ class TransformersEstimator(BaseEstimator):
 
         if hasattr(self._trainer, "intermediate_results"):
             self.intermediate_results = [
-                x[1]
-                for x in sorted(
-                    self._trainer.intermediate_results.items(), key=lambda x: x[0]
-                )
+                x[1] for x in sorted(self._trainer.intermediate_results.items(), key=lambda x: x[0])
             ]
         self._trainer = None
 
@@ -1094,9 +1040,7 @@ class TransformersEstimator(BaseEstimator):
         from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
 
         if trainer.ckpt_to_metric:
-            best_ckpt, _ = min(
-                trainer.ckpt_to_metric.items(), key=lambda x: x[1]["eval_automl_metric"]
-            )
+            best_ckpt, _ = min(trainer.ckpt_to_metric.items(), key=lambda x: x[1]["eval_automl_metric"])
             best_ckpt_global_step = trainer.ckpt_to_global_step[best_ckpt]
             for each_ckpt in list(trainer.ckpt_to_metric):
                 if each_ckpt != best_ckpt:
@@ -1158,9 +1102,7 @@ class TransformersEstimator(BaseEstimator):
             Need to reinit training_args because of a bug in deepspeed: if not reinit, the deepspeed config will be inconsistent
             with HF config https://github.com/huggingface/transformers/blob/main/src/transformers/training_args.py#L947
         """
-        training_args = self._TrainingArguments(
-            local_rank=-1, model_path=self._checkpoint_path, fp16=self.fp16
-        )
+        training_args = self._TrainingArguments(local_rank=-1, model_path=self._checkpoint_path, fp16=self.fp16)
         for key, val in self._training_args.__dict__.items():
             if key not in ("local_rank", "model_path", "fp16"):
                 setattr(training_args, key, val)
@@ -1183,9 +1125,7 @@ class TransformersEstimator(BaseEstimator):
             for key, val in pred_kwargs.items():
                 setattr(self._training_args, key, val)
 
-        assert (
-            self._task in CLASSIFICATION
-        ), "predict_proba() only for classification tasks."
+        assert self._task in CLASSIFICATION, "predict_proba() only for classification tasks."
 
         X_test, _ = self._tokenize_text(X, **self._kwargs)
         test_dataset = Dataset.from_pandas(X_test)
@@ -1245,9 +1185,7 @@ class TransformersEstimator(BaseEstimator):
 
     def config2params(self, config: dict) -> dict:
         params = super().config2params(config)
-        params[TransformersEstimator.ITER_HP] = params.get(
-            TransformersEstimator.ITER_HP, sys.maxsize
-        )
+        params[TransformersEstimator.ITER_HP] = params.get(TransformersEstimator.ITER_HP, sys.maxsize)
         return params
 
 
@@ -1257,9 +1195,7 @@ class TransformersEstimatorModelSelection(TransformersEstimator):
 
     @classmethod
     def search_space(cls, data_size, task, **params):
-        search_space_dict = TransformersEstimator.search_space(
-            data_size, task, **params
-        )
+        search_space_dict = TransformersEstimator.search_space(data_size, task, **params)
 
         """
             For model selection, use the same search space regardless of memory constraint
@@ -1368,11 +1304,7 @@ class LGBMEstimator(BaseEstimator):
     @classmethod
     def size(cls, config):
         num_leaves = int(
-            round(
-                config.get("num_leaves")
-                or config.get("max_leaves")
-                or 1 << config.get("max_depth", 16)
-            )
+            round(config.get("num_leaves") or config.get("max_leaves") or 1 << config.get("max_depth", 16))
         )
         n_estimators = int(round(config["n_estimators"]))
         return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8
@@ -1399,11 +1331,7 @@ class LGBMEstimator(BaseEstimator):
         self.HAS_CALLBACK = self.HAS_CALLBACK and self._callbacks(0, 0, 0) is not None
 
     def _preprocess(self, X):
-        if (
-            not isinstance(X, DataFrame)
-            and issparse(X)
-            and np.issubdtype(X.dtype, np.integer)
-        ):
+        if not isinstance(X, DataFrame) and issparse(X) and np.issubdtype(X.dtype, np.integer):
             X = X.astype(float)
         elif isinstance(X, np.ndarray) and X.dtype.kind not in "buif":
             # numpy array is not of numeric dtype
@@ -1422,10 +1350,7 @@ class LGBMEstimator(BaseEstimator):
         if not self.HAS_CALLBACK:
             mem0 = psutil.virtual_memory().available if psutil is not None else 1
             if (
-                (
-                    not self._time_per_iter
-                    or abs(self._train_size - X_train.shape[0]) > 4
-                )
+                (not self._time_per_iter or abs(self._train_size - X_train.shape[0]) > 4)
                 and budget is not None
                 or self._mem_per_iter < 0
                 and psutil is not None
@@ -1445,9 +1370,7 @@ class LGBMEstimator(BaseEstimator):
                 # elif self._mem2 <= 0:
                 #     self._mem_per_iter = self._mem1
                 # else:
-                self._mem_per_iter = min(
-                    self._mem1, self._mem2 / self.params[self.ITER_HP]
-                )
+                self._mem_per_iter = min(self._mem1, self._mem2 / self.params[self.ITER_HP])
                 # if self._mem_per_iter <= 1 and psutil is not None:
                 #     n_iter = self.params[self.ITER_HP]
                 self._time_per_iter = (
@@ -1458,11 +1381,7 @@ class LGBMEstimator(BaseEstimator):
                     else 0.001
                 )
                 self._train_size = X_train.shape[0]
-                if (
-                    budget is not None
-                    and self._t1 + self._t2 >= budget
-                    or n_iter == self.params[self.ITER_HP]
-                ):
+                if budget is not None and self._t1 + self._t2 >= budget or n_iter == self.params[self.ITER_HP]:
                     # self.params[self.ITER_HP] = n_iter
                     return time.time() - start_time
                 trained = True
@@ -1471,11 +1390,7 @@ class LGBMEstimator(BaseEstimator):
             if n_iter > 1:
                 max_iter = min(
                     n_iter,
-                    int(
-                        (budget - time.time() + start_time - self._t1)
-                        / self._time_per_iter
-                        + 1
-                    )
+                    int((budget - time.time() + start_time - self._t1) / self._time_per_iter + 1)
                     if budget is not None
                     else n_iter,
                     int((1 - free_mem_ratio) * mem0 / self._mem_per_iter)
@@ -1489,9 +1404,7 @@ class LGBMEstimator(BaseEstimator):
         if self.HAS_CALLBACK:
             kwargs_callbacks = kwargs.get("callbacks")
             if kwargs_callbacks:
-                callbacks = kwargs_callbacks + self._callbacks(
-                    start_time, deadline, free_mem_ratio
-                )
+                callbacks = kwargs_callbacks + self._callbacks(start_time, deadline, free_mem_ratio)
                 kwargs.pop("callbacks")
             else:
                 callbacks = self._callbacks(start_time, deadline, free_mem_ratio)
@@ -1816,9 +1729,7 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
     def config2params(self, config: dict) -> dict:
         params = super().config2params(config)
         if "max_leaves" in params:
-            params["max_leaf_nodes"] = params.get(
-                "max_leaf_nodes", params.pop("max_leaves")
-            )
+            params["max_leaf_nodes"] = params.get("max_leaf_nodes", params.pop("max_leaves"))
         if self._task not in CLASSIFICATION and "criterion" in config:
             params.pop("criterion")
         if "random_state" not in params:
@@ -1952,12 +1863,7 @@ class CatBoostEstimator(BaseEstimator):
             if not cat_columns.empty:
                 X = X.copy()
                 X[cat_columns] = X[cat_columns].apply(
-                    lambda x: x.cat.rename_categories(
-                        [
-                            str(c) if isinstance(c, float) else c
-                            for c in x.cat.categories
-                        ]
-                    )
+                    lambda x: x.cat.rename_categories([str(c) if isinstance(c, float) else c for c in x.cat.categories])
                 )
         elif isinstance(X, np.ndarray) and X.dtype.kind not in "buif":
             # numpy array is not of numeric dtype
@@ -2005,19 +1911,11 @@ class CatBoostEstimator(BaseEstimator):
         else:
             cat_features = []
         use_best_model = kwargs.get("use_best_model", True)
-        n = (
-            max(int(len(y_train) * 0.9), len(y_train) - 1000)
-            if use_best_model
-            else len(y_train)
-        )
+        n = max(int(len(y_train) * 0.9), len(y_train) - 1000) if use_best_model else len(y_train)
         X_tr, y_tr = X_train[:n], y_train[:n]
         from catboost import Pool, __version__
 
-        eval_set = (
-            Pool(data=X_train[n:], label=y_train[n:], cat_features=cat_features)
-            if use_best_model
-            else None
-        )
+        eval_set = Pool(data=X_train[n:], label=y_train[n:], cat_features=cat_features) if use_best_model else None
         if "sample_weight" in kwargs:
             weight = kwargs["sample_weight"]
             if weight is not None:
@@ -2190,9 +2088,7 @@ class Prophet(SKLearnEstimator):
             forecast = self._model.predict(X, **kwargs)
             return forecast["yhat"]
         else:
-            logger.warning(
-                "Estimator is not fit yet. Please run fit() before predict()."
-            )
+            logger.warning("Estimator is not fit yet. Please run fit() before predict().")
             return np.ones(X.shape[0])
 
     def score(self, X_val: DataFrame, y_val: Series, **kwargs):
@@ -2279,9 +2175,7 @@ class ARIMA(Prophet):
                 if len(X.columns) > 1:
                     X = self._preprocess(X.drop(columns=TS_TIMESTAMP_COL))
                     regressors = list(X)
-                    forecast = self._model.predict(
-                        start=start, end=end, exog=X[regressors], **kwargs
-                    )
+                    forecast = self._model.predict(start=start, end=end, exog=X[regressors], **kwargs)
                 else:
                     forecast = self._model.predict(start=start, end=end, **kwargs)
             else:
@@ -2398,9 +2292,7 @@ class HoltWinters(ARIMA):
             },
             "use_boxcox": {"domain": tune.choice([False, True]), "init_value": False},
             "seasonal_periods": {  # statsmodels casts this to None if "seasonal" is None
-                "domain": tune.choice(
-                    [7, 12, 4, 52, 6]
-                ),  # weekly, yearly, quarterly, weekly w yearly data
+                "domain": tune.choice([7, 12, 4, 52, 6]),  # weekly, yearly, quarterly, weekly w yearly data
                 "init_value": 7,
             },
         }
@@ -2486,9 +2378,7 @@ class TS_SKLearn(SKLearnEstimator):
                     "low_cost_init_value": False,
                 },
                 "lags": {
-                    "domain": tune.randint(
-                        lower=1, upper=max(2, int(np.sqrt(data_size[0])))
-                    ),
+                    "domain": tune.randint(lower=1, upper=max(2, int(np.sqrt(data_size[0])))),
                     "init_value": 3,
                 },
             }
@@ -2498,9 +2388,7 @@ class TS_SKLearn(SKLearnEstimator):
     def __init__(self, task="ts_forecast", **params):
         super().__init__(task, **params)
         self.hcrystaball_model = None
-        self.ts_task = (
-            "regression" if task in TS_FORECASTREGRESSION else "classification"
-        )
+        self.ts_task = "regression" if task in TS_FORECASTREGRESSION else "classification"
 
     def transform_X(self, X):
         cols = list(X)
@@ -2532,9 +2420,7 @@ class TS_SKLearn(SKLearnEstimator):
                 (
                     X_fit,
                     y_fit,
-                ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
-                    X_train, y_train, i
-                )
+                ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_train, y_train, i)
                 self.hcrystaball_model.model.set_params(**estimator.params)
                 model = self.hcrystaball_model.model.fit(X_fit, y_fit)
                 model_list.append(model)
@@ -2543,9 +2429,7 @@ class TS_SKLearn(SKLearnEstimator):
             (
                 X_fit,
                 y_fit,
-            ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
-                X_train, y_train, kwargs["period"]
-            )
+            ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_train, y_train, kwargs["period"])
             self.hcrystaball_model.model.set_params(**estimator.params)
             model = self.hcrystaball_model.model.fit(X_fit, y_fit)
             self._model = model
@@ -2569,9 +2453,7 @@ class TS_SKLearn(SKLearnEstimator):
                     (
                         X_pred,
                         _,
-                    ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
-                        X.iloc[:i, :]
-                    )
+                    ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X.iloc[:i, :])
                     preds.append(self._model[i - 1].predict(X_pred, **kwargs)[-1])
                 forecast = Series(preds)
             else:
@@ -2582,9 +2464,7 @@ class TS_SKLearn(SKLearnEstimator):
                 forecast = self._model.predict(X_pred, **kwargs)
             return forecast
         else:
-            logger.warning(
-                "Estimator is not fit yet. Please run fit() before predict()."
-            )
+            logger.warning("Estimator is not fit yet. Please run fit() before predict().")
             return np.ones(X.shape[0])
 
 
@@ -2681,13 +2561,9 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
             max_prediction_length=max_prediction_length,
             static_categoricals=kwargs.get("static_categoricals", []),
             static_reals=kwargs.get("static_reals", []),
-            time_varying_known_categoricals=kwargs.get(
-                "time_varying_known_categoricals", []
-            ),
+            time_varying_known_categoricals=kwargs.get("time_varying_known_categoricals", []),
             time_varying_known_reals=kwargs.get("time_varying_known_reals", []),
-            time_varying_unknown_categoricals=kwargs.get(
-                "time_varying_unknown_categoricals", []
-            ),
+            time_varying_unknown_categoricals=kwargs.get("time_varying_unknown_categoricals", []),
             time_varying_unknown_reals=kwargs.get("time_varying_unknown_reals", []),
             variable_groups=kwargs.get(
                 "variable_groups", {}
@@ -2703,18 +2579,12 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
 
         # create validation set (predict=True) which means to predict the last max_prediction_length points in time
         # for each series
-        validation = TimeSeriesDataSet.from_dataset(
-            training, self.data, predict=True, stop_randomization=True
-        )
+        validation = TimeSeriesDataSet.from_dataset(training, self.data, predict=True, stop_randomization=True)
 
         # create dataloaders for model
         batch_size = kwargs.get("batch_size", 64)
-        train_dataloader = training.to_dataloader(
-            train=True, batch_size=batch_size, num_workers=0
-        )
-        val_dataloader = validation.to_dataloader(
-            train=False, batch_size=batch_size * 10, num_workers=0
-        )
+        train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
+        val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
 
         return training, train_dataloader, val_dataloader
 
@@ -2728,27 +2598,19 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
 
         warnings.filterwarnings("ignore")
         current_time = time.time()
-        training, train_dataloader, val_dataloader = self.transform_ds(
-            X_train, y_train, **kwargs
-        )
+        training, train_dataloader, val_dataloader = self.transform_ds(X_train, y_train, **kwargs)
         params = self.params.copy()
         gradient_clip_val = params.pop("gradient_clip_val")
         params.pop("n_jobs")
         max_epochs = kwargs.get("max_epochs", 20)
-        early_stop_callback = EarlyStopping(
-            monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min"
-        )
+        early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
 
         def _fit(log):
             default_trainer_kwargs = dict(
-                gpus=kwargs.get("gpu_per_trial", [0])
-                if torch.cuda.is_available()
-                else None,
+                gpus=kwargs.get("gpu_per_trial", [0]) if torch.cuda.is_available() else None,
                 max_epochs=max_epochs,
                 gradient_clip_val=gradient_clip_val,
-                callbacks=[LearningRateMonitor(), early_stop_callback]
-                if log
-                else [early_stop_callback],
+                callbacks=[LearningRateMonitor(), early_stop_callback] if log else [early_stop_callback],
                 logger=log,
             )
             trainer = pl.Trainer(
@@ -2794,9 +2656,7 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
 
         ids = self.group_ids.copy()
         ids.append(TS_TIMESTAMP_COL)
-        encoder_data = self.data[
-            lambda x: x.time_idx > x.time_idx.max() - self.max_encoder_length
-        ]
+        encoder_data = self.data[lambda x: x.time_idx > x.time_idx.max() - self.max_encoder_length]
         # following pytorchforecasting example, make all target values equal to the last data
         last_data_cols = self.group_ids.copy()
         last_data_cols.append(TS_VALUE_COL)
@@ -2804,9 +2664,7 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
         decoder_data = X
         if "time_idx" not in decoder_data:
             decoder_data = add_time_idx_col(decoder_data)
-        decoder_data["time_idx"] += (
-            encoder_data["time_idx"].max() + 1 - decoder_data["time_idx"].min()
-        )
+        decoder_data["time_idx"] += encoder_data["time_idx"].max() + 1 - decoder_data["time_idx"].min()
         # decoder_data[TS_VALUE_COL] = 0
         decoder_data = decoder_data.merge(last_data, how="inner", on=self.group_ids)
         decoder_data = decoder_data.sort_values(ids)
diff --git a/flaml/automl/nlp/huggingface/data_collator.py b/flaml/automl/nlp/huggingface/data_collator.py
index 51cfda109..8ae1cab16 100644
--- a/flaml/automl/nlp/huggingface/data_collator.py
+++ b/flaml/automl/nlp/huggingface/data_collator.py
@@ -22,22 +22,15 @@ class DataCollatorForMultipleChoiceClassification(DataCollatorWithPadding):
         import torch
 
         label_name = "label" if "label" in features[0].keys() else "labels"
-        labels = (
-            [feature.pop(label_name) for feature in features]
-            if label_name in features[0]
-            else None
-        )
+        labels = [feature.pop(label_name) for feature in features] if label_name in features[0] else None
 
         batch_size = len(features)
         num_choices = len(features[0]["input_ids"])
         flattened_features = [
-            [{k: v[i] for k, v in feature.items()} for i in range(num_choices)]
-            for feature in features
+            [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
         ]
         flattened_features = list(chain(*flattened_features))
-        batch = super(DataCollatorForMultipleChoiceClassification, self).__call__(
-            flattened_features
-        )
+        batch = super(DataCollatorForMultipleChoiceClassification, self).__call__(flattened_features)
         # Un-flatten
         batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
         # Add back labels
diff --git a/flaml/automl/nlp/huggingface/trainer.py b/flaml/automl/nlp/huggingface/trainer.py
index 96151c31c..041cb4de1 100644
--- a/flaml/automl/nlp/huggingface/trainer.py
+++ b/flaml/automl/nlp/huggingface/trainer.py
@@ -24,9 +24,7 @@ class TrainerForAuto(Seq2SeqTrainer):
                 num_beams=num_beams,
             )
         else:
-            return super(Seq2SeqTrainer, self).predict(
-                test_dataset, ignore_keys, metric_key_prefix
-            )
+            return super(Seq2SeqTrainer, self).predict(test_dataset, ignore_keys, metric_key_prefix)
 
     def prediction_step(
         self,
@@ -36,13 +34,9 @@ class TrainerForAuto(Seq2SeqTrainer):
         ignore_keys,
     ):
         if getattr(self, "_is_seq2seq", None):
-            return super().prediction_step(
-                model, inputs, prediction_loss_only, ignore_keys
-            )
+            return super().prediction_step(model, inputs, prediction_loss_only, ignore_keys)
         else:
-            return super(Seq2SeqTrainer, self).prediction_step(
-                model, inputs, prediction_loss_only, ignore_keys
-            )
+            return super(Seq2SeqTrainer, self).prediction_step(model, inputs, prediction_loss_only, ignore_keys)
 
     def log(self, logs) -> None:
         if getattr(self, "_is_seq2seq", None):
@@ -66,9 +60,7 @@ class TrainerForAuto(Seq2SeqTrainer):
         """Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path."""
         from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
 
-        ckpt_dir = os.path.join(
-            self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}"
-        )
+        ckpt_dir = os.path.join(self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}")
         eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
 
         # TODO: if your task is seq2seq (i.e., SUMMARIZATION), uncomment the code below (add indentation before metrics = eval_dataset...
diff --git a/flaml/automl/nlp/huggingface/training_args.py b/flaml/automl/nlp/huggingface/training_args.py
index 9fed71e1d..60984e273 100644
--- a/flaml/automl/nlp/huggingface/training_args.py
+++ b/flaml/automl/nlp/huggingface/training_args.py
@@ -69,21 +69,13 @@ class TrainingArgumentsForAuto(TrainingArguments):
         metadata={"help": "per gpu evaluation batch size"},
     )
 
-    label_list: Optional[List[str]] = field(
-        default=None, metadata={"help": "The string list of the label names. "}
-    )
+    label_list: Optional[List[str]] = field(default=None, metadata={"help": "The string list of the label names. "})
 
-    eval_steps: int = field(
-        default=500, metadata={"help": "Run an evaluation every X steps."}
-    )
+    eval_steps: int = field(default=500, metadata={"help": "Run an evaluation every X steps."})
 
-    save_steps: int = field(
-        default=500, metadata={"help": "Save checkpoint every X updates steps."}
-    )
+    save_steps: int = field(default=500, metadata={"help": "Save checkpoint every X updates steps."})
 
-    logging_steps: int = field(
-        default=500, metadata={"help": "Log every X updates steps."}
-    )
+    logging_steps: int = field(default=500, metadata={"help": "Log every X updates steps."})
 
     @staticmethod
     def load_args_from_console():
@@ -96,12 +88,8 @@ class TrainingArgumentsForAuto(TrainingArguments):
                 "--" + each_field.name,
                 type=each_field.type,
                 help=each_field.metadata["help"],
-                required=each_field.metadata["required"]
-                if "required" in each_field.metadata
-                else False,
-                choices=each_field.metadata["choices"]
-                if "choices" in each_field.metadata
-                else None,
+                required=each_field.metadata["required"] if "required" in each_field.metadata else False,
+                choices=each_field.metadata["choices"] if "choices" in each_field.metadata else None,
                 default=each_field.default,
             )
         console_args, unknown = arg_parser.parse_known_args()
@@ -112,19 +100,13 @@ class TrainingArgumentsForAuto(TrainingArguments):
 class Seq2SeqTrainingArgumentsForAuto(TrainingArgumentsForAuto):
     model_path: str = field(
         default="t5-small",
-        metadata={
-            "help": "model path for HPO natural language generation tasks, default is set to t5-small"
-        },
+        metadata={"help": "model path for HPO natural language generation tasks, default is set to t5-small"},
     )
 
-    sortish_sampler: bool = field(
-        default=False, metadata={"help": "Whether to use SortishSampler or not."}
-    )
+    sortish_sampler: bool = field(default=False, metadata={"help": "Whether to use SortishSampler or not."})
     predict_with_generate: bool = field(
         default=True,
-        metadata={
-            "help": "Whether to use generate to calculate generative metrics (ROUGE, BLEU)."
-        },
+        metadata={"help": "Whether to use generate to calculate generative metrics (ROUGE, BLEU)."},
     )
     generation_max_length: Optional[int] = field(
         default=None,
diff --git a/flaml/automl/nlp/huggingface/utils.py b/flaml/automl/nlp/huggingface/utils.py
index d9cc0244c..88a555578 100644
--- a/flaml/automl/nlp/huggingface/utils.py
+++ b/flaml/automl/nlp/huggingface/utils.py
@@ -38,16 +38,12 @@ def tokenize_text(X, Y=None, task=None, hf_args=None, tokenizer=None):
         Y_tokenized = Y
         label_col_name = ["label"]
     elif task == TOKENCLASSIFICATION:
-        X_tokenized, Y_tokenized = tokenize_text_tokclassification(
-            X, Y, tokenizer=tokenizer, hf_args=hf_args
-        )
+        X_tokenized, Y_tokenized = tokenize_text_tokclassification(X, Y, tokenizer=tokenizer, hf_args=hf_args)
         label_col_name = ["labels"]
     elif task in NLG_TASKS:
         return tokenize_seq2seq(X, Y, tokenizer=tokenizer, task=task, hf_args=hf_args)
     elif task == MULTICHOICECLASSIFICATION:
-        X_tokenized = tokenize_text_multiplechoice(
-            X, tokenizer=tokenizer, hf_args=hf_args
-        )
+        X_tokenized = tokenize_text_multiplechoice(X, tokenizer=tokenizer, hf_args=hf_args)
         label_col_name = ["label"]
         Y_tokenized = Y
     Y_tokenized = todf(X_tokenized, Y_tokenized, label_col_name)
@@ -75,9 +71,7 @@ def tokenize_seq2seq(X, Y, tokenizer, task=None, hf_args=None):
             [(each_l if each_l != tokenizer.pad_token_id else -100) for each_l in label]
             for label in model_outputs["input_ids"]
         ]
-        model_outputs = model_outputs.drop(
-            columns=["attention_mask", "input_ids", "decoder_input_ids"]
-        )
+        model_outputs = model_outputs.drop(columns=["attention_mask", "input_ids", "decoder_input_ids"])
     return model_inputs, model_outputs
 
 
@@ -116,9 +110,7 @@ def tokenize_and_align_labels(
                 # Use the label_all_tokens to control whether to copy the label to all subtokens or to pad the additional tokens as -100
                 if hf_args.label_all_tokens:
                     # If the B- word is converted into multiple subtokens, map the additional subtokens to I-
-                    label_ids.append(
-                        b_to_i_label[label_to_id[examples[Y_sent_key][word_idx]]]
-                    )
+                    label_ids.append(b_to_i_label[label_to_id[examples[Y_sent_key][word_idx]]])
                 else:
                     label_ids.append(-100)
             previous_word_idx = word_idx
@@ -173,9 +165,7 @@ def tokenize_text_tokclassification(X, Y, tokenizer, hf_args=None):
             result_type="expand",
         )
         label_idx = tokenized_column_names.index("labels")
-        other_indices = sorted(
-            set(range(len(tokenized_column_names))).difference({label_idx})
-        )
+        other_indices = sorted(set(range(len(tokenized_column_names))).difference({label_idx}))
         other_column_names = [tokenized_column_names[x] for x in other_indices]
         d = X_and_Y_tokenized.iloc[:, other_indices]
         y_tokenized = X_and_Y_tokenized.iloc[:, label_idx]
@@ -298,10 +288,7 @@ def tokenize_swag(this_row, tokenizer, hf_args=None, return_column_name=False):
     # get each 1st sentence, multiply to 4 sentences
     question_headers = this_row["sent2"]
     # sent2 are the noun part of 2nd line
-    second_sentences = [
-        question_headers + " " + this_row[key]
-        for key in ["ending0", "ending1", "ending2", "ending3"]
-    ]
+    second_sentences = [question_headers + " " + this_row[key] for key in ["ending0", "ending1", "ending2", "ending3"]]
     # now the 2nd-sentences are formed by combing the noun part and 4 ending parts
 
     # Flatten out
@@ -322,18 +309,14 @@ def tokenize_swag(this_row, tokenizer, hf_args=None, return_column_name=False):
         return [tokenized_example[x] for x in tmp_column_names]
 
 
-def postprocess_prediction_and_true(
-    task, y_pred, tokenizer, hf_args, y_true=None, X=None
-):
+def postprocess_prediction_and_true(task, y_pred, tokenizer, hf_args, y_true=None, X=None):
     # postprocess the matrix prediction y_pred and ground truth y_true into user readable format, e.g., for summarization, decode into text
     if task == SEQCLASSIFICATION:
         return np.argmax(y_pred, axis=1), y_true
     elif task == SEQREGRESSION:
         return np.squeeze(y_pred), y_true  # predictions.reshape((len(predictions),))
     elif task == TOKENCLASSIFICATION:
-        assert (y_true is not None) or (
-            X is not None
-        ), "One of y_true and X must not be None"
+        assert (y_true is not None) or (X is not None), "One of y_true and X must not be None"
         ## If y_true is not None, we use y_true to remove the -100 in the prediction (postprocessing), and return the postprocessed y_true and prediction
         # If y_true is None, we use X to compute y_is_pad (i.e., whether y_true is -100 in that position), and use y_is_pad to remove the -100 in the prediction, and return the postprocessed prediction (not the y_true)
         y_predict = pd.Series(np.argmax(y_pred, axis=2).tolist())
@@ -354,17 +337,12 @@ def postprocess_prediction_and_true(
             for (each_pred, each_is_pad) in zip(y_predict, y_is_pad)
         ]
         y_pred_label = [
-            [
-                hf_args.label_list[p] if 0 <= p < label_len else -1
-                for (p, ispd) in each_list
-            ]
+            [hf_args.label_list[p] if 0 <= p < label_len else -1 for (p, ispd) in each_list]
             for each_list in zip_pred_ispad
         ]  # To compute precision and recall, y_pred and y_true must be converted to string labels
         # (B-PER, I-PER, etc.), so that the category-based precision/recall (i.e., PER, LOC, etc.) scores can be computed
         if y_true is not None:
-            y_true_label = [
-                [tr for (p, tr) in each_list] for each_list in zip_pred_ispad
-            ]
+            y_true_label = [[tr for (p, tr) in each_list] for each_list in zip_pred_ispad]
         else:
             y_true_label = None
         return y_pred_label, y_true_label
@@ -381,13 +359,9 @@ def postprocess_prediction_and_true(
 
         if y_true is not None:
             y_true_labels = np.where(y_true != -100, y_true, tokenizer.pad_token_id)
-            decoded_y_true_labels = tokenizer.batch_decode(
-                y_true_labels, skip_special_tokens=True
-            )
+            decoded_y_true_labels = tokenizer.batch_decode(y_true_labels, skip_special_tokens=True)
             decoded_y_true_labels = [label.strip() for label in decoded_y_true_labels]
-            decoded_y_true_labels = [
-                "\n".join(nltk.sent_tokenize(label)) for label in decoded_y_true_labels
-            ]
+            decoded_y_true_labels = ["\n".join(nltk.sent_tokenize(label)) for label in decoded_y_true_labels]
         else:
             decoded_y_true_labels = None
 
@@ -419,17 +393,11 @@ def load_model(checkpoint_path, task, num_labels=None):
                 checkpoint_path, config=model_config, ignore_mismatched_sizes=True
             )
         elif task == TOKENCLASSIFICATION:
-            return AutoModelForTokenClassification.from_pretrained(
-                checkpoint_path, config=model_config
-            )
+            return AutoModelForTokenClassification.from_pretrained(checkpoint_path, config=model_config)
         elif task in NLG_TASKS:
-            return AutoModelForSeq2SeqLM.from_pretrained(
-                checkpoint_path, config=model_config
-            )
+            return AutoModelForSeq2SeqLM.from_pretrained(checkpoint_path, config=model_config)
         elif task == MULTICHOICECLASSIFICATION:
-            return AutoModelForMultipleChoice.from_pretrained(
-                checkpoint_path, config=model_config
-            )
+            return AutoModelForMultipleChoice.from_pretrained(checkpoint_path, config=model_config)
 
     def _set_model_config(checkpoint_path):
         if task in (SEQCLASSIFICATION, SEQREGRESSION, TOKENCLASSIFICATION):
diff --git a/flaml/automl/nlp/utils.py b/flaml/automl/nlp/utils.py
index 87dc940e0..f6038a2cd 100644
--- a/flaml/automl/nlp/utils.py
+++ b/flaml/automl/nlp/utils.py
@@ -85,12 +85,8 @@ class Counter:
     @staticmethod
     def get_trial_fold_name(local_dir, trial_config, trial_id):
         Counter.counter += 1
-        experiment_tag = "{0}_{1}".format(
-            str(Counter.counter), format_vars(trial_config)
-        )
-        logdir = get_logdir_name(
-            _generate_dirname(experiment_tag, trial_id=trial_id), local_dir
-        )
+        experiment_tag = "{0}_{1}".format(str(Counter.counter), format_vars(trial_config))
+        logdir = get_logdir_name(_generate_dirname(experiment_tag, trial_id=trial_id), local_dir)
         return logdir
 
 
@@ -99,15 +95,11 @@ class LabelEncoderforTokenClassification:
         # if the labels are tokens, convert them to ids
         if any(isinstance(id, str) for id in y[0]):
             self.label_list = sorted(list(set().union(*y)))
-            self._tokenlabel_to_id = {
-                self.label_list[id]: id for id in range(len(self.label_list))
-            }
+            self._tokenlabel_to_id = {self.label_list[id]: id for id in range(len(self.label_list))}
             y = y.apply(lambda sent: [self._tokenlabel_to_id[token] for token in sent])
         # if the labels are not tokens, they must be ids
         else:
-            assert all(
-                isinstance(id, (int, np.integer)) for id in y[0]
-            ), "The labels must either be tokens or ids"
+            assert all(isinstance(id, (int, np.integer)) for id in y[0]), "The labels must either be tokens or ids"
         return y
 
     def transform(self, y):
diff --git a/flaml/automl/spark/metrics.py b/flaml/automl/spark/metrics.py
index bd9840d9e..6a5ae0c08 100644
--- a/flaml/automl/spark/metrics.py
+++ b/flaml/automl/spark/metrics.py
@@ -40,9 +40,7 @@ def _process_df(df, label_col, prediction_col):
 def _compute_label_from_probability(df, probability_col, prediction_col):
     # array_max finds the maximum value in the 'probability' array
     # array_position finds the index of the maximum value in the 'probability' array
-    max_index_expr = F.expr(
-        f"array_position({probability_col}, array_max({probability_col}))-1"
-    )
+    max_index_expr = F.expr(f"array_position({probability_col}, array_max({probability_col}))-1")
     # Create a new column 'prediction' based on the maximum probability value
     df = df.withColumn(prediction_col, max_index_expr.cast("double"))
     return df
@@ -143,9 +141,7 @@ def spark_metric_loss_score(
         )
     elif metric_name == "log_loss":
         # For log_loss, prediction_col should be probability, and we need to convert it to label
-        df = _compute_label_from_probability(
-            df, prediction_col, prediction_col + "_label"
-        )
+        df = _compute_label_from_probability(df, prediction_col, prediction_col + "_label")
         evaluator = MulticlassClassificationEvaluator(
             metricName="logLoss",
             labelCol=label_col,
@@ -214,17 +210,11 @@ def spark_metric_loss_score(
                 score /= len(counts)
                 score += 1
         else:
-            evaluator = RankingEvaluator(
-                metricName="ndcgAtK", labelCol=label_col, predictionCol=prediction_col
-            )
+            evaluator = RankingEvaluator(metricName="ndcgAtK", labelCol=label_col, predictionCol=prediction_col)
             df = _process_df(df, label_col, prediction_col)
             score = 1 - evaluator.evaluate(df)
         return score
     else:
         raise ValueError(f"Unknown metric name: {metric_name} for spark models.")
 
-    return (
-        evaluator.evaluate(df)
-        if metric_name in min_mode_metrics
-        else 1 - evaluator.evaluate(df)
-    )
+    return evaluator.evaluate(df) if metric_name in min_mode_metrics else 1 - evaluator.evaluate(df)
diff --git a/flaml/automl/spark/utils.py b/flaml/automl/spark/utils.py
index 1e7eac7f8..c3b48be8d 100644
--- a/flaml/automl/spark/utils.py
+++ b/flaml/automl/spark/utils.py
@@ -73,9 +73,7 @@ def to_pandas_on_spark(
     elif isinstance(df, (ps.DataFrame, ps.Series)):
         return df
     else:
-        raise TypeError(
-            f"{type(df)} is not one of pandas.DataFrame, pandas.Series and pyspark.sql.DataFrame"
-        )
+        raise TypeError(f"{type(df)} is not one of pandas.DataFrame, pandas.Series and pyspark.sql.DataFrame")
 
 
 def train_test_split_pyspark(
@@ -106,10 +104,7 @@ def train_test_split_pyspark(
     if stratify_column:
         # Test data
         test_fraction_dict = (
-            df.select(stratify_column)
-            .distinct()
-            .withColumn("fraction", F.lit(test_fraction))
-            .rdd.collectAsMap()
+            df.select(stratify_column).distinct().withColumn("fraction", F.lit(test_fraction)).rdd.collectAsMap()
         )
         df_test = df.stat.sampleBy(stratify_column, test_fraction_dict, seed)
         # Train data
@@ -128,9 +123,7 @@ def train_test_split_pyspark(
     return [df_train, df_test]
 
 
-def unique_pandas_on_spark(
-    psds: Union[ps.Series, ps.DataFrame]
-) -> Tuple[np.ndarray, np.ndarray]:
+def unique_pandas_on_spark(psds: Union[ps.Series, ps.DataFrame]) -> Tuple[np.ndarray, np.ndarray]:
     """Get the unique values and counts of a pandas_on_spark series."""
     if isinstance(psds, ps.DataFrame):
         psds = psds.iloc[:, 0]
@@ -140,9 +133,7 @@ def unique_pandas_on_spark(
     return label_set, counts
 
 
-def len_labels(
-    y: Union[ps.Series, np.ndarray], return_labels=False
-) -> Union[int, Optional[np.ndarray]]:
+def len_labels(y: Union[ps.Series, np.ndarray], return_labels=False) -> Union[int, Optional[np.ndarray]]:
     """Get the number of unique labels in y."""
     if not isinstance(y, (ps.DataFrame, ps.Series)):
         labels = np.unique(y)
@@ -153,9 +144,7 @@ def len_labels(
     return len(labels)
 
 
-def unique_value_first_index(
-    y: Union[pd.Series, ps.Series, np.ndarray]
-) -> Tuple[np.ndarray, np.ndarray]:
+def unique_value_first_index(y: Union[pd.Series, ps.Series, np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
     """Get the unique values and indices of a pandas series,
     pandas_on_spark series or numpy array."""
     if isinstance(y, ps.Series):
@@ -196,9 +185,7 @@ def iloc_pandas_on_spark(
             psdfiloc = psdfiloc.drop(columns=[index_col])
         return psdfiloc
     else:
-        raise TypeError(
-            f"{type(index)} is not one of int, slice and list for pandas_on_spark iloc"
-        )
+        raise TypeError(f"{type(index)} is not one of int, slice and list for pandas_on_spark iloc")
 
 
 def spark_kFold(
@@ -241,9 +228,7 @@ def spark_kFold(
             condition = (df[randCol] >= validateLB) & (df[randCol] < validateUB)
             validation = to_pandas_on_spark(df.filter(condition), index_col=index_col)
             train = to_pandas_on_spark(df.filter(~condition), index_col=index_col)
-            datasets.append(
-                (train.drop(columns=[randCol]), validation.drop(columns=[randCol]))
-            )
+            datasets.append((train.drop(columns=[randCol]), validation.drop(columns=[randCol])))
     else:
         # Use user-specified fold column
         def get_fold_num(foldNum: int) -> int:
diff --git a/flaml/automl/state.py b/flaml/automl/state.py
index 773275020..f739a7556 100644
--- a/flaml/automl/state.py
+++ b/flaml/automl/state.py
@@ -63,9 +63,7 @@ class SearchState:
             Notice (2) include the case starting point not in user specified search space custom_hp
         """
         if isinstance(domain_one_dim, sample.Domain):
-            renamed_type = list(
-                inspect.signature(domain_one_dim.is_valid).parameters.values()
-            )[0].annotation
+            renamed_type = list(inspect.signature(domain_one_dim.is_valid).parameters.values())[0].annotation
             type_match = (
                 renamed_type == Any
                 or isinstance(value_one_dim, renamed_type)
@@ -106,9 +104,7 @@ class SearchState:
         self.learner_class = learner_class
         self._budget = budget
         if task in TS_FORECAST:
-            search_space = learner_class.search_space(
-                data_size=data_size, task=task, pred_horizon=period
-            )
+            search_space = learner_class.search_space(data_size=data_size, task=task, pred_horizon=period)
         else:
             search_space = learner_class.search_space(data_size=data_size, task=task)
 
@@ -117,14 +113,10 @@ class SearchState:
 
         if isinstance(starting_point, dict):
             starting_point = AutoMLState.sanitize(starting_point)
-            if max_iter > 1 and not self.valid_starting_point(
-                starting_point, search_space
-            ):
+            if max_iter > 1 and not self.valid_starting_point(starting_point, search_space):
                 # If the number of iterations is larger than 1, remove invalid point
                 logger.warning(
-                    "Starting point {} removed because it is outside of the search space".format(
-                        starting_point
-                    )
+                    "Starting point {} removed because it is outside of the search space".format(starting_point)
                 )
                 starting_point = None
         elif isinstance(starting_point, list):
@@ -132,11 +124,7 @@ class SearchState:
             if max_iter > len(starting_point):
                 # If the number of starting points is no smaller than max iter, avoid the checking
                 starting_point_len = len(starting_point)
-                starting_point = [
-                    x
-                    for x in starting_point
-                    if self.valid_starting_point(x, search_space)
-                ]
+                starting_point = [x for x in starting_point if self.valid_starting_point(x, search_space)]
                 if starting_point_len > len(starting_point):
                     logger.warning(
                         "Starting points outside of the search space are removed. "
@@ -145,9 +133,7 @@ class SearchState:
                 starting_point = starting_point or None
 
         for name, space in search_space.items():
-            assert (
-                "domain" in space
-            ), f"{name}'s domain is missing in the search space spec {space}"
+            assert "domain" in space, f"{name}'s domain is missing in the search space spec {space}"
             if space["domain"] is None:
                 # don't search this hp
                 continue
@@ -159,19 +145,14 @@ class SearchState:
                 self.cat_hp_cost[name] = space["cat_hp_cost"]
             # if a starting point is provided, set the init config to be
             # the starting point provided
-            if (
-                isinstance(starting_point, dict)
-                and starting_point.get(name) is not None
-            ):
+            if isinstance(starting_point, dict) and starting_point.get(name) is not None:
                 if self.init_config is None:
                     self.init_config = {}
                 self.init_config[name] = starting_point[name]
             elif (
                 not isinstance(starting_point, list)
                 and "init_value" in space
-                and self.valid_starting_point_one_dim(
-                    space["init_value"], space["domain"]
-                )
+                and self.valid_starting_point_one_dim(space["init_value"], space["domain"])
             ):
                 if self.init_config is None:
                     self.init_config = {}
@@ -241,11 +222,7 @@ class SearchState:
             if time2eval:
                 self.time2eval_best_old = self.time2eval_best
                 self.time2eval_best = time2eval
-            if (
-                self.trained_estimator
-                and trained_estimator
-                and self.trained_estimator != trained_estimator
-            ):
+            if self.trained_estimator and trained_estimator and self.trained_estimator != trained_estimator:
                 self.trained_estimator.cleanup()
             if trained_estimator:
                 self.trained_estimator = trained_estimator
@@ -260,9 +237,7 @@ class SearchState:
         return config_sig
 
     def est_retrain_time(self, retrain_sample_size):
-        assert (
-            self.best_config_sample_size is not None
-        ), "need to first get best_config_sample_size"
+        assert self.best_config_sample_size is not None, "need to first get best_config_sample_size"
         return self.time2eval_best * retrain_sample_size / self.best_config_sample_size
 
 
@@ -283,9 +258,7 @@ class AutoMLState:
             )  # NOTE: _prepare_sample_train_data is before kwargs is updated to fit_kwargs_by_estimator
             if weight is not None:
                 sampled_weight = (
-                    weight.iloc[:sample_size]
-                    if isinstance(weight, (pd.Series, psSeries))
-                    else weight[:sample_size]
+                    weight.iloc[:sample_size] if isinstance(weight, (pd.Series, psSeries)) else weight[:sample_size]
                 )
             if self.groups is not None:
                 groups = (
@@ -338,10 +311,7 @@ class AutoMLState:
             if state.time_budget < 0
             else state.time_budget - state.time_from_start
             if sample_size == state.data_size[0]
-            else (state.time_budget - state.time_from_start)
-            / 2
-            * sample_size
-            / state.data_size[0]
+            else (state.time_budget - state.time_from_start) / 2 * sample_size / state.data_size[0]
         )
 
         (
@@ -357,9 +327,7 @@ class AutoMLState:
             state.y_val,
             state.weight_val,
             state.groups_val,
-            state.train_time_limit
-            if budget is None
-            else min(budget, state.train_time_limit or np.inf),
+            state.train_time_limit if budget is None else min(budget, state.train_time_limit or np.inf),
             state.kf,
             config,
             state.task,
@@ -406,9 +374,7 @@ class AutoMLState:
         sample_size: Optional[int] = None,
     ):
         if not sample_size:
-            sample_size = config_w_resource.get(
-                "FLAML_sample_size", len(self.y_train_all)
-            )
+            sample_size = config_w_resource.get("FLAML_sample_size", len(self.y_train_all))
         config = AutoMLState.sanitize(config_w_resource)
 
         this_estimator_kwargs = self.fit_kwargs_by_estimator.get(
@@ -432,9 +398,7 @@ class AutoMLState:
                 "groups"
             ] = groups  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
 
-        budget = (
-            None if self.time_budget < 0 else self.time_budget - self.time_from_start
-        )
+        budget = None if self.time_budget < 0 else self.time_budget - self.time_from_start
 
         estimator, train_time = train_estimator(
             X_train=sampled_X_train,
diff --git a/flaml/automl/task/generic_task.py b/flaml/automl/task/generic_task.py
index 95afeacc8..7c683a03a 100644
--- a/flaml/automl/task/generic_task.py
+++ b/flaml/automl/task/generic_task.py
@@ -108,44 +108,28 @@ class GenericTask(Task):
         groups=None,
     ):
         if X_train_all is not None and y_train_all is not None:
-            assert isinstance(
-                X_train_all, (np.ndarray, pd.DataFrame, psDataFrame)
-            ) or issparse(X_train_all), (
+            assert isinstance(X_train_all, (np.ndarray, pd.DataFrame, psDataFrame)) or issparse(X_train_all), (
                 "X_train_all must be a numpy array, a pandas dataframe, "
                 "a Scipy sparse matrix or a pyspark.pandas dataframe."
             )
             assert isinstance(
                 y_train_all, (np.ndarray, pd.Series, psSeries)
             ), "y_train_all must be a numpy array, a pandas series or a pyspark.pandas series."
-            assert (
-                X_train_all.size != 0 and y_train_all.size != 0
-            ), "Input data must not be empty."
+            assert X_train_all.size != 0 and y_train_all.size != 0, "Input data must not be empty."
             if isinstance(X_train_all, np.ndarray) and len(X_train_all.shape) == 1:
                 X_train_all = np.reshape(X_train_all, (X_train_all.size, 1))
             if isinstance(y_train_all, np.ndarray):
                 y_train_all = y_train_all.flatten()
-            assert (
-                X_train_all.shape[0] == y_train_all.shape[0]
-            ), "# rows in X_train must match length of y_train."
+            assert X_train_all.shape[0] == y_train_all.shape[0], "# rows in X_train must match length of y_train."
             if isinstance(X_train_all, psDataFrame):
-                X_train_all = (
-                    X_train_all.spark.cache()
-                )  # cache data to improve compute speed
+                X_train_all = X_train_all.spark.cache()  # cache data to improve compute speed
                 y_train_all = y_train_all.to_frame().spark.cache()[y_train_all.name]
-                logger.debug(
-                    f"X_train_all and y_train_all cached, shape of X_train_all: {X_train_all.shape}"
-                )
+                logger.debug(f"X_train_all and y_train_all cached, shape of X_train_all: {X_train_all.shape}")
             automl._df = isinstance(X_train_all, (pd.DataFrame, psDataFrame))
             automl._nrow, automl._ndim = X_train_all.shape
             if self.is_ts_forecast():
-                X_train_all = (
-                    pd.DataFrame(X_train_all)
-                    if isinstance(X_train_all, np.ndarray)
-                    else X_train_all
-                )
-                X_train_all, y_train_all = self._validate_ts_data(
-                    X_train_all, y_train_all
-                )
+                X_train_all = pd.DataFrame(X_train_all) if isinstance(X_train_all, np.ndarray) else X_train_all
+                X_train_all, y_train_all = self._validate_ts_data(X_train_all, y_train_all)
             X, y = X_train_all, y_train_all
         elif dataframe is not None and label is not None:
             assert isinstance(
@@ -155,9 +139,7 @@ class GenericTask(Task):
                 label in dataframe.columns
             ), f"The provided label column name `{label}` doesn't exist in the provided dataframe."
             if isinstance(dataframe, psDataFrame):
-                dataframe = (
-                    dataframe.spark.cache()
-                )  # cache data to improve compute speed
+                dataframe = dataframe.spark.cache()  # cache data to improve compute speed
                 logger.debug(f"dataframe cached, shape of dataframe: {dataframe.shape}")
             automl._df = True
             if self.is_ts_forecast():
@@ -183,9 +165,7 @@ class GenericTask(Task):
                 for _, each_cell in X[column].items():
                     if each_cell is not None:
                         is_str = isinstance(each_cell, str)
-                        is_list_of_int = isinstance(each_cell, list) and all(
-                            isinstance(x, int) for x in each_cell
-                        )
+                        is_list_of_int = isinstance(each_cell, list) and all(isinstance(x, int) for x in each_cell)
                         is_list_of_str = is_a_list_of_str(each_cell)
                         if self.is_token_classification():
                             assert is_list_of_str, (
@@ -222,9 +202,7 @@ class GenericTask(Task):
             automl._label_transformer = automl._transformer.label_transformer
             if self.is_token_classification():
                 if hasattr(automl._label_transformer, "label_list"):
-                    state.fit_kwargs.update(
-                        {"label_list": automl._label_transformer.label_list}
-                    )
+                    state.fit_kwargs.update({"label_list": automl._label_transformer.label_list})
                 elif "label_list" not in state.fit_kwargs:
                     for each_fit_kwargs in state.fit_kwargs_by_estimator.values():
                         assert (
@@ -232,34 +210,26 @@ class GenericTask(Task):
                         ), "For the token-classification task, you must either (1) pass token labels; or (2) pass id labels and the label list. "
                         "Please refer to the documentation for more details: https://microsoft.github.io/FLAML/docs/Examples/AutoML-NLP#a-simple-token-classification-example"
             automl._feature_names_in_ = (
-                automl._X_train_all.columns.to_list()
-                if hasattr(automl._X_train_all, "columns")
-                else None
+                automl._X_train_all.columns.to_list() if hasattr(automl._X_train_all, "columns") else None
             )
 
         automl._sample_weight_full = state.fit_kwargs.get(
             "sample_weight"
         )  # NOTE: _validate_data is before kwargs is updated to fit_kwargs_by_estimator
         if X_val is not None and y_val is not None:
-            assert isinstance(
-                X_val, (np.ndarray, pd.DataFrame, psDataFrame)
-            ) or issparse(X_train_all), (
+            assert isinstance(X_val, (np.ndarray, pd.DataFrame, psDataFrame)) or issparse(X_train_all), (
                 "X_val must be None, a numpy array, a pandas dataframe, "
                 "a Scipy sparse matrix or a pyspark.pandas dataframe."
             )
             assert isinstance(y_val, (np.ndarray, pd.Series, psSeries)), (
-                "y_val must be None, a numpy array, a pandas series "
-                "or a pyspark.pandas series."
+                "y_val must be None, a numpy array, a pandas series " "or a pyspark.pandas series."
             )
             assert X_val.size != 0 and y_val.size != 0, (
-                "Validation data are expected to be nonempty. "
-                "Use None for X_val and y_val if no validation data."
+                "Validation data are expected to be nonempty. " "Use None for X_val and y_val if no validation data."
             )
             if isinstance(y_val, np.ndarray):
                 y_val = y_val.flatten()
-            assert (
-                X_val.shape[0] == y_val.shape[0]
-            ), "# rows in X_val must match length of y_val."
+            assert X_val.shape[0] == y_val.shape[0], "# rows in X_val must match length of y_val."
             if automl._transformer:
                 state.X_val = automl._transformer.transform(X_val)
             else:
@@ -276,13 +246,9 @@ class GenericTask(Task):
         if groups is not None and len(groups) != automl._nrow:
             # groups is given as group counts
             state.groups = np.concatenate([[i] * c for i, c in enumerate(groups)])
-            assert (
-                len(state.groups) == automl._nrow
-            ), "the sum of group counts must match the number of examples"
+            assert len(state.groups) == automl._nrow, "the sum of group counts must match the number of examples"
             state.groups_val = (
-                np.concatenate([[i] * c for i, c in enumerate(groups_val)])
-                if groups_val is not None
-                else None
+                np.concatenate([[i] * c for i, c in enumerate(groups_val)]) if groups_val is not None else None
             )
         else:
             state.groups_val = groups_val
@@ -345,11 +311,7 @@ class GenericTask(Task):
         if not isinstance(y_train_all, (psDataFrame, psSeries)):
             raise ValueError("y_train_all must be a pyspark.pandas dataframe or series")
         df_all_in_one = X_train_all.join(y_train_all)
-        stratify_column = (
-            y_train_all.name
-            if isinstance(y_train_all, psSeries)
-            else y_train_all.columns[0]
-        )
+        stratify_column = y_train_all.name if isinstance(y_train_all, psSeries) else y_train_all.columns[0]
         ret_sample_weight = False
         if (
             "sample_weight" in state.fit_kwargs
@@ -367,9 +329,7 @@ class GenericTask(Task):
             test_fraction=split_ratio,
             seed=RANDOM_SEED,
         )
-        columns_to_drop = [
-            c for c in df_all_train.columns if c in [stratify_column, "sample_weight"]
-        ]
+        columns_to_drop = [c for c in df_all_train.columns if c in [stratify_column, "sample_weight"]]
         X_train = df_all_train.drop(columns_to_drop)
         X_val = df_all_val.drop(columns_to_drop)
         y_train = df_all_train[stratify_column]
@@ -387,17 +347,13 @@ class GenericTask(Task):
         return X_train, X_val, y_train, y_val
 
     @staticmethod
-    def _train_test_split(
-        state, X, y, first=None, rest=None, split_ratio=0.2, stratify=None
-    ):
+    def _train_test_split(state, X, y, first=None, rest=None, split_ratio=0.2, stratify=None):
         condition_type = isinstance(X, (psDataFrame, psSeries))
         # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
         condition_param = "sample_weight" in state.fit_kwargs
         if not condition_type and condition_param:
             sample_weight = (
-                state.fit_kwargs["sample_weight"]
-                if rest is None
-                else state.fit_kwargs["sample_weight"][rest]
+                state.fit_kwargs["sample_weight"] if rest is None else state.fit_kwargs["sample_weight"][rest]
             )
             (
                 X_train,
@@ -448,9 +404,7 @@ class GenericTask(Task):
                 state.weight_val = weight_val
                 state.fit_kwargs["sample_weight"] = weight_train
         else:
-            X_train, X_val, y_train, y_val = GenericTask._split_pyspark(
-                state, X, y, split_ratio, stratify
-            )
+            X_train, X_val, y_train, y_val = GenericTask._split_pyspark(state, X, y, split_ratio, stratify)
         return X_train, X_val, y_train, y_val
 
     def prepare_data(
@@ -498,21 +452,13 @@ class GenericTask(Task):
                 n = len(y_train_all)
                 while count < rare_threshld:
                     if data_is_df:
-                        X_train_all = concat(
-                            X_train_all, X_train_all.iloc[:n].loc[rare_index]
-                        )
+                        X_train_all = concat(X_train_all, X_train_all.iloc[:n].loc[rare_index])
                     else:
-                        X_train_all = concat(
-                            X_train_all, X_train_all[:n][rare_index, :]
-                        )
+                        X_train_all = concat(X_train_all, X_train_all[:n][rare_index, :])
                     if isinstance(y_train_all, (pd.Series, psSeries)):
-                        y_train_all = concat(
-                            y_train_all, y_train_all.iloc[:n].loc[rare_index]
-                        )
+                        y_train_all = concat(y_train_all, y_train_all.iloc[:n].loc[rare_index])
                     else:
-                        y_train_all = np.concatenate(
-                            [y_train_all, y_train_all[:n][rare_index]]
-                        )
+                        y_train_all = np.concatenate([y_train_all, y_train_all[:n][rare_index]])
                     count += rare_count
                 logger.info(f"class {label} augmented from {rare_count} to {count}")
         SHUFFLE_SPLIT_TYPES = ["uniform", "stratified"]
@@ -535,9 +481,7 @@ class GenericTask(Task):
                 if isinstance(state.sample_weight_all, pd.Series):
                     state.sample_weight_all.reset_index(drop=True, inplace=True)
             else:
-                X_train_all, y_train_all = shuffle(
-                    X_train_all, y_train_all, random_state=RANDOM_SEED
-                )
+                X_train_all, y_train_all = shuffle(X_train_all, y_train_all, random_state=RANDOM_SEED)
             if data_is_df:
                 X_train_all.reset_index(drop=True, inplace=True)
             if isinstance(y_train_all, pd.Series):
@@ -569,21 +513,13 @@ class GenericTask(Task):
                         X_train_all = X_train_all.sort_values(ids)
                         y_train_all = y_train_all.sort_values(ids)
                         training_cutoff = X_train_all["time_idx"].max() - period
-                        X_train = X_train_all[
-                            X_train_all["time_idx"] <= training_cutoff
-                        ]
-                        y_train = y_train_all[
-                            y_train_all["time_idx"] <= training_cutoff
-                        ].drop(columns=ids)
+                        X_train = X_train_all[X_train_all["time_idx"] <= training_cutoff]
+                        y_train = y_train_all[y_train_all["time_idx"] <= training_cutoff].drop(columns=ids)
                         X_val = X_train_all[X_train_all["time_idx"] > training_cutoff]
-                        y_val = y_train_all[
-                            y_train_all["time_idx"] > training_cutoff
-                        ].drop(columns=ids)
+                        y_val = y_train_all[y_train_all["time_idx"] > training_cutoff].drop(columns=ids)
                     else:
                         num_samples = X_train_all.shape[0]
-                        assert (
-                            period < num_samples
-                        ), f"period={period}>#examples={num_samples}"
+                        assert period < num_samples, f"period={period}>#examples={num_samples}"
                         split_idx = num_samples - period
                         X_train = X_train_all[:split_idx]
                         y_train = y_train_all[:split_idx]
@@ -627,20 +563,14 @@ class GenericTask(Task):
                                 "sample_weight"
                             ],  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                             state.weight_val,
-                        ) = self._split_pyspark(
-                            state, X_train_all, y_train_all, split_ratio
-                        )
+                        ) = self._split_pyspark(state, X_train_all, y_train_all, split_ratio)
                     else:
                         X_train, X_val, y_train, y_val = self._split_pyspark(
                             state, X_train_all, y_train_all, split_ratio
                         )
             elif split_type == "group":
-                gss = GroupShuffleSplit(
-                    n_splits=1, test_size=split_ratio, random_state=RANDOM_SEED
-                )
-                for train_idx, val_idx in gss.split(
-                    X_train_all, y_train_all, state.groups_all
-                ):
+                gss = GroupShuffleSplit(n_splits=1, test_size=split_ratio, random_state=RANDOM_SEED)
+                for train_idx, val_idx in gss.split(X_train_all, y_train_all, state.groups_all):
                     if data_is_df:
                         X_train = X_train_all.iloc[train_idx]
                         X_val = X_train_all.iloc[val_idx]
@@ -674,17 +604,9 @@ class GenericTask(Task):
                     state, X_rest, y_rest, first, rest, split_ratio, stratify
                 )
                 X_train = concat(X_first, X_train)
-                y_train = (
-                    concat(label_set, y_train)
-                    if data_is_df
-                    else np.concatenate([label_set, y_train])
-                )
+                y_train = concat(label_set, y_train) if data_is_df else np.concatenate([label_set, y_train])
                 X_val = concat(X_first, X_val)
-                y_val = (
-                    concat(label_set, y_val)
-                    if data_is_df
-                    else np.concatenate([label_set, y_val])
-                )
+                y_val = concat(label_set, y_val) if data_is_df else np.concatenate([label_set, y_val])
             elif self.is_regression():
                 X_train, X_val, y_train, y_val = self._train_test_split(
                     state, X_train_all, y_train_all, split_ratio=split_ratio
@@ -700,9 +622,7 @@ class GenericTask(Task):
             return
         if split_type == "group":
             # logger.info("Using GroupKFold")
-            assert (
-                len(state.groups_all) == y_train_all_size
-            ), "the length of groups must match the number of examples"
+            assert len(state.groups_all) == y_train_all_size, "the length of groups must match the number of examples"
             assert (
                 len_labels(state.groups_all) >= n_splits
             ), "the number of groups must be equal or larger than n_splits"
@@ -710,16 +630,13 @@ class GenericTask(Task):
         elif split_type == "stratified":
             # logger.info("Using StratifiedKFold")
             assert y_train_all_size >= n_splits, (
-                f"{n_splits}-fold cross validation"
-                f" requires input data with at least {n_splits} examples."
+                f"{n_splits}-fold cross validation" f" requires input data with at least {n_splits} examples."
             )
             assert y_train_all_size >= 2 * n_splits, (
                 f"{n_splits}-fold cross validation with metric=r2 "
                 f"requires input data with at least {n_splits*2} examples."
             )
-            state.kf = RepeatedStratifiedKFold(
-                n_splits=n_splits, n_repeats=1, random_state=RANDOM_SEED
-            )
+            state.kf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=1, random_state=RANDOM_SEED)
         elif split_type == "time":
             # logger.info("Using TimeSeriesSplit")
             if self.is_ts_forecast() and not self.is_ts_forecastpanel():
@@ -735,20 +652,14 @@ class GenericTask(Task):
                     logger.info(f"Using nsplits={n_splits} due to data size limit.")
                 state.kf = TimeSeriesSplit(n_splits=n_splits, test_size=period)
             elif self.is_ts_forecastpanel():
-                n_groups = len(
-                    X_train.groupby(state.fit_kwargs.get("group_ids")).size()
-                )
+                n_groups = len(X_train.groupby(state.fit_kwargs.get("group_ids")).size())
                 period = state.fit_kwargs.get("period")
-                state.kf = TimeSeriesSplit(
-                    n_splits=n_splits, test_size=period * n_groups
-                )
+                state.kf = TimeSeriesSplit(n_splits=n_splits, test_size=period * n_groups)
             else:
                 state.kf = TimeSeriesSplit(n_splits=n_splits)
         elif isinstance(split_type, str):
             # logger.info("Using RepeatedKFold")
-            state.kf = RepeatedKFold(
-                n_splits=n_splits, n_repeats=1, random_state=RANDOM_SEED
-            )
+            state.kf = RepeatedKFold(n_splits=n_splits, n_repeats=1, random_state=RANDOM_SEED)
         else:
             # logger.info("Using splitter object")
             state.kf = split_type
@@ -790,11 +701,7 @@ class GenericTask(Task):
 
         elif self.is_classification():
             assert split_type in ["auto", "stratified", "uniform", "time", "group"]
-            return (
-                split_type
-                if split_type != "auto"
-                else groups is None and "stratified" or "group"
-            )
+            return split_type if split_type != "auto" else groups is None and "stratified" or "group"
 
         elif self.is_regression():
             assert split_type in ["auto", "uniform", "time", "group"]
@@ -825,9 +732,7 @@ class GenericTask(Task):
                     )
                 )
             except IndexError:
-                raise IndexError(
-                    "Test data contains more columns than training data, exiting"
-                )
+                raise IndexError("Test data contains more columns than training data, exiting")
         elif isinstance(X, int):
             return X
         elif isinstance(X, psDataFrame):
@@ -872,9 +777,7 @@ class GenericTask(Task):
         if self.is_classification():
             labels = _, labels = len_labels(y_train_all, return_labels=True)
         else:
-            labels = fit_kwargs.get(
-                "label_list"
-            )  # pass the label list on to compute the evaluation metric
+            labels = fit_kwargs.get("label_list")  # pass the label list on to compute the evaluation metric
         if "sample_weight" in fit_kwargs:
             weight = fit_kwargs["sample_weight"]
             weight_val = None
@@ -889,9 +792,7 @@ class GenericTask(Task):
             if isinstance(kf, (GroupKFold, StratifiedGroupKFold)):
                 groups = kf.groups
                 dataframe = dataframe.join(groups)
-            kf = spark_kFold(
-                dataframe, nFolds=n, foldCol=groups.name if groups is not None else ""
-            )
+            kf = spark_kFold(dataframe, nFolds=n, foldCol=groups.name if groups is not None else "")
             shuffle = False
         else:
             X_train_split, y_train_split = X_train_all, y_train_all
@@ -934,15 +835,9 @@ class GenericTask(Task):
                     )
                 if groups is not None:
                     fit_kwargs["groups"] = (
-                        groups[train_index]
-                        if isinstance(groups, np.ndarray)
-                        else groups.iloc[train_index]
-                    )
-                    groups_val = (
-                        groups[val_index]
-                        if isinstance(groups, np.ndarray)
-                        else groups.iloc[val_index]
+                        groups[train_index] if isinstance(groups, np.ndarray) else groups.iloc[train_index]
                     )
+                    groups_val = groups[val_index] if isinstance(groups, np.ndarray) else groups.iloc[val_index]
                 else:
                     groups_val = None
 
@@ -983,16 +878,12 @@ class GenericTask(Task):
         pred_time /= n
         return val_loss, metric, train_time, pred_time
 
-    def default_estimator_list(
-        self, estimator_list: List[str], is_spark_dataframe: bool = False
-    ) -> List[str]:
+    def default_estimator_list(self, estimator_list: List[str], is_spark_dataframe: bool = False) -> List[str]:
         if "auto" != estimator_list:
             n_estimators = len(estimator_list)
             if is_spark_dataframe:
                 # For spark dataframe, only estimators ending with '_spark' are supported
-                estimator_list = [
-                    est for est in estimator_list if est.endswith("_spark")
-                ]
+                estimator_list = [est for est in estimator_list if est.endswith("_spark")]
                 if len(estimator_list) == 0:
                     raise ValueError(
                         "Spark dataframes only support estimator names ending with `_spark`. Non-supported "
@@ -1005,9 +896,7 @@ class GenericTask(Task):
                     )
             else:
                 # For non-spark dataframe, only estimators not ending with '_spark' are supported
-                estimator_list = [
-                    est for est in estimator_list if not est.endswith("_spark")
-                ]
+                estimator_list = [est for est in estimator_list if not est.endswith("_spark")]
                 if len(estimator_list) == 0:
                     raise ValueError(
                         "Non-spark dataframes only support estimator names not ending with `_spark`. Non-supported "
@@ -1069,11 +958,7 @@ class GenericTask(Task):
         estimator_list = [
             est
             for est in estimator_list
-            if (
-                est.endswith("_spark")
-                if is_spark_dataframe
-                else not est.endswith("_spark")
-            )
+            if (est.endswith("_spark") if is_spark_dataframe else not est.endswith("_spark"))
         ]
         return estimator_list
 
diff --git a/flaml/automl/task/task.py b/flaml/automl/task/task.py
index 0f7275bb8..5c8ecee91 100644
--- a/flaml/automl/task/task.py
+++ b/flaml/automl/task/task.py
@@ -333,9 +333,7 @@ class Task(ABC):
         return self.name == other
 
     @classmethod
-    def estimator_class_from_str(
-        cls, estimator_name: str
-    ) -> "flaml.automl.ml.BaseEstimator":
+    def estimator_class_from_str(cls, estimator_name: str) -> "flaml.automl.ml.BaseEstimator":
         """Determine the estimator class corresponding to the provided name.
 
         Args:
diff --git a/flaml/automl/training_log.py b/flaml/automl/training_log.py
index 8f3fc246b..c49c6e592 100644
--- a/flaml/automl/training_log.py
+++ b/flaml/automl/training_log.py
@@ -111,9 +111,7 @@ class TrainingLogWriter(object):
         if self.file is None:
             raise IOError("Call open() to open the output file first.")
         if self.current_best_loss_record_id is None:
-            logger.warning(
-                "flaml.training_log: checkpoint() called before any record is written, skipped."
-            )
+            logger.warning("flaml.training_log: checkpoint() called before any record is written, skipped.")
             return
         record = TrainingLogCheckPoint(self.current_best_loss_record_id)
         record.dump(self.file)
diff --git a/flaml/default/estimator.py b/flaml/default/estimator.py
index 3465d8709..67598702b 100644
--- a/flaml/default/estimator.py
+++ b/flaml/default/estimator.py
@@ -75,10 +75,7 @@ def flamlize_estimator(super_class, name: str, task: str, alternatives=None):
                         break
             estimator_name = (
                 "choose_xgb"
-                if (
-                    estimator_name == "xgb_limitdepth"
-                    and "max_depth" not in self._params
-                )
+                if (estimator_name == "xgb_limitdepth" and "max_depth" not in self._params)
                 else estimator_name
             )
             (
@@ -88,18 +85,14 @@ def flamlize_estimator(super_class, name: str, task: str, alternatives=None):
                 y_transformed,
                 self._feature_transformer,
                 self._label_transformer,
-            ) = preprocess_and_suggest_hyperparams(
-                task, X, y, estimator_name, self._default_location
-            )
+            ) = preprocess_and_suggest_hyperparams(task, X, y, estimator_name, self._default_location)
             assert estimator_class == super_class
             hyperparams.update(self._params)
             return hyperparams, estimator_name, X_transformed, y_transformed
 
         @wraps(super_class.fit)
         def fit(self, X, y, *args, **params):
-            hyperparams, estimator_name, X, y_transformed = self.suggest_hyperparams(
-                X, y
-            )
+            hyperparams, estimator_name, X, y_transformed = self.suggest_hyperparams(X, y)
             self.set_params(**hyperparams)
             if self._label_transformer and estimator_name in [
                 "rf",
@@ -150,26 +143,16 @@ def flamlize_estimator(super_class, name: str, task: str, alternatives=None):
     return EstimatorClass
 
 
-RandomForestRegressor = flamlize_estimator(
-    ensemble.RandomForestRegressor, "rf", "regression"
-)
-RandomForestClassifier = flamlize_estimator(
-    ensemble.RandomForestClassifier, "rf", "classification"
-)
-ExtraTreesRegressor = flamlize_estimator(
-    ensemble.ExtraTreesRegressor, "extra_tree", "regression"
-)
-ExtraTreesClassifier = flamlize_estimator(
-    ensemble.ExtraTreesClassifier, "extra_tree", "classification"
-)
+RandomForestRegressor = flamlize_estimator(ensemble.RandomForestRegressor, "rf", "regression")
+RandomForestClassifier = flamlize_estimator(ensemble.RandomForestClassifier, "rf", "classification")
+ExtraTreesRegressor = flamlize_estimator(ensemble.ExtraTreesRegressor, "extra_tree", "regression")
+ExtraTreesClassifier = flamlize_estimator(ensemble.ExtraTreesClassifier, "extra_tree", "classification")
 
 try:
     import lightgbm
 
     LGBMRegressor = flamlize_estimator(lightgbm.LGBMRegressor, "lgbm", "regression")
-    LGBMClassifier = flamlize_estimator(
-        lightgbm.LGBMClassifier, "lgbm", "classification"
-    )
+    LGBMClassifier = flamlize_estimator(lightgbm.LGBMClassifier, "lgbm", "classification")
 except ImportError:
     pass
 
diff --git a/flaml/default/greedy.py b/flaml/default/greedy.py
index 0b2fbb028..5306758e4 100644
--- a/flaml/default/greedy.py
+++ b/flaml/default/greedy.py
@@ -71,17 +71,12 @@ def construct_portfolio(regret_matrix, meta_features, regret_bound):
         sorted_losses = np.sort(losses)
         if sorted_losses[1] - sorted_losses[0] < eps:
             minloss = np.nanmin(losses)
-            print(
-                f"tie detected at loss = {sorted_losses[0]}, using alternative metric."
-            )
+            print(f"tie detected at loss = {sorted_losses[0]}, using alternative metric.")
             tied = np.flatnonzero(losses - minloss < eps)
             losses = [(avg_regret[i], i) for i in tied]
             minloss, ind = min(losses)
             if minloss > prev - eps:
-                print(
-                    f"May be overfitting at k = {i + 1}, current = {minloss:.5f}, "
-                    f"prev = {prev:.5f}. Stopping."
-                )
+                print(f"May be overfitting at k = {i + 1}, current = {minloss:.5f}, " f"prev = {prev:.5f}. Stopping.")
                 break
             configs = candidates[ind]
             prev = minloss
@@ -89,9 +84,7 @@ def construct_portfolio(regret_matrix, meta_features, regret_bound):
             configs = candidates[np.nanargmin(losses)]
         i += 1
         if sorted_losses[0] <= eps:
-            print(
-                f"Reached target regret bound of {regret_bound}! k = {i}. Declining to pick further!"
-            )
+            print(f"Reached target regret bound of {regret_bound}! k = {i}. Declining to pick further!")
             break
 
     return configs
diff --git a/flaml/default/portfolio.py b/flaml/default/portfolio.py
index 527dae05e..59165784e 100644
--- a/flaml/default/portfolio.py
+++ b/flaml/default/portfolio.py
@@ -109,9 +109,7 @@ def serialize(configs, regret, meta_features, output_file, config_path):
     except FileNotFoundError:
         pass
 
-    meta_features_norm, preferences, proc = config_predictor_tuple(
-        regret.columns, configs, meta_features, regret
-    )
+    meta_features_norm, preferences, proc = config_predictor_tuple(regret.columns, configs, meta_features, regret)
     portfolio = [load_json(config_path.joinpath(m + ".json")) for m in configs]
     regret = regret.loc[configs]
 
@@ -122,9 +120,7 @@ def serialize(configs, regret, meta_features, output_file, config_path):
         "preprocessing": proc,
         "neighbors": [
             {"features": tuple(x), "choice": _filter(preferences[y], regret[y])}
-            for x, y in zip(
-                meta_features_norm.to_records(index=False), preferences.columns
-            )
+            for x, y in zip(meta_features_norm.to_records(index=False), preferences.columns)
         ],
         "configsource": list(configs),
     }
@@ -164,9 +160,7 @@ def serialize(configs, regret, meta_features, output_file, config_path):
 
 def main():
     parser = argparse.ArgumentParser(description="Build a portfolio.")
-    parser.add_argument(
-        "--strategy", help="One of {greedy, greedy-feedback}", default="greedy"
-    )
+    parser.add_argument("--strategy", help="One of {greedy, greedy-feedback}", default="greedy")
     parser.add_argument("--input", help="Input path")
     parser.add_argument("--metafeatures", help="CSV of task metafeatures")
     parser.add_argument("--exclude", help="One task name to exclude (for LOO purposes)")
@@ -188,9 +182,7 @@ def main():
     all_results = None
     for estimator in args.estimator:
         # produce regret
-        all, baseline = load_result(
-            f"{args.input}/{estimator}/results.csv", args.task, "result"
-        )
+        all, baseline = load_result(f"{args.input}/{estimator}/results.csv", args.task, "result")
         regret = build_regret(all, baseline)
         regret = regret.replace(np.inf, np.nan).dropna(axis=1, how="all")
 
@@ -198,9 +190,7 @@ def main():
             regret = regret.loc[[i for i in regret.index if args.exclude not in i]]
             regret = regret[[c for c in regret.columns if args.exclude not in c]]
 
-        print(
-            f"Regret matrix complete: {100 * regret.count().sum() / regret.shape[0] / regret.shape[1]}%"
-        )
+        print(f"Regret matrix complete: {100 * regret.count().sum() / regret.shape[0] / regret.shape[1]}%")
         print(f"Num models considered: {regret.shape[0]}")
 
         configs = build_portfolio(meta_features, regret, args.strategy)
@@ -214,11 +204,7 @@ def main():
         configsource = meta_predictor["configsource"]
         all = all.loc[configsource]
         all.rename({x: f"{estimator}/{x}" for x in regret.index.values}, inplace=True)
-        baseline_best = (
-            baseline
-            if baseline_best is None
-            else pd.DataFrame({0: baseline_best, 1: baseline}).max(1)
-        )
+        baseline_best = baseline if baseline_best is None else pd.DataFrame({0: baseline_best, 1: baseline}).max(1)
         all_results = all if all_results is None else pd.concat([all_results, all])
         # analyze(regret, meta_predictor)
     regrets = build_regret(all_results, baseline_best)
diff --git a/flaml/default/regret.py b/flaml/default/regret.py
index e566ac6e7..475d610b5 100644
--- a/flaml/default/regret.py
+++ b/flaml/default/regret.py
@@ -18,14 +18,8 @@ def load_result(filename, task_type, metric):
         (df[metric].notnull()) & (df.type == task_type),
         ["task", "fold", "params", metric],
     ]
-    df["params"] = df["params"].apply(
-        lambda x: path.splitext(path.basename(eval(x)["_modeljson"]))[0]
-    )
-    baseline = (
-        df.loc[df["task"] == df["params"], ["task", metric]]
-        .groupby("task")
-        .mean()[metric]
-    )
+    df["params"] = df["params"].apply(lambda x: path.splitext(path.basename(eval(x)["_modeljson"]))[0])
+    baseline = df.loc[df["task"] == df["params"], ["task", metric]].groupby("task").mean()[metric]
     df = df.pivot_table(index="params", columns="task", values=metric)
     return df, baseline
 
@@ -34,9 +28,7 @@ def main():
     parser = argparse.ArgumentParser(description="Build a regret matrix.")
     parser.add_argument("--result_csv", help="File of experiment results")
     parser.add_argument("--task_type", help="Type of task")
-    parser.add_argument(
-        "--metric", help="Metric for calculating regret", default="result"
-    )
+    parser.add_argument("--metric", help="Metric for calculating regret", default="result")
     parser.add_argument("--output", help="Location to write regret CSV to")
     args = parser.parse_args()
 
diff --git a/flaml/default/suggest.py b/flaml/default/suggest.py
index 429ff67a2..ec0a706ad 100644
--- a/flaml/default/suggest.py
+++ b/flaml/default/suggest.py
@@ -53,10 +53,7 @@ def meta_feature(task, X_train, y_train, meta_feature_names):
             try:
                 # this feature is only supported for dataframe
                 this_feature.append(
-                    X_train.select_dtypes(
-                        include=[np.number, "float", "int", "long"]
-                    ).shape[1]
-                    / n_feat
+                    X_train.select_dtypes(include=[np.number, "float", "int", "long"]).shape[1] / n_feat
                 )
             except AttributeError:
                 # 'numpy.ndarray' object has no attribute 'select_dtypes'
@@ -79,9 +76,7 @@ def load_config_predictor(estimator_name, task, location=None):
         with open(f"{location}/{estimator_name}/{task}.json", "r") as f:
             CONFIG_PREDICTORS[key] = predictor = json.load(f)
     except FileNotFoundError:
-        raise FileNotFoundError(
-            f"Portfolio has not been built for {estimator_name} on {task} task."
-        )
+        raise FileNotFoundError(f"Portfolio has not been built for {estimator_name} on {task} task.")
     return predictor
 
 
@@ -99,11 +94,7 @@ def suggest_config(
     The returned configs can be used as starting points for AutoML.fit().
     `FLAML_sample_size` is removed from the configs.
     """
-    task = (
-        get_classification_objective(len_labels(y))
-        if task == "classification" and y is not None
-        else task
-    )
+    task = get_classification_objective(len_labels(y)) if task == "classification" and y is not None else task
     predictor = (
         load_config_predictor(estimator_or_predictor, task, location)
         if isinstance(estimator_or_predictor, str)
@@ -112,15 +103,9 @@ def suggest_config(
 
     older_version = "1.0.2"
     # TODO: update older_version when the newer code can no longer handle the older version json file
-    assert (
-        version_parse(__version__)
-        >= version_parse(predictor["version"])
-        >= version_parse(older_version)
-    )
+    assert version_parse(__version__) >= version_parse(predictor["version"]) >= version_parse(older_version)
     prep = predictor["preprocessing"]
-    feature = meta_feature_fn(
-        task, X_train=X, y_train=y, meta_feature_names=predictor["meta_feature_names"]
-    )
+    feature = meta_feature_fn(task, X_train=X, y_train=y, meta_feature_names=predictor["meta_feature_names"])
     feature = (np.array(feature) - np.array(prep["center"])) / np.array(prep["scale"])
     neighbors = predictor["neighbors"]
     nn = NearestNeighbors(n_neighbors=1)
@@ -138,9 +123,7 @@ def suggest_config(
     return configs
 
 
-def suggest_learner(
-    task, X, y, estimator_or_predictor="all", estimator_list=None, location=None
-):
+def suggest_learner(task, X, y, estimator_or_predictor="all", estimator_list=None, location=None):
     """Suggest best learner within estimator_list."""
     configs = suggest_config(task, X, y, estimator_or_predictor, location)
     if not estimator_list:
@@ -193,9 +176,7 @@ def suggest_hyperparams(task, X, y, estimator_or_predictor, location=None):
         hyperparams: A dict of the hyperparameter configurations.
         estiamtor_class: A class of the underlying estimator, e.g., lightgbm.LGBMClassifier.
     """
-    config = suggest_config(task, X, y, estimator_or_predictor, location=location, k=1)[
-        0
-    ]
+    config = suggest_config(task, X, y, estimator_or_predictor, location=location, k=1)[0]
     estimator = config["class"]
     model_class = get_estimator_class(task, estimator)
     hyperparams = config["hyperparameters"]
@@ -279,9 +260,7 @@ def preprocess_and_suggest_hyperparams(
             estimator_list=["xgb_limitdepth", "xgboost"],
             location=location,
         )
-    config = suggest_config(task, X, y, estimator_or_predictor, location=location, k=1)[
-        0
-    ]
+    config = suggest_config(task, X, y, estimator_or_predictor, location=location, k=1)[0]
     estimator = config["class"]
     model_class = get_estimator_class(task, estimator)
     hyperparams = config["hyperparameters"]
diff --git a/flaml/onlineml/autovw.py b/flaml/onlineml/autovw.py
index da0e520b7..f4c1ea754 100644
--- a/flaml/onlineml/autovw.py
+++ b/flaml/onlineml/autovw.py
@@ -113,12 +113,8 @@ class AutoVW:
         search_space = self._search_space.copy()
         for k, v in self._search_space.items():
             if k == self.VW_INTERACTION_ARG_NAME and v == self.AUTOMATIC:
-                raw_namespaces = self.get_ns_feature_dim_from_vw_example(
-                    vw_example
-                ).keys()
-                search_space[k] = polynomial_expansion_set(
-                    init_monomials=set(raw_namespaces)
-                )
+                raw_namespaces = self.get_ns_feature_dim_from_vw_example(vw_example).keys()
+                search_space[k] = polynomial_expansion_set(init_monomials=set(raw_namespaces))
         # setup the init config based on the input _init_config and search space
         init_config = self._init_config.copy()
         for k, v in search_space.items():
@@ -158,10 +154,7 @@ class AutoVW:
         self._best_trial = self._select_best_trial()
         self._y_predict = self._best_trial.predict(data_sample)
         # code for debugging purpose
-        if (
-            self._prediction_trial_id is None
-            or self._prediction_trial_id != self._best_trial.trial_id
-        ):
+        if self._prediction_trial_id is None or self._prediction_trial_id != self._best_trial.trial_id:
             self._prediction_trial_id = self._best_trial.trial_id
             logger.info(
                 "prediction trial id changed to %s at iter %s, resource used: %s",
@@ -183,14 +176,11 @@ class AutoVW:
 
     def _select_best_trial(self):
         """Select a best trial from the running trials according to the _model_select_policy."""
-        best_score = (
-            float("+inf") if self._model_selection_mode == "min" else float("-inf")
-        )
+        best_score = float("+inf") if self._model_selection_mode == "min" else float("-inf")
         new_best_trial = None
         for trial in self._trial_runner.running_trials:
             if trial.result is not None and (
-                "threshold" not in self._model_select_policy
-                or trial.result.resource_used >= self.WARMSTART_NUM
+                "threshold" not in self._model_select_policy or trial.result.resource_used >= self.WARMSTART_NUM
             ):
                 score = trial.result.get_score(self._model_select_policy)
                 if ("min" == self._model_selection_mode and score < best_score) or (
@@ -199,18 +189,13 @@ class AutoVW:
                     best_score = score
                     new_best_trial = trial
         if new_best_trial is not None:
-            logger.debug(
-                "best_trial resource used: %s", new_best_trial.result.resource_used
-            )
+            logger.debug("best_trial resource used: %s", new_best_trial.result.resource_used)
             return new_best_trial
         else:
             # This branch will be triggered when the resource consumption all trials are smaller
             # than the WARMSTART_NUM threshold. In this case, we will select the _best_trial
             # selected in the previous iteration.
-            if (
-                self._best_trial is not None
-                and self._best_trial.status == Trial.RUNNING
-            ):
+            if self._best_trial is not None and self._best_trial.status == Trial.RUNNING:
                 logger.debug("old best trial %s", self._best_trial.trial_id)
                 return self._best_trial
             else:
diff --git a/flaml/onlineml/trial.py b/flaml/onlineml/trial.py
index d04126c05..134211bc8 100644
--- a/flaml/onlineml/trial.py
+++ b/flaml/onlineml/trial.py
@@ -106,9 +106,7 @@ class OnlineResult:
         self._loss_cb = self._update_loss_cb(bound_of_range, data_dimension)
         self._loss_queue.append(new_loss)
 
-    def _update_loss_cb(
-        self, bound_of_range, data_dim, bound_name="sample_complexity_bound"
-    ):
+    def _update_loss_cb(self, bound_of_range, data_dim, bound_name="sample_complexity_bound"):
         """Calculate the coefficient of the confidence bound."""
         if bound_name == "sample_complexity_bound":
             # set the coefficient in the loss bound
@@ -119,9 +117,7 @@ class OnlineResult:
 
             comp_F = math.sqrt(data_dim)
             n = self.observation_count
-            return (
-                coef * comp_F * math.sqrt((np.log10(n / OnlineResult.prob_delta)) / n)
-            )
+            return coef * comp_F * math.sqrt((np.log10(n / OnlineResult.prob_delta)) / n)
         else:
             raise NotImplementedError
 
@@ -147,11 +143,7 @@ class OnlineResult:
 
     @property
     def loss_avg_recent(self):
-        return (
-            sum(self._loss_queue) / len(self._loss_queue)
-            if len(self._loss_queue) != 0
-            else self._init_loss
-        )
+        return sum(self._loss_queue) / len(self._loss_queue) if len(self._loss_queue) != 0 else self._init_loss
 
     def get_score(self, score_name, cb_ratio=1):
         if "lcb" in score_name:
@@ -282,9 +274,7 @@ class VowpalWabbitTrial(BaseOnlineTrial):
         try:
             from vowpalwabbit import pyvw
         except ImportError:
-            raise ImportError(
-                "To use AutoVW, please run pip install flaml[vw] to install vowpalwabbit"
-            )
+            raise ImportError("To use AutoVW, please run pip install flaml[vw] to install vowpalwabbit")
         # attributes
         self.trial_id = self._config_to_id(config) if trial_id is None else trial_id
         logger.info("Create trial with trial_id: %s", self.trial_id)
@@ -327,14 +317,10 @@ class VowpalWabbitTrial(BaseOnlineTrial):
     def _initialize_vw_model(self, vw_example):
         """Initialize a vw model using the trainable_class"""
         self._vw_config = self.config.copy()
-        ns_interactions = self.config.get(
-            VowpalWabbitTrial.interactions_config_key, None
-        )
+        ns_interactions = self.config.get(VowpalWabbitTrial.interactions_config_key, None)
         # ensure the feature interaction config is a list (required by VW)
         if ns_interactions is not None:
-            self._vw_config[VowpalWabbitTrial.interactions_config_key] = list(
-                ns_interactions
-            )
+            self._vw_config[VowpalWabbitTrial.interactions_config_key] = list(ns_interactions)
         # get the dimensionality of the feature according to the namespace configuration
         namespace_feature_dim = get_ns_feature_dim_from_vw_example(vw_example)
         self._dim = self._get_dim_from_ns(namespace_feature_dim, ns_interactions)
@@ -361,9 +347,7 @@ class VowpalWabbitTrial(BaseOnlineTrial):
         # do one step of learning
         self.model.learn(data_sample)
         # update training related results accordingly
-        new_loss = self._get_loss(
-            y, y_pred, self._metric, self._y_min_observed, self._y_max_observed
-        )
+        new_loss = self._get_loss(y, y_pred, self._metric, self._y_min_observed, self._y_max_observed)
         # udpate sample size, sum of loss, and cost
         data_sample_size = 1
         bound_of_range = self._y_max_observed - self._y_min_observed
@@ -391,11 +375,7 @@ class VowpalWabbitTrial(BaseOnlineTrial):
             loss_func = mean_squared_error
         elif "mae" in loss_func_name or "absolute" in loss_func_name:
             loss_func = mean_absolute_error
-            if (
-                y_min_observed is not None
-                and y_max_observed is not None
-                and "clip" in loss_func_name
-            ):
+            if y_min_observed is not None and y_max_observed is not None and "clip" in loss_func_name:
                 # clip y_pred in the observed range of y
                 y_pred = min(y_max_observed, max(y_pred, y_min_observed))
         else:
@@ -410,9 +390,7 @@ class VowpalWabbitTrial(BaseOnlineTrial):
             self._y_max_observed = y
 
     @staticmethod
-    def _get_dim_from_ns(
-        namespace_feature_dim: dict, namespace_interactions: Union[set, list]
-    ):
+    def _get_dim_from_ns(namespace_feature_dim: dict, namespace_interactions: Union[set, list]):
         """Get the dimensionality of the corresponding feature of input namespace set."""
         total_dim = sum(namespace_feature_dim.values())
         if namespace_interactions:
diff --git a/flaml/onlineml/trial_runner.py b/flaml/onlineml/trial_runner.py
index 3860d0afb..81669da18 100644
--- a/flaml/onlineml/trial_runner.py
+++ b/flaml/onlineml/trial_runner.py
@@ -33,12 +33,7 @@ class OnlineTrialRunner:
     WARMSTART_NUM = 100
 
     def __init__(
-        self,
-        max_live_model_num: int,
-        searcher=None,
-        scheduler=None,
-        champion_test_policy="loss_ucb",
-        **kwargs
+        self, max_live_model_num: int, searcher=None, scheduler=None, champion_test_policy="loss_ucb", **kwargs
     ):
         """Constructor.
 
@@ -192,9 +187,7 @@ class OnlineTrialRunner:
 
     def get_top_running_trials(self, top_ratio=None, top_metric="ucb") -> list:
         """Get a list of trial ids, whose performance is among the top running trials."""
-        running_valid_trials = [
-            trial for trial in self._running_trials if trial.result is not None
-        ]
+        running_valid_trials = [trial for trial in self._running_trials if trial.result is not None]
         if not running_valid_trials:
             return
         if top_ratio is None:
@@ -215,20 +208,14 @@ class OnlineTrialRunner:
         else:
             raise NotImplementedError
         top_running_valid_trials = []
-        logger.info(
-            "Running trial ids %s", [trial.trial_id for trial in running_valid_trials]
-        )
+        logger.info("Running trial ids %s", [trial.trial_id for trial in running_valid_trials])
         self._random_state.shuffle(running_valid_trials)
-        results = [
-            trial.result.get_score(test_attribute) for trial in running_valid_trials
-        ]
+        results = [trial.result.get_score(test_attribute) for trial in running_valid_trials]
         # sorted result (small to large) index
         sorted_index = np.argsort(np.array(results))
         for i in range(min(top_number, len(running_valid_trials))):
             top_running_valid_trials.append(running_valid_trials[sorted_index[i]])
-        logger.info(
-            "Top running ids %s", [trial.trial_id for trial in top_running_valid_trials]
-        )
+        logger.info("Top running ids %s", [trial.trial_id for trial in top_running_valid_trials])
         return top_running_valid_trials
 
     def _add_trial_from_searcher(self):
@@ -240,16 +227,9 @@ class OnlineTrialRunner:
         """
         # (optionally) upper bound the number of trials in the OnlineTrialRunner
         if self._bound_trial_num and self._first_challenger_pool_size is not None:
-            active_trial_size = len(
-                [t for t in self._trials if t.status != Trial.TERMINATED]
-            )
+            active_trial_size = len([t for t in self._trials if t.status != Trial.TERMINATED])
             trial_num_upper_bound = (
-                int(
-                    round(
-                        (np.log10(self._total_steps) + 1)
-                        * self._first_challenger_pool_size
-                    )
-                )
+                int(round((np.log10(self._total_steps) + 1) * self._first_challenger_pool_size))
                 if self._first_challenger_pool_size
                 else np.inf
             )
@@ -286,9 +266,7 @@ class OnlineTrialRunner:
         if self._best_challenger_trial is not None:
             assert self._best_challenger_trial.trial_id != self._champion_trial.trial_id
             # test whether a new champion is found and set the trial properties accordingly
-            is_new_champion_found = self._better_than_champion_test(
-                self._best_challenger_trial
-            )
+            is_new_champion_found = self._better_than_champion_test(self._best_challenger_trial)
             if is_new_champion_found:
                 self._set_champion(new_champion_trial=self._best_challenger_trial)
 
@@ -303,10 +281,7 @@ class OnlineTrialRunner:
                     if worse_than_champion:
                         to_stop.append(trial_to_test)
             # we want to ensure there are at least #max_live_model_num of challengers remaining
-            max_to_stop_num = (
-                len([t for t in self._trials if t.status != Trial.TERMINATED])
-                - self._max_live_model_num
-            )
+            max_to_stop_num = len([t for t in self._trials if t.status != Trial.TERMINATED]) - self._max_live_model_num
             for i in range(min(max_to_stop_num, len(to_stop))):
                 self.stop_trial(to_stop[i])
 
@@ -331,9 +306,7 @@ class OnlineTrialRunner:
         ]
         if active_trials:
             self._random_state.shuffle(active_trials)
-            results = [
-                trial.result.get_score(test_attribute) for trial in active_trials
-            ]
+            results = [trial.result.get_score(test_attribute) for trial in active_trials]
             best_index = np.argmin(results)
             self._best_challenger_trial = active_trials[best_index]
 
@@ -358,9 +331,7 @@ class OnlineTrialRunner:
             # calling set_search_properties of searcher will trigger
             # new challenger generation. we do not do this for init champion
             # as this step is already done when first constructing the searcher
-            self._searcher.set_search_properties(
-                setting={self._searcher.CHAMPION_TRIAL_NAME: self._champion_trial}
-            )
+            self._searcher.set_search_properties(setting={self._searcher.CHAMPION_TRIAL_NAME: self._champion_trial})
         else:
             self._champion_update_times = 0
 
@@ -450,13 +421,9 @@ class OnlineTrialRunner:
         """
         if trial_to_test.result is not None and self._champion_trial.result is not None:
             if "ucb" in self._champion_test_policy:
-                return self._test_lcb_ucb(
-                    self._champion_trial, trial_to_test, self.WARMSTART_NUM
-                )
+                return self._test_lcb_ucb(self._champion_trial, trial_to_test, self.WARMSTART_NUM)
             elif "avg" in self._champion_test_policy:
-                return self._test_avg_loss(
-                    self._champion_trial, trial_to_test, self.WARMSTART_NUM
-                )
+                return self._test_avg_loss(self._champion_trial, trial_to_test, self.WARMSTART_NUM)
             elif "martingale" in self._champion_test_policy:
                 return self._test_martingale(self._champion_trial, trial_to_test)
             else:
@@ -474,9 +441,7 @@ class OnlineTrialRunner:
                     trial.trial_id,
                     champion_trial.trial_id,
                 )
-                logger.info(
-                    "trial %s %s %s", trial.config, trial.result, trial.resource_lease
-                )
+                logger.info("trial %s %s %s", trial.config, trial.result, trial.resource_lease)
                 logger.info(
                     "trial loss_avg:%s, trial loss_cb %s",
                     trial.result.loss_avg,
@@ -508,13 +473,8 @@ class OnlineTrialRunner:
         """
         assert trial.trial_id != champion_trial.trial_id
         if trial.result.resource_used >= warmstart_num:
-            if (
-                trial.result.loss_ucb
-                < champion_trial.result.loss_lcb - champion_trial.result.loss_cb
-            ):
-                logger.info(
-                    "======new champion condition satisfied: using lcb vs ucb====="
-                )
+            if trial.result.loss_ucb < champion_trial.result.loss_lcb - champion_trial.result.loss_cb:
+                logger.info("======new champion condition satisfied: using lcb vs ucb=====")
                 logger.info(
                     "new champion trial %s %s %s",
                     trial.trial_id,
diff --git a/flaml/tune/analysis.py b/flaml/tune/analysis.py
index 3350f2bbe..dab5f2dee 100644
--- a/flaml/tune/analysis.py
+++ b/flaml/tune/analysis.py
@@ -126,9 +126,7 @@ class ExperimentAnalysis:
                 'metric {} for scope {} not in ["all", "last", "avg", '
                 '"last-5-avg", "last-10-avg"]. '
                 "If you didn't pass a `metric` parameter to `tune.run()`, "
-                "you have to pass one when fetching the best trial.".format(
-                    metric, scope
-                )
+                "you have to pass one when fetching the best trial.".format(metric, scope)
             )
         best_trial = None
         best_metric_score = None
@@ -155,10 +153,7 @@ class ExperimentAnalysis:
                 best_metric_score = metric_score
                 best_trial = trial
         if not best_trial:
-            logger.warning(
-                "Could not find best trial. Did you pass the correct `metric` "
-                "parameter?"
-            )
+            logger.warning("Could not find best trial. Did you pass the correct `metric` " "parameter?")
         return best_trial
 
     def get_best_config(
diff --git a/flaml/tune/result.py b/flaml/tune/result.py
index 461c991f1..5793a9245 100644
--- a/flaml/tune/result.py
+++ b/flaml/tune/result.py
@@ -124,9 +124,7 @@ STDERR_FILE = "__stderr_file__"
 
 # Where Tune writes result files by default
 DEFAULT_RESULTS_DIR = (
-    os.environ.get("TEST_TMPDIR")
-    or os.environ.get("TUNE_RESULT_DIR")
-    or os.path.expanduser("~/ray_results")
+    os.environ.get("TEST_TMPDIR") or os.environ.get("TUNE_RESULT_DIR") or os.path.expanduser("~/ray_results")
 )
 
 # Meta file about status under each experiment directory, can be
diff --git a/flaml/tune/sample.py b/flaml/tune/sample.py
index b59ced92d..e022a5275 100644
--- a/flaml/tune/sample.py
+++ b/flaml/tune/sample.py
@@ -53,13 +53,9 @@ except ImportError:
 
         def __init__(
             self,
-            generator_or_seed: Optional[
-                Union["np_random_generator", np.random.RandomState, int]
-            ] = None,
+            generator_or_seed: Optional[Union["np_random_generator", np.random.RandomState, int]] = None,
         ):
-            if generator_or_seed is None or isinstance(
-                generator_or_seed, (np.random.RandomState, np_random_generator)
-            ):
+            if generator_or_seed is None or isinstance(generator_or_seed, (np.random.RandomState, np_random_generator)):
                 self._rng = generator_or_seed
             elif LEGACY_RNG:
                 self._rng = np.random.RandomState(generator_or_seed)
@@ -85,9 +81,7 @@ except ImportError:
             return getattr(self.rng, name)
 
 
-RandomState = Union[
-    None, _BackwardsCompatibleNumpyRng, np_random_generator, np.random.RandomState, int
-]
+RandomState = Union[None, _BackwardsCompatibleNumpyRng, np_random_generator, np.random.RandomState, int]
 
 
 class Domain:
@@ -112,9 +106,7 @@ class Domain:
             raise ValueError(
                 "You can only choose one sampler for parameter "
                 "domains. Existing sampler for parameter {}: "
-                "{}. Tried to add {}".format(
-                    self.__class__.__name__, self.sampler, sampler
-                )
+                "{}. Tried to add {}".format(self.__class__.__name__, self.sampler, sampler)
             )
         self.sampler = sampler
 
@@ -231,9 +223,7 @@ class Float(Domain):
             if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
                 random_state = _BackwardsCompatibleNumpyRng(random_state)
             assert domain.lower > 0, "LogUniform needs a lower bound greater than 0"
-            assert (
-                0 < domain.upper < float("inf")
-            ), "LogUniform needs a upper bound greater than 0"
+            assert 0 < domain.upper < float("inf"), "LogUniform needs a upper bound greater than 0"
             logmin = np.log(domain.lower) / np.log(self.base)
             logmax = np.log(domain.upper) / np.log(self.base)
 
@@ -271,15 +261,9 @@ class Float(Domain):
 
     def uniform(self):
         if not self.lower > float("-inf"):
-            raise ValueError(
-                "Uniform requires a lower bound. Make sure to set the "
-                "`lower` parameter of `Float()`."
-            )
+            raise ValueError("Uniform requires a lower bound. Make sure to set the " "`lower` parameter of `Float()`.")
         if not self.upper < float("inf"):
-            raise ValueError(
-                "Uniform requires a upper bound. Make sure to set the "
-                "`upper` parameter of `Float()`."
-            )
+            raise ValueError("Uniform requires a upper bound. Make sure to set the " "`upper` parameter of `Float()`.")
         new = copy(self)
         new.set_sampler(self._Uniform())
         return new
@@ -309,20 +293,10 @@ class Float(Domain):
         return new
 
     def quantized(self, q: float):
-        if self.lower > float("-inf") and not isclose(
-            self.lower / q, round(self.lower / q)
-        ):
-            raise ValueError(
-                f"Your lower variable bound {self.lower} is not divisible by "
-                f"quantization factor {q}."
-            )
-        if self.upper < float("inf") and not isclose(
-            self.upper / q, round(self.upper / q)
-        ):
-            raise ValueError(
-                f"Your upper variable bound {self.upper} is not divisible by "
-                f"quantization factor {q}."
-            )
+        if self.lower > float("-inf") and not isclose(self.lower / q, round(self.lower / q)):
+            raise ValueError(f"Your lower variable bound {self.lower} is not divisible by " f"quantization factor {q}.")
+        if self.upper < float("inf") and not isclose(self.upper / q, round(self.upper / q)):
+            raise ValueError(f"Your upper variable bound {self.upper} is not divisible by " f"quantization factor {q}.")
 
         new = copy(self)
         new.set_sampler(Quantized(new.get_sampler(), q), allow_override=True)
@@ -361,9 +335,7 @@ class Integer(Domain):
             if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
                 random_state = _BackwardsCompatibleNumpyRng(random_state)
             assert domain.lower > 0, "LogUniform needs a lower bound greater than 0"
-            assert (
-                0 < domain.upper < float("inf")
-            ), "LogUniform needs a upper bound greater than 0"
+            assert 0 < domain.upper < float("inf"), "LogUniform needs a upper bound greater than 0"
             logmin = np.log(domain.lower) / np.log(self.base)
             logmax = np.log(domain.upper) / np.log(self.base)
 
@@ -430,9 +402,7 @@ class Categorical(Domain):
                 random_state = _BackwardsCompatibleNumpyRng(random_state)
             # do not use .choice() directly on domain.categories
             # as that will coerce them to a single dtype
-            indices = random_state.choice(
-                np.arange(0, len(domain.categories)), size=size
-            )
+            indices = random_state.choice(np.arange(0, len(domain.categories)), size=size)
             items = [domain.categories[index] for index in indices]
             return items if len(items) > 1 else domain.cast(items[0])
 
@@ -491,9 +461,7 @@ class Quantized(Sampler):
         quantized_domain = copy(domain)
         quantized_domain.lower = np.ceil(domain.lower / self.q) * self.q
         quantized_domain.upper = np.floor(domain.upper / self.q) * self.q
-        values = self.sampler.sample(
-            quantized_domain, spec, size, random_state=random_state
-        )
+        values = self.sampler.sample(quantized_domain, spec, size, random_state=random_state)
         quantized = np.round(np.divide(values, self.q)) * self.q
 
         if not isinstance(quantized, np.ndarray):
@@ -509,11 +477,7 @@ class PolynomialExpansionSet:
         allow_self_inter: bool = False,
     ):
         self._init_monomials = init_monomials
-        self._highest_poly_order = (
-            highest_poly_order
-            if highest_poly_order is not None
-            else len(self._init_monomials)
-        )
+        self._highest_poly_order = highest_poly_order if highest_poly_order is not None else len(self._init_monomials)
         self._allow_self_inter = allow_self_inter
 
     @property
@@ -644,7 +608,5 @@ def qrandn(mean: float, sd: float, q: float):
     return Float(None, None).normal(mean, sd).quantized(q)
 
 
-def polynomial_expansion_set(
-    init_monomials: set, highest_poly_order: int = None, allow_self_inter: bool = False
-):
+def polynomial_expansion_set(init_monomials: set, highest_poly_order: int = None, allow_self_inter: bool = False):
     return PolynomialExpansionSet(init_monomials, highest_poly_order, allow_self_inter)
diff --git a/flaml/tune/scheduler/online_scheduler.py b/flaml/tune/scheduler/online_scheduler.py
index bf235afcd..626427c33 100644
--- a/flaml/tune/scheduler/online_scheduler.py
+++ b/flaml/tune/scheduler/online_scheduler.py
@@ -27,10 +27,7 @@ class OnlineScheduler(TrialScheduler):
         min_paused_resource_trial = None
         for trial in trial_runner.get_trials():
             # if there is a tie, prefer the earlier added ones
-            if (
-                trial.status == Trial.PAUSED
-                and trial.resource_lease < min_paused_resource
-            ):
+            if trial.status == Trial.PAUSED and trial.resource_lease < min_paused_resource:
                 min_paused_resource = trial.resource_lease
                 min_paused_resource_trial = trial
         if min_paused_resource_trial is not None:
@@ -122,8 +119,6 @@ class ChaChaScheduler(OnlineSuccessiveDoublingScheduler):
                 )
                 logger.debug("top_learners: %s", top_trials)
                 if trial in top_trials:
-                    logger.debug(
-                        "top runner %s: set from PAUSE to CONTINUE", trial.trial_id
-                    )
+                    logger.debug("top runner %s: set from PAUSE to CONTINUE", trial.trial_id)
                     return TrialScheduler.CONTINUE
         return decision
diff --git a/flaml/tune/searcher/blendsearch.py b/flaml/tune/searcher/blendsearch.py
index 92a50e264..b6e2fa76d 100644
--- a/flaml/tune/searcher/blendsearch.py
+++ b/flaml/tune/searcher/blendsearch.py
@@ -56,9 +56,7 @@ class BlendSearch(Searcher):
         max_resource: Optional[float] = None,
         reduction_factor: Optional[float] = None,
         global_search_alg: Optional[Searcher] = None,
-        config_constraints: Optional[
-            List[Tuple[Callable[[dict], float], str, float]]
-        ] = None,
+        config_constraints: Optional[List[Tuple[Callable[[dict], float], str, float]]] = None,
         metric_constraints: Optional[List[Tuple[str, str, float]]] = None,
         seed: Optional[int] = 20,
         cost_attr: Optional[str] = "auto",
@@ -196,9 +194,7 @@ class BlendSearch(Searcher):
         self._config_constraints = config_constraints
         self._metric_constraints = metric_constraints
         if metric_constraints:
-            assert all(
-                x[1] in ["<=", ">="] for x in metric_constraints
-            ), "sign of metric constraints must be <= or >=."
+            assert all(x[1] in ["<=", ">="] for x in metric_constraints), "sign of metric constraints must be <= or >=."
             # metric modified by lagrange
             metric += self.lagrange
         self._cat_hp_cost = cat_hp_cost or {}
@@ -232,9 +228,7 @@ class BlendSearch(Searcher):
             if experimental:
                 import optuna as ot
 
-                sampler = ot.samplers.TPESampler(
-                    seed=gs_seed, multivariate=True, group=True
-                )
+                sampler = ot.samplers.TPESampler(seed=gs_seed, multivariate=True, group=True)
             else:
                 sampler = None
             try:
@@ -260,11 +254,7 @@ class BlendSearch(Searcher):
         else:
             self._gs = None
         self._experimental = experimental
-        if (
-            getattr(self, "__name__", None) == "CFO"
-            and points_to_evaluate
-            and len(self._points_to_evaluate) > 1
-        ):
+        if getattr(self, "__name__", None) == "CFO" and points_to_evaluate and len(self._points_to_evaluate) > 1:
             # use the best config in points_to_evaluate as the start point
             self._candidate_start_points = {}
             self._started_from_low_cost = not low_cost_partial_config
@@ -383,9 +373,7 @@ class BlendSearch(Searcher):
 
         if self._metric_constraints:
             self._metric_constraint_satisfied = False
-            self._metric_constraint_penalty = [
-                self.penalty for _ in self._metric_constraints
-            ]
+            self._metric_constraint_penalty = [self.penalty for _ in self._metric_constraints]
         else:
             self._metric_constraint_satisfied = True
             self._metric_constraint_penalty = None
@@ -424,9 +412,7 @@ class BlendSearch(Searcher):
     def is_ls_ever_converged(self):
         return self._is_ls_ever_converged
 
-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
         """search thread updater and cleaner."""
         metric_constraint_satisfied = True
         if result and not error and self._metric_constraints:
@@ -440,11 +426,7 @@ class BlendSearch(Searcher):
                     violation = (value - threshold) * sign_op
                     if violation > 0:
                         # add penalty term to the metric
-                        objective += (
-                            self._metric_constraint_penalty[i]
-                            * violation
-                            * self._ls.metric_op
-                        )
+                        objective += self._metric_constraint_penalty[i] * violation * self._ls.metric_op
                         metric_constraint_satisfied = False
                         if self._metric_constraint_penalty[i] < self.penalty:
                             self._metric_constraint_penalty[i] += violation
@@ -455,9 +437,7 @@ class BlendSearch(Searcher):
             self._metric_constraint_satisfied |= metric_constraint_satisfied
         thread_id = self._trial_proposed_by.get(trial_id)
         if thread_id in self._search_thread_pool:
-            self._search_thread_pool[thread_id].on_trial_complete(
-                trial_id, result, error
-            )
+            self._search_thread_pool[thread_id].on_trial_complete(trial_id, result, error)
             del self._trial_proposed_by[trial_id]
         if result:
             config = result.get("config", {})
@@ -467,9 +447,7 @@ class BlendSearch(Searcher):
                         config[key[7:]] = value
             if self._allow_empty_config and not config:
                 return
-            signature = self._ls.config_signature(
-                config, self._subspace.get(trial_id, {})
-            )
+            signature = self._ls.config_signature(config, self._subspace.get(trial_id, {}))
             if error:  # remove from result cache
                 del self._result[signature]
             else:  # add to result cache
@@ -489,11 +467,7 @@ class BlendSearch(Searcher):
                             self._ls_bound_max,
                             self._subspace.get(trial_id, self._ls.space),
                         )
-                    if (
-                        self._gs is not None
-                        and self._experimental
-                        and (not self._ls.hierarchical)
-                    ):
+                    if self._gs is not None and self._experimental and (not self._ls.hierarchical):
                         self._gs.add_evaluated_point(flatten_dict(config), objective)
                         # TODO: recover when supported
                         # converted = convert_key(config, self._gs.space)
@@ -502,17 +476,12 @@ class BlendSearch(Searcher):
                 elif metric_constraint_satisfied and self._create_condition(result):
                     # thread creator
                     thread_id = self._thread_count
-                    self._started_from_given = (
-                        self._candidate_start_points
-                        and trial_id in self._candidate_start_points
-                    )
+                    self._started_from_given = self._candidate_start_points and trial_id in self._candidate_start_points
                     if self._started_from_given:
                         del self._candidate_start_points[trial_id]
                     else:
                         self._started_from_low_cost = True
-                    self._create_thread(
-                        config, result, self._subspace.get(trial_id, self._ls.space)
-                    )
+                    self._create_thread(config, result, self._subspace.get(trial_id, self._ls.space))
                 # reset admissible region to ls bounding box
                 self._gs_admissible_min.update(self._ls_bound_min)
                 self._gs_admissible_max.update(self._ls_bound_max)
@@ -595,9 +564,7 @@ class BlendSearch(Searcher):
         """create thread condition"""
         if len(self._search_thread_pool) < 2:
             return True
-        obj_median = np.median(
-            [thread.obj_best1 for id, thread in self._search_thread_pool.items() if id]
-        )
+        obj_median = np.median([thread.obj_best1 for id, thread in self._search_thread_pool.items() if id])
         return result[self._ls.metric] * self._ls.metric_op < obj_median
 
     def _clean(self, thread_id: int):
@@ -648,10 +615,7 @@ class BlendSearch(Searcher):
         best_trial_id = None
         obj_best = None
         for trial_id, r in self._candidate_start_points.items():
-            if r and (
-                best_trial_id is None
-                or r[self._ls.metric] * self._ls.metric_op < obj_best
-            ):
+            if r and (best_trial_id is None or r[self._ls.metric] * self._ls.metric_op < obj_best):
                 best_trial_id = trial_id
                 obj_best = r[self._ls.metric] * self._ls.metric_op
         if best_trial_id:
@@ -663,9 +627,7 @@ class BlendSearch(Searcher):
                     config[key[7:]] = value
             self._started_from_given = True
             del self._candidate_start_points[best_trial_id]
-            self._create_thread(
-                config, result, self._subspace.get(best_trial_id, self._ls.space)
-            )
+            self._create_thread(config, result, self._subspace.get(best_trial_id, self._ls.space))
 
     def _expand_admissible_region(self, lower, upper, space):
         """expand the admissible region for the subspace `space`"""
@@ -674,9 +636,7 @@ class BlendSearch(Searcher):
             if isinstance(ub, list):
                 choice = space[key].get("_choice_")
                 if choice:
-                    self._expand_admissible_region(
-                        lower[key][choice], upper[key][choice], space[key]
-                    )
+                    self._expand_admissible_region(lower[key][choice], upper[key][choice], space[key])
             elif isinstance(ub, dict):
                 self._expand_admissible_region(lower[key], ub, space[key])
             else:
@@ -752,9 +712,7 @@ class BlendSearch(Searcher):
                 if choice == backup:
                     # use CFO's init point
                     init_config = self._ls.init_config
-                    config, space = self._ls.complete_config(
-                        init_config, self._ls_bound_min, self._ls_bound_max
-                    )
+                    config, space = self._ls.complete_config(init_config, self._ls_bound_min, self._ls_bound_max)
                     self._trial_proposed_by[trial_id] = choice
                     self._search_thread_pool[choice].running += 1
                 else:
@@ -801,9 +759,7 @@ class BlendSearch(Searcher):
             if self._allow_empty_config and not init_config:
                 assert reward is None, "Empty config can't have reward."
                 return init_config
-            config, space = self._ls.complete_config(
-                init_config, self._ls_bound_min, self._ls_bound_max
-            )
+            config, space = self._ls.complete_config(init_config, self._ls_bound_min, self._ls_bound_max)
             config_signature = self._ls.config_signature(config, space)
             if reward is None:
                 result = self._result.get(config_signature)
@@ -827,9 +783,7 @@ class BlendSearch(Searcher):
                 return
         if self._use_incumbent_result_in_evaluation:
             if self._trial_proposed_by[trial_id] > 0:
-                choice_thread = self._search_thread_pool[
-                    self._trial_proposed_by[trial_id]
-                ]
+                choice_thread = self._search_thread_pool[self._trial_proposed_by[trial_id]]
                 config[INCUMBENT_RESULT] = choice_thread.best_result
         return config
 
@@ -874,9 +828,7 @@ class BlendSearch(Searcher):
             if choice >= 0:  # not fallback to rs
                 result = self._result.get(config_signature)
                 if result:  # finished
-                    self._search_thread_pool[choice].on_trial_complete(
-                        trial_id, result, error=False
-                    )
+                    self._search_thread_pool[choice].on_trial_complete(trial_id, result, error=False)
                     if choice:
                         # local search thread
                         self._clean(choice)
@@ -938,9 +890,7 @@ class BlendSearch(Searcher):
                     backup_thread_id = thread_id
         return top_thread_id, backup_thread_id
 
-    def _valid(
-        self, config: Dict, space: Dict, subspace: Dict, lower: Dict, upper: Dict
-    ) -> bool:
+    def _valid(self, config: Dict, space: Dict, subspace: Dict, lower: Dict, upper: Dict) -> bool:
         """config validator"""
         normalized_config = normalize(config, subspace, config, {})
         for key, lb in lower.items():
@@ -962,10 +912,7 @@ class BlendSearch(Searcher):
                     valid = self._valid(value, domain, nestedspace, lb, ub)
                     if not valid:
                         return False
-                elif (
-                    value + self._ls.STEPSIZE < lower[key]
-                    or value > upper[key] + self._ls.STEPSIZE
-                ):
+                elif value + self._ls.STEPSIZE < lower[key] or value > upper[key] + self._ls.STEPSIZE:
                     return False
         return True
 
@@ -1033,9 +980,7 @@ class BlendSearchTuner(BlendSearch, NNITuner):
         result = {
             "config": parameters,
             self._metric: extract_scalar_reward(value),
-            self.cost_attr: 1
-            if isinstance(value, float)
-            else value.get(self.cost_attr, value.get("sequence", 1))
+            self.cost_attr: 1 if isinstance(value, float) else value.get(self.cost_attr, value.get("sequence", 1))
             # if nni does not report training cost,
             # using sequence as an approximation.
             # if no sequence, using a constant 1
@@ -1145,11 +1090,7 @@ class CFO(BlendSearchTuner):
         if self._candidate_start_points and self._thread_count == 1:
             # result needs to match or exceed the best candidate start point
             obj_best = min(
-                (
-                    self._ls.metric_op * r[self._ls.metric]
-                    for r in self._candidate_start_points.values()
-                    if r
-                ),
+                (self._ls.metric_op * r[self._ls.metric] for r in self._candidate_start_points.values() if r),
                 default=-np.inf,
             )
 
@@ -1157,9 +1098,7 @@ class CFO(BlendSearchTuner):
         else:
             return True
 
-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
         super().on_trial_complete(trial_id, result, error)
         if self._candidate_start_points and trial_id in self._candidate_start_points:
             # the trial is a candidate start point
@@ -1177,9 +1116,7 @@ class RandomSearch(CFO):
         config, _ = self._ls.complete_config({})
         return config
 
-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
         return
 
     def on_trial_result(self, trial_id: str, result: Dict):
diff --git a/flaml/tune/searcher/flow2.py b/flaml/tune/searcher/flow2.py
index 035e3d868..fc9d5212d 100644
--- a/flaml/tune/searcher/flow2.py
+++ b/flaml/tune/searcher/flow2.py
@@ -124,21 +124,13 @@ class FLOW2(Searcher):
         self.lexico_objectives = lexico_objectives
         if self.lexico_objectives is not None:
             if "modes" not in self.lexico_objectives.keys():
-                self.lexico_objectives["modes"] = ["min"] * len(
-                    self.lexico_objectives["metrics"]
-                )
-            for t_metric, t_mode in zip(
-                self.lexico_objectives["metrics"], self.lexico_objectives["modes"]
-            ):
+                self.lexico_objectives["modes"] = ["min"] * len(self.lexico_objectives["metrics"])
+            for t_metric, t_mode in zip(self.lexico_objectives["metrics"], self.lexico_objectives["modes"]):
                 if t_metric not in self.lexico_objectives["tolerances"].keys():
                     self.lexico_objectives["tolerances"][t_metric] = 0
                 if t_metric not in self.lexico_objectives["targets"].keys():
-                    self.lexico_objectives["targets"][t_metric] = (
-                        -float("inf") if t_mode == "min" else float("inf")
-                    )
-        self.resource_multiple_factor = (
-            resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR
-        )
+                    self.lexico_objectives["targets"][t_metric] = -float("inf") if t_mode == "min" else float("inf")
+        self.resource_multiple_factor = resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR
         self.cost_attr = cost_attr
         self.max_resource = max_resource
         self._resource = None
@@ -166,13 +158,9 @@ class FLOW2(Searcher):
                     q = sampler.q
                     sampler = sampler.get_sampler()
                     if str(sampler) == "Uniform":
-                        self._step_lb = min(
-                            self._step_lb, q / (domain.upper - domain.lower + 1)
-                        )
+                        self._step_lb = min(self._step_lb, q / (domain.upper - domain.lower + 1))
                 elif isinstance(domain, sample.Integer) and str(sampler) == "Uniform":
-                    self._step_lb = min(
-                        self._step_lb, 1.0 / (domain.upper - domain.lower)
-                    )
+                    self._step_lb = min(self._step_lb, 1.0 / (domain.upper - domain.lower))
                 if isinstance(domain, sample.Categorical):
                     if not domain.ordered:
                         self._unordered_cat_hp[key] = len(domain.categories)
@@ -186,11 +174,7 @@ class FLOW2(Searcher):
         if not hier:
             self._space_keys = sorted(self._tunable_keys)
         self.hierarchical = hier
-        if (
-            self.resource_attr
-            and self.resource_attr not in self._space
-            and self.max_resource
-        ):
+        if self.resource_attr and self.resource_attr not in self._space and self.max_resource:
             self.min_resource = self.min_resource or self._min_resource()
             self._resource = self._round(self.min_resource)
             if not hier:
@@ -244,14 +228,12 @@ class FLOW2(Searcher):
                 if str(sampler_inner) == "LogUniform":
                     step_lb = min(
                         step_lb,
-                        np.log(1.0 + q / self.best_config[key])
-                        / np.log(domain.upper / domain.lower),
+                        np.log(1.0 + q / self.best_config[key]) / np.log(domain.upper / domain.lower),
                     )
             elif isinstance(domain, sample.Integer) and str(sampler) == "LogUniform":
                 step_lb = min(
                     step_lb,
-                    np.log(1.0 + 1.0 / self.best_config[key])
-                    / np.log((domain.upper - 1) / domain.lower),
+                    np.log(1.0 + 1.0 / self.best_config[key]) / np.log((domain.upper - 1) / domain.lower),
                 )
         if np.isinf(step_lb):
             step_lb = self.STEP_LOWER_BOUND
@@ -288,18 +270,14 @@ class FLOW2(Searcher):
         """
         disturb = self._reset_times and partial_config == self.init_config
         # if not the first time to complete init_config, use random gaussian
-        config, space = complete_config(
-            partial_config, self.space, self, disturb, lower, upper
-        )
+        config, space = complete_config(partial_config, self.space, self, disturb, lower, upper)
         if partial_config == self.init_config:
             self._reset_times += 1
         if self._resource:
             config[self.resource_attr] = self.min_resource
         return config, space
 
-    def create(
-        self, init_config: Dict, obj: float, cost: float, space: Dict
-    ) -> Searcher:
+    def create(self, init_config: Dict, obj: float, cost: float, space: Dict) -> Searcher:
         # space is the subspace where the init_config is located
         flow2 = self.__class__(
             init_config,
@@ -318,12 +296,7 @@ class FLOW2(Searcher):
             flow2.best_obj = {}
             for k, v in obj.items():
                 flow2.best_obj[k] = (
-                    -v
-                    if self.lexico_objectives["modes"][
-                        self.lexico_objectives["metrics"].index(k)
-                    ]
-                    == "max"
-                    else v
+                    -v if self.lexico_objectives["modes"][self.lexico_objectives["metrics"].index(k)] == "max" else v
                 )
         else:
             flow2.best_obj = obj * self.metric_op  # minimize internally
@@ -333,15 +306,11 @@ class FLOW2(Searcher):
 
     def normalize(self, config, recursive=False) -> Dict:
         """normalize each dimension in config to [0,1]."""
-        return normalize(
-            config, self._space, self.best_config, self.incumbent, recursive
-        )
+        return normalize(config, self._space, self.best_config, self.incumbent, recursive)
 
     def denormalize(self, config):
         """denormalize each dimension in config from [0,1]."""
-        return denormalize(
-            config, self._space, self.best_config, self.incumbent, self._random
-        )
+        return denormalize(config, self._space, self.best_config, self.incumbent, self._random)
 
     def set_search_properties(
         self,
@@ -374,20 +343,13 @@ class FLOW2(Searcher):
             feasible_value = k_values.take(feasible_index)
             self._f_best[k_metric] = np.min(feasible_value)
             if not isinstance(self.lexico_objectives["tolerances"][k_metric], str):
-                tolerance_bound = (
-                    self._f_best[k_metric]
-                    + self.lexico_objectives["tolerances"][k_metric]
-                )
+                tolerance_bound = self._f_best[k_metric] + self.lexico_objectives["tolerances"][k_metric]
             else:
                 assert (
                     self.lexico_objectives["tolerances"][k_metric][-1] == "%"
                 ), "String tolerance of {} should use %% as the suffix".format(k_metric)
                 tolerance_bound = self._f_best[k_metric] * (
-                    1
-                    + 0.01
-                    * float(
-                        self.lexico_objectives["tolerances"][k_metric].replace("%", "")
-                    )
+                    1 + 0.01 * float(self.lexico_objectives["tolerances"][k_metric].replace("%", ""))
                 )
             feasible_index_filter = np.where(
                 feasible_value
@@ -409,33 +371,20 @@ class FLOW2(Searcher):
             for k in self.lexico_objectives["metrics"]:
                 self._histories[k].append(result[k])
             self.update_fbest()
-            for k_metric, k_mode in zip(
-                self.lexico_objectives["metrics"], self.lexico_objectives["modes"]
-            ):
+            for k_metric, k_mode in zip(self.lexico_objectives["metrics"], self.lexico_objectives["modes"]):
                 k_target = (
                     self.lexico_objectives["targets"][k_metric]
                     if k_mode == "min"
                     else -self.lexico_objectives["targets"][k_metric]
                 )
                 if not isinstance(self.lexico_objectives["tolerances"][k_metric], str):
-                    tolerance_bound = (
-                        self._f_best[k_metric]
-                        + self.lexico_objectives["tolerances"][k_metric]
-                    )
+                    tolerance_bound = self._f_best[k_metric] + self.lexico_objectives["tolerances"][k_metric]
                 else:
                     assert (
                         self.lexico_objectives["tolerances"][k_metric][-1] == "%"
-                    ), "String tolerance of {} should use %% as the suffix".format(
-                        k_metric
-                    )
+                    ), "String tolerance of {} should use %% as the suffix".format(k_metric)
                     tolerance_bound = self._f_best[k_metric] * (
-                        1
-                        + 0.01
-                        * float(
-                            self.lexico_objectives["tolerances"][k_metric].replace(
-                                "%", ""
-                            )
-                        )
+                        1 + 0.01 * float(self.lexico_objectives["tolerances"][k_metric].replace("%", ""))
                     )
                 if (result[k_metric] < max(tolerance_bound, k_target)) and (
                     self.best_obj[k_metric]
@@ -457,9 +406,7 @@ class FLOW2(Searcher):
                 else:
                     return False
 
-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
         """
         Compare with incumbent.
         If better, move, reset num_complete and num_proposed.
@@ -512,21 +459,12 @@ class FLOW2(Searcher):
         proposed_by = self._proposed_by.get(trial_id)
         if proposed_by == self.incumbent:
             self._num_complete4incumbent += 1
-            cost = (
-                result.get(self.cost_attr, 1)
-                if result
-                else self._trial_cost.get(trial_id)
-            )
+            cost = result.get(self.cost_attr, 1) if result else self._trial_cost.get(trial_id)
             if cost:
                 self._cost_complete4incumbent += cost
-            if (
-                self._num_complete4incumbent >= 2 * self.dim
-                and self._num_allowed4incumbent == 0
-            ):
+            if self._num_complete4incumbent >= 2 * self.dim and self._num_allowed4incumbent == 0:
                 self._num_allowed4incumbent = 2
-            if self._num_complete4incumbent == self.dir and (
-                not self._resource or self._resource == self.max_resource
-            ):
+            if self._num_complete4incumbent == self.dir and (not self._resource or self._resource == self.max_resource):
                 self._num_complete4incumbent -= 2
                 self._num_allowed4incumbent = max(self._num_allowed4incumbent, 2)
 
@@ -593,10 +531,7 @@ class FLOW2(Searcher):
             and self.cost_incumbent
             and self._resource
             and self._resource < self.max_resource
-            and (
-                self._cost_complete4incumbent
-                >= self.cost_incumbent * self.resource_multiple_factor
-            )
+            and (self._cost_complete4incumbent >= self.cost_incumbent * self.resource_multiple_factor)
         ):
             return self._increase_resource(trial_id)
         self._num_allowed4incumbent -= 1
@@ -608,9 +543,7 @@ class FLOW2(Searcher):
             self._direction_tried = None
         else:
             # propose a new direction
-            self._direction_tried = (
-                self.rand_vector_unit_sphere(self.dim, self._trunc) * self.step
-            )
+            self._direction_tried = self.rand_vector_unit_sphere(self.dim, self._trunc) * self.step
             for i, key in enumerate(self._tunable_keys):
                 move[key] += self._direction_tried[i]
         self._project(move)
@@ -622,25 +555,17 @@ class FLOW2(Searcher):
         if self._init_phase:
             if self._direction_tried is None:
                 if self._same:
-                    same = not any(
-                        key not in best_config or value != best_config[key]
-                        for key, value in config.items()
-                    )
+                    same = not any(key not in best_config or value != best_config[key] for key, value in config.items())
 
                     if same:
                         # increase step size
                         self.step += self.STEPSIZE
                         self.step = min(self.step, self.step_ub)
             else:
-                same = not any(
-                    key not in best_config or value != best_config[key]
-                    for key, value in config.items()
-                )
+                same = not any(key not in best_config or value != best_config[key] for key, value in config.items())
 
                 self._same = same
-        if self._num_proposedby_incumbent == self.dir and (
-            not self._resource or self._resource == self.max_resource
-        ):
+        if self._num_proposedby_incumbent == self.dir and (not self._resource or self._resource == self.max_resource):
             # check stuck condition if using max resource
             self._num_proposedby_incumbent -= 2
             self._init_phase = False
@@ -714,9 +639,7 @@ class FLOW2(Searcher):
                 # key must be in space
                 domain = space[key]
                 if self.hierarchical and not (
-                    domain is None
-                    or type(domain) in (str, int, float)
-                    or isinstance(domain, sample.Domain)
+                    domain is None or type(domain) in (str, int, float) or isinstance(domain, sample.Domain)
                 ):
                     # not domain or hashable
                     # get rid of list type for hierarchical search space.
@@ -746,10 +669,5 @@ class FLOW2(Searcher):
             # unordered cat choice is hard to reach by chance
             if config1[key] != config2.get(key):
                 return False
-        delta = np.array(
-            [
-                incumbent1[key] - incumbent2.get(key, np.inf)
-                for key in self._tunable_keys
-            ]
-        )
+        delta = np.array([incumbent1[key] - incumbent2.get(key, np.inf) for key in self._tunable_keys])
         return np.linalg.norm(delta) <= self.step
diff --git a/flaml/tune/searcher/online_searcher.py b/flaml/tune/searcher/online_searcher.py
index d56710f09..d142b0569 100644
--- a/flaml/tune/searcher/online_searcher.py
+++ b/flaml/tune/searcher/online_searcher.py
@@ -128,9 +128,7 @@ class ChampionFrontierSearcher(BaseSearcher):
 
         self._challenger_list = []
         # initialize the search in set_search_properties
-        self.set_search_properties(
-            setting={self.CHAMPION_TRIAL_NAME: None}, init_call=True
-        )
+        self.set_search_properties(setting={self.CHAMPION_TRIAL_NAME: None}, init_call=True)
         logger.debug("using random seed %s in config oracle", self._seed)
 
     def set_search_properties(
@@ -202,16 +200,12 @@ class ChampionFrontierSearcher(BaseSearcher):
             config_domain = self._space[k]
             if isinstance(config_domain, PolynomialExpansionSet):
                 # get candidate configs for hyperparameters of the PolynomialExpansionSet type
-                partial_new_configs = self._generate_independent_hp_configs(
-                    k, v, config_domain
-                )
+                partial_new_configs = self._generate_independent_hp_configs(k, v, config_domain)
                 if partial_new_configs:
                     hyperparameter_config_groups.append(partial_new_configs)
                     # does not have searcher_trial_ids
                     searcher_trial_ids_groups.append([])
-            elif isinstance(config_domain, Float) or isinstance(
-                config_domain, Categorical
-            ):
+            elif isinstance(config_domain, Float) or isinstance(config_domain, Categorical):
                 # otherwise we need to deal with them in group
                 nonpoly_config[k] = v
                 if k not in self._space_of_nonpoly_hp:
@@ -229,29 +223,17 @@ class ChampionFrontierSearcher(BaseSearcher):
                         metric=self.CFO_SEARCHER_METRIC_NAME,
                     )
                     # initialize the search in set_search_properties
-                    self._searcher_for_nonpoly_hp[
-                        seed_config_trial_id
-                    ].set_search_properties(
+                    self._searcher_for_nonpoly_hp[seed_config_trial_id].set_search_properties(
                         setting={"metric_target": self.CFO_SEARCHER_LARGE_LOSS}
                     )
                     # We need to call this for once, such that the seed config in points_to_evaluate will be called
                     # to be tried
-                    self._searcher_for_nonpoly_hp[seed_config_trial_id].suggest(
-                        seed_config_searcher_trial_id
-                    )
+                    self._searcher_for_nonpoly_hp[seed_config_trial_id].suggest(seed_config_searcher_trial_id)
                 # assuming minimization
-                if (
-                    self._searcher_for_nonpoly_hp[seed_config_trial_id].metric_target
-                    is None
-                ):
+                if self._searcher_for_nonpoly_hp[seed_config_trial_id].metric_target is None:
                     pseudo_loss = self.CFO_SEARCHER_LARGE_LOSS
                 else:
-                    pseudo_loss = (
-                        self._searcher_for_nonpoly_hp[
-                            seed_config_trial_id
-                        ].metric_target
-                        * 0.95
-                    )
+                    pseudo_loss = self._searcher_for_nonpoly_hp[seed_config_trial_id].metric_target * 0.95
                 pseudo_result_to_report = {}
                 for k, v in nonpoly_config.items():
                     pseudo_result_to_report["config/" + str(k)] = v
@@ -264,14 +246,10 @@ class ChampionFrontierSearcher(BaseSearcher):
                     # suggest multiple times
                     new_searcher_trial_id = Trial.generate_id()
                     new_searcher_trial_ids.append(new_searcher_trial_id)
-                    suggestion = self._searcher_for_nonpoly_hp[
-                        seed_config_trial_id
-                    ].suggest(new_searcher_trial_id)
+                    suggestion = self._searcher_for_nonpoly_hp[seed_config_trial_id].suggest(new_searcher_trial_id)
                     if suggestion is not None:
                         partial_new_nonpoly_configs.append(suggestion)
-                logger.info(
-                    "partial_new_nonpoly_configs %s", partial_new_nonpoly_configs
-                )
+                logger.info("partial_new_nonpoly_configs %s", partial_new_nonpoly_configs)
             else:
                 raise NotImplementedError
             if partial_new_nonpoly_configs:
@@ -298,20 +276,14 @@ class ChampionFrontierSearcher(BaseSearcher):
                     new_searcher_trial_id = searcher_trial_ids_groups[i][j]
                 else:
                     new_searcher_trial_id = None
-                new_trial = self._create_trial_from_config(
-                    new_seed_config, new_searcher_trial_id
-                )
+                new_trial = self._create_trial_from_config(new_seed_config, new_searcher_trial_id)
                 new_trials.append(new_trial)
         logger.info("new_configs %s", [t.trial_id for t in new_trials])
         return new_trials
 
-    def _generate_independent_hp_configs(
-        self, hp_name, current_config_value, config_domain
-    ) -> List:
+    def _generate_independent_hp_configs(self, hp_name, current_config_value, config_domain) -> List:
         if isinstance(config_domain, PolynomialExpansionSet):
-            seed_interactions = list(current_config_value) + list(
-                config_domain.init_monomials
-            )
+            seed_interactions = list(current_config_value) + list(config_domain.init_monomials)
             logger.info(
                 "**Important** Seed namespaces (singletons and interactions): %s",
                 seed_interactions,
@@ -340,13 +312,7 @@ class ChampionFrontierSearcher(BaseSearcher):
         champion_all_combinations = self._generate_all_comb(
             seed_interactions, order, allow_self_inter, highest_poly_order
         )
-        space = sorted(
-            list(
-                itertools.combinations(
-                    champion_all_combinations, interaction_num_to_add
-                )
-            )
-        )
+        space = sorted(list(itertools.combinations(champion_all_combinations, interaction_num_to_add)))
         self._random_state.shuffle(space)
         candidate_configs = [set(seed_interactions) | set(item) for item in space]
         final_candidate_configs = []
@@ -413,15 +379,10 @@ class ChampionFrontierSearcher(BaseSearcher):
             all_interactions_no_self_inter = []
             for s in all_interactions:
                 s_no_inter = strip_self_inter(s)
-                if (
-                    len(s_no_inter) > 1
-                    and s_no_inter not in all_interactions_no_self_inter
-                ):
+                if len(s_no_inter) > 1 and s_no_inter not in all_interactions_no_self_inter:
                     all_interactions_no_self_inter.append(s_no_inter)
             all_interactions = all_interactions_no_self_inter
         if highest_poly_order is not None:
-            all_interactions = [
-                c for c in all_interactions if len(c) <= highest_poly_order
-            ]
+            all_interactions = [c for c in all_interactions if len(c) <= highest_poly_order]
         logger.info("all_combinations %s", all_interactions)
         return all_interactions
diff --git a/flaml/tune/searcher/search_thread.py b/flaml/tune/searcher/search_thread.py
index 9bb58a8ea..f0488c818 100644
--- a/flaml/tune/searcher/search_thread.py
+++ b/flaml/tune/searcher/search_thread.py
@@ -38,14 +38,10 @@ class SearchThread:
         self._is_ls = isinstance(search_alg, FLOW2)
         self._mode = mode
         self._metric_op = 1 if mode == "min" else -1
-        self.cost_best = self.cost_last = self.cost_total = self.cost_best1 = getattr(
-            search_alg, "cost_incumbent", 0
-        )
+        self.cost_best = self.cost_last = self.cost_total = self.cost_best1 = getattr(search_alg, "cost_incumbent", 0)
         self._eps = eps
         self.cost_best2 = 0
-        self.obj_best1 = self.obj_best2 = getattr(
-            search_alg, "best_obj", np.inf
-        )  # inherently minimize
+        self.obj_best1 = self.obj_best2 = getattr(search_alg, "best_obj", np.inf)  # inherently minimize
         self.best_result = None
         # eci: estimated cost for improvement
         self.eci = self.cost_best
@@ -55,11 +51,7 @@ class SearchThread:
         self.cost_attr = cost_attr
         if search_alg:
             self.space = self._space = search_alg.space  # unflattened space
-            if (
-                self.space
-                and not isinstance(search_alg, FLOW2)
-                and isinstance(search_alg._space, dict)
-            ):
+            if self.space and not isinstance(search_alg, FLOW2) and isinstance(search_alg._space, dict):
                 # remember const config
                 self._const = add_cost_to_space(self.space, {}, {})
 
@@ -76,10 +68,7 @@ class SearchThread:
                     # define by run
                     config, self.space = unflatten_hierarchical(config, self._space)
             except FloatingPointError:
-                logger.warning(
-                    "The global search method raises FloatingPointError. "
-                    "Ignoring for this iteration."
-                )
+                logger.warning("The global search method raises FloatingPointError. " "Ignoring for this iteration.")
                 config = None
         if config is not None:
             self.running += 1
@@ -94,9 +83,7 @@ class SearchThread:
         best_obj = metric_target * self._metric_op
         if not self.speed:
             self.speed = max_speed
-        self.eci = max(
-            self.cost_total - self.cost_best1, self.cost_best1 - self.cost_best2
-        )
+        self.eci = max(self.cost_total - self.cost_best1, self.cost_best1 - self.cost_best2)
         if self.obj_best1 > best_obj and self.speed > 0:
             self.eci = max(self.eci, 2 * (self.obj_best1 - best_obj) / self.speed)
 
@@ -105,31 +92,23 @@ class SearchThread:
         if self.obj_best2 > self.obj_best1:
             # discount the speed if there are unfinished trials
             self.speed = (
-                (self.obj_best2 - self.obj_best1)
-                / self.running
-                / (max(self.cost_total - self.cost_best2, self._eps))
+                (self.obj_best2 - self.obj_best1) / self.running / (max(self.cost_total - self.cost_best2, self._eps))
             )
         else:
             self.speed = 0
 
-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
         """Update the statistics of the thread."""
         if not self._search_alg:
             return
-        if not hasattr(self._search_alg, "_ot_trials") or (
-            not error and trial_id in self._search_alg._ot_trials
-        ):
+        if not hasattr(self._search_alg, "_ot_trials") or (not error and trial_id in self._search_alg._ot_trials):
             # optuna doesn't handle error
             if self._is_ls or not self._init_config:
                 try:
                     self._search_alg.on_trial_complete(trial_id, result, error)
                 except RuntimeError as e:
                     # rs is used in place of optuna sometimes
-                    if not str(e).endswith(
-                        "has already finished and can not be updated."
-                    ):
+                    if not str(e).endswith("has already finished and can not be updated."):
                         raise e
             else:
                 # init config is not proposed by self._search_alg
@@ -138,9 +117,7 @@ class SearchThread:
         if result:
             self.cost_last = result.get(self.cost_attr, 1)
             self.cost_total += self.cost_last
-            if self._search_alg.metric in result and (
-                getattr(self._search_alg, "lexico_objectives", None) is None
-            ):
+            if self._search_alg.metric in result and (getattr(self._search_alg, "lexico_objectives", None) is None):
                 # TODO: Improve this behavior. When lexico_objectives is provided to CFO,
                 # related variables are not callable.
                 obj = result[self._search_alg.metric] * self._metric_op
@@ -162,9 +139,7 @@ class SearchThread:
         # TODO update the statistics of the thread with partial result?
         if not self._search_alg:
             return
-        if not hasattr(self._search_alg, "_ot_trials") or (
-            trial_id in self._search_alg._ot_trials
-        ):
+        if not hasattr(self._search_alg, "_ot_trials") or (trial_id in self._search_alg._ot_trials):
             try:
                 self._search_alg.on_trial_result(trial_id, result)
             except RuntimeError as e:
diff --git a/flaml/tune/searcher/suggestion.py b/flaml/tune/searcher/suggestion.py
index a349c2f14..518bab9da 100644
--- a/flaml/tune/searcher/suggestion.py
+++ b/flaml/tune/searcher/suggestion.py
@@ -112,22 +112,16 @@ class Searcher:
             # Early return to avoid assertions
             return
 
-        assert isinstance(
-            metric, type(mode)
-        ), "metric and mode must be of the same type"
+        assert isinstance(metric, type(mode)), "metric and mode must be of the same type"
         if isinstance(mode, str):
             assert mode in ["min", "max"], "if `mode` is a str must be 'min' or 'max'!"
         elif isinstance(mode, list):
             assert len(mode) == len(metric), "Metric and mode must be the same length"
-            assert all(
-                mod in ["min", "max", "obs"] for mod in mode
-            ), "All of mode must be 'min' or 'max' or 'obs'!"
+            assert all(mod in ["min", "max", "obs"] for mod in mode), "All of mode must be 'min' or 'max' or 'obs'!"
         else:
             raise ValueError("Mode must either be a list or string")
 
-    def set_search_properties(
-        self, metric: Optional[str], mode: Optional[str], config: Dict
-    ) -> bool:
+    def set_search_properties(self, metric: Optional[str], mode: Optional[str], config: Dict) -> bool:
         """Pass search properties to searcher.
         This method acts as an alternative to instantiating search algorithms
         with their own specific search spaces. Instead they can accept a
@@ -193,18 +187,13 @@ class ConcurrencyLimiter(Searcher):
         self.batch = batch
         self.live_trials = set()
         self.cached_results = {}
-        super(ConcurrencyLimiter, self).__init__(
-            metric=self.searcher.metric, mode=self.searcher.mode
-        )
+        super(ConcurrencyLimiter, self).__init__(metric=self.searcher.metric, mode=self.searcher.mode)
 
     def suggest(self, trial_id: str) -> Optional[Dict]:
-        assert (
-            trial_id not in self.live_trials
-        ), f"Trial ID {trial_id} must be unique: already found in set."
+        assert trial_id not in self.live_trials, f"Trial ID {trial_id} must be unique: already found in set."
         if len(self.live_trials) >= self.max_concurrent:
             logger.debug(
-                f"Not providing a suggestion for {trial_id} due to "
-                "concurrency limit: %s/%s.",
+                f"Not providing a suggestion for {trial_id} due to " "concurrency limit: %s/%s.",
                 len(self.live_trials),
                 self.max_concurrent,
             )
@@ -215,9 +204,7 @@ class ConcurrencyLimiter(Searcher):
             self.live_trials.add(trial_id)
         return suggestion
 
-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
         if trial_id not in self.live_trials:
             return
         elif self.batch:
@@ -226,9 +213,7 @@ class ConcurrencyLimiter(Searcher):
                 # Update the underlying searcher once the
                 # full batch is completed.
                 for trial_id, (result, error) in self.cached_results.items():
-                    self.searcher.on_trial_complete(
-                        trial_id, result=result, error=error
-                    )
+                    self.searcher.on_trial_complete(trial_id, result=result, error=error)
                     self.live_trials.remove(trial_id)
                 self.cached_results = {}
             else:
@@ -257,9 +242,7 @@ class ConcurrencyLimiter(Searcher):
     def on_unpause(self, trial_id: str):
         self.searcher.on_unpause(trial_id)
 
-    def set_search_properties(
-        self, metric: Optional[str], mode: Optional[str], config: Dict
-    ) -> bool:
+    def set_search_properties(self, metric: Optional[str], mode: Optional[str], config: Dict) -> bool:
         return self.searcher.set_search_properties(metric, mode, config)
 
 
@@ -301,17 +284,10 @@ def validate_warmstart(
     """
     if points_to_evaluate:
         if not isinstance(points_to_evaluate, list):
-            raise TypeError(
-                "points_to_evaluate expected to be a list, got {}.".format(
-                    type(points_to_evaluate)
-                )
-            )
+            raise TypeError("points_to_evaluate expected to be a list, got {}.".format(type(points_to_evaluate)))
         for point in points_to_evaluate:
             if not isinstance(point, (dict, list)):
-                raise TypeError(
-                    f"points_to_evaluate expected to include list or dict, "
-                    f"got {point}."
-                )
+                raise TypeError(f"points_to_evaluate expected to include list or dict, " f"got {point}.")
 
             if validate_point_name_lengths and (not len(point) == len(parameter_names)):
                 raise ValueError(
@@ -322,11 +298,7 @@ def validate_warmstart(
 
     if points_to_evaluate and evaluated_rewards:
         if not isinstance(evaluated_rewards, list):
-            raise TypeError(
-                "evaluated_rewards expected to be a list, got {}.".format(
-                    type(evaluated_rewards)
-                )
-            )
+            raise TypeError("evaluated_rewards expected to be a list, got {}.".format(type(evaluated_rewards)))
         if not len(evaluated_rewards) == len(points_to_evaluate):
             raise ValueError(
                 "Dim of evaluated_rewards {}".format(evaluated_rewards)
@@ -461,16 +433,12 @@ class OptunaSearch(Searcher):
         evaluated_rewards: Optional[List] = None,
     ):
         assert ot is not None, "Optuna must be installed! Run `pip install optuna`."
-        super(OptunaSearch, self).__init__(
-            metric=metric, mode=mode, max_concurrent=None, use_early_stopped_trials=None
-        )
+        super(OptunaSearch, self).__init__(metric=metric, mode=mode, max_concurrent=None, use_early_stopped_trials=None)
 
         if isinstance(space, dict) and space:
             resolved_vars, domain_vars, grid_vars = parse_spec_vars(space)
             if domain_vars or grid_vars:
-                logger.warning(
-                    UNRESOLVED_SEARCH_SPACE.format(par="space", cls=type(self).__name__)
-                )
+                logger.warning(UNRESOLVED_SEARCH_SPACE.format(par="space", cls=type(self).__name__))
                 space = self.convert_search_space(space)
             else:
                 # Flatten to support nested dicts
@@ -493,8 +461,7 @@ class OptunaSearch(Searcher):
         self._sampler = sampler or ot.samplers.TPESampler(seed=seed)
 
         assert isinstance(self._sampler, BaseSampler), (
-            "You can only pass an instance of `optuna.samplers.BaseSampler` "
-            "as a sampler to `OptunaSearcher`."
+            "You can only pass an instance of `optuna.samplers.BaseSampler` " "as a sampler to `OptunaSearcher`."
         )
 
         self._ot_trials = {}
@@ -527,17 +494,13 @@ class OptunaSearch(Searcher):
                 validate_point_name_lengths=not callable(self._space),
             )
             if self._evaluated_rewards:
-                for point, reward in zip(
-                    self._points_to_evaluate, self._evaluated_rewards
-                ):
+                for point, reward in zip(self._points_to_evaluate, self._evaluated_rewards):
                     self.add_evaluated_point(point, reward)
             else:
                 for point in self._points_to_evaluate:
                     self._ot_study.enqueue_trial(point)
 
-    def set_search_properties(
-        self, metric: Optional[str], mode: Optional[str], config: Dict
-    ) -> bool:
+    def set_search_properties(self, metric: Optional[str], mode: Optional[str], config: Dict) -> bool:
         if self._space:
             return False
         space = self.convert_search_space(config)
@@ -585,16 +548,10 @@ class OptunaSearch(Searcher):
 
     def suggest(self, trial_id: str) -> Optional[Dict]:
         if not self._space:
-            raise RuntimeError(
-                UNDEFINED_SEARCH_SPACE.format(
-                    cls=self.__class__.__name__, space="space"
-                )
-            )
+            raise RuntimeError(UNDEFINED_SEARCH_SPACE.format(cls=self.__class__.__name__, space="space"))
         if not self._metric or not self._mode:
             raise RuntimeError(
-                UNDEFINED_METRIC_MODE.format(
-                    cls=self.__class__.__name__, metric=self._metric, mode=self._mode
-                )
+                UNDEFINED_METRIC_MODE.format(cls=self.__class__.__name__, metric=self._metric, mode=self._mode)
             )
 
         if isinstance(self._space, list):
@@ -607,9 +564,7 @@ class OptunaSearch(Searcher):
 
             # getattr will fetch the trial.suggest_ function on Optuna trials
             params = {
-                args[0]
-                if len(args) > 0
-                else kwargs["name"]: getattr(ot_trial, fn)(*args, **kwargs)
+                args[0] if len(args) > 0 else kwargs["name"]: getattr(ot_trial, fn)(*args, **kwargs)
                 for (fn, args, kwargs) in self._space
             }
         elif callable(self._space):
@@ -622,9 +577,7 @@ class OptunaSearch(Searcher):
         else:
             # Use Optuna ask interface (since version 2.6.0)
             if trial_id not in self._ot_trials:
-                self._ot_trials[trial_id] = self._ot_study.ask(
-                    fixed_distributions=self._space
-                )
+                self._ot_trials[trial_id] = self._ot_study.ask(fixed_distributions=self._space)
             ot_trial = self._ot_trials[trial_id]
             params = ot_trial.params
 
@@ -636,9 +589,7 @@ class OptunaSearch(Searcher):
         ot_trial = self._ot_trials[trial_id]
         ot_trial.report(metric, step)
 
-    def on_trial_complete(
-        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
-    ):
+    def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None, error: bool = False):
         ot_trial = self._ot_trials[trial_id]
 
         val = result.get(self.metric, None) if result else None
@@ -662,16 +613,10 @@ class OptunaSearch(Searcher):
         intermediate_values: Optional[List[float]] = None,
     ):
         if not self._space:
-            raise RuntimeError(
-                UNDEFINED_SEARCH_SPACE.format(
-                    cls=self.__class__.__name__, space="space"
-                )
-            )
+            raise RuntimeError(UNDEFINED_SEARCH_SPACE.format(cls=self.__class__.__name__, space="space"))
         if not self._metric or not self._mode:
             raise RuntimeError(
-                UNDEFINED_METRIC_MODE.format(
-                    cls=self.__class__.__name__, metric=self._metric, mode=self._mode
-                )
+                UNDEFINED_METRIC_MODE.format(cls=self.__class__.__name__, metric=self._metric, mode=self._mode)
             )
 
         ot_trial_state = OptunaTrialState.COMPLETE
@@ -681,9 +626,7 @@ class OptunaSearch(Searcher):
             ot_trial_state = OptunaTrialState.PRUNED
 
         if intermediate_values:
-            intermediate_values_dict = {
-                i: value for i, value in enumerate(intermediate_values)
-            }
+            intermediate_values_dict = {i: value for i, value in enumerate(intermediate_values)}
         else:
             intermediate_values_dict = None
 
@@ -736,10 +679,7 @@ class OptunaSearch(Searcher):
             return {}
 
         if grid_vars:
-            raise ValueError(
-                "Grid search parameters cannot be automatically converted "
-                "to an Optuna search space."
-            )
+            raise ValueError("Grid search parameters cannot be automatically converted " "to an Optuna search space.")
 
         # Flatten and resolve again after checking for grid search.
         spec = flatten_dict(spec, prevent_delimiter=True)
@@ -766,18 +706,12 @@ class OptunaSearch(Searcher):
                             "Optuna does not support both quantization and "
                             "sampling from LogUniform. Dropped quantization."
                         )
-                    return ot.distributions.LogUniformDistribution(
-                        domain.lower, domain.upper
-                    )
+                    return ot.distributions.LogUniformDistribution(domain.lower, domain.upper)
 
                 elif isinstance(sampler, Uniform):
                     if quantize:
-                        return ot.distributions.DiscreteUniformDistribution(
-                            domain.lower, domain.upper, quantize
-                        )
-                    return ot.distributions.UniformDistribution(
-                        domain.lower, domain.upper
-                    )
+                        return ot.distributions.DiscreteUniformDistribution(domain.lower, domain.upper, quantize)
+                    return ot.distributions.UniformDistribution(domain.lower, domain.upper)
 
             elif isinstance(domain, Integer):
                 if isinstance(sampler, LogUniform):
@@ -798,9 +732,7 @@ class OptunaSearch(Searcher):
 
             raise ValueError(
                 "Optuna search does not support parameters of type "
-                "`{}` with samplers of type `{}`".format(
-                    type(domain).__name__, type(domain.sampler).__name__
-                )
+                "`{}` with samplers of type `{}`".format(type(domain).__name__, type(domain.sampler).__name__)
             )
 
         # Parameter name is e.g. "a/b/c" for nested dicts
diff --git a/flaml/tune/searcher/variant_generator.py b/flaml/tune/searcher/variant_generator.py
index 9d7decc0a..5b8a24d08 100644
--- a/flaml/tune/searcher/variant_generator.py
+++ b/flaml/tune/searcher/variant_generator.py
@@ -143,9 +143,7 @@ def _generate_variants(
     for resolved_spec in grid_search:
         if not constant_grid_search or not all_resolved:
             # In this path, we sample the remaining random variables
-            _, resolved_vars = _resolve_domain_vars(
-                resolved_spec, to_resolve, random_state=random_state
-            )
+            _, resolved_vars = _resolve_domain_vars(resolved_spec, to_resolve, random_state=random_state)
 
         for resolved, spec in _generate_variants(
             resolved_spec,
@@ -155,11 +153,7 @@ def _generate_variants(
             for path, value in grid_vars:
                 resolved_vars[path] = _get_value(spec, path)
             for k, v in resolved.items():
-                if (
-                    k in resolved_vars
-                    and v != resolved_vars[k]
-                    and _is_resolved(resolved_vars[k])
-                ):
+                if k in resolved_vars and v != resolved_vars[k] and _is_resolved(resolved_vars[k]):
                     raise ValueError(
                         "The variable `{}` could not be unambiguously "
                         "resolved to a single value. Consider simplifying "
@@ -197,9 +191,7 @@ def _resolve_domain_vars(
             if path in resolved:
                 continue
             try:
-                value = domain.sample(
-                    _UnresolvedAccessGuard(spec), random_state=random_state
-                )
+                value = domain.sample(_UnresolvedAccessGuard(spec), random_state=random_state)
             except RecursiveDependencyError as e:
                 error = e
             # except Exception:
@@ -217,9 +209,7 @@ def _resolve_domain_vars(
     return True, resolved
 
 
-def _grid_search_generator(
-    unresolved_spec: Dict, grid_vars: List
-) -> Generator[Dict, None, None]:
+def _grid_search_generator(unresolved_spec: Dict, grid_vars: List) -> Generator[Dict, None, None]:
     value_indices = [0] * len(grid_vars)
 
     def increment(i):
@@ -260,9 +250,7 @@ def _try_resolve(v) -> Tuple[bool, Any]:
         # Grid search values
         grid_values = v["grid_search"]
         if not isinstance(grid_values, list):
-            raise TuneError(
-                "Grid search expected list of values, got: {}".format(grid_values)
-            )
+            raise TuneError("Grid search expected list of values, got: {}".format(grid_values))
         return False, Categorical(grid_values).grid()
     return True, v
 
@@ -318,9 +306,7 @@ class _UnresolvedAccessGuard(dict):
     def __getattribute__(self, item):
         value = dict.__getattribute__(self, item)
         if not _is_resolved(value):
-            raise RecursiveDependencyError(
-                "`{}` recursively depends on {}".format(item, value)
-            )
+            raise RecursiveDependencyError("`{}` recursively depends on {}".format(item, value))
         elif isinstance(value, dict):
             return _UnresolvedAccessGuard(value)
         else:
diff --git a/flaml/tune/space.py b/flaml/tune/space.py
index f2200aceb..72a08cf1c 100644
--- a/flaml/tune/space.py
+++ b/flaml/tune/space.py
@@ -70,15 +70,11 @@ def define_by_run_func(trial, space: Dict, path: str = "") -> Optional[Dict[str,
             else:
                 raise ValueError(
                     "Optuna search does not support parameters of type "
-                    "`{}` with samplers of type `{}`".format(
-                        type(domain).__name__, type(domain.sampler).__name__
-                    )
+                    "`{}` with samplers of type `{}`".format(type(domain).__name__, type(domain.sampler).__name__)
                 )
         elif isinstance(domain, sample.Integer):
             if isinstance(sampler, sample.LogUniform):
-                trial.suggest_int(
-                    key, domain.lower, domain.upper - int(bool(not quantize)), log=True
-                )
+                trial.suggest_int(key, domain.lower, domain.upper - int(bool(not quantize)), log=True)
             elif isinstance(sampler, sample.Uniform):
                 # Upper bound should be inclusive for quantization and
                 # exclusive otherwise
@@ -103,9 +99,7 @@ def define_by_run_func(trial, space: Dict, path: str = "") -> Optional[Dict[str,
         else:
             raise ValueError(
                 "Optuna search does not support parameters of type "
-                "`{}` with samplers of type `{}`".format(
-                    type(domain).__name__, type(domain.sampler).__name__
-                )
+                "`{}` with samplers of type `{}`".format(type(domain).__name__, type(domain.sampler).__name__)
             )
     # Return all constants in a dictionary.
     return config
@@ -148,9 +142,7 @@ def unflatten_hierarchical(config: Dict, space: Dict) -> Tuple[Dict, Dict]:
             pos = key.rfind(":")
             true_key = key[:pos]
             choice = int(key[pos + 1 :])
-            hier[true_key], subspace[true_key] = unflatten_hierarchical(
-                value, space[true_key][choice]
-            )
+            hier[true_key], subspace[true_key] = unflatten_hierarchical(value, space[true_key][choice])
         else:
             if key.endswith("_choice_"):
                 key = key[:-8]
@@ -212,9 +204,7 @@ def add_cost_to_space(space: Dict, low_cost_point: Dict, choice_cost: Dict):
                         choice_cost_dict = choice_cost_list[i]
                     else:
                         choice_cost_dict = {}
-                    domain.const.append(
-                        add_cost_to_space(cat, low_cost_dict, choice_cost_dict)
-                    )
+                    domain.const.append(add_cost_to_space(cat, low_cost_dict, choice_cost_dict))
                 else:
                     domain.const.append(None)
             if choice_cost_list:
@@ -233,18 +223,14 @@ def add_cost_to_space(space: Dict, low_cost_point: Dict, choice_cost: Dict):
                 ordered = getattr(domain, "ordered", None)
                 if ordered is None:
                     # automatically decide whether to order the choices based on the value type
-                    domain.ordered = ordered = all(
-                        isinstance(x, (int, float)) for x in domain.categories
-                    )
+                    domain.ordered = ordered = all(isinstance(x, (int, float)) for x in domain.categories)
                 if ordered:
                     # sort the choices by value
                     ind = np.argsort(domain.categories)
                     domain.categories = [domain.categories[i] for i in ind]
 
             if low_cost and low_cost not in domain.categories:
-                assert isinstance(
-                    low_cost, list
-                ), f"low cost {low_cost} not in domain {domain.categories}"
+                assert isinstance(low_cost, list), f"low cost {low_cost} not in domain {domain.categories}"
                 if domain.ordered:
                     sorted_points = [low_cost[i] for i in ind]
                     for i, point in enumerate(sorted_points):
@@ -292,11 +278,7 @@ def normalize(
                     # low_cost_point list
                     norm = []
                     for i, cat in enumerate(domain.categories):
-                        norm.append(
-                            normalize(value[i], cat, reference_config[key][i], {})
-                            if recursive
-                            else value[i]
-                        )
+                        norm.append(normalize(value[i], cat, reference_config[key][i], {}) if recursive else value[i])
                     if len(value) > len(domain.categories):
                         # the low cost index was appended to low_cost_point list
                         index = value[-1]
@@ -335,16 +317,10 @@ def normalize(
         else:
             quantize = None
         if str(sampler) == "LogUniform":
-            upper = domain.upper - (
-                isinstance(domain, sample.Integer) & (quantize is None)
-            )
-            config_norm[key] = np.log(value / domain.lower) / np.log(
-                upper / domain.lower
-            )
+            upper = domain.upper - (isinstance(domain, sample.Integer) & (quantize is None))
+            config_norm[key] = np.log(value / domain.lower) / np.log(upper / domain.lower)
         elif str(sampler) == "Uniform":
-            upper = domain.upper - (
-                isinstance(domain, sample.Integer) & (quantize is None)
-            )
+            upper = domain.upper - (isinstance(domain, sample.Integer) & (quantize is None))
             config_norm[key] = (value - domain.lower) / (upper - domain.lower)
         elif str(sampler) == "Normal":
             # N(mean, sd) -> N(0,1)
@@ -366,9 +342,7 @@ def denormalize(
         if key in space:
             # domain: sample.Categorical/Integer/Float/Function
             domain = space[key]
-            if isinstance(value, dict) or not callable(
-                getattr(domain, "get_sampler", None)
-            ):
+            if isinstance(value, dict) or not callable(getattr(domain, "get_sampler", None)):
                 config_denorm[key] = value
             else:
                 if isinstance(domain, sample.Categorical):
@@ -376,16 +350,12 @@ def denormalize(
                     n = len(domain.categories)
                     if isinstance(value, list):
                         # denormalize list
-                        choice = min(
-                            n - 1, int(np.floor(value[-1] * n))
-                        )  # max choice is n-1
+                        choice = min(n - 1, int(np.floor(value[-1] * n)))  # max choice is n-1
                         config_denorm[key] = point = value[choice]
                         point["_choice_"] = choice
                         continue
                     if domain.ordered:
-                        config_denorm[key] = domain.categories[
-                            min(n - 1, int(np.floor(value * n)))
-                        ]
+                        config_denorm[key] = domain.categories[min(n - 1, int(np.floor(value * n)))]
                     else:
                         assert key in normalized_reference_config
                         if min(n - 1, np.floor(value * n)) == min(
@@ -394,11 +364,7 @@ def denormalize(
                             config_denorm[key] = reference_config[key]
                         else:  # ****random value each time!****
                             config_denorm[key] = random_state.choice(
-                                [
-                                    x
-                                    for x in domain.categories
-                                    if x != reference_config[key]
-                                ]
+                                [x for x in domain.categories if x != reference_config[key]]
                             )
                     continue
                 # Uniform/LogUniform/Normal/Base
@@ -411,14 +377,10 @@ def denormalize(
                     quantize = None
                 # Handle Log/Uniform
                 if str(sampler) == "LogUniform":
-                    upper = domain.upper - (
-                        isinstance(domain, sample.Integer) & (quantize is None)
-                    )
+                    upper = domain.upper - (isinstance(domain, sample.Integer) & (quantize is None))
                     config_denorm[key] = (upper / domain.lower) ** value * domain.lower
                 elif str(sampler) == "Uniform":
-                    upper = domain.upper - (
-                        isinstance(domain, sample.Integer) & (quantize is None)
-                    )
+                    upper = domain.upper - (isinstance(domain, sample.Integer) & (quantize is None))
                     config_denorm[key] = value * (upper - domain.lower) + domain.lower
                 elif str(sampler) == "Normal":
                     # denormalization for 'Normal'
@@ -427,9 +389,7 @@ def denormalize(
                 #     config_denorm[key] = value
                 # Handle quantized
                 if quantize is not None:
-                    config_denorm[key] = (
-                        np.round(np.divide(config_denorm[key], quantize)) * quantize
-                    )
+                    config_denorm[key] = np.round(np.divide(config_denorm[key], quantize)) * quantize
                 # Handle int (4.6 -> 5)
                 if isinstance(domain, sample.Integer):
                     config_denorm[key] = int(round(config_denorm[key]))
@@ -525,9 +485,7 @@ def complete_config(
     for key, value in space.items():
         if key not in config:
             config[key] = value
-    for _, generated in generate_variants_compatible(
-        {"config": config}, random_state=flow2.rs_random
-    ):
+    for _, generated in generate_variants_compatible({"config": config}, random_state=flow2.rs_random):
         config = generated["config"]
         break
     subspace = {}
@@ -550,9 +508,7 @@ def complete_config(
                     lower and lower.get(key) and lower[key][index],
                     upper and upper.get(key) and upper[key][index],
                 )
-                assert (
-                    "_choice_" not in subspace[key]
-                ), "_choice_ is a reserved key for hierarchical search space"
+                assert "_choice_" not in subspace[key], "_choice_ is a reserved key for hierarchical search space"
                 subspace[key]["_choice_"] = index
             else:
                 config[key], subspace[key] = complete_config(
diff --git a/flaml/tune/spark/utils.py b/flaml/tune/spark/utils.py
index 19ce77b4a..c42b670e7 100644
--- a/flaml/tune/spark/utils.py
+++ b/flaml/tune/spark/utils.py
@@ -69,11 +69,7 @@ def get_n_cpus(node="driver"):
     """
     assert node in ["driver", "executor"]
     try:
-        n_cpus = int(
-            SparkSession.builder.getOrCreate()
-            .sparkContext.getConf()
-            .get(f"spark.{node}.cores")
-        )
+        n_cpus = int(SparkSession.builder.getOrCreate().sparkContext.getConf().get(f"spark.{node}.cores"))
     except (TypeError, RuntimeError):
         n_cpus = os.cpu_count()
     return n_cpus
@@ -113,9 +109,7 @@ def with_parameters(trainable, **kwargs):
 
     if not callable(trainable):
         raise ValueError(
-            f"`with_parameters() only works with function trainables`. "
-            f"Got type: "
-            f"{type(trainable)}."
+            f"`with_parameters() only works with function trainables`. " f"Got type: " f"{type(trainable)}."
         )
 
     spark_available, spark_error_msg = check_spark()
diff --git a/flaml/tune/trial.py b/flaml/tune/trial.py
index 30d4fe663..eac1a4e61 100644
--- a/flaml/tune/trial.py
+++ b/flaml/tune/trial.py
@@ -116,25 +116,19 @@ class Trial:
                         self.metric_n_steps[metric][str(n)] = deque([value], maxlen=n)
                 else:
                     step = result["training_iteration"] or 1
-                    self.metric_analysis[metric]["max"] = max(
-                        value, self.metric_analysis[metric]["max"]
-                    )
-                    self.metric_analysis[metric]["min"] = min(
-                        value, self.metric_analysis[metric]["min"]
-                    )
+                    self.metric_analysis[metric]["max"] = max(value, self.metric_analysis[metric]["max"])
+                    self.metric_analysis[metric]["min"] = min(value, self.metric_analysis[metric]["min"])
                     self.metric_analysis[metric]["avg"] = (
-                        1
-                        / step
-                        * (value + (step - 1) * self.metric_analysis[metric]["avg"])
+                        1 / step * (value + (step - 1) * self.metric_analysis[metric]["avg"])
                     )
                     self.metric_analysis[metric]["last"] = value
 
                     for n in self.n_steps:
                         key = "last-{:d}-avg".format(n)
                         self.metric_n_steps[metric][str(n)].append(value)
-                        self.metric_analysis[metric][key] = sum(
+                        self.metric_analysis[metric][key] = sum(self.metric_n_steps[metric][str(n)]) / len(
                             self.metric_n_steps[metric][str(n)]
-                        ) / len(self.metric_n_steps[metric][str(n)])
+                        )
 
     def set_status(self, status):
         """Sets the status of the trial."""
diff --git a/flaml/tune/trial_runner.py b/flaml/tune/trial_runner.py
index 59e9d0d03..245abc4d9 100644
--- a/flaml/tune/trial_runner.py
+++ b/flaml/tune/trial_runner.py
@@ -96,17 +96,13 @@ class BaseTrialRunner:
         """Stops trial."""
         if trial.status not in [Trial.ERROR, Trial.TERMINATED]:
             if self._scheduler_alg:
-                self._scheduler_alg.on_trial_complete(
-                    self, trial.trial_id, trial.last_result
-                )
+                self._scheduler_alg.on_trial_complete(self, trial.trial_id, trial.last_result)
             self._search_alg.on_trial_complete(trial.trial_id, trial.last_result)
             trial.set_status(Trial.TERMINATED)
         elif self._scheduler_alg:
             self._scheduler_alg.on_trial_remove(self, trial)
             if trial.status == Trial.ERROR:
-                self._search_alg.on_trial_complete(
-                    trial.trial_id, trial.last_result, error=True
-                )
+                self._search_alg.on_trial_complete(trial.trial_id, trial.last_result, error=True)
 
 
 class SequentialTrialRunner(BaseTrialRunner):
diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py
index f447ac112..b0f5a07bb 100644
--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@@ -65,11 +65,7 @@ class ExperimentAnalysis(EA):
             return self.get_best_config(self.default_metric, self.default_mode)
 
     def lexico_best(self, trials):
-        results = {
-            index: trial.last_result
-            for index, trial in enumerate(trials)
-            if trial.last_result
-        }
+        results = {index: trial.last_result for index, trial in enumerate(trials) if trial.last_result}
         metrics = self.lexico_objectives["metrics"]
         modes = self.lexico_objectives["modes"]
         f_best = {}
@@ -79,15 +75,11 @@ class ExperimentAnalysis(EA):
         for time_index in range(length):
             for objective, mode in zip(metrics, modes):
                 histories[objective].append(
-                    results[keys[time_index]][objective]
-                    if mode == "min"
-                    else -results[keys[time_index]][objective]
+                    results[keys[time_index]][objective] if mode == "min" else -results[keys[time_index]][objective]
                 )
         obj_initial = self.lexico_objectives["metrics"][0]
         feasible_index = np.array([*range(len(histories[obj_initial]))])
-        for k_metric, k_mode in zip(
-            self.lexico_objectives["metrics"], self.lexico_objectives["modes"]
-        ):
+        for k_metric, k_mode in zip(self.lexico_objectives["metrics"], self.lexico_objectives["modes"]):
             k_values = np.array(histories[k_metric])
             k_target = (
                 -self.lexico_objectives["targets"][k_metric]
@@ -101,19 +93,9 @@ class ExperimentAnalysis(EA):
                 feasible_value
                 <= max(
                     f_best[k_metric] + self.lexico_objectives["tolerances"][k_metric]
-                    if not isinstance(
-                        self.lexico_objectives["tolerances"][k_metric], str
-                    )
+                    if not isinstance(self.lexico_objectives["tolerances"][k_metric], str)
                     else f_best[k_metric]
-                    * (
-                        1
-                        + 0.01
-                        * float(
-                            self.lexico_objectives["tolerances"][k_metric].replace(
-                                "%", ""
-                            )
-                        )
-                    ),
+                    * (1 + 0.01 * float(self.lexico_objectives["tolerances"][k_metric].replace("%", ""))),
                     k_target,
                 )
             )[0]
@@ -237,9 +219,7 @@ def run(
     local_dir: Optional[str] = None,
     num_samples: Optional[int] = 1,
     resources_per_trial: Optional[dict] = None,
-    config_constraints: Optional[
-        List[Tuple[Callable[[dict], float], str, float]]
-    ] = None,
+    config_constraints: Optional[List[Tuple[Callable[[dict], float], str, float]]] = None,
     metric_constraints: Optional[List[Tuple[str, str, float]]] = None,
     max_failure: Optional[int] = 100,
     use_ray: Optional[bool] = False,
@@ -463,9 +443,7 @@ def run(
             os.makedirs(dir_name, exist_ok=True)
     elif local_dir and verbose > 0:
         os.makedirs(local_dir, exist_ok=True)
-        log_file_name = os.path.join(
-            local_dir, "tune_" + str(datetime.datetime.now()).replace(":", "-") + ".log"
-        )
+        log_file_name = os.path.join(local_dir, "tune_" + str(datetime.datetime.now()).replace(":", "-") + ".log")
     if use_ray and use_spark:
         raise ValueError("use_ray and use_spark cannot be both True.")
     if not use_ray:
@@ -506,9 +484,7 @@ def run(
     from .searcher.blendsearch import BlendSearch, CFO
 
     if lexico_objectives is not None:
-        logger.warning(
-            "If lexico_objectives is not None, search_alg is forced to be CFO"
-        )
+        logger.warning("If lexico_objectives is not None, search_alg is forced to be CFO")
         search_alg = None
     if search_alg is None:
         flaml_scheduler_resource_attr = (
@@ -529,14 +505,10 @@ def run(
                 import optuna as _
 
                 SearchAlgorithm = BlendSearch
-                logger.info(
-                    "Using search algorithm {}.".format(SearchAlgorithm.__name__)
-                )
+                logger.info("Using search algorithm {}.".format(SearchAlgorithm.__name__))
             except ImportError:
                 SearchAlgorithm = CFO
-                logger.warning(
-                    "Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]"
-                )
+                logger.warning("Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]")
             metric = metric or DEFAULT_METRIC
         else:
             SearchAlgorithm = CFO
@@ -581,14 +553,8 @@ def run(
             ]
             and use_incumbent_result_in_evaluation is not None
         ):
-            search_alg.use_incumbent_result_in_evaluation = (
-                use_incumbent_result_in_evaluation
-            )
-        searcher = (
-            search_alg.searcher
-            if isinstance(search_alg, ConcurrencyLimiter)
-            else search_alg
-        )
+            search_alg.use_incumbent_result_in_evaluation = use_incumbent_result_in_evaluation
+        searcher = search_alg.searcher if isinstance(search_alg, ConcurrencyLimiter) else search_alg
         if isinstance(searcher, BlendSearch):
             setting = {}
             if time_budget_s:
@@ -617,10 +583,7 @@ def run(
         try:
             from ray import tune
         except ImportError:
-            raise ImportError(
-                "Failed to import ray tune. "
-                "Please install ray[tune] or set use_ray=False"
-            )
+            raise ImportError("Failed to import ray tune. " "Please install ray[tune] or set use_ray=False")
         _use_ray = True
         try:
             analysis = tune.run(
@@ -659,19 +622,14 @@ def run(
             from joblib import Parallel, delayed, parallel_backend
             from joblibspark import register_spark
         except ImportError as e:
-            raise ImportError(
-                f"{e}. Try pip install flaml[spark] or set use_spark=False."
-            )
+            raise ImportError(f"{e}. Try pip install flaml[spark] or set use_spark=False.")
         from flaml.tune.searcher.suggestion import ConcurrencyLimiter
         from .trial_runner import SparkTrialRunner
 
         register_spark()
         spark = SparkSession.builder.getOrCreate()
         sc = spark._jsc.sc()
-        num_executors = (
-            len([executor.host() for executor in sc.statusTracker().getExecutorInfos()])
-            - 1
-        )
+        num_executors = len([executor.host() for executor in sc.statusTracker().getExecutorInfos()]) - 1
         """
         By default, the number of executors is the number of VMs in the cluster. And we can
         launch one trial per executor. However, sometimes we can launch more trials than
@@ -708,9 +666,7 @@ def run(
             max_concurrent,
         )
         with parallel_backend("spark"):
-            with Parallel(
-                n_jobs=n_concurrent_trials, verbose=max(0, (verbose - 1) * 50)
-            ) as parallel:
+            with Parallel(n_jobs=n_concurrent_trials, verbose=max(0, (verbose - 1) * 50)) as parallel:
                 try:
                     _runner = SparkTrialRunner(
                         search_alg=search_alg,
@@ -722,9 +678,7 @@ def run(
                     if time_budget_s is None:
                         time_budget_s = np.inf
                     num_failures = 0
-                    upperbound_num_failures = (
-                        len(evaluated_rewards) if evaluated_rewards else 0
-                    ) + max_failure
+                    upperbound_num_failures = (len(evaluated_rewards) if evaluated_rewards else 0) + max_failure
                     while (
                         time.time() - time_start < time_budget_s
                         and (num_samples < 0 or num_trials < num_samples)
@@ -742,9 +696,7 @@ def run(
                                     break
                         trials_to_run = _runner.running_trials
                         if not trials_to_run:
-                            logger.warning(
-                                f"fail to sample a trial for {max_failure} times in a row, stopping."
-                            )
+                            logger.warning(f"fail to sample a trial for {max_failure} times in a row, stopping.")
                             break
                         logger.info(
                             f"Number of trials: {num_trials}/{num_samples}, {len(_runner.running_trials)} RUNNING,"
@@ -754,12 +706,9 @@ def run(
                             f"Configs of Trials to run: {[trial_to_run.config for trial_to_run in trials_to_run]}"
                         )
                         results = None
-                        with PySparkOvertimeMonitor(
-                            time_start, time_budget_s, force_cancel, parallel=parallel
-                        ):
+                        with PySparkOvertimeMonitor(time_start, time_budget_s, force_cancel, parallel=parallel):
                             results = parallel(
-                                delayed(evaluation_function)(trial_to_run.config)
-                                for trial_to_run in trials_to_run
+                                delayed(evaluation_function)(trial_to_run.config) for trial_to_run in trials_to_run
                             )
                         # results = [evaluation_function(trial_to_run.config) for trial_to_run in trials_to_run]
                         while results:
@@ -775,9 +724,7 @@ def run(
                                         # When the result returned is an empty dict, set the trial status to error
                                         trial_to_run.set_status(Trial.ERROR)
                                 else:
-                                    logger.info(
-                                        "Brief result: {}".format({metric: result})
-                                    )
+                                    logger.info("Brief result: {}".format({metric: result}))
                                     report(_metric=result)
                             _runner.stop_trial(trial_to_run)
                         num_failures = 0
@@ -817,9 +764,7 @@ def run(
         if time_budget_s is None:
             time_budget_s = np.inf
         num_failures = 0
-        upperbound_num_failures = (
-            len(evaluated_rewards) if evaluated_rewards else 0
-        ) + max_failure
+        upperbound_num_failures = (len(evaluated_rewards) if evaluated_rewards else 0) + max_failure
         while (
             time.time() - time_start < time_budget_s
             and (num_samples < 0 or num_trials < num_samples)
@@ -852,9 +797,7 @@ def run(
                 # break with upperbound_num_failures consecutive failures
                 num_failures += 1
         if num_failures == upperbound_num_failures:
-            logger.warning(
-                f"fail to sample a trial for {max_failure} times in a row, stopping."
-            )
+            logger.warning(f"fail to sample a trial for {max_failure} times in a row, stopping.")
         analysis = ExperimentAnalysis(
             _runner.get_trials(),
             metric=metric,
diff --git a/flaml/tune/utils.py b/flaml/tune/utils.py
index b67ca342f..9398162a3 100644
--- a/flaml/tune/utils.py
+++ b/flaml/tune/utils.py
@@ -23,9 +23,5 @@ def choice(categories: Sequence, order=None):
             Numerical categories have an order, while string categories do not.
     """
     domain = sample.Categorical(categories).uniform()
-    domain.ordered = (
-        order
-        if order is not None
-        else all(isinstance(x, (int, float)) for x in categories)
-    )
+    domain.ordered = order if order is not None else all(isinstance(x, (int, float)) for x in categories)
     return domain
diff --git a/test/automl/test_classification.py b/test/automl/test_classification.py
index adb308166..ecec9a6d4 100644
--- a/test/automl/test_classification.py
+++ b/test/automl/test_classification.py
@@ -265,9 +265,7 @@ class TestClassification(unittest.TestCase):
         import xgboost as xgb
 
         callback = xgb.callback.TrainingCallback()
-        automl.fit(
-            X_train=X_train, y_train=y_train, callbacks=[callback], **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, callbacks=[callback], **automl_settings)
         print(automl.predict(X_train))
         print(automl.model)
         print(automl.config_history)
@@ -279,16 +277,12 @@ class TestClassification(unittest.TestCase):
         import subprocess
         import sys
 
-        subprocess.check_call(
-            [sys.executable, "-m", "pip", "install", "xgboost==1.3.3", "--user"]
-        )
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "xgboost==1.3.3", "--user"])
         automl = AutoML()
         automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
         print(automl.feature_names_in_)
         print(automl.feature_importances_)
-        subprocess.check_call(
-            [sys.executable, "-m", "pip", "install", "-U", "xgboost", "--user"]
-        )
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "xgboost", "--user"])
 
     def test_ray_classification(self):
         X, y = load_breast_cancer(return_X_y=True)
@@ -337,9 +331,7 @@ class TestClassification(unittest.TestCase):
             import ray
 
             X_train_ref = ray.put(X_train)
-            automl_experiment.fit(
-                X_train=X_train_ref, y_train=y_train, **automl_settings
-            )
+            automl_experiment.fit(X_train=X_train_ref, y_train=y_train, **automl_settings)
             print(automl_experiment.predict(X_train))
             print(automl_experiment.model)
             print(automl_experiment.config_history)
@@ -355,9 +347,7 @@ class TestClassification(unittest.TestCase):
 
     def test_random_skip_oom(self):
         automl_experiment = AutoML()
-        automl_experiment.add_learner(
-            learner_name="large_lgbm", learner_class=MyLargeLGBM
-        )
+        automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
         automl_settings = {
             "time_budget": 2,
             "task": "classification",
@@ -396,9 +386,7 @@ class TestClassification(unittest.TestCase):
         }
         X_train = scipy.sparse.random(3000, 3000, density=0.1)
         y_train = np.random.randint(2, size=3000)
-        automl_experiment.fit(
-            X_train=X_train, y_train=y_train, train_time_limit=1, **automl_settings
-        )
+        automl_experiment.fit(X_train=X_train, y_train=y_train, train_time_limit=1, **automl_settings)
         automl_settings["time_budget"] = 5
         automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
         print(automl_experiment.predict(X_train))
diff --git a/test/automl/test_constraints.py b/test/automl/test_constraints.py
index 1222e4ed0..37e42a50b 100644
--- a/test/automl/test_constraints.py
+++ b/test/automl/test_constraints.py
@@ -31,9 +31,7 @@ def test_metric_constraints():
         from sklearn.datasets import load_wine
 
         X, y = load_wine(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.33, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
     automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
     print(automl.estimator_list)
     print(automl.search_space)
@@ -55,9 +53,7 @@ def test_metric_constraints():
         min_resource=automl.min_resource,
         max_resource=automl.max_resource,
         time_budget_s=automl._state.time_budget,
-        config_constraints=[
-            (partial(size, automl._state.learner_classes), "<=", automl._mem_thres)
-        ],
+        config_constraints=[(partial(size, automl._state.learner_classes), "<=", automl._mem_thres)],
         metric_constraints=automl.metric_constraints,
         num_samples=5,
     )
@@ -121,18 +117,12 @@ def test_metric_constraints_custom():
         from sklearn.datasets import load_wine
 
         X, y = load_wine(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.33, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
     automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
     print(automl.estimator_list)
     print(automl.search_space)
     print(automl.points_to_evaluate)
-    print(
-        "Best minimization objective on validation data: {0:.4g}".format(
-            automl.best_loss
-        )
-    )
+    print("Best minimization objective on validation data: {0:.4g}".format(automl.best_loss))
     print(
         "pred_time of the best config on validation data: {0:.4g}".format(
             automl.metrics_for_best_config[1]["pred_time"]
@@ -161,9 +151,7 @@ def test_metric_constraints_custom():
         min_resource=automl.min_resource,
         max_resource=automl.max_resource,
         time_budget_s=automl._state.time_budget,
-        config_constraints=[
-            (partial(size, automl._state.learner_classes), "<=", automl._mem_thres)
-        ],
+        config_constraints=[(partial(size, automl._state.learner_classes), "<=", automl._mem_thres)],
         metric_constraints=automl.metric_constraints,
         num_samples=5,
     )
diff --git a/test/automl/test_forecast.py b/test/automl/test_forecast.py
index b71a58efb..044d11a72 100644
--- a/test/automl/test_forecast.py
+++ b/test/automl/test_forecast.py
@@ -2,20 +2,12 @@ import numpy as np
 from flaml import AutoML
 
 
-def test_forecast_automl(
-    budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]
-):
+def test_forecast_automl(budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]):
     # using dataframe
     import statsmodels.api as sm
 
     data = sm.datasets.co2.load_pandas().data["co2"].resample("MS").mean()
-    data = (
-        data.bfill()
-        .ffill()
-        .to_frame()
-        .reset_index()
-        .rename(columns={"index": "ds", "co2": "y"})
-    )
+    data = data.bfill().ffill().to_frame().reset_index().rename(columns={"index": "ds", "co2": "y"})
     num_samples = data.shape[0]
     time_horizon = 12
     split_idx = num_samples - time_horizon
@@ -163,9 +155,7 @@ def load_multi_dataset():
     return df
 
 
-def test_multivariate_forecast_num(
-    budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]
-):
+def test_multivariate_forecast_num(budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]):
     df = load_multi_dataset()
     # split data into train and test
     time_horizon = 180
@@ -282,9 +272,7 @@ def load_multi_dataset_cat(time_horizon):
             return 0
 
     df["season"] = df["timeStamp"].apply(season)
-    df["above_monthly_avg"] = df.apply(
-        lambda x: above_monthly_avg(x["timeStamp"], x["temp"]), axis=1
-    )
+    df["above_monthly_avg"] = df.apply(lambda x: above_monthly_avg(x["timeStamp"], x["temp"]), axis=1)
 
     # split data into train and test
     num_samples = df.shape[0]
@@ -297,9 +285,7 @@ def load_multi_dataset_cat(time_horizon):
     return train_df, test_df
 
 
-def test_multivariate_forecast_cat(
-    budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]
-):
+def test_multivariate_forecast_cat(budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]):
     time_horizon = 180
     train_df, test_df = load_multi_dataset_cat(time_horizon)
     X_test = test_df[
@@ -456,16 +442,10 @@ def get_stalliion_data():
     data["time_idx"] = data["date"].dt.year * 12 + data["date"].dt.month
     data["time_idx"] -= data["time_idx"].min()
     # add additional features
-    data["month"] = data.date.dt.month.astype(str).astype(
-        "category"
-    )  # categories have be strings
+    data["month"] = data.date.dt.month.astype(str).astype("category")  # categories have be strings
     data["log_volume"] = np.log(data.volume + 1e-8)
-    data["avg_volume_by_sku"] = data.groupby(
-        ["time_idx", "sku"], observed=True
-    ).volume.transform("mean")
-    data["avg_volume_by_agency"] = data.groupby(
-        ["time_idx", "agency"], observed=True
-    ).volume.transform("mean")
+    data["avg_volume_by_sku"] = data.groupby(["time_idx", "sku"], observed=True).volume.transform("mean")
+    data["avg_volume_by_agency"] = data.groupby(["time_idx", "agency"], observed=True).volume.transform("mean")
     # we want to encode special days as one variable and thus need to first reverse one-hot encoding
     special_days = [
         "easter_day",
@@ -479,11 +459,7 @@ def get_stalliion_data():
         "beer_capital",
         "music_fest",
     ]
-    data[special_days] = (
-        data[special_days]
-        .apply(lambda x: x.map({0: "-", 1: x.name}))
-        .astype("category")
-    )
+    data[special_days] = data[special_days].apply(lambda x: x.map({0: "-", 1: x.name})).astype("category")
     return data, special_days
 
 
@@ -571,8 +547,7 @@ def test_forecast_panel(budget=5):
 
         y_test, y_pred = np.array(y_test), np.array(y_pred)
         return round(
-            np.mean(np.abs(y_pred - y_test) / ((np.abs(y_pred) + np.abs(y_test)) / 2))
-            * 100,
+            np.mean(np.abs(y_pred - y_test) / ((np.abs(y_pred) + np.abs(y_test)) / 2)) * 100,
             2,
         )
 
diff --git a/test/automl/test_multiclass.py b/test/automl/test_multiclass.py
index cb82eb02b..a8bfba7d7 100644
--- a/test/automl/test_multiclass.py
+++ b/test/automl/test_multiclass.py
@@ -156,9 +156,7 @@ class TestMultiClass(unittest.TestCase):
             del settings["time_budget"]
             settings["max_iter"] = 5
             # test the "_choice_" issue when using ray
-            automl.fit(
-                X_train=X_train, y_train=y_train, n_concurrent_trials=2, **settings
-            )
+            automl.fit(X_train=X_train, y_train=y_train, n_concurrent_trials=2, **settings)
         except ImportError:
             return
 
@@ -212,9 +210,7 @@ class TestMultiClass(unittest.TestCase):
         print(automl.best_iteration)
         print(automl.best_estimator)
         automl = AutoML()
-        estimator = automl.get_estimator_from_log(
-            settings["log_file_name"], record_id=0, task="multiclass"
-        )
+        estimator = automl.get_estimator_from_log(settings["log_file_name"], record_id=0, task="multiclass")
         print(estimator)
         (
             time_history,
@@ -233,9 +229,7 @@ class TestMultiClass(unittest.TestCase):
             del settings["time_budget"]
             settings["max_iter"] = 2
             automl.fit(**settings)
-            estimator = automl.get_estimator_from_log(
-                settings["log_file_name"], record_id=1, task="multiclass"
-            )
+            estimator = automl.get_estimator_from_log(settings["log_file_name"], record_id=1, task="multiclass")
         except ImportError:
             pass
 
@@ -290,12 +284,8 @@ class TestMultiClass(unittest.TestCase):
             "model_history": True,
         }
         X_train, y_train = load_iris(return_X_y=True)
-        automl_experiment_micro.fit(
-            X_train=X_train, y_train=y_train, metric="micro_f1", **automl_settings
-        )
-        automl_experiment_macro.fit(
-            X_train=X_train, y_train=y_train, metric="macro_f1", **automl_settings
-        )
+        automl_experiment_micro.fit(X_train=X_train, y_train=y_train, metric="micro_f1", **automl_settings)
+        automl_experiment_macro.fit(X_train=X_train, y_train=y_train, metric="macro_f1", **automl_settings)
         estimator = automl_experiment_macro.model
         y_pred = estimator.predict(X_train)
         y_pred_proba = estimator.predict_proba(X_train)
@@ -389,9 +379,7 @@ class TestMultiClass(unittest.TestCase):
 
     def _test_memory_limit(self):
         automl_experiment = AutoML()
-        automl_experiment.add_learner(
-            learner_name="large_lgbm", learner_class=MyLargeLGBM
-        )
+        automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
         automl_settings = {
             "time_budget": -1,
             "task": "classification",
@@ -403,19 +391,13 @@ class TestMultiClass(unittest.TestCase):
         }
         X_train, y_train = load_iris(return_X_y=True, as_frame=True)
 
-        automl_experiment.fit(
-            X_train=X_train, y_train=y_train, max_iter=1, **automl_settings
-        )
+        automl_experiment.fit(X_train=X_train, y_train=y_train, max_iter=1, **automl_settings)
         print(automl_experiment.model)
 
     def test_time_limit(self):
         automl_experiment = AutoML()
-        automl_experiment.add_learner(
-            learner_name="large_lgbm", learner_class=MyLargeLGBM
-        )
-        automl_experiment.add_learner(
-            learner_name="large_xgb", learner_class=MyLargeXGB
-        )
+        automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
+        automl_experiment.add_learner(learner_name="large_xgb", learner_class=MyLargeXGB)
         automl_settings = {
             "time_budget": 0.5,
             "task": "classification",
@@ -450,21 +432,12 @@ class TestMultiClass(unittest.TestCase):
             # test drop column
             X_train.columns = range(X_train.shape[1])
             X_train[X_train.shape[1]] = np.zeros(len(y_train))
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            n_concurrent_trials=n_concurrent_trials,
-            **settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, n_concurrent_trials=n_concurrent_trials, **settings)
         automl_val_accuracy = 1.0 - automl.best_loss
         print("Best ML leaner:", automl.best_estimator)
         print("Best hyperparmeter config:", automl.best_config)
         print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                automl.best_config_train_time
-            )
-        )
+        print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
 
         starting_points = automl.best_config_per_estimator
         print("starting_points", starting_points)
@@ -486,14 +459,8 @@ class TestMultiClass(unittest.TestCase):
         new_automl_val_accuracy = 1.0 - new_automl.best_loss
         print("Best ML leaner:", new_automl.best_estimator)
         print("Best hyperparmeter config:", new_automl.best_config)
-        print(
-            "Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy)
-        )
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                new_automl.best_config_train_time
-            )
-        )
+        print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
+        print("Training duration of best run: {0:.4g} s".format(new_automl.best_config_train_time))
 
     def test_fit_w_starting_point_2(self, as_frame=True):
         try:
@@ -520,21 +487,12 @@ class TestMultiClass(unittest.TestCase):
             # test drop column
             X_train.columns = range(X_train.shape[1])
             X_train[X_train.shape[1]] = np.zeros(len(y_train))
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            n_concurrent_trials=n_concurrent_trials,
-            **settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, n_concurrent_trials=n_concurrent_trials, **settings)
         automl_val_accuracy = 1.0 - automl.best_loss
         print("Best ML leaner:", automl.best_estimator)
         print("Best hyperparmeter config:", automl.best_config)
         print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                automl.best_config_train_time
-            )
-        )
+        print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
 
         starting_points = {}
         log_file_name = settings["log_file_name"]
@@ -568,9 +526,7 @@ class TestMultiClass(unittest.TestCase):
         new_automl_val_accuracy = 1.0 - new_automl.best_loss
         # print('Best ML leaner:', new_automl.best_estimator)
         # print('Best hyperparmeter config:', new_automl.best_config)
-        print(
-            "Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy)
-        )
+        print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
         # print('Training duration of best run: {0:.4g} s'.format(new_automl_experiment.best_config_train_time))
 
 
diff --git a/test/automl/test_notebook_example.py b/test/automl/test_notebook_example.py
index be86b3b5b..0de61ebcc 100644
--- a/test/automl/test_notebook_example.py
+++ b/test/automl/test_notebook_example.py
@@ -60,9 +60,7 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
     print("Best ML leaner:", automl.best_estimator)
     print("Best hyperparmeter config:", automl.best_config)
     print("Best accuracy on validation data: {0:.4g}".format(1 - automl.best_loss))
-    print(
-        "Training duration of best run: {0:.4g} s".format(automl.best_config_train_time)
-    )
+    print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
     print(automl.model.estimator)
     print(automl.best_config_per_estimator)
     print("time taken to find best model:", automl.time_to_find_best_model)
@@ -81,9 +79,7 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
 
     accuracy = 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test)
     print("accuracy", "=", accuracy)
-    print(
-        "roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test)
-    )
+    print("roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test))
     print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test))
     if budget is None:
         assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67"
@@ -122,9 +118,7 @@ def test_mlflow():
     from flaml.automl.data import load_openml_task
 
     try:
-        X_train, X_test, y_train, y_test = load_openml_task(
-            task_id=7592, data_dir="test/"
-        )
+        X_train, X_test, y_train, y_test = load_openml_task(task_id=7592, data_dir="test/")
     except (OpenMLServerException, ChunkedEncodingError, SSLError) as e:
         print(e)
         return
diff --git a/test/automl/test_python_log.py b/test/automl/test_python_log.py
index d1e38fa78..7de011752 100644
--- a/test/automl/test_python_log.py
+++ b/test/automl/test_python_log.py
@@ -40,11 +40,7 @@ class TestLogging(unittest.TestCase):
             n = len(y_train) >> 1
             print(automl.model, automl.classes_, automl.predict(X_train))
             automl.fit(
-                X_train=X_train[:n],
-                y_train=y_train[:n],
-                X_val=X_train[n:],
-                y_val=y_train[n:],
-                **automl_settings
+                X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings
             )
             logger.info(automl.search_space)
             logger.info(automl.low_cost_partial_config)
@@ -58,9 +54,7 @@ class TestLogging(unittest.TestCase):
             sample = define_by_run_func(study.ask(), automl.search_space)
             logger.info(sample)
             logger.info(unflatten_hierarchical(sample, automl.search_space))
-            add_cost_to_space(
-                automl.search_space, automl.low_cost_partial_config, automl.cat_hp_cost
-            )
+            add_cost_to_space(automl.search_space, automl.low_cost_partial_config, automl.cat_hp_cost)
             logger.info(automl.search_space["ml"].categories)
             if automl.best_config:
                 config = automl.best_config.copy()
diff --git a/test/automl/test_regression.py b/test/automl/test_regression.py
index 02d058c67..3ae4da7b7 100644
--- a/test/automl/test_regression.py
+++ b/test/automl/test_regression.py
@@ -45,13 +45,7 @@ class TestRegression(unittest.TestCase):
         }
         X_train, y_train = fetch_california_housing(return_X_y=True)
         n = int(len(y_train) * 9 // 10)
-        automl.fit(
-            X_train=X_train[:n],
-            y_train=y_train[:n],
-            X_val=X_train[n:],
-            y_val=y_train[n:],
-            **automl_settings
-        )
+        automl.fit(X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings)
         assert automl._state.eval_method == "holdout"
         y_pred = automl.predict(X_train)
         print(y_pred)
@@ -88,10 +82,7 @@ class TestRegression(unittest.TestCase):
         print(automl.model.estimator)
         y_pred2 = automl.predict(X_train)
         # In some rare case, the last config is early stopped and it's the best config. But the logged config's n_estimator is not reduced.
-        assert (
-            n_iter != automl.model.estimator.get_params("n_estimator")
-            or (y_pred == y_pred2).all()
-        )
+        assert n_iter != automl.model.estimator.get_params("n_estimator") or (y_pred == y_pred2).all()
 
     def test_sparse_matrix_regression(self):
         X_train = scipy.sparse.random(300, 900, density=0.0001)
@@ -110,9 +101,7 @@ class TestRegression(unittest.TestCase):
             "verbose": 0,
             "early_stop": True,
         }
-        automl.fit(
-            X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings)
         assert automl._state.X_val.shape == X_val.shape
         print(automl.predict(X_train))
         print(automl.model)
@@ -135,9 +124,7 @@ class TestRegression(unittest.TestCase):
                 "custom_hp": {"catboost": {"n_estimators": {"domain": 100}}},
             }
         )
-        automl.fit(
-            X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings)
 
     def test_parallel(self, hpo_method=None):
         automl_experiment = AutoML()
@@ -203,13 +190,7 @@ class TestRegression(unittest.TestCase):
             "keep_search_state": True,
             "early_stop": True,
         }
-        automl_experiment.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl_experiment.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
         assert automl_experiment._state.X_val.shape == X_val.shape
         print(automl_experiment.predict(X_train))
         print(automl_experiment.model)
@@ -231,9 +212,7 @@ def test_multioutput():
     X, y = make_regression(n_targets=3)
 
     # split into train and test data
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.30, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
 
     # train the model
     model = MultiOutputRegressor(AutoML(task="regression", time_budget=1))
diff --git a/test/automl/test_score.py b/test/automl/test_score.py
index 3fd94a24b..f6e5a99f4 100644
--- a/test/automl/test_score.py
+++ b/test/automl/test_score.py
@@ -11,12 +11,7 @@ class TestScore:
         import statsmodels.api as sm
 
         data = sm.datasets.co2.load_pandas().data["co2"].resample("MS").mean()
-        data = (
-            data.fillna(data.bfill())
-            .to_frame()
-            .reset_index()
-            .rename(columns={"index": "ds", "co2": "y"})
-        )
+        data = data.fillna(data.bfill()).to_frame().reset_index().rename(columns={"index": "ds", "co2": "y"})
         num_samples = data.shape[0]
         time_horizon = 12
         split_idx = num_samples - time_horizon
@@ -48,9 +43,7 @@ class TestScore:
             with open("automl.pkl", "rb") as f:
                 pickle.load(f)  # v1.1 of prophet raises RecursionError
         except (ImportError, RecursionError):
-            print(
-                "not using prophet due to ImportError or RecursionError (when unpickling in v1.1)"
-            )
+            print("not using prophet due to ImportError or RecursionError (when unpickling in v1.1)")
             automl.fit(
                 dataframe=df,
                 **settings,
diff --git a/test/automl/test_split.py b/test/automl/test_split.py
index 9223c520a..a97dc0fa6 100644
--- a/test/automl/test_split.py
+++ b/test/automl/test_split.py
@@ -29,13 +29,9 @@ def _test(split_type):
 
         X, y = load_wine(return_X_y=True)
     if split_type != "time":
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=0.33, random_state=42
-        )
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
     else:
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=0.33, shuffle=False
-        )
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)
     automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
 
     pred = automl.predict(X_test)
@@ -83,9 +79,7 @@ def test_groups():
     automl_settings["split_type"] = GroupKFold(n_splits=3)
     try:
         automl.fit(X, y, **automl_settings)
-        raise RuntimeError(
-            "GroupKFold object as split_type should fail when eval_method is holdout"
-        )
+        raise RuntimeError("GroupKFold object as split_type should fail when eval_method is holdout")
     except AssertionError:
         # eval_method must be 'auto' or 'cv' for custom data splitter.
         pass
@@ -140,9 +134,7 @@ def test_rank():
         "log_file_name": "test/{}.log".format(dataset),
         "model_history": True,
         "eval_method": "cv",
-        "groups": np.array(  # group labels
-            [0] * 200 + [1] * 200 + [2] * 200 + [3] * 200 + [4] * 100 + [5] * 100
-        ),
+        "groups": np.array([0] * 200 + [1] * 200 + [2] * 200 + [3] * 200 + [4] * 100 + [5] * 100),  # group labels
         "learner_selector": "roundrobin",
     }
     automl.fit(X, y, **automl_settings)
@@ -197,9 +189,7 @@ def test_object():
         "split_type": TestKFold(5),
     }
     automl.fit(X, y, **automl_settings)
-    assert (
-        automl._state.eval_method == "cv"
-    ), "eval_method must be 'cv' for custom data splitter"
+    assert automl._state.eval_method == "cv", "eval_method must be 'cv' for custom data splitter"
 
     kf = TestKFold(5)
     kf.shuffle = True
diff --git a/test/automl/test_training_log.py b/test/automl/test_training_log.py
index 8d15bacf0..37505dd0c 100644
--- a/test/automl/test_training_log.py
+++ b/test/automl/test_training_log.py
@@ -9,9 +9,7 @@ from flaml.automl.training_log import training_log_reader
 
 
 class TestTrainingLog(unittest.TestCase):
-    def test_training_log(
-        self, path="test_training_log.log", estimator_list="auto", use_ray=False
-    ):
+    def test_training_log(self, path="test_training_log.log", estimator_list="auto", use_ray=False):
         with TemporaryDirectory() as d:
             filename = os.path.join(d, path)
 
@@ -64,11 +62,9 @@ class TestTrainingLog(unittest.TestCase):
                 assert (
                     str(model.estimator) == str(automl.model.estimator)
                     or estimator == "xgboost"
-                    and str(model.estimator.get_dump())
-                    == str(automl.model.estimator.get_dump())
+                    and str(model.estimator.get_dump()) == str(automl.model.estimator.get_dump())
                     or estimator == "catboost"
-                    and str(model.estimator.get_all_params())
-                    == str(automl.model.estimator.get_all_params())
+                    and str(model.estimator.get_all_params()) == str(automl.model.estimator.get_all_params())
                 )
                 automl.fit(
                     X_train=X_train,
diff --git a/test/automl/test_warmstart.py b/test/automl/test_warmstart.py
index 74cf8c37f..89d1b2e0d 100644
--- a/test/automl/test_warmstart.py
+++ b/test/automl/test_warmstart.py
@@ -29,11 +29,7 @@ class TestWarmStart(unittest.TestCase):
         print("Best ML leaner:", automl.best_estimator)
         print("Best hyperparmeter config:", automl.best_config)
         print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                automl.best_config_train_time
-            )
-        )
+        print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
         # 1. Get starting points from previous experiments.
         starting_points = automl.best_config_per_estimator
         print("starting_points", starting_points)
@@ -61,19 +57,13 @@ class TestWarmStart(unittest.TestCase):
                 revised_hps_to_search = {
                     "n_estimators": {
                         "domain": tune.lograndint(lower=10, upper=32768),
-                        "init_value": starting_point.get("n_estimators")
-                        or space["n_estimators"].get("init_value", 10),
-                        "low_cost_init_value": space["n_estimators"].get(
-                            "low_cost_init_value", 10
-                        ),
+                        "init_value": starting_point.get("n_estimators") or space["n_estimators"].get("init_value", 10),
+                        "low_cost_init_value": space["n_estimators"].get("low_cost_init_value", 10),
                     },
                     "num_leaves": {
                         "domain": tune.lograndint(lower=10, upper=3276),
-                        "init_value": starting_point.get("num_leaves")
-                        or space["num_leaves"].get("init_value", 10),
-                        "low_cost_init_value": space["num_leaves"].get(
-                            "low_cost_init_value", 10
-                        ),
+                        "init_value": starting_point.get("num_leaves") or space["num_leaves"].get("init_value", 10),
+                        "low_cost_init_value": space["num_leaves"].get("low_cost_init_value", 10),
                     },
                     # (3.2) Add a new hp which is not in the original search space
                     "subsample": {
@@ -86,9 +76,7 @@ class TestWarmStart(unittest.TestCase):
 
         new_estimator_name = "large_lgbm"
         new_automl = AutoML()
-        new_automl.add_learner(
-            learner_name=new_estimator_name, learner_class=MyPartiallyFreezedLargeLGBM
-        )
+        new_automl.add_learner(learner_name=new_estimator_name, learner_class=MyPartiallyFreezedLargeLGBM)
 
         automl_settings_resume = {
             "time_budget": 3,
@@ -108,14 +96,8 @@ class TestWarmStart(unittest.TestCase):
         new_automl_val_accuracy = 1.0 - new_automl.best_loss
         print("Best ML leaner:", new_automl.best_estimator)
         print("Best hyperparmeter config:", new_automl.best_config)
-        print(
-            "Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy)
-        )
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                new_automl.best_config_train_time
-            )
-        )
+        print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
+        print("Training duration of best run: {0:.4g} s".format(new_automl.best_config_train_time))
 
     def test_nobudget(self):
         automl = AutoML()
@@ -127,9 +109,7 @@ class TestWarmStart(unittest.TestCase):
         from flaml.automl.data import load_openml_dataset
         from flaml import AutoML
 
-        X_train, X_test, y_train, y_test = load_openml_dataset(
-            dataset_id=1169, data_dir="./"
-        )
+        X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./")
 
         automl_settings = {
             "time_budget": 3,
diff --git a/test/automl/test_xgboost2d.py b/test/automl/test_xgboost2d.py
index e31ac5314..b34275e64 100644
--- a/test/automl/test_xgboost2d.py
+++ b/test/automl/test_xgboost2d.py
@@ -49,9 +49,7 @@ def test_simple(method=None):
         from sklearn.datasets import load_wine
 
         X, y = load_wine(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.33, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
     automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
     print(automl.estimator_list)
     print(automl.search_space)
@@ -77,9 +75,7 @@ def test_simple(method=None):
         min_resource=automl.min_resource,
         max_resource=automl.max_resource,
         time_budget_s=automl._state.time_budget,
-        config_constraints=[
-            (partial(size, automl._state.learner_classes), "<=", automl._mem_thres)
-        ],
+        config_constraints=[(partial(size, automl._state.learner_classes), "<=", automl._mem_thres)],
         metric_constraints=automl.metric_constraints,
         num_samples=5,
     )
diff --git a/test/automl/test_xgboost2d_sample_size.py b/test/automl/test_xgboost2d_sample_size.py
index aabba9139..1f97d58ba 100644
--- a/test/automl/test_xgboost2d_sample_size.py
+++ b/test/automl/test_xgboost2d_sample_size.py
@@ -31,9 +31,7 @@ def _test_simple(method=None, size_ratio=1.0):
     automl.add_learner(learner_name="XGBoost2D", learner_class=XGBoost2D)
 
     X, y = fetch_openml(name=dataset, return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.33, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
 
     final_size = int(len(y_train) * size_ratio)
     X_train = X_train[:final_size]
diff --git a/test/default/test_defaults.py b/test/default/test_defaults.py
index 22ef0a2d3..140fe71f6 100644
--- a/test/default/test_defaults.py
+++ b/test/default/test_defaults.py
@@ -69,23 +69,15 @@ def test_regret():
 def test_suggest_classification():
     location = "test/default"
     X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
-    suggested = suggest_hyperparams(
-        "classification", X_train, y_train, "lgbm", location=location
-    )
+    suggested = suggest_hyperparams("classification", X_train, y_train, "lgbm", location=location)
     print(suggested)
-    suggested = preprocess_and_suggest_hyperparams(
-        "classification", X_train, y_train, "xgboost", location=location
-    )
+    suggested = preprocess_and_suggest_hyperparams("classification", X_train, y_train, "xgboost", location=location)
     print(suggested)
-    suggested = suggest_hyperparams(
-        "classification", X_train, y_train, "xgb_limitdepth", location=location
-    )
+    suggested = suggest_hyperparams("classification", X_train, y_train, "xgb_limitdepth", location=location)
     print(suggested)
 
     X, y = load_iris(return_X_y=True, as_frame=True)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.33, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
     (
         hyperparams,
         estimator_class,
@@ -93,29 +85,21 @@ def test_suggest_classification():
         y,
         feature_transformer,
         label_transformer,
-    ) = preprocess_and_suggest_hyperparams(
-        "classification", X_train, y_train, "lgbm", location=location
-    )
+    ) = preprocess_and_suggest_hyperparams("classification", X_train, y_train, "lgbm", location=location)
     with open("test/default/feature_transformer", "wb") as f:
         pickle.dump(feature_transformer, f, pickle.HIGHEST_PROTOCOL)
     model = estimator_class(**hyperparams)  # estimator_class is LGBMClassifier
     model.fit(X, y)
     X_test = feature_transformer.transform(X_test)
-    y_pred = label_transformer.inverse_transform(
-        pd.Series(model.predict(X_test).astype(int))
-    )
+    y_pred = label_transformer.inverse_transform(pd.Series(model.predict(X_test).astype(int)))
     print(y_pred)
-    suggested = suggest_hyperparams(
-        "classification", X_train, y_train, "xgboost", location=location
-    )
+    suggested = suggest_hyperparams("classification", X_train, y_train, "xgboost", location=location)
     print(suggested)
     suggested = preprocess_and_suggest_hyperparams(
         "classification", X_train, y_train, "xgb_limitdepth", location=location
     )
     print(suggested)
-    suggested = suggest_hyperparams(
-        "classification", X_train, y_train, "xgb_limitdepth", location=location
-    )
+    suggested = suggest_hyperparams("classification", X_train, y_train, "xgb_limitdepth", location=location)
     suggested = suggest_learner(
         "classification",
         X_train,
@@ -129,17 +113,11 @@ def test_suggest_classification():
 def test_suggest_regression():
     location = "test/default"
     X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True)
-    suggested = suggest_hyperparams(
-        "regression", X_train, y_train, "lgbm", location=location
-    )
+    suggested = suggest_hyperparams("regression", X_train, y_train, "lgbm", location=location)
     print(suggested)
-    suggested = preprocess_and_suggest_hyperparams(
-        "regression", X_train, y_train, "xgboost", location=location
-    )
+    suggested = preprocess_and_suggest_hyperparams("regression", X_train, y_train, "xgboost", location=location)
     print(suggested)
-    suggested = suggest_hyperparams(
-        "regression", X_train, y_train, "xgb_limitdepth", location=location
-    )
+    suggested = suggest_hyperparams("regression", X_train, y_train, "xgb_limitdepth", location=location)
     print(suggested)
     suggested = suggest_learner("regression", X_train, y_train, location=location)
     print(suggested)
diff --git a/test/default_lgbm.py b/test/default_lgbm.py
index 105a3d6ec..c94994b89 100644
--- a/test/default_lgbm.py
+++ b/test/default_lgbm.py
@@ -5,9 +5,7 @@ from flaml.automl.ml import sklearn_metric_loss_score
 X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir="./")
 lgbm = LGBMRegressor()
 
-hyperparams, estimator_name, X_transformed, y_transformed = lgbm.suggest_hyperparams(
-    X_train, y_train
-)
+hyperparams, estimator_name, X_transformed, y_transformed = lgbm.suggest_hyperparams(X_train, y_train)
 print(hyperparams)
 
 lgbm.fit(X_train, y_train)
diff --git a/test/nlp/test_autohf.py b/test/nlp/test_autohf.py
index d751200fd..a7321e495 100644
--- a/test/nlp/test_autohf.py
+++ b/test/nlp/test_autohf.py
@@ -21,13 +21,7 @@ def test_hf_data():
     automl_settings["preserve_checkpoint"] = False
 
     try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
         automl.score(X_val, y_val, **{"metric": "accuracy"})
         automl.pickle("automl.pkl")
     except requests.exceptions.HTTPError:
@@ -54,13 +48,7 @@ def test_hf_data():
     automl_settings.pop("use_ray", None)
     automl_settings.pop("estimator_list", None)
 
-    automl.retrain_from_log(
-        X_train=X_train,
-        y_train=y_train,
-        train_full=True,
-        record_id=0,
-        **automl_settings
-    )
+    automl.retrain_from_log(X_train=X_train, y_train=y_train, train_full=True, record_id=0, **automl_settings)
     automl.predict(X_test, **{"per_device_eval_batch_size": 2})
     automl.predict(["", ""])
     automl.predict_proba(["", ""])
diff --git a/test/nlp/test_autohf_classificationhead.py b/test/nlp/test_autohf_classificationhead.py
index 4204d49f6..4df0192d8 100644
--- a/test/nlp/test_autohf_classificationhead.py
+++ b/test/nlp/test_autohf_classificationhead.py
@@ -23,65 +23,47 @@ model_path_list = [
 
 def test_switch_1_1():
     data_idx, model_path_idx = 0, 0
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])
 
 
 def test_switch_1_2():
     data_idx, model_path_idx = 0, 1
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])
 
 
 def test_switch_1_3():
     data_idx, model_path_idx = 0, 2
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])
 
 
 def test_switch_2_1():
     data_idx, model_path_idx = 1, 0
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])
 
 
 def test_switch_2_2():
     data_idx, model_path_idx = 1, 1
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])
 
 
 def test_switch_2_3():
     data_idx, model_path_idx = 1, 2
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])
 
 
 def test_switch_3_1():
     data_idx, model_path_idx = 2, 0
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])
 
 
 def test_switch_3_2():
     data_idx, model_path_idx = 2, 1
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])
 
 
 def test_switch_3_3():
     data_idx, model_path_idx = 2, 2
-    _test_switch_classificationhead(
-        data_list[data_idx], model_path_list[model_path_idx]
-    )
+    _test_switch_classificationhead(data_list[data_idx], model_path_list[model_path_idx])
 
 
 def _test_switch_classificationhead(each_data, each_model_path):
@@ -102,13 +84,7 @@ def _test_switch_classificationhead(each_data, each_model_path):
         automl_settings["metric"] = "accuracy"
 
     try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
     except requests.exceptions.HTTPError:
         return
 
diff --git a/test/nlp/test_autohf_custom_metric.py b/test/nlp/test_autohf_custom_metric.py
index 0918e0d85..72653ffd7 100644
--- a/test/nlp/test_autohf_custom_metric.py
+++ b/test/nlp/test_autohf_custom_metric.py
@@ -61,22 +61,14 @@ def test_custom_metric():
     automl_settings["use_ray"] = {"local_dir": "data/output/"}
 
     try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
     except requests.exceptions.HTTPError:
         return
 
     # testing calling custom metric in TransformersEstimator._compute_metrics_by_dataset_name
 
     automl_settings["max_iter"] = 3
-    automl.fit(
-        X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
-    )
+    automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
     automl.score(X_val, y_val, **{"metric": custom_metric})
     automl.pickle("automl.pkl")
 
diff --git a/test/nlp/test_autohf_cv.py b/test/nlp/test_autohf_cv.py
index a8b40ae9e..b37dd6c57 100644
--- a/test/nlp/test_autohf_cv.py
+++ b/test/nlp/test_autohf_cv.py
@@ -5,9 +5,7 @@ import os
 import shutil
 
 
-@pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"], reason="do not run on mac os or windows"
-)
+@pytest.mark.skipif(sys.platform in ["darwin", "win32"], reason="do not run on mac os or windows")
 def test_cv():
     from flaml import AutoML
     import requests
diff --git a/test/nlp/test_autohf_loadargs.py b/test/nlp/test_autohf_loadargs.py
index 0239d508d..e5933cbf5 100644
--- a/test/nlp/test_autohf_loadargs.py
+++ b/test/nlp/test_autohf_loadargs.py
@@ -2,6 +2,4 @@ def test_load_args():
     import subprocess
     import sys
 
-    subprocess.call(
-        [sys.executable, "load_args.py", "--output_dir", "data/"], shell=True
-    )
+    subprocess.call([sys.executable, "load_args.py", "--output_dir", "data/"], shell=True)
diff --git a/test/nlp/test_autohf_multichoice_classification.py b/test/nlp/test_autohf_multichoice_classification.py
index 918dec8f8..1670f2982 100644
--- a/test/nlp/test_autohf_multichoice_classification.py
+++ b/test/nlp/test_autohf_multichoice_classification.py
@@ -5,9 +5,7 @@ import os
 import shutil
 
 
-@pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"], reason="do not run on mac os or windows"
-)
+@pytest.mark.skipif(sys.platform in ["darwin", "win32"], reason="do not run on mac os or windows")
 def test_mcc():
     from flaml import AutoML
     import requests
@@ -27,13 +25,7 @@ def test_mcc():
     automl_settings["metric"] = "accuracy"
 
     try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
     except requests.exceptions.HTTPError:
         return
 
diff --git a/test/nlp/test_autohf_regression.py b/test/nlp/test_autohf_regression.py
index 2353c1425..63f7ca25f 100644
--- a/test/nlp/test_autohf_regression.py
+++ b/test/nlp/test_autohf_regression.py
@@ -29,9 +29,7 @@ def test_regression():
     ray.shutdown()
     ray.init()
 
-    automl.fit(
-        X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
-    )
+    automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
     automl.predict(X_val)
 
     if os.path.exists("test/data/output/"):
diff --git a/test/nlp/test_autohf_summarization.py b/test/nlp/test_autohf_summarization.py
index f1427e752..e54a42bdb 100644
--- a/test/nlp/test_autohf_summarization.py
+++ b/test/nlp/test_autohf_summarization.py
@@ -22,18 +22,10 @@ def test_summarization():
     automl_settings["task"] = "summarization"
     automl_settings["metric"] = "rouge1"
     automl_settings["time_budget"] = 2 * automl_settings["time_budget"]
-    automl_settings["fit_kwargs_by_estimator"]["transformer"][
-        "model_path"
-    ] = "patrickvonplaten/t5-tiny-random"
+    automl_settings["fit_kwargs_by_estimator"]["transformer"]["model_path"] = "patrickvonplaten/t5-tiny-random"
 
     try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
     except requests.exceptions.HTTPError:
         return
 
@@ -41,13 +33,7 @@ def test_summarization():
     automl_settings.pop("use_ray", None)
     automl_settings.pop("estimator_list", None)
 
-    automl.retrain_from_log(
-        X_train=X_train,
-        y_train=y_train,
-        train_full=True,
-        record_id=0,
-        **automl_settings
-    )
+    automl.retrain_from_log(X_train=X_train, y_train=y_train, train_full=True, record_id=0, **automl_settings)
     automl.predict(X_test)
 
     if os.path.exists("test/data/output/"):
diff --git a/test/nlp/test_autohf_tokenclassification.py b/test/nlp/test_autohf_tokenclassification.py
index ead6ce8e2..b55d465b3 100644
--- a/test/nlp/test_autohf_tokenclassification.py
+++ b/test/nlp/test_autohf_tokenclassification.py
@@ -22,9 +22,7 @@ def test_tokenclassification_idlabel():
 
     automl_settings = get_automl_settings()
     automl_settings["task"] = "token-classification"
-    automl_settings[
-        "metric"
-    ] = "seqeval:overall_f1"  # evaluating based on the overall_f1 of seqeval
+    automl_settings["metric"] = "seqeval:overall_f1"  # evaluating based on the overall_f1 of seqeval
     automl_settings["fit_kwargs_by_estimator"]["transformer"]["label_list"] = [
         "O",
         "B-PER",
@@ -38,13 +36,7 @@ def test_tokenclassification_idlabel():
     ]
 
     try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
     except requests.exceptions.HTTPError:
         return
 
@@ -83,18 +75,10 @@ def test_tokenclassification_tokenlabel():
 
     automl_settings = get_automl_settings()
     automl_settings["task"] = "token-classification"
-    automl_settings[
-        "metric"
-    ] = "seqeval:overall_f1"  # evaluating based on the overall_f1 of seqeval
+    automl_settings["metric"] = "seqeval:overall_f1"  # evaluating based on the overall_f1 of seqeval
 
     try:
-        automl.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
-        )
+        automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
     except requests.exceptions.HTTPError:
         return
 
diff --git a/test/nlp/test_default.py b/test/nlp/test_default.py
index d2e991b66..e55ed9fe7 100644
--- a/test/nlp/test_default.py
+++ b/test/nlp/test_default.py
@@ -32,15 +32,10 @@ def test_starting_point_not_in_search_space():
     automl = AutoML()
     automl_settings = get_automl_settings(estimator_name=this_estimator_name)
 
-    automl_settings["starting_points"] = {
-        this_estimator_name: [{"learning_rate": 2e-3}]
-    }
+    automl_settings["starting_points"] = {this_estimator_name: [{"learning_rate": 2e-3}]}
 
     automl.fit(X_train, y_train, **automl_settings)
-    assert (
-        automl._search_states[this_estimator_name].init_config[0]["learning_rate"]
-        != 2e-3
-    )
+    assert automl._search_states[this_estimator_name].init_config[0]["learning_rate"] != 2e-3
 
     """
         test starting_points located outside of the search space, and custom_hp is set
@@ -80,10 +75,7 @@ def test_starting_point_not_in_search_space():
             len(automl_settings["custom_hp"][this_estimator_name]),
         )
     )
-    assert (
-        automl._search_states[this_estimator_name].search_space["model_path"]
-        == "albert-base-v2"
-    )
+    assert automl._search_states[this_estimator_name].search_space["model_path"] == "albert-base-v2"
 
     if os.path.exists("test/data/output/"):
         try:
@@ -103,11 +95,7 @@ def test_points_to_evaluate():
 
     automl_settings["starting_points"] = "data:test/nlp/default/"
 
-    automl_settings["custom_hp"] = {
-        "transformer_ms": {
-            "model_path": {"domain": "google/electra-small-discriminator"}
-        }
-    }
+    automl_settings["custom_hp"] = {"transformer_ms": {"model_path": {"domain": "google/electra-small-discriminator"}}}
 
     automl.fit(X_train, y_train, **automl_settings)
 
@@ -137,13 +125,9 @@ def test_zero_shot_nomodel():
         y_train,
         _,
         _,
-    ) = preprocess_and_suggest_hyperparams(
-        "seq-classification", X_train, y_train, estimator_name, location=location
-    )
+    ) = preprocess_and_suggest_hyperparams("seq-classification", X_train, y_train, estimator_name, location=location)
 
-    model = estimator_class(
-        **hyperparams
-    )  # estimator_class is TransformersEstimatorModelSelection
+    model = estimator_class(**hyperparams)  # estimator_class is TransformersEstimatorModelSelection
 
     fit_kwargs = automl_settings.pop("fit_kwargs_by_estimator", {}).get(estimator_name)
     fit_kwargs.update(automl_settings)
diff --git a/test/nlp/utils.py b/test/nlp/utils.py
index 6d133a08c..f57dc5e8a 100644
--- a/test/nlp/utils.py
+++ b/test/nlp/utils.py
@@ -314,8 +314,7 @@ def get_toy_data_multiplechoiceclassification():
             " its false bottom. He stands and looks around, his eyes",
         ],
         "sent1": [
-            "Someone leans out of the drive - thru "
-            "window, grinning at her, holding bags filled with fast food.",
+            "Someone leans out of the drive - thru " "window, grinning at her, holding bags filled with fast food.",
             "Someone looks up suddenly when he hears.",
             "Someone drives; someone sits beside her.",
             "He opens the drawer in which we know"
@@ -343,8 +342,7 @@ def get_toy_data_multiplechoiceclassification():
         "ending2": [
             "attempts to block her ransacked.",
             "talks using the phone and walks away for a few seconds.",
-            "are too involved with each other to "
-            "notice someone watching them from the drive - thru window.",
+            "are too involved with each other to " "notice someone watching them from the drive - thru window.",
             "finally landing on: the digicam and a stack of cassettes on a shelf.",
         ],
         "ending3": [
diff --git a/test/nni/mnist.py b/test/nni/mnist.py
index 50d3c2e86..bbe55a588 100644
--- a/test/nni/mnist.py
+++ b/test/nni/mnist.py
@@ -107,9 +107,7 @@ def main(args):
             data_dir,
             train=True,
             download=True,
-            transform=transforms.Compose(
-                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
-            ),
+            transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
         ),
         batch_size=args["batch_size"],
         shuffle=True,
@@ -119,9 +117,7 @@ def main(args):
         datasets.MNIST(
             data_dir,
             train=False,
-            transform=transforms.Compose(
-                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
-            ),
+            transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
         ),
         batch_size=1000,
         shuffle=True,
@@ -188,12 +184,8 @@ def get_params():
         metavar="N",
         help="number of epochs to train (default: 10)",
     )
-    parser.add_argument(
-        "--seed", type=int, default=1, metavar="S", help="random seed (default: 1)"
-    )
-    parser.add_argument(
-        "--no_cuda", action="store_true", default=False, help="disables CUDA training"
-    )
+    parser.add_argument("--seed", type=int, default=1, metavar="S", help="random seed (default: 1)")
+    parser.add_argument("--no_cuda", action="store_true", default=False, help="disables CUDA training")
     parser.add_argument(
         "--log_interval",
         type=int,
diff --git a/test/object_store.py b/test/object_store.py
index dc00914e6..175520e0b 100644
--- a/test/object_store.py
+++ b/test/object_store.py
@@ -8,9 +8,7 @@ import ray
 
 data = fetch_california_housing(return_X_y=False, as_frame=True)
 X, y = data.data, data.target
-X_train, X_test, y_train, y_test = train_test_split(
-    X, y, test_size=0.33, random_state=42
-)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
 X_train_ref = ray.put(X_train)
 print(isinstance(X_train_ref, ray.ObjectRef))
 
@@ -33,22 +31,14 @@ def train_lgbm(config: dict) -> dict:
 # load a built-in search space from flaml
 flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
 # specify the search space as a dict from hp name to domain; you can define your own search space same way
-config_search_space = {
-    hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
-}
+config_search_space = {hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()}
 # give guidance about hp values corresponding to low training cost, i.e., {"n_estimators": 4, "num_leaves": 4}
 low_cost_partial_config = {
-    hp: space["low_cost_init_value"]
-    for hp, space in flaml_lgbm_search_space.items()
-    if "low_cost_init_value" in space
+    hp: space["low_cost_init_value"] for hp, space in flaml_lgbm_search_space.items() if "low_cost_init_value" in space
 }
 # initial points to evaluate
 points_to_evaluate = [
-    {
-        hp: space["init_value"]
-        for hp, space in flaml_lgbm_search_space.items()
-        if "init_value" in space
-    }
+    {hp: space["init_value"] for hp, space in flaml_lgbm_search_space.items() if "init_value" in space}
 ]
 # run the tuning, minimizing mse, with total time budget 3 seconds
 analysis = tune.run(
diff --git a/test/openai/test_completion.py b/test/openai/test_completion.py
index 1d04ab443..4eec54a75 100644
--- a/test/openai/test_completion.py
+++ b/test/openai/test_completion.py
@@ -17,9 +17,7 @@ from flaml.autogen.math_utils import eval_math_responses
     reason="do not run on windows",
 )
 def test_humaneval(num_samples=1):
-    eval_with_generated_assertions = partial(
-        eval_function_completions, assertions=generate_assertions
-    )
+    eval_with_generated_assertions = partial(eval_function_completions, assertions=generate_assertions)
 
     seed = 41
     data = datasets.load_dataset("openai_humaneval")["test"].shuffle(seed=seed)
@@ -165,9 +163,7 @@ def test_math(num_samples=-1):
         "stop": "###",
     }
     test_data_sample = test_data[0:3]
-    result = oai.ChatCompletion.test(
-        test_data_sample, vanilla_config, eval_math_responses
-    )
+    result = oai.ChatCompletion.test(test_data_sample, vanilla_config, eval_math_responses)
     test_data_sample = test_data[3:6]
     result = oai.ChatCompletion.test(
         test_data_sample,
diff --git a/test/pipeline_tuning_example/submit_train_pipeline.py b/test/pipeline_tuning_example/submit_train_pipeline.py
index d01edef1a..07de3123a 100644
--- a/test/pipeline_tuning_example/submit_train_pipeline.py
+++ b/test/pipeline_tuning_example/submit_train_pipeline.py
@@ -83,9 +83,7 @@ def build_and_submit_aml_pipeline(config):
     ################################################
     # load component functions
     ################################################
-    data_prep_component = Component.from_yaml(
-        ws, yaml_file=LOCAL_DIR / "data_prep/data_prep.yaml"
-    )
+    data_prep_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR / "data_prep/data_prep.yaml")
     train_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR / "train/train.yaml")
 
     ################################################
diff --git a/test/pipeline_tuning_example/submit_tuner_pipeline.py b/test/pipeline_tuning_example/submit_tuner_pipeline.py
index a9cfdbecc..082a87bb0 100644
--- a/test/pipeline_tuning_example/submit_tuner_pipeline.py
+++ b/test/pipeline_tuning_example/submit_tuner_pipeline.py
@@ -24,9 +24,7 @@ def remote_run():
     # load component functions
     ################################################
 
-    pipeline_tuning_func = Component.from_yaml(
-        ws, yaml_file=LOCAL_DIR / "tuner/component_spec.yaml"
-    )
+    pipeline_tuning_func = Component.from_yaml(ws, yaml_file=LOCAL_DIR / "tuner/component_spec.yaml")
 
     ################################################
     # build pipeline
@@ -61,9 +59,7 @@ if __name__ == "__main__":
         help="your_subscription_id",
         required=False,
     )
-    parser.add_argument(
-        "--resource_group", type=str, help="your_resource_group", required=False
-    )
+    parser.add_argument("--resource_group", type=str, help="your_resource_group", required=False)
     parser.add_argument("--workspace", type=str, help="your_workspace", required=False)
 
     parser.add_argument("--remote", dest="remote", action="store_true")
diff --git a/test/ray/distribute_tune.py b/test/ray/distribute_tune.py
index e78c624b4..3d1c8366f 100644
--- a/test/ray/distribute_tune.py
+++ b/test/ray/distribute_tune.py
@@ -26,9 +26,7 @@ if __name__ == "__main__":
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
         X_train_ref = ray.put(X_train)
         flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
-        config_search_space = {
-            hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
-        }
+        config_search_space = {hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()}
         low_cost_partial_config = {
             hp: space["low_cost_init_value"]
             for hp, space in flaml_lgbm_search_space.items()
diff --git a/test/rep.py b/test/rep.py
index 266ac0059..be9dac482 100644
--- a/test/rep.py
+++ b/test/rep.py
@@ -28,9 +28,7 @@ settings = {
 
 for trial_num in range(8):
     automl = AutoML()
-    automl.add_learner(
-        learner_name="extra_trees_seeded", learner_class=ExtraTreesEstimatorSeeded
-    )
+    automl.add_learner(learner_name="extra_trees_seeded", learner_class=ExtraTreesEstimatorSeeded)
     automl.fit(X_train=X_train, y_train=y_train, **settings)
     print(automl.best_loss)
     print(automl.best_config)
diff --git a/test/run_distribute_automl.py b/test/run_distribute_automl.py
index 8490a8c20..340d31d31 100644
--- a/test/run_distribute_automl.py
+++ b/test/run_distribute_automl.py
@@ -7,16 +7,12 @@ ray_environment_name = "aml-ray-cpu"
 ray_environment_dockerfile_path = "./Docker/Dockerfile-cpu"
 
 # Build CPU image for Ray
-ray_cpu_env = Environment.from_dockerfile(
-    name=ray_environment_name, dockerfile=ray_environment_dockerfile_path
-)
+ray_cpu_env = Environment.from_dockerfile(name=ray_environment_name, dockerfile=ray_environment_dockerfile_path)
 ray_cpu_env.register(workspace=ws)
 ray_cpu_build_details = ray_cpu_env.build(workspace=ws)
 
 while ray_cpu_build_details.status not in ["Succeeded", "Failed"]:
-    print(
-        f"Awaiting completion of ray CPU environment build. Current status is: {ray_cpu_build_details.status}"
-    )
+    print(f"Awaiting completion of ray CPU environment build. Current status is: {ray_cpu_build_details.status}")
     time.sleep(10)
 
 command = ["python distribute_automl.py"]
diff --git a/test/run_distribute_tune.py b/test/run_distribute_tune.py
index 34c35d9fd..4bc222726 100644
--- a/test/run_distribute_tune.py
+++ b/test/run_distribute_tune.py
@@ -7,16 +7,12 @@ ray_environment_name = "aml-ray-cpu"
 ray_environment_dockerfile_path = "./Docker/Dockerfile-cpu"
 
 # Build CPU image for Ray
-ray_cpu_env = Environment.from_dockerfile(
-    name=ray_environment_name, dockerfile=ray_environment_dockerfile_path
-)
+ray_cpu_env = Environment.from_dockerfile(name=ray_environment_name, dockerfile=ray_environment_dockerfile_path)
 ray_cpu_env.register(workspace=ws)
 ray_cpu_build_details = ray_cpu_env.build(workspace=ws)
 
 while ray_cpu_build_details.status not in ["Succeeded", "Failed"]:
-    print(
-        f"Awaiting completion of ray CPU environment build. Current status is: {ray_cpu_build_details.status}"
-    )
+    print(f"Awaiting completion of ray CPU environment build. Current status is: {ray_cpu_build_details.status}")
     time.sleep(10)
 
 command = ["python distribute_tune.py"]
diff --git a/test/spark/test_0sparkml.py b/test/spark/test_0sparkml.py
index 6890f7f10..8069616f3 100644
--- a/test/spark/test_0sparkml.py
+++ b/test/spark/test_0sparkml.py
@@ -36,9 +36,7 @@ else:
         skip_spark = True
 
 
-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
 
 
 def _test_spark_synapseml_lightgbm(spark=None, task="classification"):
@@ -83,9 +81,7 @@ def _test_spark_synapseml_lightgbm(spark=None, task="classification"):
         columns = X_train.columns
         feature_cols = [col for col in columns if col != "label"]
         featurizer = VectorAssembler(inputCols=feature_cols, outputCol="features")
-        X_train = featurizer.transform(X_train.to_spark(index_col="index"))[
-            "index", "features"
-        ]
+        X_train = featurizer.transform(X_train.to_spark(index_col="index"))["index", "features"]
     X_train = to_pandas_on_spark(X_train)
 
     automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
@@ -138,9 +134,7 @@ def test_spark_input_df():
         spark.read.format("csv")
         .option("header", True)
         .option("inferSchema", True)
-        .load(
-            "wasbs://publicwasb@mmlspark.blob.core.windows.net/company_bankruptcy_prediction_data.csv"
-        )
+        .load("wasbs://publicwasb@mmlspark.blob.core.windows.net/company_bankruptcy_prediction_data.csv")
     )
     train, test = df.randomSplit([0.8, 0.2], seed=1)
     feature_cols = df.columns[1:]
@@ -151,9 +145,7 @@ def test_spark_input_df():
     settings = {
         "time_budget": 30,  # total running time in seconds
         "metric": "roc_auc",
-        "estimator_list": [
-            "lgbm_spark"
-        ],  # list of ML learners; we tune lightgbm in this example
+        "estimator_list": ["lgbm_spark"],  # list of ML learners; we tune lightgbm in this example
         "task": "classification",  # task type
         "log_file_name": "flaml_experiment.log",  # flaml log file
         "seed": 7654321,  # random seed
@@ -187,9 +179,7 @@ def test_spark_input_df():
     settings = {
         "time_budget": 10,  # total running time in seconds
         "metric": "roc_auc",
-        "estimator_list": [
-            "lgbm"
-        ],  # list of ML learners; we tune lightgbm in this example
+        "estimator_list": ["lgbm"],  # list of ML learners; we tune lightgbm in this example
         "task": "classification",  # task type
     }
     with pytest.raises(ValueError) as excinfo:
diff --git a/test/spark/test_automl.py b/test/spark/test_automl.py
index 79801c79e..96562f06a 100644
--- a/test/spark/test_automl.py
+++ b/test/spark/test_automl.py
@@ -23,9 +23,7 @@ os.environ["FLAML_MAX_CONCURRENT"] = "2"
 spark_available, _ = check_spark()
 skip_spark = not spark_available
 
-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
 
 
 def test_parallel_xgboost(hpo_method=None, data_size=1000):
@@ -59,9 +57,7 @@ def test_parallel_xgboost_others():
     test_parallel_xgboost(hpo_method="random")
 
 
-@pytest.mark.skip(
-    reason="currently not supporting too large data, will support spark dataframe in the future"
-)
+@pytest.mark.skip(reason="currently not supporting too large data, will support spark dataframe in the future")
 def test_large_dataset():
     test_parallel_xgboost(data_size=90000000)
 
@@ -101,8 +97,6 @@ if __name__ == "__main__":
     test_parallel_xgboost_others()
     # test_large_dataset()
     if skip_my_learner:
-        print(
-            "please run pytest in the root directory of FLAML, i.e., the directory that contains the setup.py file"
-        )
+        print("please run pytest in the root directory of FLAML, i.e., the directory that contains the setup.py file")
     else:
         test_custom_learner()
diff --git a/test/spark/test_exceptions.py b/test/spark/test_exceptions.py
index 0c31b8ade..a49247c0a 100644
--- a/test/spark/test_exceptions.py
+++ b/test/spark/test_exceptions.py
@@ -7,17 +7,13 @@ import pytest
 spark_available, _ = check_spark()
 skip_spark = not spark_available
 
-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
 
 os.environ["FLAML_MAX_CONCURRENT"] = "2"
 
 
 def base_automl(n_concurrent_trials=1, use_ray=False, use_spark=False, verbose=0):
-    X_train, X_test, y_train, y_test = load_openml_dataset(
-        dataset_id=537, data_dir="./"
-    )
+    X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir="./")
     automl = AutoML()
     settings = {
         "time_budget": 3,  # total running time in seconds
@@ -37,9 +33,7 @@ def base_automl(n_concurrent_trials=1, use_ray=False, use_spark=False, verbose=0
     print("Best ML leaner:", automl.best_estimator)
     print("Best hyperparmeter config:", automl.best_config)
     print("Best accuracy on validation data: {0:.4g}".format(1 - automl.best_loss))
-    print(
-        "Training duration of best run: {0:.4g} s".format(automl.best_config_train_time)
-    )
+    print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
 
 
 def test_both_ray_spark():
diff --git a/test/spark/test_multiclass.py b/test/spark/test_multiclass.py
index e0384e23f..6e9265b8c 100644
--- a/test/spark/test_multiclass.py
+++ b/test/spark/test_multiclass.py
@@ -107,9 +107,7 @@ class TestMultiClass(unittest.TestCase):
             valid_loss_history,
             config_history,
             metric_history,
-        ) = get_output_from_log(
-            filename=automl_settings["log_file_name"], time_budget=6
-        )
+        ) = get_output_from_log(filename=automl_settings["log_file_name"], time_budget=6)
         print(metric_history)
 
     def test_classification(self, as_frame=False):
@@ -167,12 +165,8 @@ class TestMultiClass(unittest.TestCase):
             "use_spark": True,
         }
         X_train, y_train = load_iris(return_X_y=True)
-        automl_experiment_micro.fit(
-            X_train=X_train, y_train=y_train, metric="micro_f1", **automl_settings
-        )
-        automl_experiment_macro.fit(
-            X_train=X_train, y_train=y_train, metric="macro_f1", **automl_settings
-        )
+        automl_experiment_micro.fit(X_train=X_train, y_train=y_train, metric="micro_f1", **automl_settings)
+        automl_experiment_macro.fit(X_train=X_train, y_train=y_train, metric="macro_f1", **automl_settings)
         estimator = automl_experiment_macro.model
         y_pred = estimator.predict(X_train)
         y_pred_proba = estimator.predict_proba(X_train)
@@ -280,9 +274,7 @@ class TestMultiClass(unittest.TestCase):
     )
     def _test_memory_limit(self):
         automl_experiment = AutoML()
-        automl_experiment.add_learner(
-            learner_name="large_lgbm", learner_class=MyLargeLGBM
-        )
+        automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
         automl_settings = {
             "time_budget": -1,
             "task": "classification",
@@ -296,9 +288,7 @@ class TestMultiClass(unittest.TestCase):
         }
         X_train, y_train = load_iris(return_X_y=True, as_frame=True)
 
-        automl_experiment.fit(
-            X_train=X_train, y_train=y_train, max_iter=1, **automl_settings
-        )
+        automl_experiment.fit(X_train=X_train, y_train=y_train, max_iter=1, **automl_settings)
         print(automl_experiment.model)
 
     @unittest.skipIf(
@@ -307,12 +297,8 @@ class TestMultiClass(unittest.TestCase):
     )
     def test_time_limit(self):
         automl_experiment = AutoML()
-        automl_experiment.add_learner(
-            learner_name="large_lgbm", learner_class=MyLargeLGBM
-        )
-        automl_experiment.add_learner(
-            learner_name="large_xgb", learner_class=MyLargeXGB
-        )
+        automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
+        automl_experiment.add_learner(learner_name="large_xgb", learner_class=MyLargeXGB)
         automl_settings = {
             "time_budget": 0.5,
             "task": "classification",
@@ -356,11 +342,7 @@ class TestMultiClass(unittest.TestCase):
         print("Best ML leaner:", automl_experiment.best_estimator)
         print("Best hyperparmeter config:", automl_experiment.best_config)
         print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                automl_experiment.best_config_train_time
-            )
-        )
+        print("Training duration of best run: {0:.4g} s".format(automl_experiment.best_config_train_time))
 
         starting_points = automl_experiment.best_config_per_estimator
         print("starting_points", starting_points)
@@ -379,21 +361,13 @@ class TestMultiClass(unittest.TestCase):
             "use_spark": True,
         }
         new_automl_experiment = AutoML()
-        new_automl_experiment.fit(
-            X_train=X_train, y_train=y_train, **automl_settings_resume
-        )
+        new_automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings_resume)
 
         new_automl_val_accuracy = 1.0 - new_automl_experiment.best_loss
         print("Best ML leaner:", new_automl_experiment.best_estimator)
         print("Best hyperparmeter config:", new_automl_experiment.best_config)
-        print(
-            "Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy)
-        )
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                new_automl_experiment.best_config_train_time
-            )
-        )
+        print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
+        print("Training duration of best run: {0:.4g} s".format(new_automl_experiment.best_config_train_time))
 
     def test_fit_w_starting_points_list(self, as_frame=True):
         automl_experiment = AutoML()
@@ -418,11 +392,7 @@ class TestMultiClass(unittest.TestCase):
         print("Best ML leaner:", automl_experiment.best_estimator)
         print("Best hyperparmeter config:", automl_experiment.best_config)
         print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
-        print(
-            "Training duration of best run: {0:.4g} s".format(
-                automl_experiment.best_config_train_time
-            )
-        )
+        print("Training duration of best run: {0:.4g} s".format(automl_experiment.best_config_train_time))
 
         starting_points = {}
         log_file_name = automl_settings["log_file_name"]
@@ -453,16 +423,12 @@ class TestMultiClass(unittest.TestCase):
             "use_spark": True,
         }
         new_automl_experiment = AutoML()
-        new_automl_experiment.fit(
-            X_train=X_train, y_train=y_train, **automl_settings_resume
-        )
+        new_automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings_resume)
 
         new_automl_val_accuracy = 1.0 - new_automl_experiment.best_loss
         # print('Best ML leaner:', new_automl_experiment.best_estimator)
         # print('Best hyperparmeter config:', new_automl_experiment.best_config)
-        print(
-            "Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy)
-        )
+        print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
         # print('Training duration of best run: {0:.4g} s'.format(new_automl_experiment.best_config_train_time))
 
 
diff --git a/test/spark/test_notebook.py b/test/spark/test_notebook.py
index 2065a097a..08a28a85c 100644
--- a/test/spark/test_notebook.py
+++ b/test/spark/test_notebook.py
@@ -8,9 +8,7 @@ import pytest
 spark_available, _ = check_spark()
 skip_spark = not spark_available
 
-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
 
 here = os.path.abspath(os.path.dirname(__file__))
 os.environ["FLAML_MAX_CONCURRENT"] = "2"
diff --git a/test/spark/test_overtime.py b/test/spark/test_overtime.py
index ae17f1f50..4842faec4 100644
--- a/test/spark/test_overtime.py
+++ b/test/spark/test_overtime.py
@@ -24,9 +24,7 @@ try:
 except ImportError:
     skip_spark = True
 
-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
 
 
 def test_overtime():
@@ -56,11 +54,7 @@ def test_overtime():
     start_time = time.time()
     automl_experiment.fit(**automl_settings)
     elapsed_time = time.time() - start_time
-    print(
-        "time budget: {:.2f}s, actual elapsed time: {:.2f}s".format(
-            time_budget, elapsed_time
-        )
-    )
+    print("time budget: {:.2f}s, actual elapsed time: {:.2f}s".format(time_budget, elapsed_time))
     # assert abs(elapsed_time - time_budget) < 5  # cancel assertion because github VM sometimes is super slow, causing the test to fail
     print(automl_experiment.predict(df))
     print(automl_experiment.model)
diff --git a/test/spark/test_performance.py b/test/spark/test_performance.py
index 2bf72b945..160fca992 100644
--- a/test/spark/test_performance.py
+++ b/test/spark/test_performance.py
@@ -8,9 +8,7 @@ import pytest
 spark_available, _ = check_spark()
 skip_spark = not spark_available
 
-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
 
 os.environ["FLAML_MAX_CONCURRENT"] = "2"
 
@@ -72,9 +70,7 @@ def run_automl(budget=3, dataset_format="dataframe", hpo_method=None):
     print("Best ML leaner:", automl.best_estimator)
     print("Best hyperparmeter config:", automl.best_config)
     print("Best accuracy on validation data: {0:.4g}".format(1 - automl.best_loss))
-    print(
-        "Training duration of best run: {0:.4g} s".format(automl.best_config_train_time)
-    )
+    print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
     print(automl.model.estimator)
     print(automl.best_config_per_estimator)
     print("time taken to find best model:", automl.time_to_find_best_model)
@@ -89,9 +85,7 @@ def run_automl(budget=3, dataset_format="dataframe", hpo_method=None):
 
     accuracy = 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test)
     print("accuracy", "=", accuracy)
-    print(
-        "roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test)
-    )
+    print("roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test))
     print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test))
     if performance_check_budget is None:
         assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67"
diff --git a/test/spark/test_tune.py b/test/spark/test_tune.py
index 39098ea55..b54b802b4 100644
--- a/test/spark/test_tune.py
+++ b/test/spark/test_tune.py
@@ -12,9 +12,7 @@ import pytest
 spark_available, _ = check_spark()
 skip_spark = not spark_available
 
-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
 
 os.environ["FLAML_MAX_CONCURRENT"] = "2"
 X, y = load_breast_cancer(return_X_y=True)
@@ -35,9 +33,7 @@ def train_breast_cancer(config):
 
 def test_tune_spark():
     flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
-    config_search_space = {
-        hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
-    }
+    config_search_space = {hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()}
 
     analysis = tune.run(
         train_breast_cancer,
diff --git a/test/spark/test_utils.py b/test/spark/test_utils.py
index aa80844da..b6e86b6f1 100644
--- a/test/spark/test_utils.py
+++ b/test/spark/test_utils.py
@@ -34,9 +34,7 @@ except ImportError:
     print("Spark is not installed. Skip all spark tests.")
     skip_spark = True
 
-pytestmark = pytest.mark.skipif(
-    skip_spark, reason="Spark is not installed. Skip all spark tests."
-)
+pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
 
 
 def test_with_parameters_spark():
@@ -53,9 +51,7 @@ def test_with_parameters_spark():
     spark = SparkSession.builder.getOrCreate()
     rdd = spark.sparkContext.parallelize(list(range(2)))
 
-    t_partial = timeit(
-        lambda: rdd.map(lambda x: partial_train(config=x)).collect(), number=5
-    )
+    t_partial = timeit(lambda: rdd.map(lambda x: partial_train(config=x)).collect(), number=5)
     print("python_partial_train: " + str(t_partial))
 
     t_spark = timeit(
@@ -139,12 +135,8 @@ def test_train_test_split_pyspark():
     spark = SparkSession.builder.getOrCreate()
     sdf = spark.createDataFrame(pdf).repartition(1)
     psdf = to_pandas_on_spark(sdf).spark.repartition(1)
-    train_sdf, test_sdf = train_test_split_pyspark(
-        sdf, test_fraction=0.5, to_pandas_spark=False, seed=1
-    )
-    train_psdf, test_psdf = train_test_split_pyspark(
-        psdf, test_fraction=0.5, stratify_column="y", seed=1
-    )
+    train_sdf, test_sdf = train_test_split_pyspark(sdf, test_fraction=0.5, to_pandas_spark=False, seed=1)
+    train_psdf, test_psdf = train_test_split_pyspark(psdf, test_fraction=0.5, stratify_column="y", seed=1)
     assert isinstance(train_sdf, pyspark.sql.dataframe.DataFrame)
     assert isinstance(test_sdf, pyspark.sql.dataframe.DataFrame)
     assert isinstance(train_psdf, ps.DataFrame)
@@ -190,9 +182,7 @@ def test_unique_value_first_index():
 def test_n_current_trials():
     spark = SparkSession.builder.getOrCreate()
     sc = spark._jsc.sc()
-    num_executors = (
-        len([executor.host() for executor in sc.statusTracker().getExecutorInfos()]) - 1
-    )
+    num_executors = len([executor.host() for executor in sc.statusTracker().getExecutorInfos()]) - 1
 
     def get_n_current_trials(n_concurrent_trials=0, num_executors=num_executors):
         try:
diff --git a/test/test_autovw.py b/test/test_autovw.py
index 59f7b9943..d4d1db6cd 100644
--- a/test/test_autovw.py
+++ b/test/test_autovw.py
@@ -18,9 +18,7 @@ NS_LIST = list(string.ascii_lowercase) + list(string.ascii_uppercase)
 logger = logging.getLogger(__name__)
 
 
-def oml_to_vw_w_grouping(
-    X, y, ds_dir, fname, orginal_dim, group_num, grouping_method="sequential"
-):
+def oml_to_vw_w_grouping(X, y, ds_dir, fname, orginal_dim, group_num, grouping_method="sequential"):
     # split all_indexes into # group_num of groups
     max_size_per_group = int(np.ceil(orginal_dim / float(group_num)))
     # sequential grouping
@@ -49,17 +47,11 @@ def oml_to_vw_w_grouping(
                 for i in range(len(X)):
                     NS_content = []
                     for zz in range(len(group_indexes)):
-                        ns_features = " ".join(
-                            "{}:{:.6f}".format(ind, X[i][ind])
-                            for ind in group_indexes[zz]
-                        )
+                        ns_features = " ".join("{}:{:.6f}".format(ind, X[i][ind]) for ind in group_indexes[zz])
                         NS_content.append(ns_features)
                     ns_line = "{} |{}".format(
                         str(y[i]),
-                        "|".join(
-                            "{} {}".format(NS_LIST[j], NS_content[j])
-                            for j in range(len(group_indexes))
-                        ),
+                        "|".join("{} {}".format(NS_LIST[j], NS_content[j]) for j in range(len(group_indexes))),
                     )
                     f.write(ns_line)
                     f.write("\n")
@@ -140,10 +132,7 @@ def load_vw_dataset(did, ds_dir, is_regression, max_ns_num):
         fname = "ds_{}_{}_{}.vw".format(did, max_ns_num, 0)
         vw_dataset_file = os.path.join(ds_dir, fname)
         # if file does not exist, generate and save the datasets
-        if (
-            not os.path.exists(vw_dataset_file)
-            or os.stat(vw_dataset_file).st_size < 1000
-        ):
+        if not os.path.exists(vw_dataset_file) or os.stat(vw_dataset_file).st_size < 1000:
             get_oml_to_vw(did, max_ns_num)
         print(ds_dir, vw_dataset_file)
         if not os.path.exists(ds_dir):
@@ -175,9 +164,7 @@ def get_data(
     # Y = data.Y
     if vw_format:
         # vw_examples = data.vw_examples
-        vw_examples = load_vw_dataset(
-            did=data_id, ds_dir=VW_DS_DIR, is_regression=True, max_ns_num=max_ns_num
-        )
+        vw_examples = load_vw_dataset(did=data_id, ds_dir=VW_DS_DIR, is_regression=True, max_ns_num=max_ns_num)
         Y = []
         for i, e in enumerate(vw_examples):
             Y.append(float(e.split("|")[0]))
@@ -230,9 +217,7 @@ class VowpalWabbitNamesspaceTuningProblem:
         }
         self._problem_info.update(kwargs)
         self._fixed_hp_config = kwargs.get("fixed_hp_config", {})
-        self.namespace_feature_dim = AutoVW.get_ns_feature_dim_from_vw_example(
-            self.vw_examples[0]
-        )
+        self.namespace_feature_dim = AutoVW.get_ns_feature_dim_from_vw_example(self.vw_examples[0])
         self._raw_namespaces = list(self.namespace_feature_dim.keys())
         self._setup_search()
 
@@ -355,13 +340,9 @@ def get_vw_tuning_problem(tuning_hp="NamesapceInteraction"):
         "fixed_hp_config": online_vw_exp_setting["fixed_hp_config"],
     }
     if tuning_hp == "NamesapceInteraction":
-        vw_online_aml_problem = VowpalWabbitNamesspaceTuningProblem(
-            **vw_oml_problem_args
-        )
+        vw_online_aml_problem = VowpalWabbitNamesspaceTuningProblem(**vw_oml_problem_args)
     elif tuning_hp == "NamesapceInteraction+LearningRate":
-        vw_online_aml_problem = VowpalWabbitNamesspaceLRTuningProblem(
-            **vw_oml_problem_args
-        )
+        vw_online_aml_problem = VowpalWabbitNamesspaceLRTuningProblem(**vw_oml_problem_args)
     else:
         NotImplementedError
 
@@ -382,13 +363,9 @@ class TestAutoVW(unittest.TestCase):
             vw_online_aml_problem.max_iter_num,
             vw_online_aml_problem.vw_examples,
             vanilla_vw,
-            loss_func=vw_oml_problem_args["fixed_hp_config"].get(
-                "loss_function", "squared"
-            ),
-        )
-        print(
-            "final average loss:", sum(cumulative_loss_list) / len(cumulative_loss_list)
+            loss_func=vw_oml_problem_args["fixed_hp_config"].get("loss_function", "squared"),
         )
+        print("final average loss:", sum(cumulative_loss_list) / len(cumulative_loss_list))
 
     def test_supervised_vw_tune_namespace(self):
         # basic experiment setting
@@ -405,13 +382,9 @@ class TestAutoVW(unittest.TestCase):
             vw_online_aml_problem.max_iter_num,
             vw_online_aml_problem.vw_examples,
             autovw,
-            loss_func=vw_oml_problem_args["fixed_hp_config"].get(
-                "loss_function", "squared"
-            ),
-        )
-        print(
-            "final average loss:", sum(cumulative_loss_list) / len(cumulative_loss_list)
+            loss_func=vw_oml_problem_args["fixed_hp_config"].get("loss_function", "squared"),
         )
+        print("final average loss:", sum(cumulative_loss_list) / len(cumulative_loss_list))
 
     def test_supervised_vw_tune_namespace_learningrate(self):
         # basic experiment setting
@@ -430,13 +403,9 @@ class TestAutoVW(unittest.TestCase):
             vw_online_aml_problem.max_iter_num,
             vw_online_aml_problem.vw_examples,
             autovw,
-            loss_func=vw_oml_problem_args["fixed_hp_config"].get(
-                "loss_function", "squared"
-            ),
-        )
-        print(
-            "final average loss:", sum(cumulative_loss_list) / len(cumulative_loss_list)
+            loss_func=vw_oml_problem_args["fixed_hp_config"].get("loss_function", "squared"),
         )
+        print("final average loss:", sum(cumulative_loss_list) / len(cumulative_loss_list))
 
     def test_bandit_vw_tune_namespace(self):
         pass
diff --git a/test/test_gpu.py b/test/test_gpu.py
index 22a6aea65..2db05d85d 100644
--- a/test/test_gpu.py
+++ b/test/test_gpu.py
@@ -23,9 +23,7 @@ def test_xgboost():
             gpu_per_trial=1,
         )
 
-        train, label = make_moons(
-            n_samples=300000, shuffle=True, noise=0.3, random_state=None
-        )
+        train, label = make_moons(n_samples=300000, shuffle=True, noise=0.3, random_state=None)
         automl = AutoML()
         automl.fit(
             train,
@@ -89,18 +87,10 @@ def _test_hf_data():
         }
     }
 
-    automl.fit(
-        X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
-    )
+    automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
 
     automl = AutoML()
-    automl.retrain_from_log(
-        X_train=X_train,
-        y_train=y_train,
-        train_full=True,
-        record_id=0,
-        **automl_settings
-    )
+    automl.retrain_from_log(X_train=X_train, y_train=y_train, train_full=True, record_id=0, **automl_settings)
     with open("automl.pkl", "wb") as f:
         pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
     with open("automl.pkl", "rb") as f:
diff --git a/test/tune/test_flaml_raytune_consistency.py b/test/tune/test_flaml_raytune_consistency.py
index 736a64d9b..e8ad93d76 100644
--- a/test/tune/test_flaml_raytune_consistency.py
+++ b/test/tune/test_flaml_raytune_consistency.py
@@ -31,9 +31,7 @@ def setup_searcher(searcher_name):
     from flaml.tune.searcher.blendsearch import BlendSearch, CFO, RandomSearch
 
     if "cfo" in searcher_name:
-        searcher = CFO(
-            space=config_search_space, low_cost_partial_config=low_cost_partial_config
-        )
+        searcher = CFO(space=config_search_space, low_cost_partial_config=low_cost_partial_config)
     elif searcher_name == "bs":
         searcher = BlendSearch(
             metric="metric",
@@ -48,9 +46,7 @@ def setup_searcher(searcher_name):
     return searcher
 
 
-def _test_flaml_raytune_consistency(
-    num_samples=-1, max_concurrent_trials=1, searcher_name="cfo"
-):
+def _test_flaml_raytune_consistency(num_samples=-1, max_concurrent_trials=1, searcher_name="cfo"):
     try:
         from ray import tune as raytune, __version__ as ray_version
 
@@ -59,9 +55,7 @@ def _test_flaml_raytune_consistency(
         else:
             from ray.tune.search import ConcurrencyLimiter
     except ImportError:
-        print(
-            "skip _test_flaml_raytune_consistency because ray tune cannot be imported."
-        )
+        print("skip _test_flaml_raytune_consistency because ray tune cannot be imported.")
         return
     searcher = setup_searcher(searcher_name)
     analysis = tune.run(
@@ -110,21 +104,13 @@ def _test_flaml_raytune_consistency(
     print("flaml config in results", searcher_name, flaml_config_in_results)
     print("ray config in results", searcher_name, ray_config_in_results)
     assert ray_best_config == flaml_best_config, "best config should be the same"
-    assert (
-        flaml_config_in_results == ray_config_in_results
-    ), "results from raytune and flaml should be the same"
+    assert flaml_config_in_results == ray_config_in_results, "results from raytune and flaml should be the same"
 
 
 def test_consistency():
-    _test_flaml_raytune_consistency(
-        num_samples=5, max_concurrent_trials=1, searcher_name="random"
-    )
-    _test_flaml_raytune_consistency(
-        num_samples=5, max_concurrent_trials=1, searcher_name="cfo"
-    )
-    _test_flaml_raytune_consistency(
-        num_samples=5, max_concurrent_trials=1, searcher_name="bs"
-    )
+    _test_flaml_raytune_consistency(num_samples=5, max_concurrent_trials=1, searcher_name="random")
+    _test_flaml_raytune_consistency(num_samples=5, max_concurrent_trials=1, searcher_name="cfo")
+    _test_flaml_raytune_consistency(num_samples=5, max_concurrent_trials=1, searcher_name="bs")
 
 
 if __name__ == "__main__":
diff --git a/test/tune/test_lexiflow.py b/test/tune/test_lexiflow.py
index cad772af7..2d0274634 100644
--- a/test/tune/test_lexiflow.py
+++ b/test/tune/test_lexiflow.py
@@ -89,9 +89,7 @@ def test_lexiflow():
                 correct += pred.eq(target.view_as(pred)).sum().item()
 
         accuracy = correct / N_VALID_EXAMPLES
-        flops, params = thop.profile(
-            model, inputs=(torch.randn(1, 28 * 28).to(DEVICE),), verbose=False
-        )
+        flops, params = thop.profile(model, inputs=(torch.randn(1, 28 * 28).to(DEVICE),), verbose=False)
         return np.log2(flops), 1 - accuracy, params
 
     def evaluate_function(configuration):
@@ -198,9 +196,7 @@ def test_lexiflow_performance():
     print(analysis.best_config)
     print(analysis.best_result)
 
-    assert (
-        analysis.best_result["currin"] <= 2.2
-    ), "the value of currin function should be less than 2.2"
+    assert analysis.best_result["currin"] <= 2.2, "the value of currin function should be less than 2.2"
 
 
 if __name__ == "__main__":
diff --git a/test/tune/test_pytorch_cifar10.py b/test/tune/test_pytorch_cifar10.py
index 188d9750f..b43db7253 100644
--- a/test/tune/test_pytorch_cifar10.py
+++ b/test/tune/test_pytorch_cifar10.py
@@ -48,17 +48,11 @@ except ImportError:
 
 # __load_data_begin__
 def load_data(data_dir="test/data"):
-    transform = transforms.Compose(
-        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
-    )
+    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 
-    trainset = torchvision.datasets.CIFAR10(
-        root=data_dir, train=True, download=True, transform=transform
-    )
+    trainset = torchvision.datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transform)
 
-    testset = torchvision.datasets.CIFAR10(
-        root=data_dir, train=False, download=True, transform=transform
-    )
+    testset = torchvision.datasets.CIFAR10(root=data_dir, train=False, download=True, transform=transform)
 
     return trainset, testset
 
@@ -93,9 +87,7 @@ def train_cifar(config, checkpoint_dir=None, data_dir=None):
     trainset, testset = load_data(data_dir)
 
     test_abs = int(len(trainset) * 0.8)
-    train_subset, val_subset = random_split(
-        trainset, [test_abs, len(trainset) - test_abs]
-    )
+    train_subset, val_subset = random_split(trainset, [test_abs, len(trainset) - test_abs])
 
     trainloader = torch.utils.data.DataLoader(
         train_subset,
@@ -112,9 +104,7 @@ def train_cifar(config, checkpoint_dir=None, data_dir=None):
 
     from ray import tune
 
-    for epoch in range(
-        int(round(config["num_epochs"]))
-    ):  # loop over the dataset multiple times
+    for epoch in range(int(round(config["num_epochs"]))):  # loop over the dataset multiple times
         running_loss = 0.0
         epoch_steps = 0
         for i, data in enumerate(trainloader, 0):
@@ -135,10 +125,7 @@ def train_cifar(config, checkpoint_dir=None, data_dir=None):
             running_loss += loss.item()
             epoch_steps += 1
             if i % 2000 == 1999:  # print every 2000 mini-batches
-                print(
-                    "[%d, %5d] loss: %.3f"
-                    % (epoch + 1, i + 1, running_loss / epoch_steps)
-                )
+                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / epoch_steps))
                 running_loss = 0.0
 
         # Validation loss
@@ -178,9 +165,7 @@ def train_cifar(config, checkpoint_dir=None, data_dir=None):
 def _test_accuracy(net, device="cpu"):
     trainset, testset = load_data()
 
-    testloader = torch.utils.data.DataLoader(
-        testset, batch_size=4, shuffle=False, num_workers=2
-    )
+    testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)
 
     correct = 0
     total = 0
@@ -200,9 +185,7 @@ def _test_accuracy(net, device="cpu"):
 
 
 # __main_begin__
-def cifar10_main(
-    method="BlendSearch", num_samples=10, max_num_epochs=100, gpus_per_trial=1
-):
+def cifar10_main(method="BlendSearch", num_samples=10, max_num_epochs=100, gpus_per_trial=1):
     data_dir = os.path.abspath("test/data")
     load_data(data_dir)  # Download data for all trials before starting the run
     if method == "BlendSearch":
@@ -294,16 +277,8 @@ def cifar10_main(
     logger.info(f"time={time.time()-start_time}")
     best_trial = result.get_best_trial("loss", "min", "all")
     logger.info("Best trial config: {}".format(best_trial.config))
-    logger.info(
-        "Best trial final validation loss: {}".format(
-            best_trial.metric_analysis["loss"]["min"]
-        )
-    )
-    logger.info(
-        "Best trial final validation accuracy: {}".format(
-            best_trial.metric_analysis["accuracy"]["max"]
-        )
-    )
+    logger.info("Best trial final validation loss: {}".format(best_trial.metric_analysis["loss"]["min"]))
+    logger.info("Best trial final validation accuracy: {}".format(best_trial.metric_analysis["accuracy"]["max"]))
 
     best_trained_model = Net(2 ** best_trial.config["l1"], 2 ** best_trial.config["l2"])
     device = "cpu"
@@ -313,10 +288,7 @@ def cifar10_main(
             best_trained_model = nn.DataParallel(best_trained_model)
     best_trained_model.to(device)
 
-    checkpoint_value = (
-        getattr(best_trial.checkpoint, "dir_or_data", None)
-        or best_trial.checkpoint.value
-    )
+    checkpoint_value = getattr(best_trial.checkpoint, "dir_or_data", None) or best_trial.checkpoint.value
     checkpoint_path = os.path.join(checkpoint_value, "checkpoint")
 
     model_state, optimizer_state = torch.load(checkpoint_path)
diff --git a/test/tune/test_reproducibility.py b/test/tune/test_reproducibility.py
index 8b3c867a5..cfa4a1c85 100644
--- a/test/tune/test_reproducibility.py
+++ b/test/tune/test_reproducibility.py
@@ -41,9 +41,7 @@ def test_tune(externally_setup_searcher=False, use_ray=False, use_raytune=False)
             metric="mean_loss",
             mode="min",
         )
-        assert (
-            searcher.cost_attr == "time_total_s"
-        ), "when time_budget_s is provided, cost_attr should be time_total_s"
+        assert searcher.cost_attr == "time_total_s", "when time_budget_s is provided, cost_attr should be time_total_s"
 
         searcher = BlendSearch(
             space=search_space,
@@ -51,9 +49,7 @@ def test_tune(externally_setup_searcher=False, use_ray=False, use_raytune=False)
             metric="mean_loss",
             mode="min",
         )
-        assert (
-            searcher.cost_attr is None
-        ), "when time_budget_s is not provided, cost_attr should be None."
+        assert searcher.cost_attr is None, "when time_budget_s is not provided, cost_attr should be None."
 
         searcher = BlendSearch(
             space=search_space,
@@ -116,9 +112,7 @@ def test_reproducibility():
     best_config_2 = test_tune(externally_setup_searcher=True)
     print(best_config_1)
     print(best_config_2)
-    assert (
-        best_config_1 == best_config_2
-    ), "flaml.tune not reproducible when the searcher is set up externally"
+    assert best_config_1 == best_config_2, "flaml.tune not reproducible when the searcher is set up externally"
 
 
 def test_gs_reproducibility():
diff --git a/test/tune/test_restore.py b/test/tune/test_restore.py
index ef3c95425..745d9984d 100644
--- a/test/tune/test_restore.py
+++ b/test/tune/test_restore.py
@@ -25,9 +25,7 @@ class AbstractWarmStartTest:
         np.random.seed(162)
         search_alg, cost = self.set_basic_conf()
         search_alg = ConcurrencyLimiter(search_alg, 1)
-        results_exp_1 = tune.run(
-            cost, num_samples=5, search_alg=search_alg, verbose=0, local_dir=self.tmpdir
-        )
+        results_exp_1 = tune.run(cost, num_samples=5, search_alg=search_alg, verbose=0, local_dir=self.tmpdir)
         checkpoint_path = os.path.join(self.tmpdir, self.experiment_name)
         search_alg.save(checkpoint_path)
         return results_exp_1, np.random.get_state(), checkpoint_path
diff --git a/test/tune/test_scheduler.py b/test/tune/test_scheduler.py
index e12f0f6bb..5960a3f0d 100644
--- a/test/tune/test_scheduler.py
+++ b/test/tune/test_scheduler.py
@@ -120,9 +120,7 @@ def test_asha_scheduler(use_ray=False, time_budget_s=1):
     except ImportError:
         print("skip the test as ray tune cannot be imported.")
         return
-    best_config = test_scheduler(
-        scheduler="asha", use_ray=use_ray, time_budget_s=time_budget_s
-    )
+    best_config = test_scheduler(scheduler="asha", use_ray=use_ray, time_budget_s=time_budget_s)
     print("Auto ASHA scheduler, test error:", abs(10 / 2 - best_config["z"] / 2))
 
 
@@ -132,9 +130,7 @@ def test_custom_scheduler():
     except ImportError:
         print("skip the test as ray tune cannot be imported.")
         return
-    my_scheduler = HyperBandScheduler(
-        time_attr="samplesize", max_t=1000, reduction_factor=2
-    )
+    my_scheduler = HyperBandScheduler(time_attr="samplesize", max_t=1000, reduction_factor=2)
     best_config = test_scheduler(scheduler=my_scheduler)
     print("Custom ASHA scheduler, test error:", abs(10 / 2 - best_config["z"] / 2))
 
diff --git a/test/tune/test_searcher.py b/test/tune/test_searcher.py
index cb8c9e06a..f4bfc11be 100644
--- a/test/tune/test_searcher.py
+++ b/test/tune/test_searcher.py
@@ -91,9 +91,7 @@ def test_searchers():
         # 'set' object has no attribute 'keys'
         pass
     try:
-        searcher.add_evaluated_point(
-            {"a": 1, "b": 0.01}, None, intermediate_values=[0.1]
-        )
+        searcher.add_evaluated_point({"a": 1, "b": 0.01}, None, intermediate_values=[0.1])
     except ValueError:
         # `value` is supposed to be set for a complete trial.
         pass
@@ -113,16 +111,12 @@ def test_searchers():
         # Dim of point {'a': 1} and parameter_names {'a': UniformDistribution(high=8.0, low=6.0), 'b': LogUniformDistribution(high=0.01, low=0.0001)} do not match.
         pass
     try:
-        searcher = OptunaSearch(
-            config, points_to_evaluate=[{"a": 1, "b": 0.01}], evaluated_rewards=1
-        )
+        searcher = OptunaSearch(config, points_to_evaluate=[{"a": 1, "b": 0.01}], evaluated_rewards=1)
     except TypeError:
         # valuated_rewards expected to be a list, got <class 'int'>.
         pass
     try:
-        searcher = OptunaSearch(
-            config, points_to_evaluate=[{"a": 1, "b": 0.01}], evaluated_rewards=[1, 2]
-        )
+        searcher = OptunaSearch(config, points_to_evaluate=[{"a": 1, "b": 0.01}], evaluated_rewards=[1, 2])
     except ValueError:
         # Dim of evaluated_rewards [1, 2] and points_to_evaluate [{'a': 1, 'b': 0.01}] do not match.
         pass
@@ -197,9 +191,7 @@ def test_searchers():
     searcher.save("test/tune/optuna.pkl")
     searcher.restore("test/tune/optuna.pkl")
     try:
-        searcher = BlendSearch(
-            metric="m", global_search_alg=searcher, metric_constraints=[("c", "<", 1)]
-        )
+        searcher = BlendSearch(metric="m", global_search_alg=searcher, metric_constraints=[("c", "<", 1)])
     except AssertionError:
         # sign of metric constraints must be <= or >=.
         pass
@@ -303,12 +295,8 @@ def test_searchers():
     from flaml import tune
 
     tune.run(lambda x: 1, config={}, use_ray=use_ray, log_file_name="logs/searcher.log")
-    searcher = BlendSearch(
-        space=config, cost_attr="cost", cost_budget=10, metric="m", mode="min"
-    )
-    analysis = tune.run(
-        lambda x: {"cost": 2, "m": x["b"]}, search_alg=searcher, num_samples=10
-    )
+    searcher = BlendSearch(space=config, cost_attr="cost", cost_budget=10, metric="m", mode="min")
+    analysis = tune.run(lambda x: {"cost": 2, "m": x["b"]}, search_alg=searcher, num_samples=10)
     assert len(analysis.trials) == 5
 
 
diff --git a/test/tune/test_tune.py b/test/tune/test_tune.py
index 371b664d6..8ada5519a 100644
--- a/test/tune/test_tune.py
+++ b/test/tune/test_tune.py
@@ -85,9 +85,7 @@ def _test_xgboost(method="BlendSearch"):
     else:
         from ray import tune
     search_space = {
-        "max_depth": tune.randint(1, 9)
-        if method in ["BlendSearch", "BOHB", "Optuna"]
-        else tune.randint(1, 9),
+        "max_depth": tune.randint(1, 9) if method in ["BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
         "min_child_weight": tune.choice([1, 2, 3]),
         "subsample": tune.uniform(0.5, 1.0),
         "eta": tune.loguniform(1e-4, 1e-1),
@@ -226,9 +224,7 @@ def test_nested_space():
     }
 
     def simple_func(config):
-        obj = (config["cost_related"]["a"] - 4) ** 2 + (
-            config["b"] - config["cost_related"]["a"]
-        ) ** 2
+        obj = (config["cost_related"]["a"] - 4) ** 2 + (config["b"] - config["cost_related"]["a"]) ** 2
         tune.report(obj=obj)
         tune.report(obj=obj, ab=config["cost_related"]["a"] * config["b"])
 
@@ -291,8 +287,7 @@ def test_nested_space():
         low_cost_partial_config={"cost_related": {"a": 1}},
         points_to_evaluate=points_to_evaluate,
         evaluated_rewards=[
-            (config["cost_related"]["a"] - 4) ** 2
-            + (config["b"] - config["cost_related"]["a"]) ** 2
+            (config["cost_related"]["a"] - 4) ** 2 + (config["b"] - config["cost_related"]["a"]) ** 2
             for config in points_to_evaluate[:-1]
         ],
         metric="obj",
diff --git a/test/tune_example.py b/test/tune_example.py
index 0dc770790..e8afb4f02 100644
--- a/test/tune_example.py
+++ b/test/tune_example.py
@@ -7,9 +7,7 @@ from sklearn.metrics import mean_squared_error
 
 data = fetch_california_housing(return_X_y=False, as_frame=True)
 df, X, y = data.frame, data.data, data.target
-df_train, _, X_train, X_test, _, y_test = train_test_split(
-    df, X, y, test_size=0.33, random_state=42
-)
+df_train, _, X_train, X_test, _, y_test = train_test_split(df, X, y, test_size=0.33, random_state=42)
 csv_file_name = "test/housing.csv"
 df_train.to_csv(csv_file_name, index=False)
 # X, y = fetch_california_housing(return_X_y=True, as_frame=True)
@@ -24,9 +22,7 @@ def train_lgbm(config: dict) -> dict:
     # train the model
     # train_set = lightgbm.Dataset(X_train, y_train)
     # LightGBM only accepts the csv with valid number format, if even these string columns are set to ignore.
-    train_set = lightgbm.Dataset(
-        csv_file_name, params={"label_column": "name:MedHouseVal", "header": True}
-    )
+    train_set = lightgbm.Dataset(csv_file_name, params={"label_column": "name:MedHouseVal", "header": True})
     model = lightgbm.train(params, train_set)
     # evaluate the model
     pred = model.predict(X_test)
@@ -39,9 +35,7 @@ def test_tune_lgbm_csv():
     # load a built-in search space from flaml
     flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
     # specify the search space as a dict from hp name to domain; you can define your own search space same way
-    config_search_space = {
-        hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
-    }
+    config_search_space = {hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()}
     # give guidance about hp values corresponding to low training cost, i.e., {"n_estimators": 4, "num_leaves": 4}
     low_cost_partial_config = {
         hp: space["low_cost_init_value"]
@@ -50,11 +44,7 @@ def test_tune_lgbm_csv():
     }
     # initial points to evaluate
     points_to_evaluate = [
-        {
-            hp: space["init_value"]
-            for hp, space in flaml_lgbm_search_space.items()
-            if "init_value" in space
-        }
+        {hp: space["init_value"] for hp, space in flaml_lgbm_search_space.items() if "init_value" in space}
     ]
     # run the tuning, minimizing mse, with total time budget 3 seconds
     analysis = tune.run(