From a1c49ca27be8439e69904bec222eed65705a5f0d Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Sat, 23 Apr 2022 16:22:34 -0700
Subject: [PATCH] allow evaluated_rewards shorter than points_to_evaluate
 (#522)

* allow evaluated_rewards shorter than points_to_evaluate

* docstr update
---
 flaml/searcher/blendsearch.py | 49 ++++++++++++++++++++++++++++++-----
 flaml/tune/tune.py            |  9 +++----
 test/tune/test_tune.py        | 32 ++++++++++++-----------
 3 files changed, 63 insertions(+), 27 deletions(-)

diff --git a/flaml/searcher/blendsearch.py b/flaml/searcher/blendsearch.py
index da1913c5b..f2cedfd15 100644
--- a/flaml/searcher/blendsearch.py
+++ b/flaml/searcher/blendsearch.py
@@ -79,8 +79,8 @@ class BlendSearch(Searcher):
                 parameters passed in as points_to_evaluate you can avoid
                 re-running those trials by passing in the reward attributes
                 as a list so the optimiser can be told the results without
-                needing to re-compute the trial. Must be the same length as
-                points_to_evaluate.
+                needing to re-compute the trial. Must be the same or shorter length than
+                points_to_evaluate. When provided, `mode` must be specified.
             time_budget_s: int or float | Time budget in seconds.
             num_samples: int | The number of configs to try.
             resource_attr: A string to specify the resource dimension and the best
@@ -115,9 +115,14 @@ class BlendSearch(Searcher):
                 "'low_cost_partial_config'. More info can be found at "
                 "https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune"
             )
-        if evaluated_rewards and mode:
+        if evaluated_rewards:
+            assert mode, "mode must be specified when evaluted_rewards is provided."
             self._points_to_evaluate = []
             self._evaluated_rewards = []
+            n = len(evaluated_rewards)
+            self._evaluated_points = points_to_evaluate[:n]
+            new_points_to_evaluate = points_to_evaluate[n:]
+            self._all_rewards = evaluated_rewards
             best = max(evaluated_rewards) if mode == "max" else min(evaluated_rewards)
             # only keep the best points as start points
             for i, r in enumerate(evaluated_rewards):
@@ -125,6 +130,7 @@ class BlendSearch(Searcher):
                     p = points_to_evaluate[i]
                     self._points_to_evaluate.append(p)
                     self._evaluated_rewards.append(r)
+            self._points_to_evaluate.extend(new_points_to_evaluate)
         else:
             self._points_to_evaluate = points_to_evaluate or []
             self._evaluated_rewards = evaluated_rewards or []
@@ -178,7 +184,7 @@ class BlendSearch(Searcher):
                     mode=mode,
                     seed=gs_seed,
                     sampler=sampler,
-                    points_to_evaluate=points_to_evaluate,
+                    points_to_evaluate=self._evaluated_points,
                     evaluated_rewards=evaluated_rewards,
                 )
             except (AssertionError, ValueError):
@@ -305,7 +311,26 @@ class BlendSearch(Searcher):
         )
         self._gs_admissible_min = self._ls_bound_min.copy()
         self._gs_admissible_max = self._ls_bound_max.copy()
-        self._result = {}  # config_signature: tuple -> result: Dict
+        # config_signature: tuple -> result: Dict
+        self._result = (
+            {
+                self._ls.config_signature(
+                    *self._ls.complete_config(
+                        self._evaluated_points[i],
+                        self._ls_bound_min,
+                        self._ls_bound_max,
+                    )
+                ): {
+                    self._metric: r,
+                    self.cost_attr: 1,
+                    "config": self._evaluated_points[i],
+                }
+                for i, r in enumerate(self._all_rewards)
+            }
+            if self._evaluated_rewards  # store all the evaluated rewards
+            else {}
+        )
+
         if self._metric_constraints:
             self._metric_constraint_satisfied = False
             self._metric_constraint_penalty = [
@@ -708,8 +733,8 @@ class BlendSearch(Searcher):
             config, space = self._ls.complete_config(
                 init_config, self._ls_bound_min, self._ls_bound_max
             )
+            config_signature = self._ls.config_signature(config, space)
             if reward is None:
-                config_signature = self._ls.config_signature(config, space)
                 result = self._result.get(config_signature)
                 if result:  # tried before
                     return None
@@ -722,7 +747,8 @@ class BlendSearch(Searcher):
             self._search_thread_pool[0].running += 1
             self._subspace[trial_id] = space
             if reward is not None:
-                result = {self._metric: reward, self.cost_attr: 1, "config": config}
+                # result = {self._metric: reward, self.cost_attr: 1, "config": config}
+                result = self._result[config_signature]
                 self.on_trial_complete(trial_id, result)
                 return None
         if self._use_incumbent_result_in_evaluation:
@@ -852,6 +878,15 @@ class BlendSearch(Searcher):
                     return False
         return True
 
+    @property
+    def results(self) -> List[Dict]:
+        """A list of dicts of results for each evaluated configuration.
+
+        Each dict has "config" and metric names as keys.
+        The returned dict includes the initial results provided via `evaluated_reward`.
+        """
+        return [x for x in getattr(self, "_result", {}).values() if x]
+
 
 try:
     from ray import __version__ as ray_version
diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py
index 8044f5f51..f281cb81a 100644
--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@@ -201,7 +201,7 @@ def run(
             parameters passed in as points_to_evaluate you can avoid
             re-running those trials by passing in the reward attributes
             as a list so the optimiser can be told the results without
-            needing to re-compute the trial. Must be the same length as
+            needing to re-compute the trial. Must be the same or shorter length than
             points_to_evaluate.
             e.g.,
 
@@ -210,12 +210,11 @@ def run(
         {"b": .99, "cost_related": {"a": 3}},
         {"b": .99, "cost_related": {"a": 2}},
     ]
-    evaluated_rewards=[3.0, 1.0]
+    evaluated_rewards = [3.0]
     ```
 
-            means that you know the reward for the two configs in
-            points_to_evaluate are 3.0 and 1.0 respectively and want to
-            inform run().
+            means that you know the reward for the first config in
+            points_to_evaluate is 3.0 and want to inform run().
 
         resource_attr: A string to specify the resource dimension used by
             the scheduler via "scheduler".
diff --git a/test/tune/test_tune.py b/test/tune/test_tune.py
index f816f6adb..9aa14bedf 100644
--- a/test/tune/test_tune.py
+++ b/test/tune/test_tune.py
@@ -223,26 +223,27 @@ def test_nested():
     logger.info(f"CFO best config: {best_trial.config}")
     logger.info(f"CFO best result: {best_trial.last_result}")
 
+    bs = BlendSearch(
+        experimental=True,
+        space=search_space,
+        metric="obj",
+        mode="min",
+        low_cost_partial_config={"cost_related": {"a": 1}},
+        points_to_evaluate=[
+            {"b": 0.99, "cost_related": {"a": 3}},
+            {"b": 0.99, "cost_related": {"a": 2}},
+            {"cost_related": {"a": 8}},
+        ],
+        metric_constraints=[("ab", "<=", 4)],
+    )
     analysis = tune.run(
         simple_func,
-        search_alg=BlendSearch(
-            experimental=True,
-            space=search_space,
-            metric="obj",
-            mode="min",
-            low_cost_partial_config={"cost_related": {"a": 1}},
-            points_to_evaluate=[
-                {"b": 0.99, "cost_related": {"a": 3}},
-                {"b": 0.99, "cost_related": {"a": 2}},
-                {"cost_related": {"a": 8}},
-            ],
-            metric_constraints=[("ab", "<=", 4)],
-        ),
+        search_alg=bs,
         local_dir="logs/",
         num_samples=-1,
         time_budget_s=1,
     )
-
+    print(bs.results)
     best_trial = analysis.get_best_trial()
     logger.info(f"BlendSearch exp best config: {best_trial.config}")
     logger.info(f"BlendSearch exp best result: {best_trial.last_result}")
@@ -250,6 +251,7 @@ def test_nested():
     points_to_evaluate = [
         {"b": 0.99, "cost_related": {"a": 3}},
         {"b": 0.99, "cost_related": {"a": 2}},
+        {"cost_related": {"a": 8}},
     ]
     analysis = tune.run(
         simple_func,
@@ -259,7 +261,7 @@ def test_nested():
         evaluated_rewards=[
             (config["cost_related"]["a"] - 4) ** 2
             + (config["b"] - config["cost_related"]["a"]) ** 2
-            for config in points_to_evaluate
+            for config in points_to_evaluate[:-1]
         ],
         metric="obj",
         mode="min",