diff --git a/flaml/automl.py b/flaml/automl.py
index b6f1c434c..be0fb18ca 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -845,7 +845,7 @@ class AutoML:
         if eval_method == 'auto' or self._state.X_val is not None:
             eval_method = self._decide_eval_method(time_budget)
         self._state.eval_method = eval_method
-        if not mlflow or not mlflow.active_run() and not logger.handler:
+        if (not mlflow or not mlflow.active_run()) and not logger.handlers:
             # Add the console handler.
             _ch = logging.StreamHandler()
             _ch.setFormatter(logger_formatter)
@@ -1074,7 +1074,7 @@ class AutoML:
                                         search_state.best_config,
                                         estimator,
                                         search_state.sample_size)                
-                    if mlflow is not None:
+                    if mlflow is not None and mlflow.active_run():
                         with mlflow.start_run(nested=True) as run:
                             mlflow.log_metric('iter_counter',
                                 self._iter_per_learner[estimator])
diff --git a/flaml/searcher/blendsearch.py b/flaml/searcher/blendsearch.py
index 62701e435..278a53ce9 100644
--- a/flaml/searcher/blendsearch.py
+++ b/flaml/searcher/blendsearch.py
@@ -25,6 +25,8 @@ class BlendSearch(Searcher):
     '''class for BlendSearch algorithm
     '''
 
+    cost_attr = "time_total_s" # cost attribute in result
+
     def __init__(self,
                  metric: Optional[str] = None,
                  mode: Optional[str] = None,
@@ -193,7 +195,7 @@ class BlendSearch(Searcher):
                 self._search_thread_pool[self._thread_count] = SearchThread(
                     self._ls.mode,
                     self._ls.create(config, result[self._metric], cost=result[
-                        "time_total_s"])
+                        self.cost_attr])
                 )
                 thread_id = self._thread_count
                 self._thread_count += 1
@@ -393,7 +395,89 @@ class BlendSearch(Searcher):
         return True
 
 
-class CFO(BlendSearch):
+try:
+    from nni.tuner import Tuner as NNITuner
+    from nni.utils import extract_scalar_reward
+    try:
+        from ray.tune import (uniform, quniform, choice, randint, qrandint, randn,
+    qrandn, loguniform, qloguniform)
+    except:
+        from .sample import (uniform, quniform, choice, randint, qrandint, randn,
+    qrandn, loguniform, qloguniform)
+
+    class BlendSearchTuner(BlendSearch, NNITuner):
+        '''Tuner class for NNI
+        '''
+
+        def receive_trial_result(self, parameter_id, parameters, value,
+         **kwargs):
+            '''
+            Receive trial's final result.
+            parameter_id: int
+            parameters: object created by 'generate_parameters()'
+            value: final metrics of the trial, including default metric
+            '''
+            result = {}
+            for key, value in parameters:
+                result['config/'+key] = value
+            reward = extract_scalar_reward(value)
+            result[self._metric] = reward
+            # if nni does not report training cost, 
+            # using sequence as an approximation.
+            # if no sequence, using a constant 1
+            result[self.cost_attr] = value.get(self.cost_attr, value.get(
+                'sequence', 1))
+            self.on_trial_complete(str(parameter_id), result)
+        ...
+
+        def generate_parameters(self, parameter_id, **kwargs) -> Dict:
+            '''
+            Returns a set of trial (hyper-)parameters, as a serializable object
+            parameter_id: int
+            '''            
+            return self.suggest(str(parameter_id))
+        ...
+
+        def update_search_space(self, search_space):
+            '''
+            Tuners are advised to support updating search space at run-time.
+            If a tuner can only set search space once before generating first hyper-parameters,
+            it should explicitly document this behaviour.
+            search_space: JSON object created by experiment owner
+            '''
+            config = {}
+            for key, value in search_space:
+                v = value.get("_value")
+                _type = value['_type']
+                if _type == 'choice':
+                    config[key] = choice(v)
+                elif _type == 'randint':
+                    config[key] = randint(v[0], v[1]-1)
+                elif _type == 'uniform':
+                    config[key] = uniform(v[0], v[1])
+                elif _type == 'quniform':
+                    config[key] = quniform(v[0], v[1], v[2])
+                elif _type == 'loguniform':
+                    config[key] = loguniform(v[0], v[1])
+                elif _type == 'qloguniform':
+                    config[key] = qloguniform(v[0], v[1], v[2])
+                elif _type == 'normal':
+                    config[key] = randn(v[1], v[2])
+                elif _type == 'qnormal':
+                    config[key] = qrandn(v[1], v[2], v[3])
+                else:
+                    raise ValueError(
+                    f'unsupported type in search_space {_type}')
+            self._ls.set_search_properties(None, None, config)
+            if self._gs is not None:
+                self._gs.set_search_properties(None, None, config)
+            self._init_search()
+
+except:
+    class BlendSearchTuner(BlendSearch): pass
+
+
+class CFO(BlendSearchTuner):
     ''' class for CFO algorithm
     '''
 
@@ -416,3 +500,5 @@ class CFO(BlendSearch):
         ''' create thread condition
         '''
         return len(self._search_thread_pool) < 2
+
+
diff --git a/flaml/searcher/flow2.py b/flaml/searcher/flow2.py
index 3784ef957..681956867 100644
--- a/flaml/searcher/flow2.py
+++ b/flaml/searcher/flow2.py
@@ -9,9 +9,10 @@ try:
     from ray.tune.suggest import Searcher
     from ray.tune.suggest.variant_generator import generate_variants
     from ray.tune import sample
+    from ray.tune.utils.util import flatten_dict, unflatten_dict
 except ImportError:
     from .suggestion import Searcher
-    from .variant_generator import generate_variants
+    from .variant_generator import generate_variants, flatten_dict, unflatten_dict
     from ..tune import sample
 
 
@@ -86,6 +87,7 @@ class FLOW2(Searcher):
         elif mode == "min":
             self.metric_op = 1.
         self.space = space or {}
+        self.space = flatten_dict(self.space, prevent_delimiter=True)
         self._random = np.random.RandomState(seed)
         self._seed = seed
         if not init_config:
@@ -95,7 +97,8 @@ class FLOW2(Searcher):
                 "consider providing init values for cost-related hps via "
                 "'init_config'."
                 )
-        self.init_config = self.best_config = init_config
+        self.init_config = init_config
+        self.best_config = flatten_dict(init_config)
         self.cat_hp_cost = cat_hp_cost
         self.prune_attr = prune_attr
         self.min_resource = min_resource
@@ -171,7 +174,7 @@ class FLOW2(Searcher):
             # logger.info(self._resource)
         else: self._resource = None
         self.incumbent = {}
-        self.incumbent = self.normalize(self.init_config)
+        self.incumbent = self.normalize(self.best_config) # flattened
         self.best_obj = self.cost_incumbent = None
         self.dim = len(self._tunable_keys)  # total # tunable dimensions
         self._direction_tried = None        
@@ -247,7 +250,7 @@ class FLOW2(Searcher):
                 if key not in self._unordered_cat_hp:
                     if upper and lower:
                         u, l = upper[key], lower[key]
-                        gauss_std = u-l
+                        gauss_std = u-l or self.STEPSIZE
                         # allowed bound
                         u += self.STEPSIZE
                         l -= self.STEPSIZE
@@ -261,11 +264,11 @@ class FLOW2(Searcher):
                     normalized[key] = max(l, min(u, normalized[key] + delta))
             # use best config for unordered cat choice
             config = self.denormalize(normalized)
-            self._reset_times += 1
         else:
             # first time init_config, or other configs, take as is
             config = partial_config.copy()
-
+        if partial_config == self.init_config: self._reset_times += 1
+        config = flatten_dict(config)
         for key, value in self.space.items():
             if key not in config:
                 config[key] = value
@@ -277,13 +280,13 @@ class FLOW2(Searcher):
 
         if self._resource:
             config[self.prune_attr] = self.min_resource
-        return config
+        return unflatten_dict(config)
 
     def create(self, init_config: Dict, obj: float, cost: float) -> Searcher:
         flow2 = FLOW2(init_config, self.metric, self.mode, self._cat_hp_cost,
-                      self.space, self.prune_attr, self.min_resource,
-                      self.max_resource, self.resource_multiple_factor,
-                      self._seed+1)
+                      unflatten_dict(self.space), self.prune_attr, 
+                      self.min_resource, self.max_resource, 
+                      self.resource_multiple_factor, self._seed+1)
         flow2.best_obj = obj * self.metric_op  # minimize internally
         flow2.cost_incumbent = cost
         return flow2
@@ -292,7 +295,7 @@ class FLOW2(Searcher):
         ''' normalize each dimension in config to [0,1]
         '''
         config_norm = {}
-        for key, value in config.items():
+        for key, value in flatten_dict(config).items():
             if key in self.space:
                 # domain: sample.Categorical/Integer/Float/Function
                 domain = self.space[key]
@@ -426,7 +429,7 @@ class FLOW2(Searcher):
             obj = result.get(self._metric)
             if obj: 
                 obj *= self.metric_op
-                if obj < self.best_obj:
+                if self.best_obj is None or obj < self.best_obj:
                     self.best_obj, self.best_config = obj, self._configs[
                         trial_id]
                     self.incumbent = self.normalize(self.best_config)
@@ -437,7 +440,8 @@ class FLOW2(Searcher):
                     self._cost_complete4incumbent = 0
                     self._num_allowed4incumbent = 2 * self.dim
                     self._proposed_by.clear()
-                    if self._K > 0:
+                    if self._K > 0: 
+                        # self._oldK must have been set when self._K>0
                         self.step *= np.sqrt(self._K/self._oldK)
                     if self.step > self.step_ub: self.step = self.step_ub
                     self._iter_best_config = self.trial_count
@@ -474,7 +478,7 @@ class FLOW2(Searcher):
             obj = result.get(self._metric)
             if obj: 
                 obj *= self.metric_op
-                if obj < self.best_obj:
+                if self.best_obj is None or obj < self.best_obj:
                     self.best_obj = obj
                     config = self._configs[trial_id]
                     if self.best_config != config:
@@ -533,7 +537,7 @@ class FLOW2(Searcher):
         config = self.denormalize(move)
         self._proposed_by[trial_id] = self.incumbent
         self._configs[trial_id] = config
-        return config
+        return unflatten_dict(config)
 
     def _project(self, config):
         ''' project normalized config in the feasible region and set prune_attr
@@ -553,6 +557,7 @@ class FLOW2(Searcher):
     def config_signature(self, config) -> tuple:
         ''' return the signature tuple of a config
         '''
+        config = flatten_dict(config)
         value_list = []
         for key in self._space_keys:
             if key in config:
diff --git a/flaml/searcher/search_thread.py b/flaml/searcher/search_thread.py
index ed280ff46..84cb2f9f1 100644
--- a/flaml/searcher/search_thread.py
+++ b/flaml/searcher/search_thread.py
@@ -20,6 +20,7 @@ class SearchThread:
     '''
 
     cost_attr = 'time_total_s'
+    eps = 1e-10
 
     def __init__(self, mode: str = "min", 
                  search_alg: Optional[Searcher] = None):
@@ -70,7 +71,7 @@ class SearchThread:
         # calculate speed; use 0 for invalid speed temporarily
         if self.obj_best2 > self.obj_best1: 
             self.speed = (self.obj_best2 - self.obj_best1) / (
-                self.cost_total - self.cost_best2)
+                self.cost_total - self.cost_best2 + self.eps)
         else: self.speed = 0
 
     def on_trial_complete(self, trial_id: str, result: Optional[Dict] = None,
diff --git a/flaml/searcher/variant_generator.py b/flaml/searcher/variant_generator.py
index d1a80e0b2..5427604ee 100644
--- a/flaml/searcher/variant_generator.py
+++ b/flaml/searcher/variant_generator.py
@@ -28,6 +28,46 @@ from ..tune.sample import Categorical, Domain, Function
 logger = logging.getLogger(__name__)
 
 
+def flatten_dict(dt, delimiter="/", prevent_delimiter=False):
+    dt = copy.deepcopy(dt)
+    if prevent_delimiter and any(delimiter in key for key in dt):
+        # Raise if delimiter is any of the keys
+        raise ValueError(
+            "Found delimiter `{}` in key when trying to flatten array."
+            "Please avoid using the delimiter in your specification.")
+    while any(isinstance(v, dict) for v in dt.values()):
+        remove = []
+        add = {}
+        for key, value in dt.items():
+            if isinstance(value, dict):
+                for subkey, v in value.items():
+                    if prevent_delimiter and delimiter in subkey:
+                        # Raise  if delimiter is in any of the subkeys
+                        raise ValueError(
+                            "Found delimiter `{}` in key when trying to "
+                            "flatten array. Please avoid using the delimiter "
+                            "in your specification.")
+                    add[delimiter.join([key, str(subkey)])] = v
+                remove.append(key)
+        dt.update(add)
+        for k in remove:
+            del dt[k]
+    return dt
+
+
+def unflatten_dict(dt, delimiter="/"):
+    """Unflatten dict. Does not support unflattening lists."""
+    dict_type = type(dt)
+    out = dict_type()
+    for key, val in dt.items():
+        path = key.split(delimiter)
+        item = out
+        for k in path[:-1]:
+            item = item.setdefault(k, dict_type())
+        item[path[-1]] = val
+    return out
+    
+    
 class TuneError(Exception):
     """General error class raised by ray.tune."""
     pass
diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py
index 7ce35e85d..ad8bebf0a 100644
--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@@ -17,6 +17,8 @@ logger = logging.getLogger(__name__)
 _use_ray = True
 _runner = None
 _verbose = 0
+_running_trial = None
+_training_iteration = 0
 
 
 class ExperimentAnalysis(EA):
@@ -68,6 +70,8 @@ def report(_metric=None, **kwargs):
     '''
     global _use_ray
     global _verbose
+    global _running_trial
+    global _training_iteration
     if _use_ray:
         from ray import tune
         return tune.report(_metric, **kwargs)
@@ -77,6 +81,12 @@ def report(_metric=None, **kwargs):
             logger.info(f"result: {kwargs}")
         if _metric: result['_default_anonymous_metric'] = _metric
         trial = _runner.running_trial
+        if _running_trial == trial:
+            _training_iteration += 1
+        else:
+            _training_iteration = 0
+            _running_trial = trial
+        result["training_iteration"] = _training_iteration
         result['config'] = trial.config
         for key, value in trial.config.items():
             result['config/'+key] = value
@@ -213,7 +223,7 @@ def run(training_function,
             import os
             os.makedirs(local_dir, exist_ok=True)
             logger.addHandler(logging.FileHandler(local_dir+'/tune_'+str(
-                datetime.datetime.now())+'.log'))
+                datetime.datetime.now()).replace(':', '-')+'.log'))
             if verbose<=2:
                 logger.setLevel(logging.INFO)
             else:
diff --git a/flaml/version.py b/flaml/version.py
index fe404ae57..01ef12070 100644
--- a/flaml/version.py
+++ b/flaml/version.py
@@ -1 +1 @@
-__version__ = "0.2.5"
+__version__ = "0.2.6"
diff --git a/notebook/flaml_finetune_transformer.ipynb b/notebook/flaml_finetune_transformer.ipynb
index 9bea4ef61..3e3943053 100644
--- a/notebook/flaml_finetune_transformer.ipynb
+++ b/notebook/flaml_finetune_transformer.ipynb
@@ -11,7 +11,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 102,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -27,7 +27,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 103,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -36,7 +36,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 104,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -45,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 105,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -54,18 +54,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 106,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'input_ids': [101, 2023, 2003, 1037, 3231, 102], 'attention_mask': [1, 1, 1, 1, 1, 1]}"
       ]
      },
-     "execution_count": 10,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 106
     }
    ],
    "source": [
@@ -81,7 +81,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 107,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -90,7 +90,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 108,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -99,16 +99,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 109,
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
+     "name": "stderr",
      "text": [
-      "Reusing dataset glue (/home/amin/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
-      "/home/amin/miniconda/lib/python3.7/site-packages/torch/cuda/__init__.py:52: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at  /pytorch/c10/cuda/CUDAFunctions.cpp:100.)\n",
-      "  return torch._C._cuda_getDeviceCount() > 0\n"
+      "Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n"
      ]
     }
    ],
@@ -118,7 +116,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 110,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -130,70 +128,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 111,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5bd7b23a478043eaaf6e14e119143fcd",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stderr",
      "text": [
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d7b648c2dbdc4fb9907e43da7db8af9a",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "36a9d6e62dbe462d94b1769f36fbd0f3",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n"
+      "Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c3dd50f05994d4a5.arrow\n",
+      "Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-f2290a23c3c6f190.arrow\n",
+      "Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-6868a7b57fb52895.arrow\n"
      ]
     }
    ],
@@ -203,10 +147,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 112,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
@@ -234,9 +179,8 @@
        " 'sentence': \"Our friends won't buy this analysis, let alone the next one we propose.\"}"
       ]
      },
-     "execution_count": 19,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 112
     }
    ],
    "source": [
@@ -252,7 +196,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 113,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -261,36 +205,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 114,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "35b76e51b5c8406fae416fcdc3dd885e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=267967963.0, style=ProgressStyle(descri…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
      "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
      "name": "stderr",
-     "output_type": "stream",
      "text": [
       "Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
-      "- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
+      "- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
       "- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
       "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
       "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
@@ -304,10 +227,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 115,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "DistilBertForSequenceClassification(\n",
@@ -425,9 +349,8 @@
        ")"
       ]
      },
-     "execution_count": 31,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 115
     }
    ],
    "source": [
@@ -443,7 +366,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 116,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -452,31 +375,60 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 117,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "Metric(name: \"glue\", features: {'predictions': Value(dtype='int64', id=None), 'references': Value(dtype='int64', id=None)}, usage: \"\"\"\n",
        "Compute GLUE evaluation metric associated to each GLUE dataset.\n",
        "Args:\n",
-       "    predictions: list of translations to score.\n",
+       "    predictions: list of predictions to score.\n",
        "        Each translation should be tokenized into a list of tokens.\n",
        "    references: list of lists of references for each translation.\n",
        "        Each reference should be tokenized into a list of tokens.\n",
        "Returns: depending on the GLUE subset, one or several of:\n",
        "    \"accuracy\": Accuracy\n",
-       "    \"f1\": F1\n",
+       "    \"f1\": F1 score\n",
        "    \"pearson\": Pearson Correlation\n",
        "    \"spearmanr\": Spearman Correlation\n",
        "    \"matthews_correlation\": Matthew Correlation\n",
+       "Examples:\n",
+       "\n",
+       "    >>> glue_metric = datasets.load_metric('glue', 'sst2')  # 'sst2' or any of [\"mnli\", \"mnli_mismatched\", \"mnli_matched\", \"qnli\", \"rte\", \"wnli\", \"hans\"]\n",
+       "    >>> references = [0, 1]\n",
+       "    >>> predictions = [0, 1]\n",
+       "    >>> results = glue_metric.compute(predictions=predictions, references=references)\n",
+       "    >>> print(results)\n",
+       "    {'accuracy': 1.0}\n",
+       "\n",
+       "    >>> glue_metric = datasets.load_metric('glue', 'mrpc')  # 'mrpc' or 'qqp'\n",
+       "    >>> references = [0, 1]\n",
+       "    >>> predictions = [0, 1]\n",
+       "    >>> results = glue_metric.compute(predictions=predictions, references=references)\n",
+       "    >>> print(results)\n",
+       "    {'accuracy': 1.0, 'f1': 1.0}\n",
+       "\n",
+       "    >>> glue_metric = datasets.load_metric('glue', 'stsb')\n",
+       "    >>> references = [0., 1., 2., 3., 4., 5.]\n",
+       "    >>> predictions = [0., 1., 2., 3., 4., 5.]\n",
+       "    >>> results = glue_metric.compute(predictions=predictions, references=references)\n",
+       "    >>> print({\"pearson\": round(results[\"pearson\"], 2), \"spearmanr\": round(results[\"spearmanr\"], 2)})\n",
+       "    {'pearson': 1.0, 'spearmanr': 1.0}\n",
+       "\n",
+       "    >>> glue_metric = datasets.load_metric('glue', 'cola')\n",
+       "    >>> references = [0, 1]\n",
+       "    >>> predictions = [0, 1]\n",
+       "    >>> results = glue_metric.compute(predictions=predictions, references=references)\n",
+       "    >>> print(results)\n",
+       "    {'matthews_correlation': 1.0}\n",
        "\"\"\", stored examples: 0)"
       ]
      },
-     "execution_count": 25,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 117
     }
    ],
    "source": [
@@ -485,10 +437,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 118,
    "metadata": {},
    "outputs": [],
    "source": [
+    "import numpy as np\n",
     "def compute_metrics(eval_pred):\n",
     "    predictions, labels = eval_pred\n",
     "    predictions = np.argmax(predictions, axis=1)\n",
@@ -504,7 +457,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 119,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -514,7 +467,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 120,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -526,7 +479,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 121,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -542,46 +495,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 122,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "    <div>\n",
-       "        <style>\n",
-       "            /* Turns off some styling */\n",
-       "            progress {\n",
-       "                /* gets rid of default border in Firefox and Opera. */\n",
-       "                border: none;\n",
-       "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
-       "                background-size: auto;\n",
-       "            }\n",
-       "        </style>\n",
-       "      \n",
-       "      <progress value='322' max='3207' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [ 322/3207 02:51 < 25:41, 1.87 it/s, Epoch 0.30/3]\n",
-       "    </div>\n",
-       "    <table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: left;\">\n",
-       "      <th>Step</th>\n",
-       "      <th>Training Loss</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "  </tbody>\n",
-       "</table><p>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "trainer.train()"
    ]
@@ -602,7 +518,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 123,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -614,8 +530,9 @@
     "    tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)\n",
     "    def tokenize(examples):\n",
     "        return tokenizer(examples[COLUMN_NAME], truncation=True)\n",
+    "\n",
     "    # Load CoLA dataset and apply tokenizer\n",
-    "    cola_raw = load_dataset(\"glue\", TASK)\n",
+    "    cola_raw = datasets.load_dataset(\"glue\", TASK)\n",
     "    cola_encoded = cola_raw.map(tokenize, batched=True)\n",
     "    # QUESTION: Write processed data to disk?\n",
     "    train_dataset, eval_dataset = cola_encoded[\"train\"], cola_encoded[\"validation\"]\n",
@@ -624,11 +541,18 @@
     "        MODEL_CHECKPOINT, num_labels=NUM_LABELS\n",
     "    )\n",
     "\n",
-    "    metric = load_metric(\"glue\", TASK)\n",
+    "    metric = datasets.load_metric(\"glue\", TASK)\n",
+    "    def compute_metrics(eval_pred):\n",
+    "        predictions, labels = eval_pred\n",
+    "        predictions = np.argmax(predictions, axis=1)\n",
+    "        return metric.compute(predictions=predictions, references=labels)\n",
     "\n",
     "    training_args = TrainingArguments(\n",
     "        output_dir='.',\n",
     "        do_eval=False,\n",
+    "        disable_tqdm=True,\n",
+    "        logging_steps=20000,\n",
+    "        save_total_limit=0,\n",
     "        **config,\n",
     "    )\n",
     "\n",
@@ -669,7 +593,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 124,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -686,7 +610,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 125,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -699,7 +623,7 @@
     "\n",
     "# constraints\n",
     "num_samples = -1    # number of trials, -1 means unlimited\n",
-    "time_budget_s = 10800    # time budget in seconds"
+    "time_budget_s = 3600    # time budget in seconds"
    ]
   },
   {
@@ -713,13 +637,508 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 126,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "2021-02-24 13:56:21,166\tINFO services.py:1173 -- View the Ray dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8265\u001b[39m\u001b[22m\n",
+      "2021-02-24 13:56:21,951\tWARNING optuna.py:126 -- You passed a `space` parameter to <class 'ray.tune.suggest.optuna.OptunaSearch'> that contained unresolved search space definitions. <class 'ray.tune.suggest.optuna.OptunaSearch'> should however be instantiated with fully configured search spaces only. To use Ray Tune's automatic search space conversion, pass the space definition as part of the `config` argument to `tune.run()` instead.\n",
+      "\u001b[32m[I 2021-02-24 13:56:21,955]\u001b[0m A new study created in memory with name: optuna\u001b[0m\n",
+      "Tuning started...\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 11.7/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 1/infinite (1 RUNNING)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "\u001b[2m\u001b[36m(pid=29589)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=29589)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=29589)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=29589)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=29589)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=29589)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=29589)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=29589)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=29589)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=29589)\u001b[0m {'train_runtime': 37.2833, 'train_samples_per_second': 7.188, 'epoch': 1.0}\n",
+      "Trial train_distilbert_21b2c490 reported matthews_correlation=0.00 with parameters={'num_train_epochs': 1, 'learning_rate': 5.61151641533451e-06, 'adam_epsilon': 7.969454818643929e-08, 'adam_beta1': 0.9390788489441669, 'adam_beta2': 0.99186521389353}.\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 16.0/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: 21b2c490 with matthews_correlation=0.0 and parameters={'num_train_epochs': 1, 'learning_rate': 5.61151641533451e-06, 'adam_epsilon': 7.969454818643929e-08, 'adam_beta1': 0.9390788489441669, 'adam_beta2': 0.99186521389353}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 2/infinite (1 PENDING, 1 RUNNING)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_21b2c490 completed. Last result: loss=0.5786514282226562,matthews_correlation=0.0\n",
+      "\u001b[2m\u001b[36m(pid=29589)\u001b[0m {'eval_loss': 0.5786514282226562, 'eval_matthews_correlation': 0.0, 'eval_runtime': 1.8133, 'eval_samples_per_second': 575.184, 'epoch': 1.0}\n",
+      "\u001b[2m\u001b[36m(pid=29588)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=29588)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=29588)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=29588)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=29588)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=29588)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=29588)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=29588)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=29588)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=29588)\u001b[0m {'train_runtime': 205.6814, 'train_samples_per_second': 8.469, 'epoch': 6.5}\n",
+      "Trial train_distilbert_21b2c491 reported matthews_correlation=0.51 with parameters={'num_train_epochs': 6.496661243646011, 'learning_rate': 3.1345403715761375e-05, 'adam_epsilon': 1.2428131101359459e-08, 'adam_beta1': 0.9100859688137786, 'adam_beta2': 0.9850788361346603}.\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 16.5/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: 21b2c491 with matthews_correlation=0.5093030018169853 and parameters={'num_train_epochs': 6.496661243646011, 'learning_rate': 3.1345403715761375e-05, 'adam_epsilon': 1.2428131101359459e-08, 'adam_beta1': 0.9100859688137786, 'adam_beta2': 0.9850788361346603}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 3/infinite (1 PENDING, 1 RUNNING, 1 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_21b2c491 completed. Last result: loss=0.9910964965820312,matthews_correlation=0.5093030018169853\n",
+      "\u001b[2m\u001b[36m(pid=29588)\u001b[0m {'eval_loss': 0.9910964965820312, 'eval_matthews_correlation': 0.5093030018169853, 'eval_runtime': 1.8366, 'eval_samples_per_second': 567.883, 'epoch': 6.5}\n",
+      "\u001b[2m\u001b[36m(pid=29591)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=29591)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=29591)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=29591)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=29591)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=29591)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=29591)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=29591)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=29591)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=29591)\u001b[0m {'train_runtime': 37.2801, 'train_samples_per_second': 7.189, 'epoch': 1.0}\n",
+      "Trial train_distilbert_3f0da820 reported matthews_correlation=0.00 with parameters={'num_train_epochs': 1.0, 'learning_rate': 5.265428651017862e-06, 'adam_epsilon': 1e-07, 'adam_beta1': 0.9093950363089345, 'adam_beta2': 0.9937145453421068}.\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 16.7/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: 21b2c491 with matthews_correlation=0.5093030018169853 and parameters={'num_train_epochs': 6.496661243646011, 'learning_rate': 3.1345403715761375e-05, 'adam_epsilon': 1.2428131101359459e-08, 'adam_beta1': 0.9100859688137786, 'adam_beta2': 0.9850788361346603}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 4/infinite (1 PENDING, 1 RUNNING, 2 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_3f0da820 completed. Last result: loss=0.5775065422058105,matthews_correlation=0.0\n",
+      "\u001b[2m\u001b[36m(pid=29591)\u001b[0m {'eval_loss': 0.5775065422058105, 'eval_matthews_correlation': 0.0, 'eval_runtime': 1.7547, 'eval_samples_per_second': 594.388, 'epoch': 1.0}\n",
+      "\u001b[2m\u001b[36m(pid=29590)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=29590)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=29590)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=29590)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=29590)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=29590)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=29590)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=29590)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=29590)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=29590)\u001b[0m {'train_runtime': 197.3016, 'train_samples_per_second': 8.591, 'epoch': 6.32}\n",
+      "Trial train_distilbert_c1106c22 reported matthews_correlation=0.55 with parameters={'num_train_epochs': 6.324445967486241, 'learning_rate': 2.9412189965562634e-05, 'adam_epsilon': 2.256452443236495e-08, 'adam_beta1': 0.880402156178546, 'adam_beta2': 0.9869155143904086}.\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 15.9/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: c1106c22 with matthews_correlation=0.5451837431775948 and parameters={'num_train_epochs': 6.324445967486241, 'learning_rate': 2.9412189965562634e-05, 'adam_epsilon': 2.256452443236495e-08, 'adam_beta1': 0.880402156178546, 'adam_beta2': 0.9869155143904086}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 5/infinite (1 PENDING, 1 RUNNING, 3 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_c1106c22 completed. Last result: loss=0.8939734101295471,matthews_correlation=0.5451837431775948\n",
+      "\u001b[2m\u001b[36m(pid=29590)\u001b[0m {'eval_loss': 0.8939734101295471, 'eval_matthews_correlation': 0.5451837431775948, 'eval_runtime': 1.8277, 'eval_samples_per_second': 570.669, 'epoch': 6.32}\n",
+      "\u001b[2m\u001b[36m(pid=8754)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=8754)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=8754)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=8754)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=8754)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=8754)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=8754)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=8754)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=8754)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=8754)\u001b[0m {'train_runtime': 105.8952, 'train_samples_per_second': 7.847, 'epoch': 3.1}\n",
+      "Trial train_distilbert_de95f5e6 reported matthews_correlation=0.48 with parameters={'num_train_epochs': 3.097601049860023, 'learning_rate': 3.015866216468612e-05, 'adam_epsilon': 6.092346813998939e-09, 'adam_beta1': 0.9628888910610184, 'adam_beta2': 0.9832186589335725}.\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 16.3/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: c1106c22 with matthews_correlation=0.5451837431775948 and parameters={'num_train_epochs': 6.324445967486241, 'learning_rate': 2.9412189965562634e-05, 'adam_epsilon': 2.256452443236495e-08, 'adam_beta1': 0.880402156178546, 'adam_beta2': 0.9869155143904086}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 6/infinite (1 PENDING, 1 RUNNING, 4 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_de95f5e6 completed. Last result: loss=0.5720887780189514,matthews_correlation=0.48369222635456827\n",
+      "\u001b[2m\u001b[36m(pid=8754)\u001b[0m {'eval_loss': 0.5720887780189514, 'eval_matthews_correlation': 0.48369222635456827, 'eval_runtime': 1.8561, 'eval_samples_per_second': 561.936, 'epoch': 3.1}\n",
+      "\u001b[2m\u001b[36m(pid=12777)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=12777)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=12777)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=12777)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=12777)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=12777)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=12777)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=12777)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=12777)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=12777)\u001b[0m {'train_runtime': 330.1466, 'train_samples_per_second': 8.732, 'epoch': 10.76}\n",
+      "Trial train_distilbert_5bb0a1fc reported matthews_correlation=0.53 with parameters={'num_train_epochs': 10.755455977982155, 'learning_rate': 5.858103269448852e-05, 'adam_epsilon': 5.045085830072572e-08, 'adam_beta1': 0.845137019185222, 'adam_beta2': 0.9882166289933315}.\n",
+      "\u001b[2m\u001b[36m(pid=12777)\u001b[0m {'eval_loss': 1.5075323581695557, 'eval_matthews_correlation': 0.5282404248888111, 'eval_runtime': 1.7504, 'eval_samples_per_second': 595.853, 'epoch': 10.76}\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 15.9/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: c1106c22 with matthews_correlation=0.5451837431775948 and parameters={'num_train_epochs': 6.324445967486241, 'learning_rate': 2.9412189965562634e-05, 'adam_epsilon': 2.256452443236495e-08, 'adam_beta1': 0.880402156178546, 'adam_beta2': 0.9869155143904086}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 7/infinite (1 PENDING, 1 RUNNING, 5 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_5bb0a1fc completed. Last result: loss=1.5075323581695557,matthews_correlation=0.5282404248888111\n",
+      "\u001b[2m\u001b[36m(pid=39770)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=39770)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=39770)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=39770)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=39770)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=39770)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=39770)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=39770)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=39770)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=39770)\u001b[0m {'train_runtime': 182.3796, 'train_samples_per_second': 8.724, 'epoch': 5.94}\n",
+      "Trial train_distilbert_a247fb2e reported matthews_correlation=0.54 with parameters={'num_train_epochs': 5.933063389003551, 'learning_rate': 1.845204084769373e-05, 'adam_epsilon': 1.372505378696326e-08, 'adam_beta1': 0.8534841230874768, 'adam_beta2': 0.9858475457825921}.\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 16.4/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: c1106c22 with matthews_correlation=0.5451837431775948 and parameters={'num_train_epochs': 6.324445967486241, 'learning_rate': 2.9412189965562634e-05, 'adam_epsilon': 2.256452443236495e-08, 'adam_beta1': 0.880402156178546, 'adam_beta2': 0.9869155143904086}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 8/infinite (1 PENDING, 1 RUNNING, 6 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_a247fb2e completed. Last result: loss=0.6974263191223145,matthews_correlation=0.5399503104637741\n",
+      "\u001b[2m\u001b[36m(pid=39770)\u001b[0m {'eval_loss': 0.6974263191223145, 'eval_matthews_correlation': 0.5399503104637741, 'eval_runtime': 1.8585, 'eval_samples_per_second': 561.204, 'epoch': 5.94}\n",
+      "\u001b[2m\u001b[36m(pid=7123)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=7123)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=7123)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=7123)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=7123)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=7123)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=7123)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=7123)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=7123)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=7123)\u001b[0m {'train_runtime': 189.7562, 'train_samples_per_second': 8.59, 'epoch': 6.08}\n",
+      "Trial train_distilbert_6e9e8ec2 reported matthews_correlation=0.52 with parameters={'num_train_epochs': 6.078693989748608, 'learning_rate': 1.8357895987910622e-05, 'adam_epsilon': 1.5849146381322022e-08, 'adam_beta1': 0.8904370071918882, 'adam_beta2': 0.9844583428325462}.\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 17.1/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: c1106c22 with matthews_correlation=0.5451837431775948 and parameters={'num_train_epochs': 6.324445967486241, 'learning_rate': 2.9412189965562634e-05, 'adam_epsilon': 2.256452443236495e-08, 'adam_beta1': 0.880402156178546, 'adam_beta2': 0.9869155143904086}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 9/infinite (1 PENDING, 1 RUNNING, 7 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_6e9e8ec2 completed. Last result: loss=0.7202959656715393,matthews_correlation=0.5185394246694179\n",
+      "\u001b[2m\u001b[36m(pid=7123)\u001b[0m {'eval_loss': 0.7202959656715393, 'eval_matthews_correlation': 0.5185394246694179, 'eval_runtime': 1.6051, 'eval_samples_per_second': 649.814, 'epoch': 6.08}\n",
+      "\u001b[2m\u001b[36m(pid=14798)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=14798)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=14798)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=14798)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=14798)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=14798)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=14798)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=14798)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=14798)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=14798)\u001b[0m {'train_runtime': 329.789, 'train_samples_per_second': 8.448, 'epoch': 10.4}\n",
+      "Trial train_distilbert_e30fd860 reported matthews_correlation=0.54 with parameters={'num_train_epochs': 10.39182109947885, 'learning_rate': 6.762356226483751e-05, 'adam_epsilon': 5.0195217227379364e-08, 'adam_beta1': 0.8951148565195837, 'adam_beta2': 0.9914274194005184}.\n",
+      "\u001b[2m\u001b[36m(pid=14798)\u001b[0m {'eval_loss': 1.505250334739685, 'eval_matthews_correlation': 0.5353569722427551, 'eval_runtime': 1.8314, 'eval_samples_per_second': 569.522, 'epoch': 10.4}\n",
+      "\u001b[2m\u001b[36m(pid=14798)\u001b[0m \n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 15.9/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: c1106c22 with matthews_correlation=0.5451837431775948 and parameters={'num_train_epochs': 6.324445967486241, 'learning_rate': 2.9412189965562634e-05, 'adam_epsilon': 2.256452443236495e-08, 'adam_beta1': 0.880402156178546, 'adam_beta2': 0.9869155143904086}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 10/infinite (1 PENDING, 1 RUNNING, 8 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_e30fd860 completed. Last result: loss=1.505250334739685,matthews_correlation=0.5353569722427551\n",
+      "\u001b[2m\u001b[36m(pid=27867)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=27867)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=27867)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=27867)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=27867)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=27867)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=27867)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=27867)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=27867)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=27867)\u001b[0m {'train_runtime': 259.759, 'train_samples_per_second': 9.078, 'epoch': 8.8}\n",
+      "Trial train_distilbert_5bddb1ae reported matthews_correlation=0.55 with parameters={'num_train_epochs': 8.797715187430134, 'learning_rate': 2.72412577596775e-05, 'adam_epsilon': 7.4151444539151255e-09, 'adam_beta1': 0.869942964703411, 'adam_beta2': 0.9852670758817403}.\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 16.8/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: 5bddb1ae with matthews_correlation=0.5492247863049868 and parameters={'num_train_epochs': 8.797715187430134, 'learning_rate': 2.72412577596775e-05, 'adam_epsilon': 7.4151444539151255e-09, 'adam_beta1': 0.869942964703411, 'adam_beta2': 0.9852670758817403}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 11/infinite (1 PENDING, 1 RUNNING, 9 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_5bddb1ae completed. Last result: loss=1.0900800228118896,matthews_correlation=0.5492247863049868\n",
+      "\u001b[2m\u001b[36m(pid=27867)\u001b[0m {'eval_loss': 1.0900800228118896, 'eval_matthews_correlation': 0.5492247863049868, 'eval_runtime': 1.6198, 'eval_samples_per_second': 643.889, 'epoch': 8.8}\n",
+      "\u001b[2m\u001b[36m(pid=38727)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=38727)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=38727)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=38727)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=38727)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=38727)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=38727)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=38727)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=38727)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=38727)\u001b[0m {'train_runtime': 251.169, 'train_samples_per_second': 8.544, 'epoch': 8.01}\n",
+      "Trial train_distilbert_27da6108 reported matthews_correlation=0.55 with parameters={'num_train_epochs': 8.005678804316002, 'learning_rate': 1.931832460928058e-05, 'adam_epsilon': 6.696984191794608e-08, 'adam_beta1': 0.9116736888940158, 'adam_beta2': 0.9869397626562693}.\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 16.1/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: 27da6108 with matthews_correlation=0.550740569901542 and parameters={'num_train_epochs': 8.005678804316002, 'learning_rate': 1.931832460928058e-05, 'adam_epsilon': 6.696984191794608e-08, 'adam_beta1': 0.9116736888940158, 'adam_beta2': 0.9869397626562693}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 12/infinite (1 PENDING, 1 RUNNING, 10 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_27da6108 completed. Last result: loss=0.8646725416183472,matthews_correlation=0.550740569901542\n",
+      "\u001b[2m\u001b[36m(pid=38727)\u001b[0m {'eval_loss': 0.8646725416183472, 'eval_matthews_correlation': 0.550740569901542, 'eval_runtime': 1.7453, 'eval_samples_per_second': 597.588, 'epoch': 8.01}\n",
+      "\u001b[2m\u001b[36m(pid=8698)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=8698)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=8698)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=8698)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=8698)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=8698)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=8698)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=8698)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=8698)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=8698)\u001b[0m {'train_runtime': 150.7963, 'train_samples_per_second': 8.641, 'epoch': 4.86}\n",
+      "Trial train_distilbert_ca4167f2 reported matthews_correlation=0.55 with parameters={'num_train_epochs': 4.8609021804212205, 'learning_rate': 3.0765755916918634e-05, 'adam_epsilon': 3.2784085089990583e-09, 'adam_beta1': 0.9001311340399742, 'adam_beta2': 0.9865549219923857}.\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 16.7/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: 27da6108 with matthews_correlation=0.550740569901542 and parameters={'num_train_epochs': 8.005678804316002, 'learning_rate': 1.931832460928058e-05, 'adam_epsilon': 6.696984191794608e-08, 'adam_beta1': 0.9116736888940158, 'adam_beta2': 0.9869397626562693}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 13/infinite (1 PENDING, 1 RUNNING, 11 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_ca4167f2 completed. Last result: loss=0.7426601052284241,matthews_correlation=0.5474713423103301\n",
+      "\u001b[2m\u001b[36m(pid=8698)\u001b[0m {'eval_loss': 0.7426601052284241, 'eval_matthews_correlation': 0.5474713423103301, 'eval_runtime': 1.6955, 'eval_samples_per_second': 615.172, 'epoch': 4.86}\n",
+      "\u001b[2m\u001b[36m(pid=26401)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=26401)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=26401)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=26401)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=26401)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=26401)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=26401)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=26401)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=26401)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=26401)\u001b[0m {'train_runtime': 168.574, 'train_samples_per_second': 8.56, 'epoch': 5.38}\n",
+      "Trial train_distilbert_6776ad66 reported matthews_correlation=0.50 with parameters={'num_train_epochs': 5.381515555130151, 'learning_rate': 1.4923436298344364e-05, 'adam_epsilon': 4.718609673277113e-08, 'adam_beta1': 0.8855356638050199, 'adam_beta2': 0.9817714112199931}.\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 15.7/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: 27da6108 with matthews_correlation=0.550740569901542 and parameters={'num_train_epochs': 8.005678804316002, 'learning_rate': 1.931832460928058e-05, 'adam_epsilon': 6.696984191794608e-08, 'adam_beta1': 0.9116736888940158, 'adam_beta2': 0.9869397626562693}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 14/infinite (1 PENDING, 1 RUNNING, 12 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "\u001b[2m\u001b[36m(pid=26401)\u001b[0m {'eval_loss': 0.6062898635864258, 'eval_matthews_correlation': 0.5039642659976749, 'eval_runtime': 1.8481, 'eval_samples_per_second': 564.358, 'epoch': 5.38}\n",
+      "Trial train_distilbert_6776ad66 completed. Last result: loss=0.6062898635864258,matthews_correlation=0.5039642659976749\n",
+      "\u001b[2m\u001b[36m(pid=36494)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=36494)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=36494)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=36494)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=36494)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=36494)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=36494)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=36494)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=36494)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=36494)\u001b[0m {'train_runtime': 267.304, 'train_samples_per_second': 8.694, 'epoch': 8.67}\n",
+      "Trial train_distilbert_c904a63c reported matthews_correlation=0.54 with parameters={'num_train_epochs': 8.670157213614129, 'learning_rate': 3.589310669581693e-05, 'adam_epsilon': 1e-07, 'adam_beta1': 0.9159421419473668, 'adam_beta2': 0.9870278515925665}.\n",
+      "\u001b[2m\u001b[36m(pid=36494)\u001b[0m {'eval_loss': 1.15528404712677, 'eval_matthews_correlation': 0.541934635424655, 'eval_runtime': 1.8046, 'eval_samples_per_second': 577.975, 'epoch': 8.67}\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 16.4/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: 27da6108 with matthews_correlation=0.550740569901542 and parameters={'num_train_epochs': 8.005678804316002, 'learning_rate': 1.931832460928058e-05, 'adam_epsilon': 6.696984191794608e-08, 'adam_beta1': 0.9116736888940158, 'adam_beta2': 0.9869397626562693}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 15/infinite (1 PENDING, 1 RUNNING, 13 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_c904a63c completed. Last result: loss=1.15528404712677,matthews_correlation=0.541934635424655\n",
+      "\u001b[2m\u001b[36m(pid=7128)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=7128)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=7128)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=7128)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=7128)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=7128)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=7128)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=7128)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=7128)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=7128)\u001b[0m {'train_runtime': 401.1267, 'train_samples_per_second': 8.808, 'epoch': 13.18}\n",
+      "Trial train_distilbert_34cd23b2 reported matthews_correlation=0.54 with parameters={'num_train_epochs': 13.180325143440442, 'learning_rate': 1.1392631517503339e-05, 'adam_epsilon': 8.551227707433237e-08, 'adam_beta1': 0.8917360114521684, 'adam_beta2': 0.9933954023113967}.\n",
+      "\u001b[2m\u001b[36m(pid=7128)\u001b[0m {'eval_loss': 0.9118097424507141, 'eval_matthews_correlation': 0.5361146089547957, 'eval_runtime': 1.6269, 'eval_samples_per_second': 641.089, 'epoch': 13.18}\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 16.4/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: 27da6108 with matthews_correlation=0.550740569901542 and parameters={'num_train_epochs': 8.005678804316002, 'learning_rate': 1.931832460928058e-05, 'adam_epsilon': 6.696984191794608e-08, 'adam_beta1': 0.9116736888940158, 'adam_beta2': 0.9869397626562693}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 16/infinite (1 PENDING, 1 RUNNING, 14 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_34cd23b2 completed. Last result: loss=0.9118097424507141,matthews_correlation=0.5361146089547957\n",
+      "\u001b[2m\u001b[36m(pid=23493)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=23493)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=23493)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=23493)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=23493)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=23493)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=23493)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=23493)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=23493)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=23493)\u001b[0m {'train_runtime': 261.9267, 'train_samples_per_second': 8.548, 'epoch': 8.35}\n",
+      "Trial train_distilbert_dbc01c60 reported matthews_correlation=0.53 with parameters={'num_train_epochs': 8.351740081197375, 'learning_rate': 4.14474164779562e-05, 'adam_epsilon': 2.5536744573294183e-08, 'adam_beta1': 0.9010345773126118, 'adam_beta2': 0.98213801095907}.\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 16.0/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 4/4 CPUs, 4/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: 27da6108 with matthews_correlation=0.550740569901542 and parameters={'num_train_epochs': 8.005678804316002, 'learning_rate': 1.931832460928058e-05, 'adam_epsilon': 6.696984191794608e-08, 'adam_beta1': 0.9116736888940158, 'adam_beta2': 0.9869397626562693}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 17/infinite (1 PENDING, 1 RUNNING, 15 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Trial train_distilbert_dbc01c60 completed. Last result: loss=1.270609974861145,matthews_correlation=0.5331291095663535\n",
+      "\u001b[2m\u001b[36m(pid=23493)\u001b[0m {'eval_loss': 1.270609974861145, 'eval_matthews_correlation': 0.5331291095663535, 'eval_runtime': 1.7863, 'eval_samples_per_second': 583.876, 'epoch': 8.35}\n",
+      "\u001b[2m\u001b[36m(pid=33982)\u001b[0m Reusing dataset glue (/home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
+      "\u001b[2m\u001b[36m(pid=33982)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-bec756fc24993464.arrow\n",
+      "\u001b[2m\u001b[36m(pid=33982)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-3b411a778de4d998.arrow\n",
+      "\u001b[2m\u001b[36m(pid=33982)\u001b[0m Loading cached processed dataset at /home/chiw/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4/cache-c7231adac87a0159.arrow\n",
+      "\u001b[2m\u001b[36m(pid=33982)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
+      "\u001b[2m\u001b[36m(pid=33982)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "\u001b[2m\u001b[36m(pid=33982)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "\u001b[2m\u001b[36m(pid=33982)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
+      "\u001b[2m\u001b[36m(pid=33982)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "\u001b[2m\u001b[36m(pid=33982)\u001b[0m {'train_runtime': 307.947, 'train_samples_per_second': 8.501, 'epoch': 9.77}\n",
+      "2021-02-24 15:01:18,861\tINFO stopper.py:193 -- Reached timeout of 3600 seconds. Stopping all trials.\n",
+      "Trial train_distilbert_d1e00f7e reported matthews_correlation=0.50 with parameters={'num_train_epochs': 9.768470529742105, 'learning_rate': 7.278242504625585e-06, 'adam_epsilon': 9.024121328462365e-08, 'adam_beta1': 0.9568651413276459, 'adam_beta2': 0.9898624818542463}.\n",
+      "\u001b[2m\u001b[36m(pid=33982)\u001b[0m {'eval_loss': 0.6356746554374695, 'eval_matthews_correlation': 0.502884728860933, 'eval_runtime': 1.7441, 'eval_samples_per_second': 598.03, 'epoch': 9.77}\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 15.9/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 0/4 CPUs, 0/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: 27da6108 with matthews_correlation=0.550740569901542 and parameters={'num_train_epochs': 8.005678804316002, 'learning_rate': 1.931832460928058e-05, 'adam_epsilon': 6.696984191794608e-08, 'adam_beta1': 0.9116736888940158, 'adam_beta2': 0.9869397626562693}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 18/infinite (18 TERMINATED)<br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "== Status ==<br>Memory usage on this node: 15.9/251.8 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 0/4 CPUs, 0/4 GPUs, 0.0/161.91 GiB heap, 0.0/50.63 GiB objects (0/1.0 accelerator_type:V100)<br>Current best trial: 27da6108 with matthews_correlation=0.550740569901542 and parameters={'num_train_epochs': 8.005678804316002, 'learning_rate': 1.931832460928058e-05, 'adam_epsilon': 6.696984191794608e-08, 'adam_beta1': 0.9116736888940158, 'adam_beta2': 0.9869397626562693}<br>Result logdir: /raid/chiw/FLAML/notebook/logs/train_distilbert_2021-02-24_13-56-21<br>Number of trials: 18/infinite (18 TERMINATED)<br><table>\n<thead>\n<tr><th>Trial name               </th><th>status    </th><th>loc  </th><th style=\"text-align: right;\">  adam_beta1</th><th style=\"text-align: right;\">  adam_beta2</th><th style=\"text-align: right;\">  adam_epsilon</th><th style=\"text-align: right;\">  learning_rate</th><th style=\"text-align: right;\">  num_train_epochs</th><th style=\"text-align: right;\">  iter</th><th style=\"text-align: right;\">  total time (s)</th><th style=\"text-align: right;\">    loss</th><th style=\"text-align: right;\">  matthews_correlation</th></tr>\n</thead>\n<tbody>\n<tr><td>train_distilbert_21b2c490</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.939079</td><td style=\"text-align: right;\">    0.991865</td><td style=\"text-align: right;\">   7.96945e-08</td><td style=\"text-align: right;\">    5.61152e-06</td><td style=\"text-align: right;\">           1      </td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">         46.9698</td><td style=\"text-align: right;\">0.578651</td><td style=\"text-align: right;\">              0       </td></tr>\n<tr><td>train_distilbert_21b2c491</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.910086</td><td style=\"text-align: right;\">    0.985079</td><td style=\"text-align: right;\">   1.24281e-08</td><td style=\"text-align: right;\">    3.13454e-05</td><td style=\"text-align: right;\">           6.49666</td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        215.872 </td><td style=\"text-align: right;\">0.991096</td><td style=\"text-align: right;\">              0.509303</td></tr>\n<tr><td>train_distilbert_3f0da820</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.909395</td><td style=\"text-align: right;\">    0.993715</td><td style=\"text-align: right;\">   1e-07      </td><td style=\"text-align: right;\">    5.26543e-06</td><td style=\"text-align: right;\">           1      </td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">         47.3068</td><td style=\"text-align: right;\">0.577507</td><td style=\"text-align: right;\">              0       </td></tr>\n<tr><td>train_distilbert_c1106c22</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.880402</td><td style=\"text-align: right;\">    0.986916</td><td style=\"text-align: right;\">   2.25645e-08</td><td style=\"text-align: right;\">    2.94122e-05</td><td style=\"text-align: right;\">           6.32445</td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        207.618 </td><td style=\"text-align: right;\">0.893973</td><td style=\"text-align: right;\">              0.545184</td></tr>\n<tr><td>train_distilbert_de95f5e6</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.962889</td><td style=\"text-align: right;\">    0.983219</td><td style=\"text-align: right;\">   6.09235e-09</td><td style=\"text-align: right;\">    3.01587e-05</td><td style=\"text-align: right;\">           3.0976 </td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        115.872 </td><td style=\"text-align: right;\">0.572089</td><td style=\"text-align: right;\">              0.483692</td></tr>\n<tr><td>train_distilbert_5bb0a1fc</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.845137</td><td style=\"text-align: right;\">    0.988217</td><td style=\"text-align: right;\">   5.04509e-08</td><td style=\"text-align: right;\">    5.8581e-05 </td><td style=\"text-align: right;\">          10.7555 </td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        340.281 </td><td style=\"text-align: right;\">1.50753 </td><td style=\"text-align: right;\">              0.52824 </td></tr>\n<tr><td>train_distilbert_a247fb2e</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.853484</td><td style=\"text-align: right;\">    0.985848</td><td style=\"text-align: right;\">   1.37251e-08</td><td style=\"text-align: right;\">    1.8452e-05 </td><td style=\"text-align: right;\">           5.93306</td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        192.779 </td><td style=\"text-align: right;\">0.697426</td><td style=\"text-align: right;\">              0.53995 </td></tr>\n<tr><td>train_distilbert_6e9e8ec2</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.890437</td><td style=\"text-align: right;\">    0.984458</td><td style=\"text-align: right;\">   1.58491e-08</td><td style=\"text-align: right;\">    1.83579e-05</td><td style=\"text-align: right;\">           6.07869</td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        200.122 </td><td style=\"text-align: right;\">0.720296</td><td style=\"text-align: right;\">              0.518539</td></tr>\n<tr><td>train_distilbert_e30fd860</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.895115</td><td style=\"text-align: right;\">    0.991427</td><td style=\"text-align: right;\">   5.01952e-08</td><td style=\"text-align: right;\">    6.76236e-05</td><td style=\"text-align: right;\">          10.3918 </td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        339.615 </td><td style=\"text-align: right;\">1.50525 </td><td style=\"text-align: right;\">              0.535357</td></tr>\n<tr><td>train_distilbert_5bddb1ae</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.869943</td><td style=\"text-align: right;\">    0.985267</td><td style=\"text-align: right;\">   7.41514e-09</td><td style=\"text-align: right;\">    2.72413e-05</td><td style=\"text-align: right;\">           8.79772</td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        269.864 </td><td style=\"text-align: right;\">1.09008 </td><td style=\"text-align: right;\">              0.549225</td></tr>\n<tr><td>train_distilbert_27da6108</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.911674</td><td style=\"text-align: right;\">    0.98694 </td><td style=\"text-align: right;\">   6.69698e-08</td><td style=\"text-align: right;\">    1.93183e-05</td><td style=\"text-align: right;\">           8.00568</td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        261.261 </td><td style=\"text-align: right;\">0.864673</td><td style=\"text-align: right;\">              0.550741</td></tr>\n<tr><td>train_distilbert_ca4167f2</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.900131</td><td style=\"text-align: right;\">    0.986555</td><td style=\"text-align: right;\">   3.27841e-09</td><td style=\"text-align: right;\">    3.07658e-05</td><td style=\"text-align: right;\">           4.8609 </td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        161.146 </td><td style=\"text-align: right;\">0.74266 </td><td style=\"text-align: right;\">              0.547471</td></tr>\n<tr><td>train_distilbert_6776ad66</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.885536</td><td style=\"text-align: right;\">    0.981771</td><td style=\"text-align: right;\">   4.71861e-08</td><td style=\"text-align: right;\">    1.49234e-05</td><td style=\"text-align: right;\">           5.38152</td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        178.269 </td><td style=\"text-align: right;\">0.60629 </td><td style=\"text-align: right;\">              0.503964</td></tr>\n<tr><td>train_distilbert_c904a63c</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.915942</td><td style=\"text-align: right;\">    0.987028</td><td style=\"text-align: right;\">   1e-07      </td><td style=\"text-align: right;\">    3.58931e-05</td><td style=\"text-align: right;\">           8.67016</td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        277.56  </td><td style=\"text-align: right;\">1.15528 </td><td style=\"text-align: right;\">              0.541935</td></tr>\n<tr><td>train_distilbert_34cd23b2</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.891736</td><td style=\"text-align: right;\">    0.993395</td><td style=\"text-align: right;\">   8.55123e-08</td><td style=\"text-align: right;\">    1.13926e-05</td><td style=\"text-align: right;\">          13.1803 </td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        410.4   </td><td style=\"text-align: right;\">0.91181 </td><td style=\"text-align: right;\">              0.536115</td></tr>\n<tr><td>train_distilbert_dbc01c60</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.901035</td><td style=\"text-align: right;\">    0.982138</td><td style=\"text-align: right;\">   2.55367e-08</td><td style=\"text-align: right;\">    4.14474e-05</td><td style=\"text-align: right;\">           8.35174</td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        272.136 </td><td style=\"text-align: right;\">1.27061 </td><td style=\"text-align: right;\">              0.533129</td></tr>\n<tr><td>train_distilbert_d1e00f7e</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.956865</td><td style=\"text-align: right;\">    0.989862</td><td style=\"text-align: right;\">   9.02412e-08</td><td style=\"text-align: right;\">    7.27824e-06</td><td style=\"text-align: right;\">           9.76847</td><td style=\"text-align: right;\">     1</td><td style=\"text-align: right;\">        317.557 </td><td style=\"text-align: right;\">0.635675</td><td style=\"text-align: right;\">              0.502885</td></tr>\n<tr><td>train_distilbert_759d8c04</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">    0.852308</td><td style=\"text-align: right;\">    0.986484</td><td style=\"text-align: right;\">   3.65877e-08</td><td style=\"text-align: right;\">    2.91155e-05</td><td style=\"text-align: right;\">           4.12326</td><td style=\"text-align: right;\">      </td><td style=\"text-align: right;\">                </td><td style=\"text-align: right;\">        </td><td style=\"text-align: right;\">                      </td></tr>\n</tbody>\n</table><br><br>"
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "2021-02-24 15:01:18,957\tINFO tune.py:448 -- Total run time: 3897.00 seconds (3896.97 seconds for the tuning loop).\n"
+     ]
+    }
+   ],
    "source": [
     "import time\n",
     "import ray\n",
     "start_time = time.time()\n",
+    "ray.shutdown()\n",
     "ray.init(num_cpus=num_cpus, num_gpus=num_gpus)\n",
     "\n",
     "print(\"Tuning started...\")\n",
@@ -734,7 +1153,7 @@
     "    report_intermediate_result=False,\n",
     "    # uncomment the following if report_intermediate_result = True\n",
     "    # max_resource=max_num_epoch, min_resource=1,\n",
-    "    resources_per_trial={\"gpu\": 1},\n",
+    "    resources_per_trial={\"gpu\": num_gpus, \"cpu\": num_cpus},\n",
     "    local_dir='logs/',\n",
     "    num_samples=num_samples,\n",
     "    time_budget_s=time_budget_s,\n",
@@ -746,9 +1165,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 127,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "n_trials=18\ntime=3903.5583679676056\nBest model eval matthews_correlation: 0.5507\nBest model parameters: {'num_train_epochs': 8.005678804316002, 'learning_rate': 1.931832460928058e-05, 'adam_epsilon': 6.696984191794608e-08, 'adam_beta1': 0.9116736888940158, 'adam_beta2': 0.9869397626562693}\n"
+     ]
+    }
+   ],
    "source": [
     "best_trial = analysis.get_best_trial(HP_METRIC, MODE, \"all\")\n",
     "metric = best_trial.metric_analysis[HP_METRIC][MODE]\n",
@@ -765,7 +1192,7 @@
     "Notice that we only reported the metric with `flaml.tune.report` at the end of full training loop. It is possible to enable reporting of intermediate performance - allowing early stopping - as follows:\n",
     "\n",
     "- Huggingface provides _Callbacks_ which can be used to insert the `flaml.tune.report` call inside the training loop\n",
-    "- Make sure to set `do_eval=True` in the `TrainingArguments` provided to `Trainer` and adjust theevaluation frequency accordingly"
+    "- Make sure to set `do_eval=True` in the `TrainingArguments` provided to `Trainer` and adjust the evaluation frequency accordingly"
    ],
    "cell_type": "markdown",
    "metadata": {}
@@ -774,12 +1201,8 @@
  "metadata": {
   "kernelspec": {
    "name": "python3",
-   "display_name": "Python 3.7.7 64-bit ('flaml': conda)",
-   "metadata": {
-    "interpreter": {
-     "hash": "bfcd9a6a9254a5e160761a1fd7a9e444f011592c6770d9f4180dde058a9df5dd"
-    }
-   }
+   "display_name": "Python 3",
+   "language": "python"
   },
   "language_info": {
    "codemirror_mode": {
@@ -791,7 +1214,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7-final"
+   "version": "3.7.9-final"
   }
  },
  "nbformat": 4,
diff --git a/notebook/flaml_xgboost.ipynb b/notebook/flaml_xgboost.ipynb
new file mode 100644
index 000000000..425414daf
--- /dev/null
+++ b/notebook/flaml_xgboost.ipynb
@@ -0,0 +1,556 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved. \n",
+    "\n",
+    "Licensed under the MIT License.\n",
+    "\n",
+    "# Tune XGBoost with FLAML Library\n",
+    "\n",
+    "\n",
+    "## 1. Introduction\n",
+    "\n",
+    "FLAML is a Python library (https://github.com/microsoft/FLAML) designed to automatically produce accurate machine learning models \n",
+    "with low computational cost. It is fast and cheap. The simple and lightweight design makes it easy \n",
+    "to use and extend, such as adding new learners. FLAML can \n",
+    "- serve as an economical AutoML engine,\n",
+    "- be used as a fast hyperparameter tuning tool, or \n",
+    "- be embedded in self-tuning software that requires low latency & resource in repetitive\n",
+    "   tuning tasks.\n",
+    "\n",
+    "In this notebook, we demonstrate how to use FLAML library to tune hyperparameters of XGBoost with a regression example.\n",
+    "\n",
+    "FLAML requires `Python>=3.6`. To run this notebook example, please install flaml with the `notebook` option:\n",
+    "```bash\n",
+    "pip install flaml[notebook]\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install flaml[notebook];"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "## 2. Regression Example\n",
+    "### Load data and preprocess\n",
+    "\n",
+    "Download [houses dataset](https://www.openml.org/d/537) from OpenML. The task is to predict median price of the house in the region based on demographic composition and a state of housing market in the region."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "load dataset from ./openml_ds537.pkl\nDataset name: houses\nX_train.shape: (15480, 8), y_train.shape: (15480,);\nX_test.shape: (5160, 8), y_test.shape: (5160,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from flaml.data import load_openml_dataset\n",
+    "X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id = 537, data_dir = './')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "### Run FLAML\n",
+    "In the FLAML automl run configuration, users can specify the task type, time budget, error metric, learner list, whether to subsample, resampling strategy type, and so on. All these arguments have default values which will be used if users do not provide them. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "''' import AutoML class from flaml package '''\n",
+    "from flaml import AutoML\n",
+    "automl = AutoML()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "settings = {\n",
+    "    \"time_budget\": 60, # total running time in seconds\n",
+    "    \"metric\": 'r2', # primary metrics for regression can be chosen from: ['mae','mse','r2']\n",
+    "    \"estimator_list\": ['xgboost'], # list of ML learners; we tune xgboost in this example\n",
+    "    \"task\": 'regression', # task type    \n",
+    "    \"log_file_name\": 'houses_experiment.log', # flaml log file\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "[flaml.automl: 02-23 14:54:34] {853} INFO - Evaluation method: cv\n",
+      "INFO - Evaluation method: cv\n",
+      "[flaml.automl: 02-23 14:54:34] {577} INFO - Using RepeatedKFold\n",
+      "INFO - Using RepeatedKFold\n",
+      "[flaml.automl: 02-23 14:54:34] {874} INFO - Minimizing error metric: 1-r2\n",
+      "INFO - Minimizing error metric: 1-r2\n",
+      "[flaml.automl: 02-23 14:54:34] {894} INFO - List of ML learners in AutoML Run: ['xgboost']\n",
+      "INFO - List of ML learners in AutoML Run: ['xgboost']\n",
+      "[flaml.automl: 02-23 14:54:34] {953} INFO - iteration 0  current learner xgboost\n",
+      "INFO - iteration 0  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:35] {1107} INFO -  at 1.3s,\tbest xgboost's error=2.1267,\tbest xgboost's error=2.1267\n",
+      "INFO -  at 1.3s,\tbest xgboost's error=2.1267,\tbest xgboost's error=2.1267\n",
+      "[flaml.automl: 02-23 14:54:35] {953} INFO - iteration 1  current learner xgboost\n",
+      "INFO - iteration 1  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:35] {1107} INFO -  at 1.4s,\tbest xgboost's error=2.1267,\tbest xgboost's error=2.1267\n",
+      "INFO -  at 1.4s,\tbest xgboost's error=2.1267,\tbest xgboost's error=2.1267\n",
+      "[flaml.automl: 02-23 14:54:35] {953} INFO - iteration 2  current learner xgboost\n",
+      "INFO - iteration 2  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:36] {1107} INFO -  at 1.5s,\tbest xgboost's error=0.4565,\tbest xgboost's error=0.4565\n",
+      "INFO -  at 1.5s,\tbest xgboost's error=0.4565,\tbest xgboost's error=0.4565\n",
+      "[flaml.automl: 02-23 14:54:36] {953} INFO - iteration 3  current learner xgboost\n",
+      "INFO - iteration 3  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:36] {1107} INFO -  at 1.6s,\tbest xgboost's error=0.4565,\tbest xgboost's error=0.4565\n",
+      "INFO -  at 1.6s,\tbest xgboost's error=0.4565,\tbest xgboost's error=0.4565\n",
+      "[flaml.automl: 02-23 14:54:36] {953} INFO - iteration 4  current learner xgboost\n",
+      "INFO - iteration 4  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:36] {1107} INFO -  at 1.9s,\tbest xgboost's error=0.2697,\tbest xgboost's error=0.2697\n",
+      "INFO -  at 1.9s,\tbest xgboost's error=0.2697,\tbest xgboost's error=0.2697\n",
+      "[flaml.automl: 02-23 14:54:36] {953} INFO - iteration 5  current learner xgboost\n",
+      "INFO - iteration 5  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:36] {1107} INFO -  at 2.1s,\tbest xgboost's error=0.2278,\tbest xgboost's error=0.2278\n",
+      "INFO -  at 2.1s,\tbest xgboost's error=0.2278,\tbest xgboost's error=0.2278\n",
+      "[flaml.automl: 02-23 14:54:36] {953} INFO - iteration 6  current learner xgboost\n",
+      "INFO - iteration 6  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:36] {1107} INFO -  at 2.2s,\tbest xgboost's error=0.2278,\tbest xgboost's error=0.2278\n",
+      "INFO -  at 2.2s,\tbest xgboost's error=0.2278,\tbest xgboost's error=0.2278\n",
+      "[flaml.automl: 02-23 14:54:36] {953} INFO - iteration 7  current learner xgboost\n",
+      "INFO - iteration 7  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:36] {1107} INFO -  at 2.5s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "INFO -  at 2.5s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "[flaml.automl: 02-23 14:54:36] {953} INFO - iteration 8  current learner xgboost\n",
+      "INFO - iteration 8  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:37] {1107} INFO -  at 2.6s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "INFO -  at 2.6s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "[flaml.automl: 02-23 14:54:37] {953} INFO - iteration 9  current learner xgboost\n",
+      "INFO - iteration 9  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:37] {1107} INFO -  at 2.8s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "INFO -  at 2.8s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "[flaml.automl: 02-23 14:54:37] {953} INFO - iteration 10  current learner xgboost\n",
+      "INFO - iteration 10  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:37] {1107} INFO -  at 3.0s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "INFO -  at 3.0s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "[flaml.automl: 02-23 14:54:37] {953} INFO - iteration 11  current learner xgboost\n",
+      "INFO - iteration 11  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:38] {1107} INFO -  at 3.6s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "INFO -  at 3.6s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "[flaml.automl: 02-23 14:54:38] {953} INFO - iteration 12  current learner xgboost\n",
+      "INFO - iteration 12  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:38] {1107} INFO -  at 4.1s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "INFO -  at 4.1s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "[flaml.automl: 02-23 14:54:38] {953} INFO - iteration 13  current learner xgboost\n",
+      "INFO - iteration 13  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:38] {1107} INFO -  at 4.2s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "INFO -  at 4.2s,\tbest xgboost's error=0.2228,\tbest xgboost's error=0.2228\n",
+      "[flaml.automl: 02-23 14:54:38] {953} INFO - iteration 14  current learner xgboost\n",
+      "INFO - iteration 14  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:39] {1107} INFO -  at 4.9s,\tbest xgboost's error=0.1814,\tbest xgboost's error=0.1814\n",
+      "INFO -  at 4.9s,\tbest xgboost's error=0.1814,\tbest xgboost's error=0.1814\n",
+      "[flaml.automl: 02-23 14:54:39] {953} INFO - iteration 15  current learner xgboost\n",
+      "INFO - iteration 15  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:39] {1107} INFO -  at 5.2s,\tbest xgboost's error=0.1814,\tbest xgboost's error=0.1814\n",
+      "INFO -  at 5.2s,\tbest xgboost's error=0.1814,\tbest xgboost's error=0.1814\n",
+      "[flaml.automl: 02-23 14:54:39] {953} INFO - iteration 16  current learner xgboost\n",
+      "INFO - iteration 16  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:46] {1107} INFO -  at 12.3s,\tbest xgboost's error=0.1813,\tbest xgboost's error=0.1813\n",
+      "INFO -  at 12.3s,\tbest xgboost's error=0.1813,\tbest xgboost's error=0.1813\n",
+      "[flaml.automl: 02-23 14:54:46] {953} INFO - iteration 17  current learner xgboost\n",
+      "INFO - iteration 17  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:54:51] {1107} INFO -  at 17.5s,\tbest xgboost's error=0.1642,\tbest xgboost's error=0.1642\n",
+      "INFO -  at 17.5s,\tbest xgboost's error=0.1642,\tbest xgboost's error=0.1642\n",
+      "[flaml.automl: 02-23 14:54:51] {953} INFO - iteration 18  current learner xgboost\n",
+      "INFO - iteration 18  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:55:04] {1107} INFO -  at 30.4s,\tbest xgboost's error=0.1642,\tbest xgboost's error=0.1642\n",
+      "INFO -  at 30.4s,\tbest xgboost's error=0.1642,\tbest xgboost's error=0.1642\n",
+      "[flaml.automl: 02-23 14:55:04] {953} INFO - iteration 19  current learner xgboost\n",
+      "INFO - iteration 19  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:55:06] {1107} INFO -  at 32.1s,\tbest xgboost's error=0.1642,\tbest xgboost's error=0.1642\n",
+      "INFO -  at 32.1s,\tbest xgboost's error=0.1642,\tbest xgboost's error=0.1642\n",
+      "[flaml.automl: 02-23 14:55:06] {953} INFO - iteration 20  current learner xgboost\n",
+      "INFO - iteration 20  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:55:10] {1107} INFO -  at 35.7s,\tbest xgboost's error=0.1642,\tbest xgboost's error=0.1642\n",
+      "INFO -  at 35.7s,\tbest xgboost's error=0.1642,\tbest xgboost's error=0.1642\n",
+      "[flaml.automl: 02-23 14:55:10] {953} INFO - iteration 21  current learner xgboost\n",
+      "INFO - iteration 21  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:55:11] {1107} INFO -  at 36.7s,\tbest xgboost's error=0.1642,\tbest xgboost's error=0.1642\n",
+      "INFO -  at 36.7s,\tbest xgboost's error=0.1642,\tbest xgboost's error=0.1642\n",
+      "[flaml.automl: 02-23 14:55:11] {953} INFO - iteration 22  current learner xgboost\n",
+      "INFO - iteration 22  current learner xgboost\n",
+      "[flaml.automl: 02-23 14:55:34] {1107} INFO -  at 59.7s,\tbest xgboost's error=0.1601,\tbest xgboost's error=0.1601\n",
+      "INFO -  at 59.7s,\tbest xgboost's error=0.1601,\tbest xgboost's error=0.1601\n",
+      "[flaml.automl: 02-23 14:55:34] {1148} INFO - selected model: <xgboost.core.Booster object at 0x00000234901A00C8>\n",
+      "INFO - selected model: <xgboost.core.Booster object at 0x00000234901A00C8>\n",
+      "[flaml.automl: 02-23 14:55:34] {908} INFO - fit succeeded\n",
+      "INFO - fit succeeded\n"
+     ]
+    }
+   ],
+   "source": [
+    "'''The main flaml automl API'''\n",
+    "automl.fit(X_train = X_train, y_train = y_train, **settings)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "### Best model and metric"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Best hyperparmeter config: {'n_estimators': 317.0, 'max_leaves': 216.0, 'min_child_weight': 20.0, 'learning_rate': 0.03224270129795047, 'subsample': 1.0, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.9439217658719203, 'reg_alpha': 1.0302456093366526e-07, 'reg_lambda': 1.0}\nBest r2 on validation data: 0.8399\nTraining duration of best run: 23.02 s\n"
+     ]
+    }
+   ],
+   "source": [
+    "''' retrieve best config'''\n",
+    "print('Best hyperparmeter config:', automl.best_config)\n",
+    "print('Best r2 on validation data: {0:.4g}'.format(1-automl.best_loss))\n",
+    "print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "<xgboost.core.Booster at 0x234901a00c8>"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 6
+    }
+   ],
+   "source": [
+    "automl.model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "''' pickle and save the best model '''\n",
+    "import pickle\n",
+    "with open('best_model.pkl', 'wb') as f:\n",
+    "    pickle.dump(automl.model, f, pickle.HIGHEST_PROTOCOL)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Predicted labels [144760.3  259431.67 158536.47 ... 172862.03 235544.52 272218.38]\nTrue labels [136900. 241300. 200700. ... 160900. 227300. 265600.]\n"
+     ]
+    }
+   ],
+   "source": [
+    "''' compute predictions of testing dataset ''' \n",
+    "y_pred = automl.predict(X_test)\n",
+    "print('Predicted labels', y_pred)\n",
+    "print('True labels', y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "r2 = 0.8381301879550651\nmse = 2139677169.3730364\nmae = 30461.545708424175\n"
+     ]
+    }
+   ],
+   "source": [
+    "''' compute different metric values on testing dataset'''\n",
+    "from flaml.ml import sklearn_metric_loss_score\n",
+    "print('r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))\n",
+    "print('mse', '=', sklearn_metric_loss_score('mse', y_pred, y_test))\n",
+    "print('mae', '=', sklearn_metric_loss_score('mae', y_pred, y_test))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "{'Current Learner': 'xgboost', 'Current Sample': 15480, 'Current Hyper-parameters': {'n_estimators': 4, 'max_leaves': 4, 'min_child_weight': 20.0, 'learning_rate': 0.1, 'subsample': 1.0, 'colsample_bylevel': 1.0, 'colsample_bytree': 1.0, 'reg_alpha': 1e-10, 'reg_lambda': 1.0}, 'Best Learner': 'xgboost', 'Best Hyper-parameters': {'n_estimators': 4, 'max_leaves': 4, 'min_child_weight': 20.0, 'learning_rate': 0.1, 'subsample': 1.0, 'colsample_bylevel': 1.0, 'colsample_bytree': 1.0, 'reg_alpha': 1e-10, 'reg_lambda': 1.0}}\n{'Current Learner': 'xgboost', 'Current Sample': 15480, 'Current Hyper-parameters': {'n_estimators': 4.0, 'max_leaves': 4.0, 'min_child_weight': 20.0, 'learning_rate': 0.46335414315327306, 'subsample': 0.9339389930838808, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.9904286645657556, 'reg_alpha': 2.841147337412889e-10, 'reg_lambda': 0.12000833497054482}, 'Best Learner': 'xgboost', 'Best Hyper-parameters': {'n_estimators': 4.0, 'max_leaves': 4.0, 'min_child_weight': 20.0, 'learning_rate': 0.46335414315327306, 'subsample': 0.9339389930838808, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.9904286645657556, 'reg_alpha': 2.841147337412889e-10, 'reg_lambda': 0.12000833497054482}}\n{'Current Learner': 'xgboost', 'Current Sample': 15480, 'Current Hyper-parameters': {'n_estimators': 20.0, 'max_leaves': 4.0, 'min_child_weight': 20.0, 'learning_rate': 1.0, 'subsample': 0.9917683183663918, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.9858892907525497, 'reg_alpha': 3.8783982645515837e-10, 'reg_lambda': 0.36607431863072826}, 'Best Learner': 'xgboost', 'Best Hyper-parameters': {'n_estimators': 20.0, 'max_leaves': 4.0, 'min_child_weight': 20.0, 'learning_rate': 1.0, 'subsample': 0.9917683183663918, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.9858892907525497, 'reg_alpha': 3.8783982645515837e-10, 'reg_lambda': 0.36607431863072826}}\n{'Current Learner': 'xgboost', 'Current Sample': 15480, 'Current Hyper-parameters': {'n_estimators': 11.0, 'max_leaves': 15.0, 'min_child_weight': 14.947587304572773, 'learning_rate': 0.6092558236172073, 'subsample': 0.9659256891661986, 'colsample_bylevel': 1.0, 'colsample_bytree': 1.0, 'reg_alpha': 3.816590663384559e-08, 'reg_lambda': 0.4482946615262561}, 'Best Learner': 'xgboost', 'Best Hyper-parameters': {'n_estimators': 11.0, 'max_leaves': 15.0, 'min_child_weight': 14.947587304572773, 'learning_rate': 0.6092558236172073, 'subsample': 0.9659256891661986, 'colsample_bylevel': 1.0, 'colsample_bytree': 1.0, 'reg_alpha': 3.816590663384559e-08, 'reg_lambda': 0.4482946615262561}}\n{'Current Learner': 'xgboost', 'Current Sample': 15480, 'Current Hyper-parameters': {'n_estimators': 7.0, 'max_leaves': 41.0, 'min_child_weight': 20.0, 'learning_rate': 0.8834537640176922, 'subsample': 1.0, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.9837052481490312, 'reg_alpha': 4.482246955743696e-08, 'reg_lambda': 0.028657570201141073}, 'Best Learner': 'xgboost', 'Best Hyper-parameters': {'n_estimators': 7.0, 'max_leaves': 41.0, 'min_child_weight': 20.0, 'learning_rate': 0.8834537640176922, 'subsample': 1.0, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.9837052481490312, 'reg_alpha': 4.482246955743696e-08, 'reg_lambda': 0.028657570201141073}}\n{'Current Learner': 'xgboost', 'Current Sample': 15480, 'Current Hyper-parameters': {'n_estimators': 19.0, 'max_leaves': 67.0, 'min_child_weight': 20.0, 'learning_rate': 0.42055174154191877, 'subsample': 1.0, 'colsample_bylevel': 0.9680465326601881, 'colsample_bytree': 0.9911260129490254, 'reg_alpha': 1.4386508989317825e-07, 'reg_lambda': 0.025583032704844726}, 'Best Learner': 'xgboost', 'Best Hyper-parameters': {'n_estimators': 19.0, 'max_leaves': 67.0, 'min_child_weight': 20.0, 'learning_rate': 0.42055174154191877, 'subsample': 1.0, 'colsample_bylevel': 0.9680465326601881, 'colsample_bytree': 0.9911260129490254, 'reg_alpha': 1.4386508989317825e-07, 'reg_lambda': 0.025583032704844726}}\n{'Current Learner': 'xgboost', 'Current Sample': 15480, 'Current Hyper-parameters': {'n_estimators': 59.0, 'max_leaves': 361.0, 'min_child_weight': 20.0, 'learning_rate': 0.21792795753241478, 'subsample': 1.0, 'colsample_bylevel': 0.958793753764408, 'colsample_bytree': 1.0, 'reg_alpha': 6.590406934826904e-10, 'reg_lambda': 0.03395561483378905}, 'Best Learner': 'xgboost', 'Best Hyper-parameters': {'n_estimators': 59.0, 'max_leaves': 361.0, 'min_child_weight': 20.0, 'learning_rate': 0.21792795753241478, 'subsample': 1.0, 'colsample_bylevel': 0.958793753764408, 'colsample_bytree': 1.0, 'reg_alpha': 6.590406934826904e-10, 'reg_lambda': 0.03395561483378905}}\n{'Current Learner': 'xgboost', 'Current Sample': 15480, 'Current Hyper-parameters': {'n_estimators': 151.0, 'max_leaves': 62.0, 'min_child_weight': 20.0, 'learning_rate': 0.17819907126319565, 'subsample': 0.9992325198304696, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.957244844266689, 'reg_alpha': 9.262639990406998e-08, 'reg_lambda': 0.5632458817508302}, 'Best Learner': 'xgboost', 'Best Hyper-parameters': {'n_estimators': 151.0, 'max_leaves': 62.0, 'min_child_weight': 20.0, 'learning_rate': 0.17819907126319565, 'subsample': 0.9992325198304696, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.957244844266689, 'reg_alpha': 9.262639990406998e-08, 'reg_lambda': 0.5632458817508302}}\n{'Current Learner': 'xgboost', 'Current Sample': 15480, 'Current Hyper-parameters': {'n_estimators': 317.0, 'max_leaves': 216.0, 'min_child_weight': 20.0, 'learning_rate': 0.03224270129795047, 'subsample': 1.0, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.9439217658719203, 'reg_alpha': 1.0302456093366526e-07, 'reg_lambda': 1.0}, 'Best Learner': 'xgboost', 'Best Hyper-parameters': {'n_estimators': 317.0, 'max_leaves': 216.0, 'min_child_weight': 20.0, 'learning_rate': 0.03224270129795047, 'subsample': 1.0, 'colsample_bylevel': 1.0, 'colsample_bytree': 0.9439217658719203, 'reg_alpha': 1.0302456093366526e-07, 'reg_lambda': 1.0}}\n"
+     ]
+    }
+   ],
+   "source": [
+    "from flaml.data import get_output_from_log\n",
+    "time_history, best_valid_loss_history, valid_loss_history, config_history, train_loss_history = \\\n",
+    "    get_output_from_log(filename = settings['log_file_name'], time_budget = 60)\n",
+    "\n",
+    "for config in config_history:\n",
+    "    print(config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "outputs": [
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": "<Figure size 432x288 with 1 Axes>",
+      "image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\r\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n  \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n<!-- Created with matplotlib (https://matplotlib.org/) -->\r\n<svg height=\"277.314375pt\" version=\"1.1\" viewBox=\"0 0 400.523437 277.314375\" width=\"400.523437pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n <defs>\r\n  <style type=\"text/css\">\r\n*{stroke-linecap:butt;stroke-linejoin:round;}\r\n  </style>\r\n </defs>\r\n <g id=\"figure_1\">\r\n  <g id=\"patch_1\">\r\n   <path d=\"M 0 277.314375 \r\nL 400.523437 277.314375 \r\nL 400.523437 0 \r\nL 0 0 \r\nz\r\n\" style=\"fill:none;\"/>\r\n  </g>\r\n  <g id=\"axes_1\">\r\n   <g id=\"patch_2\">\r\n    <path d=\"M 58.523438 239.758125 \r\nL 393.323438 239.758125 \r\nL 393.323438 22.318125 \r\nL 58.523438 22.318125 \r\nz\r\n\" style=\"fill:#ffffff;\"/>\r\n   </g>\r\n   <g id=\"PathCollection_1\">\r\n    <defs>\r\n     <path d=\"M 0 3 \r\nC 0.795609 3 1.55874 2.683901 2.12132 2.12132 \r\nC 2.683901 1.55874 3 0.795609 3 0 \r\nC 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 \r\nC 1.55874 -2.683901 0.795609 -3 0 -3 \r\nC -0.795609 -3 -1.55874 -2.683901 -2.12132 -2.12132 \r\nC -2.683901 -1.55874 -3 -0.795609 -3 0 \r\nC -3 0.795609 -2.683901 1.55874 -2.12132 2.12132 \r\nC -1.55874 2.683901 -0.795609 3 0 3 \r\nz\r\n\" id=\"m2937806fa7\" style=\"stroke:#1f77b4;\"/>\r\n    </defs>\r\n    <g clip-path=\"url(#pae0c1da17c)\">\r\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"73.741619\" xlink:href=\"#m2937806fa7\" y=\"229.874489\"/>\r\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"74.887343\" xlink:href=\"#m2937806fa7\" y=\"61.994745\"/>\r\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"76.783009\" xlink:href=\"#m2937806fa7\" y=\"43.216967\"/>\r\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"78.012066\" xlink:href=\"#m2937806fa7\" y=\"39.006531\"/>\r\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"79.845248\" xlink:href=\"#m2937806fa7\" y=\"38.504869\"/>\r\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"92.418551\" xlink:href=\"#m2937806fa7\" y=\"34.345291\"/>\r\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"130.815098\" xlink:href=\"#m2937806fa7\" y=\"34.331016\"/>\r\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"158.058717\" xlink:href=\"#m2937806fa7\" y=\"32.60987\"/>\r\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"378.105256\" xlink:href=\"#m2937806fa7\" y=\"32.201761\"/>\r\n    </g>\r\n   </g>\r\n   <g id=\"matplotlib.axis_1\">\r\n    <g id=\"xtick_1\">\r\n     <g id=\"line2d_1\">\r\n      <defs>\r\n       <path d=\"M 0 0 \r\nL 0 3.5 \r\n\" id=\"m143fd363c3\" style=\"stroke:#000000;stroke-width:0.8;\"/>\r\n      </defs>\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"66.961277\" xlink:href=\"#m143fd363c3\" y=\"239.758125\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_1\">\r\n      <!-- 0 -->\r\n      <defs>\r\n       <path d=\"M 31.78125 66.40625 \r\nQ 24.171875 66.40625 20.328125 58.90625 \r\nQ 16.5 51.421875 16.5 36.375 \r\nQ 16.5 21.390625 20.328125 13.890625 \r\nQ 24.171875 6.390625 31.78125 6.390625 \r\nQ 39.453125 6.390625 43.28125 13.890625 \r\nQ 47.125 21.390625 47.125 36.375 \r\nQ 47.125 51.421875 43.28125 58.90625 \r\nQ 39.453125 66.40625 31.78125 66.40625 \r\nz\r\nM 31.78125 74.21875 \r\nQ 44.046875 74.21875 50.515625 64.515625 \r\nQ 56.984375 54.828125 56.984375 36.375 \r\nQ 56.984375 17.96875 50.515625 8.265625 \r\nQ 44.046875 -1.421875 31.78125 -1.421875 \r\nQ 19.53125 -1.421875 13.0625 8.265625 \r\nQ 6.59375 17.96875 6.59375 36.375 \r\nQ 6.59375 54.828125 13.0625 64.515625 \r\nQ 19.53125 74.21875 31.78125 74.21875 \r\nz\r\n\" id=\"DejaVuSans-48\"/>\r\n      </defs>\r\n      <g transform=\"translate(63.780027 254.356562)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-48\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"xtick_2\">\r\n     <g id=\"line2d_2\">\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"119.040004\" xlink:href=\"#m143fd363c3\" y=\"239.758125\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_2\">\r\n      <!-- 10 -->\r\n      <defs>\r\n       <path d=\"M 12.40625 8.296875 \r\nL 28.515625 8.296875 \r\nL 28.515625 63.921875 \r\nL 10.984375 60.40625 \r\nL 10.984375 69.390625 \r\nL 28.421875 72.90625 \r\nL 38.28125 72.90625 \r\nL 38.28125 8.296875 \r\nL 54.390625 8.296875 \r\nL 54.390625 0 \r\nL 12.40625 0 \r\nz\r\n\" id=\"DejaVuSans-49\"/>\r\n      </defs>\r\n      <g transform=\"translate(112.677504 254.356562)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-49\"/>\r\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-48\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"xtick_3\">\r\n     <g id=\"line2d_3\">\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"171.118732\" xlink:href=\"#m143fd363c3\" y=\"239.758125\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_3\">\r\n      <!-- 20 -->\r\n      <defs>\r\n       <path d=\"M 19.1875 8.296875 \r\nL 53.609375 8.296875 \r\nL 53.609375 0 \r\nL 7.328125 0 \r\nL 7.328125 8.296875 \r\nQ 12.9375 14.109375 22.625 23.890625 \r\nQ 32.328125 33.6875 34.8125 36.53125 \r\nQ 39.546875 41.84375 41.421875 45.53125 \r\nQ 43.3125 49.21875 43.3125 52.78125 \r\nQ 43.3125 58.59375 39.234375 62.25 \r\nQ 35.15625 65.921875 28.609375 65.921875 \r\nQ 23.96875 65.921875 18.8125 64.3125 \r\nQ 13.671875 62.703125 7.8125 59.421875 \r\nL 7.8125 69.390625 \r\nQ 13.765625 71.78125 18.9375 73 \r\nQ 24.125 74.21875 28.421875 74.21875 \r\nQ 39.75 74.21875 46.484375 68.546875 \r\nQ 53.21875 62.890625 53.21875 53.421875 \r\nQ 53.21875 48.921875 51.53125 44.890625 \r\nQ 49.859375 40.875 45.40625 35.40625 \r\nQ 44.1875 33.984375 37.640625 27.21875 \r\nQ 31.109375 20.453125 19.1875 8.296875 \r\nz\r\n\" id=\"DejaVuSans-50\"/>\r\n      </defs>\r\n      <g transform=\"translate(164.756232 254.356562)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-50\"/>\r\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-48\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"xtick_4\">\r\n     <g id=\"line2d_4\">\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"223.19746\" xlink:href=\"#m143fd363c3\" y=\"239.758125\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_4\">\r\n      <!-- 30 -->\r\n      <defs>\r\n       <path d=\"M 40.578125 39.3125 \r\nQ 47.65625 37.796875 51.625 33 \r\nQ 55.609375 28.21875 55.609375 21.1875 \r\nQ 55.609375 10.40625 48.1875 4.484375 \r\nQ 40.765625 -1.421875 27.09375 -1.421875 \r\nQ 22.515625 -1.421875 17.65625 -0.515625 \r\nQ 12.796875 0.390625 7.625 2.203125 \r\nL 7.625 11.71875 \r\nQ 11.71875 9.328125 16.59375 8.109375 \r\nQ 21.484375 6.890625 26.8125 6.890625 \r\nQ 36.078125 6.890625 40.9375 10.546875 \r\nQ 45.796875 14.203125 45.796875 21.1875 \r\nQ 45.796875 27.640625 41.28125 31.265625 \r\nQ 36.765625 34.90625 28.71875 34.90625 \r\nL 20.21875 34.90625 \r\nL 20.21875 43.015625 \r\nL 29.109375 43.015625 \r\nQ 36.375 43.015625 40.234375 45.921875 \r\nQ 44.09375 48.828125 44.09375 54.296875 \r\nQ 44.09375 59.90625 40.109375 62.90625 \r\nQ 36.140625 65.921875 28.71875 65.921875 \r\nQ 24.65625 65.921875 20.015625 65.03125 \r\nQ 15.375 64.15625 9.8125 62.3125 \r\nL 9.8125 71.09375 \r\nQ 15.4375 72.65625 20.34375 73.4375 \r\nQ 25.25 74.21875 29.59375 74.21875 \r\nQ 40.828125 74.21875 47.359375 69.109375 \r\nQ 53.90625 64.015625 53.90625 55.328125 \r\nQ 53.90625 49.265625 50.4375 45.09375 \r\nQ 46.96875 40.921875 40.578125 39.3125 \r\nz\r\n\" id=\"DejaVuSans-51\"/>\r\n      </defs>\r\n      <g transform=\"translate(216.83496 254.356562)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-51\"/>\r\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-48\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"xtick_5\">\r\n     <g id=\"line2d_5\">\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"275.276188\" xlink:href=\"#m143fd363c3\" y=\"239.758125\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_5\">\r\n      <!-- 40 -->\r\n      <defs>\r\n       <path d=\"M 37.796875 64.3125 \r\nL 12.890625 25.390625 \r\nL 37.796875 25.390625 \r\nz\r\nM 35.203125 72.90625 \r\nL 47.609375 72.90625 \r\nL 47.609375 25.390625 \r\nL 58.015625 25.390625 \r\nL 58.015625 17.1875 \r\nL 47.609375 17.1875 \r\nL 47.609375 0 \r\nL 37.796875 0 \r\nL 37.796875 17.1875 \r\nL 4.890625 17.1875 \r\nL 4.890625 26.703125 \r\nz\r\n\" id=\"DejaVuSans-52\"/>\r\n      </defs>\r\n      <g transform=\"translate(268.913688 254.356562)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-52\"/>\r\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-48\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"xtick_6\">\r\n     <g id=\"line2d_6\">\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"327.354916\" xlink:href=\"#m143fd363c3\" y=\"239.758125\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_6\">\r\n      <!-- 50 -->\r\n      <defs>\r\n       <path d=\"M 10.796875 72.90625 \r\nL 49.515625 72.90625 \r\nL 49.515625 64.59375 \r\nL 19.828125 64.59375 \r\nL 19.828125 46.734375 \r\nQ 21.96875 47.46875 24.109375 47.828125 \r\nQ 26.265625 48.1875 28.421875 48.1875 \r\nQ 40.625 48.1875 47.75 41.5 \r\nQ 54.890625 34.8125 54.890625 23.390625 \r\nQ 54.890625 11.625 47.5625 5.09375 \r\nQ 40.234375 -1.421875 26.90625 -1.421875 \r\nQ 22.3125 -1.421875 17.546875 -0.640625 \r\nQ 12.796875 0.140625 7.71875 1.703125 \r\nL 7.71875 11.625 \r\nQ 12.109375 9.234375 16.796875 8.0625 \r\nQ 21.484375 6.890625 26.703125 6.890625 \r\nQ 35.15625 6.890625 40.078125 11.328125 \r\nQ 45.015625 15.765625 45.015625 23.390625 \r\nQ 45.015625 31 40.078125 35.4375 \r\nQ 35.15625 39.890625 26.703125 39.890625 \r\nQ 22.75 39.890625 18.8125 39.015625 \r\nQ 14.890625 38.140625 10.796875 36.28125 \r\nz\r\n\" id=\"DejaVuSans-53\"/>\r\n      </defs>\r\n      <g transform=\"translate(320.992416 254.356562)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-53\"/>\r\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-48\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"xtick_7\">\r\n     <g id=\"line2d_7\">\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"379.433644\" xlink:href=\"#m143fd363c3\" y=\"239.758125\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_7\">\r\n      <!-- 60 -->\r\n      <defs>\r\n       <path d=\"M 33.015625 40.375 \r\nQ 26.375 40.375 22.484375 35.828125 \r\nQ 18.609375 31.296875 18.609375 23.390625 \r\nQ 18.609375 15.53125 22.484375 10.953125 \r\nQ 26.375 6.390625 33.015625 6.390625 \r\nQ 39.65625 6.390625 43.53125 10.953125 \r\nQ 47.40625 15.53125 47.40625 23.390625 \r\nQ 47.40625 31.296875 43.53125 35.828125 \r\nQ 39.65625 40.375 33.015625 40.375 \r\nz\r\nM 52.59375 71.296875 \r\nL 52.59375 62.3125 \r\nQ 48.875 64.0625 45.09375 64.984375 \r\nQ 41.3125 65.921875 37.59375 65.921875 \r\nQ 27.828125 65.921875 22.671875 59.328125 \r\nQ 17.53125 52.734375 16.796875 39.40625 \r\nQ 19.671875 43.65625 24.015625 45.921875 \r\nQ 28.375 48.1875 33.59375 48.1875 \r\nQ 44.578125 48.1875 50.953125 41.515625 \r\nQ 57.328125 34.859375 57.328125 23.390625 \r\nQ 57.328125 12.15625 50.6875 5.359375 \r\nQ 44.046875 -1.421875 33.015625 -1.421875 \r\nQ 20.359375 -1.421875 13.671875 8.265625 \r\nQ 6.984375 17.96875 6.984375 36.375 \r\nQ 6.984375 53.65625 15.1875 63.9375 \r\nQ 23.390625 74.21875 37.203125 74.21875 \r\nQ 40.921875 74.21875 44.703125 73.484375 \r\nQ 48.484375 72.75 52.59375 71.296875 \r\nz\r\n\" id=\"DejaVuSans-54\"/>\r\n      </defs>\r\n      <g transform=\"translate(373.071144 254.356562)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-54\"/>\r\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-48\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"text_8\">\r\n     <!-- Wall Clock Time (s) -->\r\n     <defs>\r\n      <path d=\"M 3.328125 72.90625 \r\nL 13.28125 72.90625 \r\nL 28.609375 11.28125 \r\nL 43.890625 72.90625 \r\nL 54.984375 72.90625 \r\nL 70.3125 11.28125 \r\nL 85.59375 72.90625 \r\nL 95.609375 72.90625 \r\nL 77.296875 0 \r\nL 64.890625 0 \r\nL 49.515625 63.28125 \r\nL 33.984375 0 \r\nL 21.578125 0 \r\nz\r\n\" id=\"DejaVuSans-87\"/>\r\n      <path d=\"M 34.28125 27.484375 \r\nQ 23.390625 27.484375 19.1875 25 \r\nQ 14.984375 22.515625 14.984375 16.5 \r\nQ 14.984375 11.71875 18.140625 8.90625 \r\nQ 21.296875 6.109375 26.703125 6.109375 \r\nQ 34.1875 6.109375 38.703125 11.40625 \r\nQ 43.21875 16.703125 43.21875 25.484375 \r\nL 43.21875 27.484375 \r\nz\r\nM 52.203125 31.203125 \r\nL 52.203125 0 \r\nL 43.21875 0 \r\nL 43.21875 8.296875 \r\nQ 40.140625 3.328125 35.546875 0.953125 \r\nQ 30.953125 -1.421875 24.3125 -1.421875 \r\nQ 15.921875 -1.421875 10.953125 3.296875 \r\nQ 6 8.015625 6 15.921875 \r\nQ 6 25.140625 12.171875 29.828125 \r\nQ 18.359375 34.515625 30.609375 34.515625 \r\nL 43.21875 34.515625 \r\nL 43.21875 35.40625 \r\nQ 43.21875 41.609375 39.140625 45 \r\nQ 35.0625 48.390625 27.6875 48.390625 \r\nQ 23 48.390625 18.546875 47.265625 \r\nQ 14.109375 46.140625 10.015625 43.890625 \r\nL 10.015625 52.203125 \r\nQ 14.9375 54.109375 19.578125 55.046875 \r\nQ 24.21875 56 28.609375 56 \r\nQ 40.484375 56 46.34375 49.84375 \r\nQ 52.203125 43.703125 52.203125 31.203125 \r\nz\r\n\" id=\"DejaVuSans-97\"/>\r\n      <path d=\"M 9.421875 75.984375 \r\nL 18.40625 75.984375 \r\nL 18.40625 0 \r\nL 9.421875 0 \r\nz\r\n\" id=\"DejaVuSans-108\"/>\r\n      <path id=\"DejaVuSans-32\"/>\r\n      <path d=\"M 64.40625 67.28125 \r\nL 64.40625 56.890625 \r\nQ 59.421875 61.53125 53.78125 63.8125 \r\nQ 48.140625 66.109375 41.796875 66.109375 \r\nQ 29.296875 66.109375 22.65625 58.46875 \r\nQ 16.015625 50.828125 16.015625 36.375 \r\nQ 16.015625 21.96875 22.65625 14.328125 \r\nQ 29.296875 6.6875 41.796875 6.6875 \r\nQ 48.140625 6.6875 53.78125 8.984375 \r\nQ 59.421875 11.28125 64.40625 15.921875 \r\nL 64.40625 5.609375 \r\nQ 59.234375 2.09375 53.4375 0.328125 \r\nQ 47.65625 -1.421875 41.21875 -1.421875 \r\nQ 24.65625 -1.421875 15.125 8.703125 \r\nQ 5.609375 18.84375 5.609375 36.375 \r\nQ 5.609375 53.953125 15.125 64.078125 \r\nQ 24.65625 74.21875 41.21875 74.21875 \r\nQ 47.75 74.21875 53.53125 72.484375 \r\nQ 59.328125 70.75 64.40625 67.28125 \r\nz\r\n\" id=\"DejaVuSans-67\"/>\r\n      <path d=\"M 30.609375 48.390625 \r\nQ 23.390625 48.390625 19.1875 42.75 \r\nQ 14.984375 37.109375 14.984375 27.296875 \r\nQ 14.984375 17.484375 19.15625 11.84375 \r\nQ 23.34375 6.203125 30.609375 6.203125 \r\nQ 37.796875 6.203125 41.984375 11.859375 \r\nQ 46.1875 17.53125 46.1875 27.296875 \r\nQ 46.1875 37.015625 41.984375 42.703125 \r\nQ 37.796875 48.390625 30.609375 48.390625 \r\nz\r\nM 30.609375 56 \r\nQ 42.328125 56 49.015625 48.375 \r\nQ 55.71875 40.765625 55.71875 27.296875 \r\nQ 55.71875 13.875 49.015625 6.21875 \r\nQ 42.328125 -1.421875 30.609375 -1.421875 \r\nQ 18.84375 -1.421875 12.171875 6.21875 \r\nQ 5.515625 13.875 5.515625 27.296875 \r\nQ 5.515625 40.765625 12.171875 48.375 \r\nQ 18.84375 56 30.609375 56 \r\nz\r\n\" id=\"DejaVuSans-111\"/>\r\n      <path d=\"M 48.78125 52.59375 \r\nL 48.78125 44.1875 \r\nQ 44.96875 46.296875 41.140625 47.34375 \r\nQ 37.3125 48.390625 33.40625 48.390625 \r\nQ 24.65625 48.390625 19.8125 42.84375 \r\nQ 14.984375 37.3125 14.984375 27.296875 \r\nQ 14.984375 17.28125 19.8125 11.734375 \r\nQ 24.65625 6.203125 33.40625 6.203125 \r\nQ 37.3125 6.203125 41.140625 7.25 \r\nQ 44.96875 8.296875 48.78125 10.40625 \r\nL 48.78125 2.09375 \r\nQ 45.015625 0.34375 40.984375 -0.53125 \r\nQ 36.96875 -1.421875 32.421875 -1.421875 \r\nQ 20.0625 -1.421875 12.78125 6.34375 \r\nQ 5.515625 14.109375 5.515625 27.296875 \r\nQ 5.515625 40.671875 12.859375 48.328125 \r\nQ 20.21875 56 33.015625 56 \r\nQ 37.15625 56 41.109375 55.140625 \r\nQ 45.0625 54.296875 48.78125 52.59375 \r\nz\r\n\" id=\"DejaVuSans-99\"/>\r\n      <path d=\"M 9.078125 75.984375 \r\nL 18.109375 75.984375 \r\nL 18.109375 31.109375 \r\nL 44.921875 54.6875 \r\nL 56.390625 54.6875 \r\nL 27.390625 29.109375 \r\nL 57.625 0 \r\nL 45.90625 0 \r\nL 18.109375 26.703125 \r\nL 18.109375 0 \r\nL 9.078125 0 \r\nz\r\n\" id=\"DejaVuSans-107\"/>\r\n      <path d=\"M -0.296875 72.90625 \r\nL 61.375 72.90625 \r\nL 61.375 64.59375 \r\nL 35.5 64.59375 \r\nL 35.5 0 \r\nL 25.59375 0 \r\nL 25.59375 64.59375 \r\nL -0.296875 64.59375 \r\nz\r\n\" id=\"DejaVuSans-84\"/>\r\n      <path d=\"M 9.421875 54.6875 \r\nL 18.40625 54.6875 \r\nL 18.40625 0 \r\nL 9.421875 0 \r\nz\r\nM 9.421875 75.984375 \r\nL 18.40625 75.984375 \r\nL 18.40625 64.59375 \r\nL 9.421875 64.59375 \r\nz\r\n\" id=\"DejaVuSans-105\"/>\r\n      <path d=\"M 52 44.1875 \r\nQ 55.375 50.25 60.0625 53.125 \r\nQ 64.75 56 71.09375 56 \r\nQ 79.640625 56 84.28125 50.015625 \r\nQ 88.921875 44.046875 88.921875 33.015625 \r\nL 88.921875 0 \r\nL 79.890625 0 \r\nL 79.890625 32.71875 \r\nQ 79.890625 40.578125 77.09375 44.375 \r\nQ 74.3125 48.1875 68.609375 48.1875 \r\nQ 61.625 48.1875 57.5625 43.546875 \r\nQ 53.515625 38.921875 53.515625 30.90625 \r\nL 53.515625 0 \r\nL 44.484375 0 \r\nL 44.484375 32.71875 \r\nQ 44.484375 40.625 41.703125 44.40625 \r\nQ 38.921875 48.1875 33.109375 48.1875 \r\nQ 26.21875 48.1875 22.15625 43.53125 \r\nQ 18.109375 38.875 18.109375 30.90625 \r\nL 18.109375 0 \r\nL 9.078125 0 \r\nL 9.078125 54.6875 \r\nL 18.109375 54.6875 \r\nL 18.109375 46.1875 \r\nQ 21.1875 51.21875 25.484375 53.609375 \r\nQ 29.78125 56 35.6875 56 \r\nQ 41.65625 56 45.828125 52.96875 \r\nQ 50 49.953125 52 44.1875 \r\nz\r\n\" id=\"DejaVuSans-109\"/>\r\n      <path d=\"M 56.203125 29.59375 \r\nL 56.203125 25.203125 \r\nL 14.890625 25.203125 \r\nQ 15.484375 15.921875 20.484375 11.0625 \r\nQ 25.484375 6.203125 34.421875 6.203125 \r\nQ 39.59375 6.203125 44.453125 7.46875 \r\nQ 49.3125 8.734375 54.109375 11.28125 \r\nL 54.109375 2.78125 \r\nQ 49.265625 0.734375 44.1875 -0.34375 \r\nQ 39.109375 -1.421875 33.890625 -1.421875 \r\nQ 20.796875 -1.421875 13.15625 6.1875 \r\nQ 5.515625 13.8125 5.515625 26.8125 \r\nQ 5.515625 40.234375 12.765625 48.109375 \r\nQ 20.015625 56 32.328125 56 \r\nQ 43.359375 56 49.78125 48.890625 \r\nQ 56.203125 41.796875 56.203125 29.59375 \r\nz\r\nM 47.21875 32.234375 \r\nQ 47.125 39.59375 43.09375 43.984375 \r\nQ 39.0625 48.390625 32.421875 48.390625 \r\nQ 24.90625 48.390625 20.390625 44.140625 \r\nQ 15.875 39.890625 15.1875 32.171875 \r\nz\r\n\" id=\"DejaVuSans-101\"/>\r\n      <path d=\"M 31 75.875 \r\nQ 24.46875 64.65625 21.28125 53.65625 \r\nQ 18.109375 42.671875 18.109375 31.390625 \r\nQ 18.109375 20.125 21.3125 9.0625 \r\nQ 24.515625 -2 31 -13.1875 \r\nL 23.1875 -13.1875 \r\nQ 15.875 -1.703125 12.234375 9.375 \r\nQ 8.59375 20.453125 8.59375 31.390625 \r\nQ 8.59375 42.28125 12.203125 53.3125 \r\nQ 15.828125 64.359375 23.1875 75.875 \r\nz\r\n\" id=\"DejaVuSans-40\"/>\r\n      <path d=\"M 44.28125 53.078125 \r\nL 44.28125 44.578125 \r\nQ 40.484375 46.53125 36.375 47.5 \r\nQ 32.28125 48.484375 27.875 48.484375 \r\nQ 21.1875 48.484375 17.84375 46.4375 \r\nQ 14.5 44.390625 14.5 40.28125 \r\nQ 14.5 37.15625 16.890625 35.375 \r\nQ 19.28125 33.59375 26.515625 31.984375 \r\nL 29.59375 31.296875 \r\nQ 39.15625 29.25 43.1875 25.515625 \r\nQ 47.21875 21.78125 47.21875 15.09375 \r\nQ 47.21875 7.46875 41.1875 3.015625 \r\nQ 35.15625 -1.421875 24.609375 -1.421875 \r\nQ 20.21875 -1.421875 15.453125 -0.5625 \r\nQ 10.6875 0.296875 5.421875 2 \r\nL 5.421875 11.28125 \r\nQ 10.40625 8.6875 15.234375 7.390625 \r\nQ 20.0625 6.109375 24.8125 6.109375 \r\nQ 31.15625 6.109375 34.5625 8.28125 \r\nQ 37.984375 10.453125 37.984375 14.40625 \r\nQ 37.984375 18.0625 35.515625 20.015625 \r\nQ 33.0625 21.96875 24.703125 23.78125 \r\nL 21.578125 24.515625 \r\nQ 13.234375 26.265625 9.515625 29.90625 \r\nQ 5.8125 33.546875 5.8125 39.890625 \r\nQ 5.8125 47.609375 11.28125 51.796875 \r\nQ 16.75 56 26.8125 56 \r\nQ 31.78125 56 36.171875 55.265625 \r\nQ 40.578125 54.546875 44.28125 53.078125 \r\nz\r\n\" id=\"DejaVuSans-115\"/>\r\n      <path d=\"M 8.015625 75.875 \r\nL 15.828125 75.875 \r\nQ 23.140625 64.359375 26.78125 53.3125 \r\nQ 30.421875 42.28125 30.421875 31.390625 \r\nQ 30.421875 20.453125 26.78125 9.375 \r\nQ 23.140625 -1.703125 15.828125 -13.1875 \r\nL 8.015625 -13.1875 \r\nQ 14.5 -2 17.703125 9.0625 \r\nQ 20.90625 20.125 20.90625 31.390625 \r\nQ 20.90625 42.671875 17.703125 53.65625 \r\nQ 14.5 64.65625 8.015625 75.875 \r\nz\r\n\" id=\"DejaVuSans-41\"/>\r\n     </defs>\r\n     <g transform=\"translate(178.364844 268.034687)scale(0.1 -0.1)\">\r\n      <use xlink:href=\"#DejaVuSans-87\"/>\r\n      <use x=\"92.501953\" xlink:href=\"#DejaVuSans-97\"/>\r\n      <use x=\"153.78125\" xlink:href=\"#DejaVuSans-108\"/>\r\n      <use x=\"181.564453\" xlink:href=\"#DejaVuSans-108\"/>\r\n      <use x=\"209.347656\" xlink:href=\"#DejaVuSans-32\"/>\r\n      <use x=\"241.134766\" xlink:href=\"#DejaVuSans-67\"/>\r\n      <use x=\"310.958984\" xlink:href=\"#DejaVuSans-108\"/>\r\n      <use x=\"338.742188\" xlink:href=\"#DejaVuSans-111\"/>\r\n      <use x=\"399.923828\" xlink:href=\"#DejaVuSans-99\"/>\r\n      <use x=\"454.904297\" xlink:href=\"#DejaVuSans-107\"/>\r\n      <use x=\"512.814453\" xlink:href=\"#DejaVuSans-32\"/>\r\n      <use x=\"544.601562\" xlink:href=\"#DejaVuSans-84\"/>\r\n      <use x=\"602.560547\" xlink:href=\"#DejaVuSans-105\"/>\r\n      <use x=\"630.34375\" xlink:href=\"#DejaVuSans-109\"/>\r\n      <use x=\"727.755859\" xlink:href=\"#DejaVuSans-101\"/>\r\n      <use x=\"789.279297\" xlink:href=\"#DejaVuSans-32\"/>\r\n      <use x=\"821.066406\" xlink:href=\"#DejaVuSans-40\"/>\r\n      <use x=\"860.080078\" xlink:href=\"#DejaVuSans-115\"/>\r\n      <use x=\"912.179688\" xlink:href=\"#DejaVuSans-41\"/>\r\n     </g>\r\n    </g>\r\n   </g>\r\n   <g id=\"matplotlib.axis_2\">\r\n    <g id=\"ytick_1\">\r\n     <g id=\"line2d_8\">\r\n      <defs>\r\n       <path d=\"M 0 0 \r\nL -3.5 0 \r\n\" id=\"m56bdad95da\" style=\"stroke:#000000;stroke-width:0.8;\"/>\r\n      </defs>\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"58.523438\" xlink:href=\"#m56bdad95da\" y=\"217.13522\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_9\">\r\n      <!-- −1.00 -->\r\n      <defs>\r\n       <path d=\"M 10.59375 35.5 \r\nL 73.1875 35.5 \r\nL 73.1875 27.203125 \r\nL 10.59375 27.203125 \r\nz\r\n\" id=\"DejaVuSans-8722\"/>\r\n       <path d=\"M 10.6875 12.40625 \r\nL 21 12.40625 \r\nL 21 0 \r\nL 10.6875 0 \r\nz\r\n\" id=\"DejaVuSans-46\"/>\r\n      </defs>\r\n      <g transform=\"translate(20.878125 220.934439)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-8722\"/>\r\n       <use x=\"83.789062\" xlink:href=\"#DejaVuSans-49\"/>\r\n       <use x=\"147.412109\" xlink:href=\"#DejaVuSans-46\"/>\r\n       <use x=\"179.199219\" xlink:href=\"#DejaVuSans-48\"/>\r\n       <use x=\"242.822266\" xlink:href=\"#DejaVuSans-48\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"ytick_2\">\r\n     <g id=\"line2d_9\">\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"58.523438\" xlink:href=\"#m56bdad95da\" y=\"192.007163\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_10\">\r\n      <!-- −0.75 -->\r\n      <defs>\r\n       <path d=\"M 8.203125 72.90625 \r\nL 55.078125 72.90625 \r\nL 55.078125 68.703125 \r\nL 28.609375 0 \r\nL 18.3125 0 \r\nL 43.21875 64.59375 \r\nL 8.203125 64.59375 \r\nz\r\n\" id=\"DejaVuSans-55\"/>\r\n      </defs>\r\n      <g transform=\"translate(20.878125 195.806382)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-8722\"/>\r\n       <use x=\"83.789062\" xlink:href=\"#DejaVuSans-48\"/>\r\n       <use x=\"147.412109\" xlink:href=\"#DejaVuSans-46\"/>\r\n       <use x=\"179.199219\" xlink:href=\"#DejaVuSans-55\"/>\r\n       <use x=\"242.822266\" xlink:href=\"#DejaVuSans-53\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"ytick_3\">\r\n     <g id=\"line2d_10\">\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"58.523438\" xlink:href=\"#m56bdad95da\" y=\"166.879107\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_11\">\r\n      <!-- −0.50 -->\r\n      <g transform=\"translate(20.878125 170.678325)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-8722\"/>\r\n       <use x=\"83.789062\" xlink:href=\"#DejaVuSans-48\"/>\r\n       <use x=\"147.412109\" xlink:href=\"#DejaVuSans-46\"/>\r\n       <use x=\"179.199219\" xlink:href=\"#DejaVuSans-53\"/>\r\n       <use x=\"242.822266\" xlink:href=\"#DejaVuSans-48\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"ytick_4\">\r\n     <g id=\"line2d_11\">\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"58.523438\" xlink:href=\"#m56bdad95da\" y=\"141.75105\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_12\">\r\n      <!-- −0.25 -->\r\n      <g transform=\"translate(20.878125 145.550268)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-8722\"/>\r\n       <use x=\"83.789062\" xlink:href=\"#DejaVuSans-48\"/>\r\n       <use x=\"147.412109\" xlink:href=\"#DejaVuSans-46\"/>\r\n       <use x=\"179.199219\" xlink:href=\"#DejaVuSans-50\"/>\r\n       <use x=\"242.822266\" xlink:href=\"#DejaVuSans-53\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"ytick_5\">\r\n     <g id=\"line2d_12\">\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"58.523438\" xlink:href=\"#m56bdad95da\" y=\"116.622993\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_13\">\r\n      <!-- 0.00 -->\r\n      <g transform=\"translate(29.257813 120.422212)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-48\"/>\r\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\r\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-48\"/>\r\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-48\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"ytick_6\">\r\n     <g id=\"line2d_13\">\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"58.523438\" xlink:href=\"#m56bdad95da\" y=\"91.494936\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_14\">\r\n      <!-- 0.25 -->\r\n      <g transform=\"translate(29.257813 95.294155)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-48\"/>\r\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\r\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-50\"/>\r\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-53\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"ytick_7\">\r\n     <g id=\"line2d_14\">\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"58.523438\" xlink:href=\"#m56bdad95da\" y=\"66.366879\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_15\">\r\n      <!-- 0.50 -->\r\n      <g transform=\"translate(29.257813 70.166098)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-48\"/>\r\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\r\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-53\"/>\r\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-48\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"ytick_8\">\r\n     <g id=\"line2d_15\">\r\n      <g>\r\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"58.523438\" xlink:href=\"#m56bdad95da\" y=\"41.238823\"/>\r\n      </g>\r\n     </g>\r\n     <g id=\"text_16\">\r\n      <!-- 0.75 -->\r\n      <g transform=\"translate(29.257813 45.038041)scale(0.1 -0.1)\">\r\n       <use xlink:href=\"#DejaVuSans-48\"/>\r\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\r\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-55\"/>\r\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-53\"/>\r\n      </g>\r\n     </g>\r\n    </g>\r\n    <g id=\"text_17\">\r\n     <!-- Validation r2 -->\r\n     <defs>\r\n      <path d=\"M 28.609375 0 \r\nL 0.78125 72.90625 \r\nL 11.078125 72.90625 \r\nL 34.1875 11.53125 \r\nL 57.328125 72.90625 \r\nL 67.578125 72.90625 \r\nL 39.796875 0 \r\nz\r\n\" id=\"DejaVuSans-86\"/>\r\n      <path d=\"M 45.40625 46.390625 \r\nL 45.40625 75.984375 \r\nL 54.390625 75.984375 \r\nL 54.390625 0 \r\nL 45.40625 0 \r\nL 45.40625 8.203125 \r\nQ 42.578125 3.328125 38.25 0.953125 \r\nQ 33.9375 -1.421875 27.875 -1.421875 \r\nQ 17.96875 -1.421875 11.734375 6.484375 \r\nQ 5.515625 14.40625 5.515625 27.296875 \r\nQ 5.515625 40.1875 11.734375 48.09375 \r\nQ 17.96875 56 27.875 56 \r\nQ 33.9375 56 38.25 53.625 \r\nQ 42.578125 51.265625 45.40625 46.390625 \r\nz\r\nM 14.796875 27.296875 \r\nQ 14.796875 17.390625 18.875 11.75 \r\nQ 22.953125 6.109375 30.078125 6.109375 \r\nQ 37.203125 6.109375 41.296875 11.75 \r\nQ 45.40625 17.390625 45.40625 27.296875 \r\nQ 45.40625 37.203125 41.296875 42.84375 \r\nQ 37.203125 48.484375 30.078125 48.484375 \r\nQ 22.953125 48.484375 18.875 42.84375 \r\nQ 14.796875 37.203125 14.796875 27.296875 \r\nz\r\n\" id=\"DejaVuSans-100\"/>\r\n      <path d=\"M 18.3125 70.21875 \r\nL 18.3125 54.6875 \r\nL 36.8125 54.6875 \r\nL 36.8125 47.703125 \r\nL 18.3125 47.703125 \r\nL 18.3125 18.015625 \r\nQ 18.3125 11.328125 20.140625 9.421875 \r\nQ 21.96875 7.515625 27.59375 7.515625 \r\nL 36.8125 7.515625 \r\nL 36.8125 0 \r\nL 27.59375 0 \r\nQ 17.1875 0 13.234375 3.875 \r\nQ 9.28125 7.765625 9.28125 18.015625 \r\nL 9.28125 47.703125 \r\nL 2.6875 47.703125 \r\nL 2.6875 54.6875 \r\nL 9.28125 54.6875 \r\nL 9.28125 70.21875 \r\nz\r\n\" id=\"DejaVuSans-116\"/>\r\n      <path d=\"M 54.890625 33.015625 \r\nL 54.890625 0 \r\nL 45.90625 0 \r\nL 45.90625 32.71875 \r\nQ 45.90625 40.484375 42.875 44.328125 \r\nQ 39.84375 48.1875 33.796875 48.1875 \r\nQ 26.515625 48.1875 22.3125 43.546875 \r\nQ 18.109375 38.921875 18.109375 30.90625 \r\nL 18.109375 0 \r\nL 9.078125 0 \r\nL 9.078125 54.6875 \r\nL 18.109375 54.6875 \r\nL 18.109375 46.1875 \r\nQ 21.34375 51.125 25.703125 53.5625 \r\nQ 30.078125 56 35.796875 56 \r\nQ 45.21875 56 50.046875 50.171875 \r\nQ 54.890625 44.34375 54.890625 33.015625 \r\nz\r\n\" id=\"DejaVuSans-110\"/>\r\n      <path d=\"M 41.109375 46.296875 \r\nQ 39.59375 47.171875 37.8125 47.578125 \r\nQ 36.03125 48 33.890625 48 \r\nQ 26.265625 48 22.1875 43.046875 \r\nQ 18.109375 38.09375 18.109375 28.8125 \r\nL 18.109375 0 \r\nL 9.078125 0 \r\nL 9.078125 54.6875 \r\nL 18.109375 54.6875 \r\nL 18.109375 46.1875 \r\nQ 20.953125 51.171875 25.484375 53.578125 \r\nQ 30.03125 56 36.53125 56 \r\nQ 37.453125 56 38.578125 55.875 \r\nQ 39.703125 55.765625 41.0625 55.515625 \r\nz\r\n\" id=\"DejaVuSans-114\"/>\r\n     </defs>\r\n     <g transform=\"translate(14.798438 162.554531)rotate(-90)scale(0.1 -0.1)\">\r\n      <use xlink:href=\"#DejaVuSans-86\"/>\r\n      <use x=\"60.658203\" xlink:href=\"#DejaVuSans-97\"/>\r\n      <use x=\"121.9375\" xlink:href=\"#DejaVuSans-108\"/>\r\n      <use x=\"149.720703\" xlink:href=\"#DejaVuSans-105\"/>\r\n      <use x=\"177.503906\" xlink:href=\"#DejaVuSans-100\"/>\r\n      <use x=\"240.980469\" xlink:href=\"#DejaVuSans-97\"/>\r\n      <use x=\"302.259766\" xlink:href=\"#DejaVuSans-116\"/>\r\n      <use x=\"341.46875\" xlink:href=\"#DejaVuSans-105\"/>\r\n      <use x=\"369.251953\" xlink:href=\"#DejaVuSans-111\"/>\r\n      <use x=\"430.433594\" xlink:href=\"#DejaVuSans-110\"/>\r\n      <use x=\"493.8125\" xlink:href=\"#DejaVuSans-32\"/>\r\n      <use x=\"525.599609\" xlink:href=\"#DejaVuSans-114\"/>\r\n      <use x=\"566.712891\" xlink:href=\"#DejaVuSans-50\"/>\r\n     </g>\r\n    </g>\r\n   </g>\r\n   <g id=\"line2d_16\">\r\n    <path clip-path=\"url(#pae0c1da17c)\" d=\"M 73.741619 229.874489 \r\nL 74.887343 229.874489 \r\nL 74.887343 61.994745 \r\nL 76.783009 61.994745 \r\nL 76.783009 43.216967 \r\nL 78.012066 43.216967 \r\nL 78.012066 39.006531 \r\nL 79.845248 39.006531 \r\nL 79.845248 38.504869 \r\nL 92.418551 38.504869 \r\nL 92.418551 34.345291 \r\nL 130.815098 34.345291 \r\nL 130.815098 34.331016 \r\nL 158.058717 34.331016 \r\nL 158.058717 32.60987 \r\nL 378.105256 32.60987 \r\nL 378.105256 32.201761 \r\n\" style=\"fill:none;stroke:#1f77b4;stroke-linecap:square;stroke-width:1.5;\"/>\r\n   </g>\r\n   <g id=\"patch_3\">\r\n    <path d=\"M 58.523438 239.758125 \r\nL 58.523438 22.318125 \r\n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\r\n   </g>\r\n   <g id=\"patch_4\">\r\n    <path d=\"M 393.323438 239.758125 \r\nL 393.323438 22.318125 \r\n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\r\n   </g>\r\n   <g id=\"patch_5\">\r\n    <path d=\"M 58.523438 239.758125 \r\nL 393.323438 239.758125 \r\n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\r\n   </g>\r\n   <g id=\"patch_6\">\r\n    <path d=\"M 58.523438 22.318125 \r\nL 393.323438 22.318125 \r\n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\r\n   </g>\r\n   <g id=\"text_18\">\r\n    <!-- Learning Curve -->\r\n    <defs>\r\n     <path d=\"M 9.8125 72.90625 \r\nL 19.671875 72.90625 \r\nL 19.671875 8.296875 \r\nL 55.171875 8.296875 \r\nL 55.171875 0 \r\nL 9.8125 0 \r\nz\r\n\" id=\"DejaVuSans-76\"/>\r\n     <path d=\"M 45.40625 27.984375 \r\nQ 45.40625 37.75 41.375 43.109375 \r\nQ 37.359375 48.484375 30.078125 48.484375 \r\nQ 22.859375 48.484375 18.828125 43.109375 \r\nQ 14.796875 37.75 14.796875 27.984375 \r\nQ 14.796875 18.265625 18.828125 12.890625 \r\nQ 22.859375 7.515625 30.078125 7.515625 \r\nQ 37.359375 7.515625 41.375 12.890625 \r\nQ 45.40625 18.265625 45.40625 27.984375 \r\nz\r\nM 54.390625 6.78125 \r\nQ 54.390625 -7.171875 48.1875 -13.984375 \r\nQ 42 -20.796875 29.203125 -20.796875 \r\nQ 24.46875 -20.796875 20.265625 -20.09375 \r\nQ 16.0625 -19.390625 12.109375 -17.921875 \r\nL 12.109375 -9.1875 \r\nQ 16.0625 -11.328125 19.921875 -12.34375 \r\nQ 23.78125 -13.375 27.78125 -13.375 \r\nQ 36.625 -13.375 41.015625 -8.765625 \r\nQ 45.40625 -4.15625 45.40625 5.171875 \r\nL 45.40625 9.625 \r\nQ 42.625 4.78125 38.28125 2.390625 \r\nQ 33.9375 0 27.875 0 \r\nQ 17.828125 0 11.671875 7.65625 \r\nQ 5.515625 15.328125 5.515625 27.984375 \r\nQ 5.515625 40.671875 11.671875 48.328125 \r\nQ 17.828125 56 27.875 56 \r\nQ 33.9375 56 38.28125 53.609375 \r\nQ 42.625 51.21875 45.40625 46.390625 \r\nL 45.40625 54.6875 \r\nL 54.390625 54.6875 \r\nz\r\n\" id=\"DejaVuSans-103\"/>\r\n     <path d=\"M 8.5 21.578125 \r\nL 8.5 54.6875 \r\nL 17.484375 54.6875 \r\nL 17.484375 21.921875 \r\nQ 17.484375 14.15625 20.5 10.265625 \r\nQ 23.53125 6.390625 29.59375 6.390625 \r\nQ 36.859375 6.390625 41.078125 11.03125 \r\nQ 45.3125 15.671875 45.3125 23.6875 \r\nL 45.3125 54.6875 \r\nL 54.296875 54.6875 \r\nL 54.296875 0 \r\nL 45.3125 0 \r\nL 45.3125 8.40625 \r\nQ 42.046875 3.421875 37.71875 1 \r\nQ 33.40625 -1.421875 27.6875 -1.421875 \r\nQ 18.265625 -1.421875 13.375 4.4375 \r\nQ 8.5 10.296875 8.5 21.578125 \r\nz\r\nM 31.109375 56 \r\nz\r\n\" id=\"DejaVuSans-117\"/>\r\n     <path d=\"M 2.984375 54.6875 \r\nL 12.5 54.6875 \r\nL 29.59375 8.796875 \r\nL 46.6875 54.6875 \r\nL 56.203125 54.6875 \r\nL 35.6875 0 \r\nL 23.484375 0 \r\nz\r\n\" id=\"DejaVuSans-118\"/>\r\n    </defs>\r\n    <g transform=\"translate(180.265312 16.318125)scale(0.12 -0.12)\">\r\n     <use xlink:href=\"#DejaVuSans-76\"/>\r\n     <use x=\"53.962891\" xlink:href=\"#DejaVuSans-101\"/>\r\n     <use x=\"115.486328\" xlink:href=\"#DejaVuSans-97\"/>\r\n     <use x=\"176.765625\" xlink:href=\"#DejaVuSans-114\"/>\r\n     <use x=\"216.128906\" xlink:href=\"#DejaVuSans-110\"/>\r\n     <use x=\"279.507812\" xlink:href=\"#DejaVuSans-105\"/>\r\n     <use x=\"307.291016\" xlink:href=\"#DejaVuSans-110\"/>\r\n     <use x=\"370.669922\" xlink:href=\"#DejaVuSans-103\"/>\r\n     <use x=\"434.146484\" xlink:href=\"#DejaVuSans-32\"/>\r\n     <use x=\"465.933594\" xlink:href=\"#DejaVuSans-67\"/>\r\n     <use x=\"535.757812\" xlink:href=\"#DejaVuSans-117\"/>\r\n     <use x=\"599.136719\" xlink:href=\"#DejaVuSans-114\"/>\r\n     <use x=\"640.25\" xlink:href=\"#DejaVuSans-118\"/>\r\n     <use x=\"699.429688\" xlink:href=\"#DejaVuSans-101\"/>\r\n    </g>\r\n   </g>\r\n  </g>\r\n </g>\r\n <defs>\r\n  <clipPath id=\"pae0c1da17c\">\r\n   <rect height=\"217.44\" width=\"334.8\" x=\"58.523438\" y=\"22.318125\"/>\r\n  </clipPath>\r\n </defs>\r\n</svg>\r\n",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAAEWCAYAAABIVsEJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAfq0lEQVR4nO3df7xVdZ3v8dfbIySaiiYaoogmkVgJddLMpsQk0cnAMtMar9dukY3OVE4W1lSW1xknbnVrMr3oWNZopiZIRqJiaZmmIMoPkSQ1hUOCP1BSEoHP/WN9N222e++zzzpn/zrn/Xw89mOv9V3ftdbnC/vsz17ftdZ3KSIwMzPrqe2aHYCZmbUnJxAzM8vFCcTMzHJxAjEzs1ycQMzMLBcnEDMzy8UJxKxOJP2dpOXNjsOsXpxArF+S9Jiko5sZQ0T8JiLG1Gv7ko6RdIek9ZLWSrpd0vvrtT+zUk4gZjlJ6mjivk8ErgV+BOwD7AV8BTg+x7Ykyd8F1mP+0NiAImk7SdMk/VHS05KukbR70fJrJf1Z0nPp1/3BRct+KOliSXMkvQBMSEc6n5O0KK3zU0k7pPpHSlpZtH7Fumn55yWtltQl6eOSQtKBZdog4FvA+RFxWUQ8FxFbIuL2iPhEqnOepP8uWmdU2t72af7Xki6QdCfwIvBFSfNL9vNZSbPT9Ksk/R9Jj0t6UtIlkob08r/D2pwTiA00/wxMAd4N7A08C1xUtPyXwGhgT+A+4MqS9T8CXADsDPw2lZ0ETAL2B94M/M8q+y9bV9Ik4GzgaODAFF8lY4B9geuq1KnFqcBUsrb8JzBG0uii5R8BrkrT/wG8HhiX4htBdsRjA5gTiA00nwS+FBErI+Il4DzgxMIv84i4PCLWFy07RNKuRevfEBF3pl/8f01l342Iroh4Bvg52ZdsJZXqngT8ICKWRsSLwNeqbOM16X11za0u74dpf5si4jngBuAUgJRI3gDMTkc8nwA+GxHPRMR64N+Ak3u5f2tzTiA20OwHzJS0TtI6YBmwGdhLUoekC1P31vPAY2mdPYrWf6LMNv9cNP0i8Ooq+69Ud++SbZfbT8HT6X14lTq1KN3HVaQEQnb0MSsls2HAjsCCon+3m1K5DWBOIDbQPAEcGxFDi147RMQqsi/NyWTdSLsCo9I6Klq/XsNXryY7GV6wb5W6y8na8cEqdV4g+9IveG2ZOqVtuRnYQ9I4skRS6L56CtgAHFz0b7ZrRFRLlDYAOIFYfzZI0g5Fr+2BS4ALJO0HIGmYpMmp/s7AS2S/8Hck66ZplGuA0yUdJGlHqpxfiOwZDGcDX5Z0uqRd0sUB75Q0I1W7H3iXpJGpC+7c7gKIiE1k51WmA7sDt6TyLcClwLcl7QkgaYSkY3K31voFJxDrz+aQ/XIuvM4DvgPMBm6WtB64Gzgs1f8R8CdgFfBgWtYQEfFL4LvAr4AVwF1p0UsV6l8HfBj4GNAFPAn8b7LzGETELcBPgUXAAuDGGkO5iuwI7NqUUAq+kOK6O3Xv3Up2Mt8GMPmBUmatR9JBwBLgVSVf5GYtw0cgZi1C0gmSBkvajeyy2Z87eVgrcwIxax2fBNYCfyS7MuxTzQ3HrDp3YZmZWS4+AjEzs1y2b3YAjbTHHnvEqFGjmh2GmVlbWbBgwVMR8YobRwdUAhk1ahTz58/vvqKZmW0l6U/lyt2FZWZmuTiBmJlZLk4gZmaWixOImZnl4gRiZma5DKirsMzMBppZC1cxfe5yutZtYO+hQzjnmDFMGT+iT7btBGINVc8Ps5lta9bCVZx7/WI2vLwZgFXrNnDu9YsB+uTvzgmkgVr1y7NRcdX7w2xm25o+d/nWv7eCDS9vZvrc5U4g7aRVvzwbGVelD/Pnr1vET+55vE/3ZWbZ33M5XRXKe8oJpEFa9ctz4ePr2Lh5yzZl9Yqr0oe5dP9m1jcGd2xX9u9r76FD+mT7TiANUinjN/vLs9L+6xFXpQ/ziKFD+OknD+/z/ZkNdKU9DABDBnVwzjF98zBJJ5AG2XvokLK/wJv95XnEhbc1LK56f5jNbFuFbmhfhdUiCiecV63bQIfE5ghG1PCfcs4xY1ryy7ORcdX7w2xmrzRl/Ii6/Y05gfRA6S/ozelhXLWceC6Uf/66RWzcvKWmpNMIjf5Sr+eH2cwaq6lPJJQ0CfgO0AFcFhEXliw/B/homt0eOAgYFhHPSHoMWE/26M9NEdHZ3f46OzujN8O5V+ruKRjcsR3jRw6tuo0HVz/P2OG7uM/fzNqGpAXlvmObdgQiqQO4CJgIrATulTQ7Ih4s1ImI6cD0VP944LMR8UzRZiZExFONirm7S99qOfE8dvguTB7nX+Bm1v6a2YV1KLAiIh4BkHQ1MBl4sEL9U4CfNCi2sobuOIhnX3y54vJmnxA3M2ukZg6mOAJ4omh+ZSp7BUk7ApOAnxUVB3CzpAWSplbaiaSpkuZLmr927drcwc5auIq//HVTxeWtcELczKyRmnkEojJllU7IHA/cWdJ9dUREdEnaE7hF0kMRcccrNhgxA5gB2TmQvMFOn7ucl7eUX71VToibmTVSMxPISmDfovl9gK4KdU+mpPsqIrrS+xpJM8m6xF6RQPpKpfMfAu6cdlS9dmtm1rKa2YV1LzBa0v6SBpMlidmllSTtCrwbuKGobCdJOxemgfcCS+oZbKVb//tqSAAzs3bTtAQSEZuAs4C5wDLgmohYKukMSWcUVT0BuDkiXigq2wv4raQHgHuAX0TETfWM95xjxjBkUMc2ZT7vYWYDWVPvA2m03t4HMmvhqpa7EdDMrN5a7j6QdjRl/IitI9T6cl0zG+j8THQzM8vFCcTMzHJxAjEzs1ycQMzMLBcnEDMzy8UJxMzMcnECMTOzXJxAzMwsFycQMzPLxQnEzMxycQIxM7NcPBZWDWYtXMX0ucvpWreBQR3bse/uHsLdzMxHIN2YtXAV516/mFXrNhDAxs1bePSpF5i1cFWzQzMzayonkG5Mn7ucDS9v3qZsS2TlZmYDmRNINyo9yrZSuZnZQNHUBCJpkqTlklZImlZm+ZGSnpN0f3p9pdZ1+4ofZWtmVl7TEoikDuAi4FhgLHCKpLFlqv4mIsal19d7uG6v+VG2ZmblNfMI5FBgRUQ8EhEbgauByQ1Yt0emjB/Bv3/gTQzuyP6pRgwdwr9/4E1+lK2ZDXjNvIx3BPBE0fxK4LAy9Q6X9ADQBXwuIpb2YF0kTQWmAowcOTJXoH6UrZnZKzXzCERlyqJk/j5gv4g4BPhPYFYP1s0KI2ZERGdEdA4bNix3sGZmtq1mJpCVwL5F8/uQHWVsFRHPR8Rf0vQcYJCkPWpZ18zM6quZCeReYLSk/SUNBk4GZhdXkPRaSUrTh5LF+3Qt65qZWX017RxIRGySdBYwF+gALo+IpZLOSMsvAU4EPiVpE7ABODkiAii7blMaYmY2QDV1LKzULTWnpOySounvAd+rdV0zM2sc34luZma5OIGYmVkuTiBmZpaLE4iZmeXiBGJmZrk4gZiZWS5OIGZmlosTiJmZ5eIEYmZmuTiBmJlZLk4gZmaWixOImZnl4gRiZma5OIGYmVkuTiBmZpaLE4iZmeXS1AQiaZKk5ZJWSJpWZvlHJS1Kr99JOqRo2WOSFku6X9L8xkZuZmZNeyKhpA7gImAisBK4V9LsiHiwqNqjwLsj4llJxwIzgMOKlk+IiKcaFrSZmW3VzCOQQ4EVEfFIRGwErgYmF1eIiN9FxLNp9m5gnwbHaGZmFTQzgYwAniiaX5nKKvlfwC+L5gO4WdICSVPrEJ+ZmVXRtC4sQGXKomxFaQJZAnlnUfEREdElaU/gFkkPRcQdZdadCkwFGDlyZO+jNjMzoLlHICuBfYvm9wG6SitJejNwGTA5Ip4ulEdEV3pfA8wk6xJ7hYiYERGdEdE5bNiwPgzfzGxga2YCuRcYLWl/SYOBk4HZxRUkjQSuB06NiD8Ule8kaefCNPBeYEnDIjczs+Z1YUXEJklnAXOBDuDyiFgq6Yy0/BLgK8BrgO9LAtgUEZ3AXsDMVLY9cFVE3NSEZpiZDVjNPAdCRMwB5pSUXVI0/XHg42XWewQ4pLTczMwax3eim5lZLk4gZmaWixOImZnl4gRiZma5OIGYmVkuVROIpF0kva5M+ZvrF5KZmbWDiglE0knAQ8DPJC2V9LaixT+sd2BmZtbaqh2BfBF4a0SMA04HfizpA2lZuXGszMxsAKl2I2FHRKwGiIh70oCGN0rahwqDHpqZ2cBR7QhkffH5j5RMjiR7ZsfBdY7LzMxaXLUjkE9R0lUVEeslTQJOqmtUZmbW8ioegUTEA8Cjkm4tKX85Iq6se2RmZtbSql7GGxGbgRcl7dqgeMzMrE3UMhrvX4HFkm4BXigURsQ/1y0qMzNrebUkkF+kl5mZ2VbdJpCIuKIRgZiZWXvxWFhmZpZLUxOIpEmSlktaIWlameWS9N20fJGkt9S6rpmZ1VfTEoikDuAi4FhgLHCKpLEl1Y4FRqfXVODiHqxrZmZ11O05EEmvB84B9iuuHxFH9XLfhwIr0vPNkXQ12V3uDxbVmQz8KCICuFvSUEnDgVE1rGtmZnVUy1VY1wKXAJcCm/tw3yOAJ4rmVwKH1VBnRI3rAiBpKtnRCyNHjuxdxGZmtlUtCWRTRFxch32XG9G3dJDGSnVqWTcrjJgBzADo7Oz0IJBmZn2klgTyc0n/CMwEXioURsQzvdz3SmDfovl9gK4a6wyuYV0zM6ujWhLIaen9nKKyAA7o5b7vBUZL2h9YBZwMfKSkzmzgrHSO4zDguYhYLWltDeuamVkd1XIj4f712HFEbJJ0FjAX6AAuj4ilks5Iyy8B5gDHASuAF8kebFVx3XrEaWZm5dVyFdYgsqHd35WKfg38v4h4ubc7j4g5ZEmiuOySoukAzqx1XTMza5xaurAuBgYB30/zp6ayj9crKDMza321JJC3RcQhRfO3SXqgXgGZmVl7qOVO9M3Fj7aVdAB9ez+ImZm1oVqOQM4BfiXpEbL7L/Yjncw2M7OBq5arsOZJGg2MIUsgD0XES92sZmZm/VzFBCLpqIi4TdIHSha9ThIRcX2dYzMzsxZW7Qjk3cBtwPFllgXgBGJmNoBVTCAR8dU0+fWIeLR4WboD3MzMBrBarsL6WZmy6/o6EDMzay/VzoG8ATgY2LXkPMguwA71DszMzFpbtXMgY4D3AUPZ9jzIeuAT9QzKzMxaX7VzIDcAN0g6PCLuamBMZmbWBmq5kXChpDPJurO2dl1FxMfqFpWZmbW8Wk6i/xh4LXAMcDvZw5vW1zMoMzNrfbUkkAMj4svACxFxBfD3wJvqG5aZmbW6WhJI4bkf6yS9EdgVGFW3iMzMrC3UkkBmSNoN+DLZI2YfBL7Rm51K2l3SLZIeTu+7lamzr6RfSVomaamkTxctO0/SKkn3p9dxvYnHzMx6rtsEEhGXRcSzEXF7RBwQEXsWPzUwp2nAvIgYDcxL86U2Af8SEQcBbwfOlDS2aPm3I2JcevnJhGZmDVbtRsKzq60YEd/qxX4nA0em6SvIHpP7hZLtrwZWp+n1kpYBI8iOgMzMrMmqHYHsnF6dZM9EH5FeZwBjq6xXi71Sgigkij2rVZY0ChgP/L6o+CxJiyRdXq4LrGjdqZLmS5q/du3aXoZtZmYF1W4k/BqApJuBt0TE+jR/HnBtdxuWdCvZ5b+lvtSTACW9mmw8rs9ExPOp+GLgfLJRgc8HvgmUvS8lImYAMwA6OzujJ/s2M7PKarmRcCSwsWh+IzVchRURR1daJulJScMjYrWk4cCaCvUGkSWPK4ufPxIRTxbVuRS4sbt4zMysb9WSQH4M3CNpJtkv/hOAH/Vyv7OB04AL0/sNpRUkCfgvYFnp+ZZC8kmzJwBLehmPmZn1UC1XYV1A9gz0Z4F1wOkR8W+93O+FwERJDwMT0zyS9pZUuKLqCOBU4Kgyl+t+Q9JiSYuACcBnexmPmZn1ULWrsHaJiOcl7Q48ll6FZbtHxDN5dxoRTwPvKVPeBRyXpn9L9gz2cuufmnffZmbWN6p1YV1FNpz7ArKuqwKl+QPqGJeZmbW4aldhvS+9+/G1Zmb2CtW6sN5SbcWIuK/vwzEzs3ZRrQvrm1WWBXBUH8diZmZtpFoX1oRGBmJmZu2llvtASMO4j2XbJxL29l4QMzNrY90mEElfJRv4cCwwBzgW+C29v5nQzMzaWC3PAzmR7J6NP0fE6cAhwKvqGpWZmbW8WhLIhojYAmyStAvZuFW+B8TMbICr5RzIfElDgUvJbir8C3BPXaMyM7OWV+0+kO8BV0XEP6aiSyTdBOwSEYsaEp2ZmbWsakcgDwPfTMOt/xT4SUTc35iwzMys1VU8BxIR34mIw4F3A88AP5C0TNJXJL2+YRGamVlLqmU49z9FxH9ExHjgI2TP31hW98jMzKyldZtAJA2SdLykK4FfAn8APlj3yMzMrKVVO4k+ETgF+Huyq66uBqZGxAsNis3MzFpYtZPoXyR7JsjnevPwqHLSQ6p+SvZs9ceAkyLi2TL1HgPWA5uBTRHR2ZP1zcysfqqdRJ8QEZf2dfJIpgHzImI0MC/NVzIhIsYVkkeO9c3MrA5quRO9HiYDV6TpK4ApDV7fzMx6qVkJZK+IWA2Q3vesUC+AmyUtkDQ1x/pImippvqT5a9eu7aPwzcyspuHc85B0K/DaMou+1IPNHBERXZL2BG6R9FBE3NGTOCJiBjADoLOzM7qpbmZmNapbAomIoystk/SkpOERsTrd6b6mwja60vsaSTOBQ4E7gJrWNzOz+mlWF9Zs4LQ0fRpwQ2kFSTtJ2rkwDbwXWFLr+mZmVl/NSiAXAhMlPQxMTPNI2lvSnFRnL+C3kh4guw/lFxFxU7X1zcyscerWhVVNRDxN9pCq0vIu4Lg0/QjZw6tqXt/MzBqnWUcgZmbW5pxAzMwsFycQMzPLxQnEzMxycQIxM7NcnEDMzCwXJxAzM8vFCcTMzHJxAjEzs1ycQMzMLBcnEDMzy8UJxMzMcnECMTOzXJxAzMwsFycQMzPLxQnEzMxyaUoCkbS7pFskPZzedytTZ4yk+4tez0v6TFp2nqRVRcuOa3wrzMwGtmYdgUwD5kXEaGBemt9GRCyPiHERMQ54K/AiMLOoyrcLyyNiTun6ZmZWX81KIJOBK9L0FcCUbuq/B/hjRPyprlGZmVnNmpVA9oqI1QDpfc9u6p8M/KSk7CxJiyRdXq4LrEDSVEnzJc1fu3Zt76I2M7Ot6pZAJN0qaUmZ1+Qebmcw8H7g2qLii4HXAeOA1cA3K60fETMiojMiOocNG5ajJWZmVs729dpwRBxdaZmkJyUNj4jVkoYDa6ps6ljgvoh4smjbW6clXQrc2Bcxm5lZ7ZrVhTUbOC1NnwbcUKXuKZR0X6WkU3ACsKRPozMzs241K4FcCEyU9DAwMc0jaW9JW6+okrRjWn59yfrfkLRY0iJgAvDZxoRtZmYFdevCqiYinia7sqq0vAs4rmj+ReA1ZeqdWtcAzcysW74T3czMcnECMTOzXJxAzMwsFycQMzPLxQnEzMxycQIxM7NcnEDMzCwXJxAzM8vFCcTMzHJxAjEzs1ycQMzMLBcnEDMzy8UJxMzMcnECMTOzXJxAzMwsFycQMzPLpSkJRNKHJC2VtEVSZ5V6kyQtl7RC0rSi8t0l3SLp4fS+W2MiNzOzgmYdgSwBPgDcUamCpA7gIuBYYCxwiqSxafE0YF5EjAbmpXkzM2ugpiSQiFgWEcu7qXYosCIiHomIjcDVwOS0bDJwRZq+AphSn0jNzKySVj4HMgJ4omh+ZSoD2CsiVgOk9z0rbUTSVEnzJc1fu3Zt3YI1Mxtotq/XhiXdCry2zKIvRcQNtWyiTFn0NI6ImAHMAOjs7Ozx+mZmVl7dEkhEHN3LTawE9i2a3wfoStNPShoeEaslDQfW9HJfZmbWQ63chXUvMFrS/pIGAycDs9Oy2cBpafo0oJYjGjMz60PNuoz3BEkrgcOBX0iam8r3ljQHICI2AWcBc4FlwDURsTRt4kJgoqSHgYlp3szMGqhuXVjVRMRMYGaZ8i7guKL5OcCcMvWeBt5TzxjNzKy6Vu7CMjOzFuYEYmZmuTiBmJlZLk4gZmaWixNIN2YtXMURF97G7x99hoWPr2PWwlXNDsnMrCU05SqsdjFr4SrOvX4xG17eDMDGzVs49/rFAEwZP6LaqmZm/Z6PQKqYPnf51uRRsOHlzUyf2904kGZm/Z8TSBVd6zb0qNzMbCBxAqli76FDelRuZjaQOIFUcc4xYxgyqGObsiGDOjjnmDFNisjMrHX4JHoVhRPl0+cup2vdBvYeOoRzjhnjE+hmZjiBdGvK+BFOGGZmZbgLy8zMcnECMTOzXJxAzMwsFycQMzPLxQnEzMxyUUQ0O4aGkbQW+FOVKnsATzUonEbob+2B/tcmt6e1uT2Z/SJiWGnhgEog3ZE0PyI6mx1HX+lv7YH+1ya3p7W5PdW5C8vMzHJxAjEzs1ycQLY1o9kB9LH+1h7of21ye1qb21OFz4GYmVkuPgIxM7NcnEDMzCwXJ5BE0iRJyyWtkDSt2fH0lKTLJa2RtKSobHdJt0h6OL3v1swYe0LSvpJ+JWmZpKWSPp3K27JNknaQdI+kB1J7vpbK27I9BZI6JC2UdGOab9v2SHpM0mJJ90uan8rauT1DJV0n6aH0d3R4X7fHCYTsjwC4CDgWGAucImlsc6PqsR8Ck0rKpgHzImI0MC/Nt4tNwL9ExEHA24Ez0/9Ju7bpJeCoiDgEGAdMkvR22rc9BZ8GlhXNt3t7JkTEuKJ7Jdq5Pd8BboqINwCHkP0/9W17ImLAv4DDgblF8+cC5zY7rhztGAUsKZpfDgxP08OB5c2OsRdtuwGY2B/aBOwI3Acc1s7tAfZJX0JHATemsnZuz2PAHiVlbdkeYBfgUdKFUvVqj49AMiOAJ4rmV6aydrdXRKwGSO97NjmeXCSNAsYDv6eN25S6e+4H1gC3RERbtwf4v8DngS1FZe3cngBulrRA0tRU1q7tOQBYC/wgdTFeJmkn+rg9TiAZlSnz9c0tQNKrgZ8Bn4mI55sdT29ExOaIGEf2y/1QSW9sdkx5SXofsCYiFjQ7lj50RES8hawr+0xJ72p2QL2wPfAW4OKIGA+8QB2635xAMiuBfYvm9wG6mhRLX3pS0nCA9L6myfH0iKRBZMnjyoi4PhW3dZsAImId8Guyc1bt2p4jgPdLegy4GjhK0n/Tvu0hIrrS+xpgJnAo7duelcDKdJQLcB1ZQunT9jiBZO4FRkvaX9Jg4GRgdpNj6guzgdPS9Glk5xHagiQB/wUsi4hvFS1qyzZJGiZpaJoeAhwNPESbticizo2IfSJiFNnfy20R8Q+0aXsk7SRp58I08F5gCW3anoj4M/CEpDGp6D3Ag/Rxe3wneiLpOLI+3Q7g8oi4oMkh9YiknwBHkg3X/CTwVWAWcA0wEngc+FBEPNOsGHtC0juB3wCL+Vsf+xfJzoO0XZskvRm4guzztR1wTUR8XdJraMP2FJN0JPC5iHhfu7ZH0gFkRx2Qdf9cFREXtGt7ACSNAy4DBgOPAKeTPnv0UXucQMzMLBd3YZmZWS5OIGZmlosTiJmZ5eIEYmZmuTiBmJlZLk4g1m9I+rakzxTNz5V0WdH8NyWdXWX9H0o6MU3/WlJnmTqDJF2YRjNdkkbYPTYte0zSHjni3rrfCssvSiPEPihpQ5q+X9KJkuYU7i/pS5KGF0bYrbB8sKQ7JG3f1/u29uEEYv3J74B3AEjajuyemIOLlr8DuLOX+zifbBC6N0bEG4HjgZ17uc2qIuLMNATKccAfIxstdlxEXBcRx6U72/va2cClVWLaSDaQ4ofrsG9rE04g1p/cSUogZIljCbBe0m6SXgUcBCyU9BVJ96YjiBnprvduSdoR+ATwTxHxEkBEPBkR15Spe3ba/pKSo6L/IWmRsueC/LjMeuenI5Ka/jYLRz2SRqXnPlyW9nmlpKMl3ZmOlg5N9XdS9uyYe9Mge5MrbPqDwE1pnYPTkdb9KfbRqc4s4KO1xGn9kw8/rd+IiC5JmySNJEskd5GNqnw48BywKCI2SvpeRHwdIH2Jvw/4eQ27OBB4vLtBHSW9leyu38PIBur8vaTbgY3Al8gG7XtK0u4l630D2BU4PfLd4Xsg8CFgKtnwPB8B3gm8n+wu/ilp/7dFxMdS19c9km6NiBeK4tgfeLaQJIEzgO9ExJVpqJ+OVL4EeFuOOK2f8BGI9TeFo5BCArmraP53qc4ESb+XtJjsWRYHl9tQL7wTmBkRL0TEX4Drgb9L+7ouIp4CKBlC4svA0Ij4ZM7kAfBoRCyOiC3AUrIHBwXZcDCjUp33AtOUDSv/a2AHsmEtig0nGwq84C7gi5K+AOwXERtS/JuBjYUxpGzgcQKx/qZwHuRNZL+Q7yY7AnkHcKekHYDvAydGxJvI+vl3qHHbK4CRNXxhVuoSE5UfE3Av8NbSo5IeeqloekvR/Bb+1tsg4INF51FGRkTxEwUBNlD0bxIRV5EdxWwA5ko6qqjuq4C/9iJma2NOINbf3EnWJfVMev7GM8BQsiRyF3/7YnxK2bNGKl79VCoiXiQbIfi7qSuncLXSP5RUvQOYImnHNLLrCWQDQ84DTkoD9FGSLG4CLgR+Uedf9HOBfyqc95E0vkydP/C3I5bCQIOPRMR3yUZzfXMqfw2wNiJermO81sKcQKy/WUx29dXdJWXPRcRT6YqlS1PZLLJf/j3xr2TdOw9KWpK2UdzdQ0TcR/aM+nvIRg++LCIWRsRS4ALgdkkPAN8qWe/aFNtsZUO+18P5wCBgUYr//NIK6XzIHyUdmIo+DCxJ3V5vAH6UyicAc+oUp7UBj8ZrZq8g6QTgrRHxr1XqXA+cGxHLGxeZtRJfhWVmrxARMwtdbeWkLrxZTh4Dm49AzMwsF58DMTOzXJxAzMwsFycQMzPLxQnEzMxycQIxM7Nc/j/9j4PZZaQobwAAAABJRU5ErkJggg==\n"
+     },
+     "metadata": {
+      "needs_background": "light"
+     }
+    }
+   ],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "\n",
+    "plt.title('Learning Curve')\n",
+    "plt.xlabel('Wall Clock Time (s)')\n",
+    "plt.ylabel('Validation r2')\n",
+    "plt.scatter(time_history, 1-np.array(valid_loss_history))\n",
+    "plt.step(time_history, 1-np.array(best_valid_loss_history), where='post')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "source": [
+    "## 3. Comparison with untuned XGBoost\n",
+    "\n",
+    "### FLAML's accuracy"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "flaml (60s) r2 = 0.8381301879550651\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('flaml (60s) r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))"
+   ]
+  },
+  {
+   "source": [
+    "### Default XGBoost"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from xgboost import XGBRegressor\n",
+    "xgb = XGBRegressor()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
+       "             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,\n",
+       "             importance_type='gain', interaction_constraints='',\n",
+       "             learning_rate=0.300000012, max_delta_step=0, max_depth=6,\n",
+       "             min_child_weight=1, missing=nan, monotone_constraints='()',\n",
+       "             n_estimators=100, n_jobs=8, num_parallel_tree=1, random_state=0,\n",
+       "             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,\n",
+       "             tree_method='exact', validate_parameters=1, verbosity=None)"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 14
+    }
+   ],
+   "source": [
+    "xgb.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "default xgboost r2 = 0.8265451174596482\n"
+     ]
+    }
+   ],
+   "source": [
+    "y_pred = xgb.predict(X_test)\n",
+    "from flaml.ml import sklearn_metric_loss_score\n",
+    "print('default xgboost r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3.7.7 64-bit ('flaml': conda)",
+   "metadata": {
+    "interpreter": {
+     "hash": "bfcd9a6a9254a5e160761a1fd7a9e444f011592c6770d9f4180dde058a9df5dd"
+    }
+   }
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7-final"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 0a2aba41e..fb12c6845 100644
--- a/setup.py
+++ b/setup.py
@@ -57,8 +57,11 @@ setuptools.setup(
             "pyyaml<5.3.1",            
         ],
         "azureml": [
-            "azureml-mlflow"
+            "azureml-mlflow",
         ],
+        "nni": [
+            "nni",
+        ]
     },
     classifiers=[
         "Programming Language :: Python :: 3",
diff --git a/test/test_distillbert.py b/test/test_distillbert.py
index c50687442..0288d1cae 100644
--- a/test/test_distillbert.py
+++ b/test/test_distillbert.py
@@ -15,6 +15,17 @@ try:
         Trainer,
         TrainingArguments,
     )
+    MODEL_CHECKPOINT = "distilbert-base-uncased"
+    TASK = "cola"
+    NUM_LABELS = 2
+    COLUMN_NAME = "sentence"
+    METRIC_NAME = "matthews_correlation"
+
+    # HP_METRIC, MODE = "loss", "min"
+    HP_METRIC, MODE = "matthews_correlation", "max"
+
+    # Define tokenize method
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
 except:
     print("pip install torch transformers datasets flaml[blendsearch,ray]")
     
@@ -25,24 +36,21 @@ logger.setLevel(logging.INFO)
 
 import flaml
 
-
-MODEL_CHECKPOINT = "distilbert-base-uncased"
-TASK = "cola"
-NUM_LABELS = 2
-COLUMN_NAME = "sentence"
-METRIC_NAME = "matthews_correlation"
-
-# HP_METRIC, MODE = "loss", "min"
-HP_METRIC, MODE = "matthews_correlation", "max"
-
 def train_distilbert(config: dict):
 
-    # Define tokenize method
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT, use_fast=True)
+    metric = load_metric("glue", TASK)
+
     def tokenize(examples):
         return tokenizer(examples[COLUMN_NAME], truncation=True)
+
+    def compute_metrics(eval_pred):
+        predictions, labels = eval_pred
+        predictions = np.argmax(predictions, axis=1)
+        return metric.compute(predictions=predictions, references=labels)
+
     # Load CoLA dataset and apply tokenizer
     cola_raw = load_dataset("glue", TASK)
+
     cola_encoded = cola_raw.map(tokenize, batched=True)
     train_dataset, eval_dataset = cola_encoded["train"], cola_encoded["validation"]
 
@@ -50,13 +58,6 @@ def train_distilbert(config: dict):
         MODEL_CHECKPOINT, num_labels=NUM_LABELS
     )
 
-    metric = load_metric("glue", TASK)
-
-    def compute_metrics(eval_pred):
-        predictions, labels = eval_pred
-        predictions = np.argmax(predictions, axis=1)
-        return metric.compute(predictions=predictions, references=labels)
-
     training_args = TrainingArguments(
         output_dir='.',
         do_eval=False,
@@ -91,7 +92,7 @@ def _test_distillbert(method='BlendSearch'):
  
     max_num_epoch = 64
     num_samples = -1
-    time_budget_s = 10800
+    time_budget_s = 3600
 
     search_space = {
         # You can mix constants with search space objects.
@@ -123,7 +124,7 @@ def _test_distillbert(method='BlendSearch'):
         from flaml import BlendSearch
         algo = BlendSearch(points_to_evaluate=[{
             "num_train_epochs": 1,
-        }])        
+        }])
     elif 'Dragonfly' == method:
         from ray.tune.suggest.dragonfly import DragonflySearch
         algo = DragonflySearch()
@@ -139,7 +140,7 @@ def _test_distillbert(method='BlendSearch'):
         algo = ZOOptSearch(budget=num_samples)
     elif 'Ax' == method:
         from ray.tune.suggest.ax import AxSearch
-        algo = AxSearch()
+        algo = AxSearch(max_concurrent=3)
     elif 'HyperOpt' == method:
         from ray.tune.suggest.hyperopt import HyperOptSearch
         algo = HyperOptSearch()
@@ -154,8 +155,7 @@ def _test_distillbert(method='BlendSearch'):
         train_distilbert,
         metric=HP_METRIC,
         mode=MODE,
-        # You can add "gpu": 1 to allocate GPUs
-        resources_per_trial={"gpu": 1},
+        resources_per_trial={"gpu": 4, "cpu": 4},
         config=search_space, local_dir='test/logs/',
         num_samples=num_samples, time_budget_s=time_budget_s,
         keep_checkpoints_num=1, checkpoint_score_attr=HP_METRIC,
diff --git a/test/test_tune.py b/test/test_tune.py
index 50f92d29e..34aaacd79 100644
--- a/test/test_tune.py
+++ b/test/test_tune.py
@@ -49,7 +49,6 @@ def _test_xgboost(method='BlendSearch'):
     else:
         from ray import tune
     search_space = {
-        # You can mix constants with search space objects.
         "max_depth": tune.randint(1, 8) if method in [
             "BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
         "min_child_weight": tune.choice([1, 2, 3]),
@@ -154,6 +153,33 @@ def _test_xgboost(method='BlendSearch'):
             logger.info(f"Best model parameters: {best_trial.config}")
 
 
+def test_nested():
+    from flaml import tune
+    search_space = {
+        # test nested search space
+        "cost_related": {
+            "a": tune.randint(1, 8),
+        },
+        "b": tune.uniform(0.5, 1.0),
+    }
+
+    def simple_func(config):
+        tune.report(
+            metric=(config["cost_related"]["a"]-4)**2 * (config["b"]-0.7)**2)
+
+    analysis = tune.run(
+        simple_func,
+        init_config={
+            "cost_related": {"a": 1,}
+        },
+        metric="metric",
+        mode="min",
+        config=search_space,
+        local_dir='logs/',
+        num_samples=-1,
+        time_budget_s=1)
+
+
 def test_xgboost_bs():
     _test_xgboost()