V0.2.2 (#19)

* v0.2.2 separate the HPO part into the module flaml.tune enhanced implementation of FLOW^2, CFO and BlendSearch support parallel tuning using ray tune add support for sample_weight and generic fit arguments enable mlflow logging Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
2026-04-20 03:02:16 -04:00 · 2021-02-05 21:41:14 -08:00
parent 53e300ae02
commit 776aa55189
41 changed files with 7721 additions and 2850 deletions
--- a/test/test_automl.py
+++ b/test/test_automl.py
@@ -7,58 +7,74 @@ from sklearn.datasets import load_boston, load_iris, load_wine
 from flaml import AutoML
 from flaml.data import get_output_from_log

-from flaml.model import BaseEstimator
-from flaml.space import ConfigSearchInfo
+from flaml.model import SKLearnEstimator
 from rgf.sklearn import RGFClassifier, RGFRegressor
+from flaml import tune


-class MyRegularizedGreedyForest(BaseEstimator):
+class MyRegularizedGreedyForest(SKLearnEstimator):

-    # search space
-    params_configsearch_info = {
-        'max_leaf': ConfigSearchInfo(name = 'max_leaf',
-         type = int, lower = 4, init = 4, upper = 10000),
-        'n_iter': ConfigSearchInfo(name = 'n_iter', type = int, lower = 1,
-         init = 1, upper = 32768),
-        'n_tree_search': ConfigSearchInfo(name = 'n_tree_search', type = int,
-         lower = 1, init = 1, upper = 32768),
-        'opt_interval': ConfigSearchInfo(name = 'opt_interval', type = int,
-         lower = 1, init = 100, upper = 10000),
-        'learning_rate': ConfigSearchInfo(name = 'learning_rate', type = float,
-         lower = 0.01, init = 1.0, upper = 20.0),
-        'min_samples_leaf': ConfigSearchInfo(name = 'min_samples_leaf',
-         type = int, lower = 1, init = 20, upper = 20)
-    }
-    
-    def __init__(self, objective_name = 'binary:logistic', n_jobs = 1,
-     max_leaf = 1000, n_iter = 1, n_tree_search = 1, opt_interval = 1,
-      learning_rate = 1.0, min_samples_leaf = 1):

-        self.objective_name = objective_name
+    def __init__(self, task = 'binary:logistic', n_jobs = 1, max_leaf = 4,
+    n_iter = 1, n_tree_search = 1, opt_interval = 1, learning_rate = 1.0,
+    min_samples_leaf = 1, **params):

-        if 'regression' in objective_name:
+        super().__init__(task, **params)
+
+        if 'regression' in task:
            self.estimator_class = RGFRegressor
        else:
            self.estimator_class = RGFClassifier

        # round integer hyperparameters
        self.params = {
+            "n_jobs": n_jobs,
            'max_leaf': int(round(max_leaf)),
            'n_iter': int(round(n_iter)),
            'n_tree_search': int(round(n_tree_search)),
            'opt_interval': int(round(opt_interval)),
            'learning_rate': learning_rate,
-            'min_samples_leaf':int(round(min_samples_leaf)),
-            "n_jobs": n_jobs,
-        }            
+            'min_samples_leaf':int(round(min_samples_leaf))
+        }    
+
+    @classmethod
+    def search_space(cls, data_size, task):
+        space = {
+        'max_leaf': {'domain': tune.qloguniform(
+            lower = 4, upper = data_size, q = 1), 'init_value': 4},
+        'n_iter': {'domain': tune.qloguniform(
+            lower = 1, upper = data_size, q = 1), 'init_value': 1},
+        'n_tree_search': {'domain': tune.qloguniform(
+            lower = 1, upper = 32768, q = 1), 'init_value': 1},
+        'opt_interval': {'domain': tune.qloguniform(
+            lower = 1, upper = 10000, q = 1), 'init_value': 100},
+        'learning_rate': {'domain': tune.loguniform(
+            lower = 0.01, upper = 20.0)},
+        'min_samples_leaf': {'domain': tune.qloguniform(
+            lower = 1, upper = 20, q = 1), 'init_value': 20},
+        }
+        return space
+
+    @classmethod
+    def size(cls, config):
+        max_leaves = int(round(config['max_leaf']))
+        n_estimators = int(round(config['n_iter']))
+        return (max_leaves*3 + (max_leaves-1)*4 + 1.0)*n_estimators*8
+
+    @classmethod
+    def cost_relative2lgbm(cls):
+        return 1.0       


-def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):
+def custom_metric(X_test, y_test, estimator, labels, X_train, y_train,
+    weight_test=None, weight_train=None):
    from sklearn.metrics import log_loss
    y_pred = estimator.predict_proba(X_test)
-    test_loss = log_loss(y_test, y_pred, labels=labels)
+    test_loss = log_loss(y_test, y_pred, labels=labels,
+     sample_weight=weight_test)
    y_pred = estimator.predict_proba(X_train)
-    train_loss = log_loss(y_train, y_pred, labels=labels)
+    train_loss = log_loss(y_train, y_pred, labels=labels,
+     sample_weight=weight_train)
    alpha = 0.5
    return test_loss * (1 + alpha) - alpha * train_loss, [test_loss, train_loss]

@@ -77,6 +93,27 @@ class TestAutoML(unittest.TestCase):
            "sample": True, # whether to subsample training data
            "log_file_name": "test/wine.log",
            "log_training_metric": True, # whether to log training metric
+            "n_jobs": 1,
+        }
+
+        '''The main flaml automl API'''
+        automl.fit(X_train = X_train, y_train = y_train, **settings)
+
+    def test_ensemble(self):
+        automl = AutoML()
+        automl.add_learner(learner_name = 'RGF',
+            learner_class = MyRegularizedGreedyForest)            
+        X_train, y_train = load_wine(return_X_y=True)
+        settings = {
+            "time_budget": 10, # total running time in seconds
+            # "estimator_list": ['lgbm', 'xgboost'], 
+            "estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'], 
+            "task": 'classification', # task type    
+            "sample": True, # whether to subsample training data
+            "log_file_name": "test/wine.log",
+            "log_training_metric": True, # whether to log training metric
+            "ensemble": True,
+            "n_jobs": 1,
        }

        '''The main flaml automl API'''
@@ -87,6 +124,7 @@ class TestAutoML(unittest.TestCase):

    def test_custom_metric(self):

+        X_train, y_train = load_iris(return_X_y=True)
        automl_experiment = AutoML()
        automl_settings = {
            "time_budget": 10,
@@ -96,9 +134,10 @@ class TestAutoML(unittest.TestCase):
            "log_file_name": "test/iris_custom.log",
            "log_training_metric": True,
            'log_type': 'all',
-            "model_history": True
+            "n_jobs": 1,
+            "model_history": True,
+            "sample_weight": np.ones(len(y_train)),
        }
-        X_train, y_train = load_iris(return_X_y=True)
        automl_experiment.fit(X_train=X_train, y_train=y_train,
                              **automl_settings)
        print(automl_experiment.classes_)
@@ -111,7 +150,7 @@ class TestAutoML(unittest.TestCase):
        automl_experiment = AutoML()
        estimator = automl_experiment.get_estimator_from_log(
            automl_settings["log_file_name"], record_id=0,
-            objective='multi')
+            task='multi')
        print(estimator)
        time_history, best_valid_loss_history, valid_loss_history, \
            config_history, train_loss_history = get_output_from_log(
@@ -127,6 +166,7 @@ class TestAutoML(unittest.TestCase):
            "task": 'classification',
            "log_file_name": "test/iris.log",
            "log_training_metric": True,
+            "n_jobs": 1,
            "model_history": True
        }
        X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
@@ -160,6 +200,7 @@ class TestAutoML(unittest.TestCase):
            "task": 'regression',
            "log_file_name": "test/boston.log",
            "log_training_metric": True,
+            "n_jobs": 1,
            "model_history": True
        }
        X_train, y_train = load_boston(return_X_y=True)
@@ -167,7 +208,7 @@ class TestAutoML(unittest.TestCase):
        automl_experiment.fit(X_train=X_train[:n], y_train=y_train[:n],
                              X_val=X_train[n:], y_val=y_train[n:],
                              **automl_settings)
-        assert automl_experiment.eval_method == 'holdout'
+        assert automl_experiment._state.eval_method == 'holdout'
        print(automl_experiment.predict(X_train))
        print(automl_experiment.model)
        print(automl_experiment.config_history)
@@ -185,6 +226,7 @@ class TestAutoML(unittest.TestCase):
            "task": 'classification',
            "log_file_name": "test/sparse_classification.log",
            "split_type": "uniform",
+            "n_jobs": 1,
            "model_history": True
        }
        X_train = scipy.sparse.random(1554, 21, dtype=int)
@@ -207,6 +249,7 @@ class TestAutoML(unittest.TestCase):
            "metric": 'mae',
            "task": 'regression',
            "log_file_name": "test/sparse_regression.log",
+            "n_jobs": 1,
            "model_history": True
        }
        X_train = scipy.sparse.random(300, 900, density=0.0001)
@@ -216,7 +259,7 @@ class TestAutoML(unittest.TestCase):
        automl_experiment.fit(X_train=X_train, y_train=y_train,
                              X_val=X_val, y_val=y_val,
                              **automl_settings)
-        assert automl_experiment.X_val.shape == X_val.shape
+        assert automl_experiment._state.X_val.shape == X_val.shape
        print(automl_experiment.predict(X_train))
        print(automl_experiment.model)
        print(automl_experiment.config_history)
@@ -237,6 +280,7 @@ class TestAutoML(unittest.TestCase):
            "log_file_name": "test/sparse_classification.log",
            "estimator_list": ["xgboost"],
            "log_type": "all",
+            "n_jobs": 1,
        }
        X_train = scipy.sparse.eye(900000)
        y_train = np.random.randint(2, size=900000)
@@ -259,6 +303,7 @@ class TestAutoML(unittest.TestCase):
            "log_file_name": "test/sparse_classification.log",
            "estimator_list": ["lrl1", "lrl2"],
            "log_type": "all",
+            "n_jobs": 1,
        }
        X_train = scipy.sparse.random(3000, 900, density=0.1)
        y_train = np.random.randint(2, size=3000)
@@ -279,6 +324,7 @@ class TestAutoML(unittest.TestCase):
            'eval_method': 'cv',
            "task": 'regression',
            "log_file_name": "test/sparse_regression.log",
+            "n_jobs": 1,
            "model_history": True
        }
        X_train = scipy.sparse.random(100, 100)
--- a/test/test_python_log.py
+++ b/test/test_python_log.py
@@ -28,11 +28,12 @@ class TestLogging(unittest.TestCase):
            # Run a simple job.
            automl_experiment = AutoML()
            automl_settings = {
-                "time_budget": 2,
+                "time_budget": 1,
                "metric": 'mse',
                "task": 'regression',
                "log_file_name": training_log,
                "log_training_metric": True,
+                "n_jobs": 1,
                "model_history": True
            }
            X_train, y_train = load_boston(return_X_y=True)
--- a/test/test_pytorch_cifar10.py
+++ b/test/test_pytorch_cifar10.py
@@ -0,0 +1,351 @@
+import unittest
+import os
+import time
+
+import logging
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.FileHandler('test/tune_pytorch_cifar10.log'))
+
+
+# __load_data_begin__
+def load_data(data_dir="./data"):
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+    ])
+
+    trainset = torchvision.datasets.CIFAR10(
+        root=data_dir, train=True, download=True, transform=transform)
+
+    testset = torchvision.datasets.CIFAR10(
+        root=data_dir, train=False, download=True, transform=transform)
+
+    return trainset, testset
+# __load_data_end__
+
+
+import numpy as np
+try:
+    import torch
+    import torch.nn as nn
+    import torch.nn.functional as F
+    import torch.optim as optim
+    from torch.utils.data import random_split
+    import torchvision
+    import torchvision.transforms as transforms
+
+
+    # __net_begin__
+    class Net(nn.Module):
+        def __init__(self, l1=120, l2=84):
+            super(Net, self).__init__()
+            self.conv1 = nn.Conv2d(3, 6, 5)
+            self.pool = nn.MaxPool2d(2, 2)
+            self.conv2 = nn.Conv2d(6, 16, 5)
+            self.fc1 = nn.Linear(16 * 5 * 5, l1)
+            self.fc2 = nn.Linear(l1, l2)
+            self.fc3 = nn.Linear(l2, 10)
+
+        def forward(self, x):
+            x = self.pool(F.relu(self.conv1(x)))
+            x = self.pool(F.relu(self.conv2(x)))
+            x = x.view(-1, 16 * 5 * 5)
+            x = F.relu(self.fc1(x))
+            x = F.relu(self.fc2(x))
+            x = self.fc3(x)
+            return x
+    # __net_end__
+except ImportError:
+    print("skip test_pytorch because torchvision cannot be imported.")
+
+
+# __load_data_begin__
+def load_data(data_dir="test/data"):
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+    ])
+
+    trainset = torchvision.datasets.CIFAR10(
+        root=data_dir, train=True, download=True, transform=transform)
+
+    testset = torchvision.datasets.CIFAR10(
+        root=data_dir, train=False, download=True, transform=transform)
+
+    return trainset, testset
+# __load_data_end__
+
+
+# __train_begin__
+def train_cifar(config, checkpoint_dir=None, data_dir=None):
+    if not "l1" in config:
+        logger.warning(config)
+    net = Net(2 ** config["l1"], 2 ** config["l2"])
+
+    device = "cpu"
+    if torch.cuda.is_available():
+        device = "cuda:0"
+        if torch.cuda.device_count() > 1:
+            net = nn.DataParallel(net)
+    net.to(device)
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
+
+    # The `checkpoint_dir` parameter gets passed by Ray Tune when a checkpoint
+    # should be restored.
+    if checkpoint_dir:
+        checkpoint = os.path.join(checkpoint_dir, "checkpoint")
+        model_state, optimizer_state = torch.load(checkpoint)
+        net.load_state_dict(model_state)
+        optimizer.load_state_dict(optimizer_state)
+
+    trainset, testset = load_data(data_dir)
+
+    test_abs = int(len(trainset) * 0.8)
+    train_subset, val_subset = random_split(
+        trainset, [test_abs, len(trainset) - test_abs])
+
+    trainloader = torch.utils.data.DataLoader(
+        train_subset,
+        batch_size=int(2**config["batch_size"]),
+        shuffle=True,
+        num_workers=4)
+    valloader = torch.utils.data.DataLoader(
+        val_subset,
+        batch_size=int(2**config["batch_size"]),
+        shuffle=True,
+        num_workers=4)
+
+    from ray import tune
+
+    for epoch in range(int(round(config["num_epochs"]))):  # loop over the dataset multiple times
+        running_loss = 0.0
+        epoch_steps = 0
+        for i, data in enumerate(trainloader, 0):
+            # get the inputs; data is a list of [inputs, labels]
+            inputs, labels = data
+            inputs, labels = inputs.to(device), labels.to(device)
+
+            # zero the parameter gradients
+            optimizer.zero_grad()
+
+            # forward + backward + optimize
+            outputs = net(inputs)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+
+            # print statistics
+            running_loss += loss.item()
+            epoch_steps += 1
+            if i % 2000 == 1999:  # print every 2000 mini-batches
+                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
+                                                running_loss / epoch_steps))
+                running_loss = 0.0
+
+        # Validation loss
+        val_loss = 0.0
+        val_steps = 0
+        total = 0
+        correct = 0
+        for i, data in enumerate(valloader, 0):
+            with torch.no_grad():
+                inputs, labels = data
+                inputs, labels = inputs.to(device), labels.to(device)
+
+                outputs = net(inputs)
+                _, predicted = torch.max(outputs.data, 1)
+                total += labels.size(0)
+                correct += (predicted == labels).sum().item()
+
+                loss = criterion(outputs, labels)
+                val_loss += loss.cpu().numpy()
+                val_steps += 1
+
+        # Here we save a checkpoint. It is automatically registered with
+        # Ray Tune and will potentially be passed as the `checkpoint_dir`
+        # parameter in future iterations.
+        with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
+            path = os.path.join(checkpoint_dir, "checkpoint")
+            torch.save(
+                (net.state_dict(), optimizer.state_dict()), path)
+
+        tune.report(loss=(val_loss / val_steps), accuracy=correct / total)
+    print("Finished Training")
+# __train_end__
+
+
+# __test_acc_begin__
+def _test_accuracy(net, device="cpu"):
+    trainset, testset = load_data()
+
+    testloader = torch.utils.data.DataLoader(
+        testset, batch_size=4, shuffle=False, num_workers=2)
+
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for data in testloader:
+            images, labels = data
+            images, labels = images.to(device), labels.to(device)
+            outputs = net(images)
+            _, predicted = torch.max(outputs.data, 1)
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+
+    return correct / total
+# __test_acc_end__
+
+
+# __main_begin__
+def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100,
+ gpus_per_trial=2):
+    data_dir = os.path.abspath("test/data")
+    load_data(data_dir)  # Download data for all trials before starting the run
+    if method == 'BlendSearch':
+        from flaml import tune
+    else:
+        from ray import tune
+    if method in ['BlendSearch', 'BOHB', 'Optuna']:
+        config = {
+            "l1": tune.randint(2, 8),
+            "l2": tune.randint(2, 8),
+            "lr": tune.loguniform(1e-4, 1e-1),
+            "num_epochs": tune.qloguniform(1, max_num_epochs, q=1),
+            "batch_size": tune.randint(1, 4)#tune.choice([2, 4, 8, 16])
+        }
+    else:
+        config = {
+            "l1": tune.randint(2, 9),
+            "l2": tune.randint(2, 9),
+            "lr": tune.loguniform(1e-4, 1e-1),
+            "num_epochs": tune.qloguniform(1, max_num_epochs+1, q=1),
+            "batch_size": tune.randint(1, 5)#tune.choice([2, 4, 8, 16])
+        }
+    import ray
+    time_budget_s = 3600
+    start_time = time.time()
+    if method == 'BlendSearch':
+        result = tune.run(
+            ray.tune.with_parameters(train_cifar, data_dir=data_dir),
+            init_config={
+                "l1": 2,
+                "l2": 2,
+                "num_epochs": 1,
+                "batch_size": 4,
+            },
+            metric="loss",
+            mode="min",
+            max_resource=max_num_epochs,
+            min_resource=1,
+            report_intermediate_result=True,
+            resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
+            config=config,
+            local_dir='logs/',
+            num_samples=num_samples,
+            time_budget_s=time_budget_s,
+            use_ray=True)
+    else:
+        if 'ASHA' == method:
+            algo = None
+        elif 'BOHB' == method:
+            from ray.tune.schedulers import HyperBandForBOHB
+            from ray.tune.suggest.bohb import TuneBOHB
+            algo = TuneBOHB()
+            scheduler = HyperBandForBOHB(max_t=max_num_epochs)
+        elif 'Optuna' == method:
+            from ray.tune.suggest.optuna import OptunaSearch
+            algo = OptunaSearch()
+        elif 'CFO' == method:
+            from flaml import CFO
+            algo = CFO(points_to_evaluate=[{
+                "l1": 2,
+                "l2": 2,
+                "num_epochs": 1,
+                "batch_size": 4,
+            }])
+        elif 'Nevergrad' == method:
+            from ray.tune.suggest.nevergrad import NevergradSearch
+            import nevergrad as ng
+            algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
+        if method != 'BOHB':
+            from ray.tune.schedulers import ASHAScheduler
+            scheduler = ASHAScheduler(
+                max_t=max_num_epochs,
+                grace_period=1)        
+        result = tune.run(
+            tune.with_parameters(train_cifar, data_dir=data_dir),
+            resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
+            config=config,
+            metric="loss",
+            mode="min",
+            num_samples=num_samples, time_budget_s=time_budget_s,
+            scheduler=scheduler, search_alg=algo
+        )
+    ray.shutdown()
+
+    logger.info(f"method={method}")
+    logger.info(f"n_samples={num_samples}")
+    logger.info(f"time={time.time()-start_time}")
+    best_trial = result.get_best_trial("loss", "min", "all")
+    logger.info("Best trial config: {}".format(best_trial.config))
+    logger.info("Best trial final validation loss: {}".format(
+        best_trial.metric_analysis["loss"]["min"]))
+    logger.info("Best trial final validation accuracy: {}".format(
+        best_trial.metric_analysis["accuracy"]["max"]))
+
+    best_trained_model = Net(2**best_trial.config["l1"],
+     2**best_trial.config["l2"])
+    device = "cpu"
+    if torch.cuda.is_available():
+        device = "cuda:0"
+        if gpus_per_trial > 1:
+            best_trained_model = nn.DataParallel(best_trained_model)
+    best_trained_model.to(device)
+
+    checkpoint_path = os.path.join(best_trial.checkpoint.value, "checkpoint")
+
+    model_state, optimizer_state = torch.load(checkpoint_path)
+    best_trained_model.load_state_dict(model_state)
+
+    test_acc = _test_accuracy(best_trained_model, device)
+    logger.info("Best trial test set accuracy: {}".format(test_acc))
+# __main_end__
+
+
+gpus_per_trial=0#.5
+num_samples=500
+
+
+def _test_cifar10_bs():
+    cifar10_main(num_samples=num_samples, gpus_per_trial=gpus_per_trial)
+
+
+def _test_cifar10_cfo():
+    cifar10_main('CFO',
+     num_samples=num_samples, gpus_per_trial=gpus_per_trial)
+
+
+def _test_cifar10_optuna():
+    cifar10_main('Optuna',
+     num_samples=num_samples, gpus_per_trial=gpus_per_trial)
+
+
+def _test_cifar10_asha():
+    cifar10_main('ASHA',
+     num_samples=num_samples, gpus_per_trial=gpus_per_trial)
+
+
+def _test_cifar10_bohb():
+    cifar10_main('BOHB',
+     num_samples=num_samples, gpus_per_trial=gpus_per_trial)
+
+
+def _test_cifar10_nevergrad():
+    cifar10_main('Nevergrad',
+     num_samples=num_samples, gpus_per_trial=gpus_per_trial)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/test_training_log.py
+++ b/test/test_training_log.py
@@ -23,6 +23,8 @@ class TestTrainingLog(unittest.TestCase):
                "task": 'regression',
                "log_file_name": filename,
                "log_training_metric": True,
+                "mem_thres": 1024*1024,
+                "n_jobs": 1,
                "model_history": True
            }
            X_train, y_train = load_boston(return_X_y=True)            
--- a/test/test_tune.py
+++ b/test/test_tune.py
@@ -0,0 +1,200 @@
+import unittest
+import os
+import time
+from sklearn.model_selection import train_test_split
+import sklearn.metrics
+import sklearn.datasets
+try:
+    from ray.tune.integration.xgboost import TuneReportCheckpointCallback
+except ImportError:
+    print("skip test_tune because ray tune cannot be imported.")
+import xgboost as xgb
+
+import logging
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.FileHandler('test/tune_xgboost.log'))
+
+
+def train_breast_cancer(config: dict):
+    # This is a simple training function to be passed into Tune
+    # Load dataset
+    data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
+    # Split into train and test set
+    train_x, test_x, train_y, test_y = train_test_split(
+        data, labels, test_size=0.25)
+    # Build input matrices for XGBoost
+    train_set = xgb.DMatrix(train_x, label=train_y)
+    test_set = xgb.DMatrix(test_x, label=test_y)
+    # HyperOpt returns a tuple
+    config = config.copy()
+    config["eval_metric"] = ["logloss", "error"]
+    config["objective"] = "binary:logistic"
+    # Train the classifier, using the Tune callback
+    xgb.train(
+        config,
+        train_set,
+        evals=[(test_set, "eval")],
+        verbose_eval=False,
+        callbacks=[TuneReportCheckpointCallback(filename="model.xgb")])
+
+
+def _test_xgboost(method='BlendSearch'):
+    try:
+        import ray
+    except ImportError:
+        return
+    if method == 'BlendSearch':
+        from flaml import tune
+    else:
+        from ray import tune
+    search_space = {
+        # You can mix constants with search space objects.
+        "max_depth": tune.randint(1, 8) if method in [
+            "BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
+        "min_child_weight": tune.choice([1, 2, 3]),
+        "subsample": tune.uniform(0.5, 1.0),
+        "eta": tune.loguniform(1e-4, 1e-1)
+    }
+    max_iter = 10
+    for num_samples in [256]:
+        time_budget_s = None
+        for n_cpu in [8]:
+            start_time = time.time()
+            ray.init(num_cpus=n_cpu, num_gpus=0)
+            if method == 'BlendSearch':
+                analysis = tune.run(
+                    train_breast_cancer,
+                    init_config={
+                        "max_depth": 1,
+                        "min_child_weight": 3,
+                    },
+                    cat_hp_cost={
+                        "min_child_weight": [6, 3, 2],
+                    },
+                    metric="eval-logloss",
+                    mode="min",
+                    max_resource=max_iter,
+                    min_resource=1,
+                    report_intermediate_result=True,
+                    # You can add "gpu": 0.1 to allocate GPUs
+                    resources_per_trial={"cpu": 1},
+                    config=search_space,
+                    local_dir='logs/',
+                    num_samples=num_samples*n_cpu,
+                    time_budget_s=time_budget_s,
+                    use_ray=True)
+            else:
+                if 'ASHA' == method:
+                    algo = None
+                elif 'BOHB' == method:
+                    from ray.tune.schedulers import HyperBandForBOHB
+                    from ray.tune.suggest.bohb import TuneBOHB
+                    algo = TuneBOHB(max_concurrent=n_cpu)
+                    scheduler = HyperBandForBOHB(max_t=max_iter)
+                elif 'Optuna' == method:
+                    from ray.tune.suggest.optuna import OptunaSearch
+                    algo = OptunaSearch()
+                elif 'CFO' == method:
+                    from flaml import CFO
+                    algo = CFO(points_to_evaluate=[{
+                        "max_depth": 1,
+                        "min_child_weight": 3,
+                    }], cat_hp_cost={
+                        "min_child_weight": [6, 3, 2],
+                    })
+                elif 'Dragonfly' == method:
+                    from ray.tune.suggest.dragonfly import DragonflySearch
+                    algo = DragonflySearch()
+                elif 'SkOpt' == method:
+                    from ray.tune.suggest.skopt import SkOptSearch
+                    algo = SkOptSearch()
+                elif 'Nevergrad' == method:
+                    from ray.tune.suggest.nevergrad import NevergradSearch
+                    import nevergrad as ng
+                    algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
+                elif 'ZOOpt' == method:
+                    from ray.tune.suggest.zoopt import ZOOptSearch
+                    algo = ZOOptSearch(budget=num_samples*n_cpu)
+                elif 'Ax' == method:
+                    from ray.tune.suggest.ax import AxSearch
+                    algo = AxSearch()
+                elif 'HyperOpt' == method:
+                    from ray.tune.suggest.hyperopt import HyperOptSearch
+                    algo = HyperOptSearch()
+                    scheduler = None
+                if method != 'BOHB':
+                    from ray.tune.schedulers import ASHAScheduler
+                    scheduler = ASHAScheduler(
+                        max_t=max_iter,
+                        grace_period=1)
+                analysis = tune.run(
+                    train_breast_cancer,
+                    metric="eval-logloss",
+                    mode="min",
+                    # You can add "gpu": 0.1 to allocate GPUs
+                    resources_per_trial={"cpu": 1},
+                    config=search_space, local_dir='logs/',
+                    num_samples=num_samples*n_cpu, time_budget_s=time_budget_s,
+                    scheduler=scheduler, search_alg=algo)
+            ray.shutdown()
+            # # Load the best model checkpoint
+            # best_bst = xgb.Booster()
+            # best_bst.load_model(os.path.join(analysis.best_checkpoint,
+            #  "model.xgb"))
+            best_trial = analysis.get_best_trial("eval-logloss","min","all")
+            accuracy = 1. - best_trial.metric_analysis["eval-error"]["min"]
+            logloss = best_trial.metric_analysis["eval-logloss"]["min"]
+            logger.info(f"method={method}")
+            logger.info(f"n_samples={num_samples*n_cpu}")
+            logger.info(f"time={time.time()-start_time}")
+            logger.info(f"Best model eval loss: {logloss:.4f}")
+            logger.info(f"Best model total accuracy: {accuracy:.4f}")
+            logger.info(f"Best model parameters: {best_trial.config}")
+
+
+def test_xgboost_bs():
+    _test_xgboost()
+
+
+def test_xgboost_cfo():
+    _test_xgboost('CFO')
+
+
+def _test_xgboost_dragonfly():
+    _test_xgboost('Dragonfly')
+
+
+def _test_xgboost_skopt():
+    _test_xgboost('SkOpt')
+
+
+def _test_xgboost_nevergrad():
+    _test_xgboost('Nevergrad')
+
+
+def _test_xgboost_zoopt():
+    _test_xgboost('ZOOpt')
+
+
+def _test_xgboost_ax():
+    _test_xgboost('Ax')
+
+
+def __test_xgboost_hyperopt():
+    _test_xgboost('HyperOpt')
+
+
+def _test_xgboost_optuna():
+    _test_xgboost('Optuna')
+
+
+def _test_xgboost_asha():
+    _test_xgboost('ASHA')
+
+
+def _test_xgboost_bohb():
+    _test_xgboost('BOHB')
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/test_xgboost2d.py
+++ b/test/test_xgboost2d.py
@@ -0,0 +1,69 @@
+import unittest
+
+from sklearn.datasets import fetch_openml
+from sklearn.model_selection import train_test_split
+import numpy as np
+from flaml.automl import AutoML
+from flaml.model import XGBoostSklearnEstimator
+from flaml import tune
+
+
+# dataset = "blood-transfusion-service-center"
+# dataset = "Australian"
+dataset = "credit-g"
+# dataset = "phoneme"
+# dataset = "kc1"
+
+
+class XGBoost2D(XGBoostSklearnEstimator):
+
+    @classmethod
+    def search_space(cls, data_size, task):
+        upper = min(32768,int(data_size))
+        return {
+            'n_estimators': {
+                'domain': tune.qloguniform(lower=4, upper=upper, q=1),
+                'init_value': 4,
+            },
+            'max_leaves': {
+                'domain': tune.qloguniform(lower=4, upper=upper, q=1),
+                'init_value': 4,
+            },
+        }
+
+
+def test_simple(method=None):
+    automl = AutoML()
+    automl.add_learner(learner_name = 'XGBoost2D',
+        learner_class = XGBoost2D)            
+
+    automl_settings = {
+        "estimator_list": ['XGBoost2D'],
+        # "metric": 'accuracy',
+        "task": 'classification',
+        "log_file_name": f"test/xgboost2d_{dataset}_{method}.log",
+        # "model_history": True,
+        # "log_training_metric": True,
+        # "split_type": split_type,
+        "n_jobs": 1,
+        "hpo_method": method,
+        "log_type": "all",
+        "time_budget": 3#6000,
+    }
+
+    X, y = fetch_openml(name=dataset, return_X_y=True)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
+     random_state=42)
+    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
+
+
+def _test_optuna():
+    test_simple(method="optuna")
+
+
+def test_grid():
+    test_simple(method="grid")
+
+
+if __name__ == "__main__":
+    unittest.main()