mirror of
https://github.com/microsoft/autogen.git
synced 2026-04-20 03:02:16 -04:00
cleanup
This commit is contained in:
@@ -1,14 +0,0 @@
|
||||
FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04
|
||||
|
||||
RUN pip install azureml-core
|
||||
RUN pip install flaml[blendsearch,ray]
|
||||
RUN pip install ray-on-aml
|
||||
|
||||
EXPOSE 8265
|
||||
EXPOSE 6379
|
||||
|
||||
USER root
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y jq
|
||||
RUN apt-get install -y rsync
|
||||
@@ -1,402 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
import scipy.sparse
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
from sklearn.model_selection import train_test_split
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from flaml import AutoML
|
||||
from flaml.automl.model import LGBMEstimator
|
||||
from flaml import tune
|
||||
|
||||
|
||||
class MyLargeLGBM(LGBMEstimator):
|
||||
@classmethod
|
||||
def search_space(cls, **params):
|
||||
return {
|
||||
"n_estimators": {
|
||||
"domain": tune.lograndint(lower=4, upper=32768),
|
||||
"init_value": 32768,
|
||||
"low_cost_init_value": 4,
|
||||
},
|
||||
"num_leaves": {
|
||||
"domain": tune.lograndint(lower=4, upper=32768),
|
||||
"init_value": 32768,
|
||||
"low_cost_init_value": 4,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class TestClassification(unittest.TestCase):
|
||||
def test_preprocess(self):
|
||||
automl = AutoML()
|
||||
X = pd.DataFrame(
|
||||
{
|
||||
"f1": [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
|
||||
"f2": [
|
||||
3.0,
|
||||
16.0,
|
||||
10.0,
|
||||
12.0,
|
||||
3.0,
|
||||
14.0,
|
||||
11.0,
|
||||
12.0,
|
||||
5.0,
|
||||
14.0,
|
||||
20.0,
|
||||
16.0,
|
||||
15.0,
|
||||
11.0,
|
||||
],
|
||||
"f3": [
|
||||
"a",
|
||||
"b",
|
||||
"a",
|
||||
"c",
|
||||
"c",
|
||||
"b",
|
||||
"b",
|
||||
"b",
|
||||
"b",
|
||||
"a",
|
||||
"b",
|
||||
1.0,
|
||||
1.0,
|
||||
"a",
|
||||
],
|
||||
"f4": [
|
||||
True,
|
||||
True,
|
||||
False,
|
||||
True,
|
||||
True,
|
||||
False,
|
||||
False,
|
||||
False,
|
||||
True,
|
||||
True,
|
||||
False,
|
||||
False,
|
||||
True,
|
||||
True,
|
||||
],
|
||||
}
|
||||
)
|
||||
y = pd.Series([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 3,
|
||||
"task": "classification",
|
||||
"n_jobs": 1,
|
||||
"estimator_list": ["xgboost", "catboost", "kneighbor"],
|
||||
"eval_method": "cv",
|
||||
"n_splits": 3,
|
||||
"metric": "accuracy",
|
||||
"log_training_metric": True,
|
||||
# "verbose": 4,
|
||||
"ensemble": True,
|
||||
}
|
||||
automl.fit(X, y, **automl_settings)
|
||||
del automl
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 6,
|
||||
"task": "classification",
|
||||
"n_jobs": 1,
|
||||
"estimator_list": ["catboost", "lrl2"],
|
||||
"eval_method": "cv",
|
||||
"n_splits": 3,
|
||||
"metric": "accuracy",
|
||||
"log_training_metric": True,
|
||||
# "verbose": 4,
|
||||
"ensemble": True,
|
||||
}
|
||||
automl.fit(X, y, **automl_settings)
|
||||
print(automl.feature_names_in_)
|
||||
print(automl.feature_importances_)
|
||||
del automl
|
||||
|
||||
automl = AutoML()
|
||||
try:
|
||||
import ray
|
||||
|
||||
n_concurrent_trials = 2
|
||||
except ImportError:
|
||||
n_concurrent_trials = 1
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": "classification",
|
||||
"n_jobs": 1,
|
||||
"estimator_list": ["lrl2", "kneighbor"],
|
||||
"eval_method": "cv",
|
||||
"n_splits": 3,
|
||||
"metric": "accuracy",
|
||||
"log_training_metric": True,
|
||||
"verbose": 4,
|
||||
"ensemble": True,
|
||||
"n_concurrent_trials": n_concurrent_trials,
|
||||
}
|
||||
automl.fit(X, y, **automl_settings)
|
||||
del automl
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 3,
|
||||
"task": "classification",
|
||||
"n_jobs": 1,
|
||||
"estimator_list": ["lgbm", "catboost", "kneighbor"],
|
||||
"eval_method": "cv",
|
||||
"n_splits": 3,
|
||||
"metric": "accuracy",
|
||||
"log_training_metric": True,
|
||||
# "verbose": 4,
|
||||
"ensemble": True,
|
||||
}
|
||||
automl_settings["keep_search_state"] = True
|
||||
automl.fit(X, y, **automl_settings)
|
||||
X, y = automl._X_train_all, automl._y_train_all
|
||||
del automl
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 3,
|
||||
"task": "classification",
|
||||
"n_jobs": 1,
|
||||
"estimator_list": ["kneighbor"],
|
||||
"eval_method": "cv",
|
||||
"n_splits": 3,
|
||||
"metric": "accuracy",
|
||||
"log_training_metric": True,
|
||||
# "verbose": 4,
|
||||
"ensemble": True,
|
||||
"skip_transform": True,
|
||||
}
|
||||
automl.fit(X, y, **automl_settings)
|
||||
del automl
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 3,
|
||||
"task": "classification",
|
||||
"n_jobs": 1,
|
||||
"estimator_list": ["kneighbor"],
|
||||
"eval_method": "cv",
|
||||
"n_splits": 3,
|
||||
"metric": "roc_auc_weighted",
|
||||
"log_training_metric": True,
|
||||
# "verbose": 4,
|
||||
"ensemble": True,
|
||||
"skip_transform": True,
|
||||
}
|
||||
automl.fit(X, y, **automl_settings)
|
||||
del automl
|
||||
|
||||
def test_binary(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 1,
|
||||
"task": "binary",
|
||||
"log_file_name": "test/breast_cancer.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = load_breast_cancer(return_X_y=True)
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
_ = automl_experiment.predict(X_train)
|
||||
|
||||
def test_datetime_columns(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"log_file_name": "test/datetime_columns.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
fake_df = pd.DataFrame(
|
||||
{
|
||||
"A": [
|
||||
datetime(1900, 2, 3),
|
||||
datetime(1900, 3, 4),
|
||||
datetime(1900, 3, 4),
|
||||
datetime(1900, 3, 4),
|
||||
datetime(1900, 7, 2),
|
||||
datetime(1900, 8, 9),
|
||||
],
|
||||
"B": [
|
||||
datetime(1900, 1, 1),
|
||||
datetime(1900, 1, 1),
|
||||
datetime(1900, 1, 1),
|
||||
datetime(1900, 1, 1),
|
||||
datetime(1900, 1, 1),
|
||||
datetime(1900, 1, 1),
|
||||
],
|
||||
"year_A": [
|
||||
datetime(1900, 1, 2),
|
||||
datetime(1900, 8, 1),
|
||||
datetime(1900, 1, 4),
|
||||
datetime(1900, 6, 1),
|
||||
datetime(1900, 1, 5),
|
||||
datetime(1900, 4, 1),
|
||||
],
|
||||
}
|
||||
)
|
||||
y = np.array([0, 1, 0, 1, 0, 0])
|
||||
automl_experiment.fit(X_train=fake_df, y_train=y, **automl_settings)
|
||||
_ = automl_experiment.predict(fake_df)
|
||||
|
||||
def test_sparse_matrix_xgboost(self):
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 3,
|
||||
"metric": "ap",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/sparse_classification.log",
|
||||
"estimator_list": ["xgboost"],
|
||||
"log_type": "all",
|
||||
"n_jobs": 1,
|
||||
}
|
||||
X_train = scipy.sparse.eye(900000)
|
||||
y_train = np.random.randint(2, size=900000)
|
||||
import xgboost as xgb
|
||||
|
||||
callback = xgb.callback.TrainingCallback()
|
||||
automl.fit(X_train=X_train, y_train=y_train, callbacks=[callback], **automl_settings)
|
||||
print(automl.predict(X_train))
|
||||
print(automl.model)
|
||||
print(automl.config_history)
|
||||
print(automl.best_model_for_estimator("xgboost"))
|
||||
print(automl.best_iteration)
|
||||
print(automl.best_estimator)
|
||||
|
||||
# test an old version of xgboost
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
subprocess.check_call([sys.executable, "-m", "pip", "install", "xgboost==1.3.3", "--user"])
|
||||
automl = AutoML()
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl.feature_names_in_)
|
||||
print(automl.feature_importances_)
|
||||
subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "xgboost", "--user"])
|
||||
|
||||
def test_ray_classification(self):
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
|
||||
|
||||
automl = AutoML()
|
||||
try:
|
||||
automl.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
X_val=X_test,
|
||||
y_val=y_test,
|
||||
time_budget=10,
|
||||
task="classification",
|
||||
use_ray=True,
|
||||
)
|
||||
automl.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
X_val=X_test,
|
||||
y_val=y_test,
|
||||
time_budget=10,
|
||||
task="classification",
|
||||
n_concurrent_trials=2,
|
||||
ensemble=True,
|
||||
)
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
def test_parallel_xgboost(self, hpo_method=None):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 10,
|
||||
"metric": "ap",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/sparse_classification.log",
|
||||
"estimator_list": ["xgboost"],
|
||||
"log_type": "all",
|
||||
"n_jobs": 1,
|
||||
"n_concurrent_trials": 2,
|
||||
"hpo_method": hpo_method,
|
||||
}
|
||||
X_train = scipy.sparse.eye(900000)
|
||||
y_train = np.random.randint(2, size=900000)
|
||||
try:
|
||||
import ray
|
||||
|
||||
X_train_ref = ray.put(X_train)
|
||||
automl_experiment.fit(X_train=X_train_ref, y_train=y_train, **automl_settings)
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.best_model_for_estimator("xgboost"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
def test_parallel_xgboost_others(self):
|
||||
# use random search as the hpo_method
|
||||
self.test_parallel_xgboost(hpo_method="random")
|
||||
|
||||
def test_random_skip_oom(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": "classification",
|
||||
"log_file_name": "test/sparse_classification_oom.log",
|
||||
"estimator_list": ["large_lgbm"],
|
||||
"log_type": "all",
|
||||
"n_jobs": 1,
|
||||
"hpo_method": "random",
|
||||
"n_concurrent_trials": 2,
|
||||
}
|
||||
X_train = scipy.sparse.eye(900000)
|
||||
y_train = np.random.randint(2, size=900000)
|
||||
|
||||
try:
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.best_model_for_estimator("large_lgbm"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
except ImportError:
|
||||
print("skipping concurrency test as ray is not installed")
|
||||
return
|
||||
|
||||
def test_sparse_matrix_lr(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 3,
|
||||
"metric": "f1",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/sparse_classification.log",
|
||||
"estimator_list": ["lrl1", "lrl2"],
|
||||
"log_type": "all",
|
||||
"n_jobs": 1,
|
||||
}
|
||||
X_train = scipy.sparse.random(3000, 3000, density=0.1)
|
||||
y_train = np.random.randint(2, size=3000)
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, train_time_limit=1, **automl_settings)
|
||||
automl_settings["time_budget"] = 5
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.best_model_for_estimator("lrl2"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test = TestClassification()
|
||||
test.test_preprocess()
|
||||
@@ -1,163 +0,0 @@
|
||||
from urllib.error import URLError
|
||||
from sklearn.datasets import fetch_openml
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.externals._arff import ArffException
|
||||
from functools import partial
|
||||
from flaml.automl import AutoML, size
|
||||
from flaml import tune
|
||||
|
||||
dataset = "credit-g"
|
||||
|
||||
|
||||
def test_metric_constraints():
|
||||
# impose metric constrains via "pred_time_limit"
|
||||
automl = AutoML()
|
||||
|
||||
automl_settings = {
|
||||
"estimator_list": ["xgboost"],
|
||||
"task": "classification",
|
||||
"log_file_name": f"test/constraints_{dataset}.log",
|
||||
"n_jobs": 1,
|
||||
"log_type": "all",
|
||||
"retrain_full": "budget",
|
||||
"keep_search_state": True,
|
||||
"time_budget": 2,
|
||||
"pred_time_limit": 5.1e-05,
|
||||
}
|
||||
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
except (ArffException, ValueError, URLError):
|
||||
from sklearn.datasets import load_wine
|
||||
|
||||
X, y = load_wine(return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl.estimator_list)
|
||||
print(automl.search_space)
|
||||
print(automl.points_to_evaluate)
|
||||
config = automl.best_config.copy()
|
||||
config["learner"] = automl.best_estimator
|
||||
automl.trainable(config)
|
||||
print("metric constraints used in automl", automl.metric_constraints)
|
||||
|
||||
analysis = tune.run(
|
||||
automl.trainable,
|
||||
automl.search_space,
|
||||
metric="val_loss",
|
||||
mode="min",
|
||||
low_cost_partial_config=automl.low_cost_partial_config,
|
||||
points_to_evaluate=automl.points_to_evaluate,
|
||||
cat_hp_cost=automl.cat_hp_cost,
|
||||
resource_attr=automl.resource_attr,
|
||||
min_resource=automl.min_resource,
|
||||
max_resource=automl.max_resource,
|
||||
time_budget_s=automl._state.time_budget,
|
||||
config_constraints=[(partial(size, automl._state.learner_classes), "<=", automl._mem_thres)],
|
||||
metric_constraints=automl.metric_constraints,
|
||||
num_samples=5,
|
||||
)
|
||||
print(analysis.trials[-1])
|
||||
|
||||
|
||||
def custom_metric(
|
||||
X_val,
|
||||
y_val,
|
||||
estimator,
|
||||
labels,
|
||||
X_train,
|
||||
y_train,
|
||||
weight_val,
|
||||
weight_train,
|
||||
*args,
|
||||
):
|
||||
from sklearn.metrics import log_loss
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
y_pred = estimator.predict_proba(X_val)
|
||||
pred_time = (time.time() - start) / len(X_val)
|
||||
val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
|
||||
y_pred = estimator.predict_proba(X_train)
|
||||
train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
|
||||
alpha = 0.5
|
||||
return val_loss * (1 + alpha) - alpha * train_loss, {
|
||||
"val_loss": val_loss,
|
||||
"val_train_loss_gap": val_loss - train_loss,
|
||||
"pred_time": pred_time,
|
||||
}
|
||||
|
||||
|
||||
def test_metric_constraints_custom():
|
||||
automl = AutoML()
|
||||
# When you are providing a custom metric function, you can also specify constraints
|
||||
# on one or more of the metrics reported via the second object, i.e., a metrics_to_log dictionary,
|
||||
# returned by the custom metric function.
|
||||
# For example, in the following code, we add a constraint on the `pred_time` metrics and `val_train_loss_gap` metric
|
||||
# reported in `custom_metric` defined above, respectively.
|
||||
automl_settings = {
|
||||
"estimator_list": ["xgboost"],
|
||||
"task": "classification",
|
||||
"log_file_name": f"test/constraints_custom_{dataset}.log",
|
||||
"n_jobs": 1,
|
||||
"metric": custom_metric,
|
||||
"log_type": "all",
|
||||
"retrain_full": "budget",
|
||||
"keep_search_state": True,
|
||||
"time_budget": 1,
|
||||
"metric_constraints": [
|
||||
("pred_time", "<=", 5.1e-05),
|
||||
("val_train_loss_gap", "<=", 0.05),
|
||||
],
|
||||
}
|
||||
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
except (ArffException, ValueError):
|
||||
from sklearn.datasets import load_wine
|
||||
|
||||
X, y = load_wine(return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl.estimator_list)
|
||||
print(automl.search_space)
|
||||
print(automl.points_to_evaluate)
|
||||
print("Best minimization objective on validation data: {0:.4g}".format(automl.best_loss))
|
||||
print(
|
||||
"pred_time of the best config on validation data: {0:.4g}".format(
|
||||
automl.metrics_for_best_config[1]["pred_time"]
|
||||
)
|
||||
)
|
||||
print(
|
||||
"val_train_loss_gap of the best config on validation data: {0:.4g}".format(
|
||||
automl.metrics_for_best_config[1]["val_train_loss_gap"]
|
||||
)
|
||||
)
|
||||
|
||||
config = automl.best_config.copy()
|
||||
config["learner"] = automl.best_estimator
|
||||
automl.trainable(config)
|
||||
print("metric constraints in automl", automl.metric_constraints)
|
||||
|
||||
analysis = tune.run(
|
||||
automl.trainable,
|
||||
automl.search_space,
|
||||
metric="val_loss",
|
||||
mode="min",
|
||||
low_cost_partial_config=automl.low_cost_partial_config,
|
||||
points_to_evaluate=automl.points_to_evaluate,
|
||||
cat_hp_cost=automl.cat_hp_cost,
|
||||
resource_attr=automl.resource_attr,
|
||||
min_resource=automl.min_resource,
|
||||
max_resource=automl.max_resource,
|
||||
time_budget_s=automl._state.time_budget,
|
||||
config_constraints=[(partial(size, automl._state.learner_classes), "<=", automl._mem_thres)],
|
||||
metric_constraints=automl.metric_constraints,
|
||||
num_samples=5,
|
||||
)
|
||||
print(analysis.trials[-1])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_metric_constraints()
|
||||
test_metric_constraints_custom()
|
||||
@@ -1,65 +0,0 @@
|
||||
import sys
|
||||
import pytest
|
||||
from flaml import AutoML, tune
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||
def test_custom_hp_nlp():
|
||||
from test.nlp.utils import get_toy_data_seqclassification, get_automl_settings
|
||||
|
||||
X_train, y_train, X_val, y_val, X_test = get_toy_data_seqclassification()
|
||||
|
||||
automl = AutoML()
|
||||
|
||||
automl_settings = get_automl_settings()
|
||||
automl_settings["custom_hp"] = None
|
||||
automl_settings["custom_hp"] = {
|
||||
"transformer": {
|
||||
"model_path": {
|
||||
"domain": tune.choice(["google/electra-small-discriminator"]),
|
||||
},
|
||||
"num_train_epochs": {"domain": 3},
|
||||
}
|
||||
}
|
||||
automl_settings["fit_kwargs_by_estimator"] = {
|
||||
"transformer": {
|
||||
"output_dir": "test/data/output/",
|
||||
"fp16": False,
|
||||
}
|
||||
}
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
|
||||
def test_custom_hp():
|
||||
from sklearn.datasets import load_iris
|
||||
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl = AutoML()
|
||||
custom_hp = {
|
||||
"xgboost": {
|
||||
"n_estimators": {
|
||||
"domain": tune.lograndint(lower=1, upper=100),
|
||||
"low_cost_init_value": 1,
|
||||
},
|
||||
},
|
||||
"rf": {
|
||||
"max_leaves": {
|
||||
"domain": None, # disable search
|
||||
},
|
||||
},
|
||||
"lgbm": {
|
||||
"subsample": {
|
||||
"domain": tune.uniform(lower=0.1, upper=1.0),
|
||||
"init_value": 1.0,
|
||||
},
|
||||
"subsample_freq": {
|
||||
"domain": 1, # subsample_freq must > 0 to enable subsample
|
||||
},
|
||||
},
|
||||
}
|
||||
automl.fit(X_train, y_train, custom_hp=custom_hp, time_budget=2)
|
||||
print(automl.best_config_per_estimator)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_custom_hp()
|
||||
@@ -1,672 +0,0 @@
|
||||
import datetime
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from flaml import AutoML
|
||||
|
||||
from flaml.automl.task.time_series_task import TimeSeriesTask
|
||||
|
||||
|
||||
def test_forecast_automl(budget=10, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]):
|
||||
# using dataframe
|
||||
import statsmodels.api as sm
|
||||
|
||||
data = sm.datasets.co2.load_pandas().data["co2"].resample("MS").mean()
|
||||
data = data.bfill().ffill().to_frame().reset_index().rename(columns={"index": "ds", "co2": "y"})
|
||||
num_samples = data.shape[0]
|
||||
time_horizon = 12
|
||||
split_idx = num_samples - time_horizon
|
||||
df = data[:split_idx]
|
||||
X_test = data[split_idx:]["ds"]
|
||||
y_test = data[split_idx:]["y"]
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": budget, # total running time in seconds
|
||||
"metric": "mape", # primary metric
|
||||
"task": "ts_forecast", # task type
|
||||
"log_file_name": "test/CO2_forecast.log", # flaml log file
|
||||
"eval_method": "holdout",
|
||||
"label": "y",
|
||||
}
|
||||
"""The main flaml automl API"""
|
||||
try:
|
||||
import prophet
|
||||
|
||||
automl.fit(dataframe=df, **settings, period=time_horizon)
|
||||
except ImportError:
|
||||
print("not using prophet due to ImportError")
|
||||
automl.fit(
|
||||
dataframe=df,
|
||||
**settings,
|
||||
estimator_list=estimators_when_no_prophet,
|
||||
period=time_horizon,
|
||||
)
|
||||
""" retrieve best config and best learner"""
|
||||
print("Best ML leaner:", automl.best_estimator)
|
||||
print("Best hyperparmeter config:", automl.best_config)
|
||||
print(f"Best mape on validation data: {automl.best_loss}")
|
||||
print(f"Training duration of best run: {automl.best_config_train_time}s")
|
||||
print(automl.model.estimator)
|
||||
""" pickle and save the automl object """
|
||||
import pickle
|
||||
|
||||
with open("automl.pkl", "wb") as f:
|
||||
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
|
||||
""" compute predictions of testing dataset """
|
||||
y_pred = automl.predict(X_test)
|
||||
print("Predicted labels", y_pred)
|
||||
print("True labels", y_test)
|
||||
""" compute different metric values on testing dataset"""
|
||||
from flaml.automl.ml import sklearn_metric_loss_score
|
||||
|
||||
mape = sklearn_metric_loss_score("mape", y_pred, y_test)
|
||||
print("mape", "=", mape)
|
||||
assert mape <= 0.005, "the mape of flaml should be less than 0.005"
|
||||
from flaml.automl.data import get_output_from_log
|
||||
|
||||
(
|
||||
time_history,
|
||||
best_valid_loss_history,
|
||||
valid_loss_history,
|
||||
config_history,
|
||||
metric_history,
|
||||
) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
|
||||
for config in config_history:
|
||||
print(config)
|
||||
print(automl.resource_attr)
|
||||
print(automl.max_resource)
|
||||
print(automl.min_resource)
|
||||
|
||||
X_train = df[["ds"]]
|
||||
y_train = df["y"]
|
||||
automl = AutoML()
|
||||
try:
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings, period=time_horizon)
|
||||
except ImportError:
|
||||
print("not using prophet due to ImportError")
|
||||
automl.fit(
|
||||
X_train=X_train,
|
||||
y_train=y_train,
|
||||
**settings,
|
||||
estimator_list=estimators_when_no_prophet,
|
||||
period=time_horizon,
|
||||
)
|
||||
|
||||
|
||||
def test_models(budget=3):
|
||||
n = 100
|
||||
X = pd.DataFrame(
|
||||
{
|
||||
"A": pd.date_range(start="1900-01-01", periods=n, freq="D"),
|
||||
}
|
||||
)
|
||||
y = np.exp(np.random.randn(n))
|
||||
|
||||
task = TimeSeriesTask("ts_forecast")
|
||||
|
||||
for est in task.estimators.keys():
|
||||
if est == "tft":
|
||||
continue # TFT is covered by its own test
|
||||
automl = AutoML()
|
||||
automl.fit(
|
||||
X_train=X[:72], # a single column of timestamp
|
||||
y_train=y[:72], # value for each timestamp
|
||||
estimator_list=[est],
|
||||
period=12, # time horizon to forecast, e.g., 12 months
|
||||
task="ts_forecast",
|
||||
time_budget=budget, # time budget in seconds
|
||||
)
|
||||
automl.predict(X[72:])
|
||||
|
||||
|
||||
def test_numpy():
|
||||
X_train = np.arange("2014-01", "2021-01", dtype="datetime64[M]")
|
||||
y_train = np.random.random(size=len(X_train))
|
||||
automl = AutoML()
|
||||
automl.fit(
|
||||
X_train=X_train[:72], # a single column of timestamp
|
||||
y_train=y_train[:72], # value for each timestamp
|
||||
period=12, # time horizon to forecast, e.g., 12 months
|
||||
task="ts_forecast",
|
||||
time_budget=3, # time budget in seconds
|
||||
log_file_name="test/ts_forecast.log",
|
||||
n_splits=3, # number of splits
|
||||
)
|
||||
print(automl.predict(X_train[72:]))
|
||||
|
||||
automl = AutoML()
|
||||
automl.fit(
|
||||
X_train=X_train[:72], # a single column of timestamp
|
||||
y_train=y_train[:72], # value for each timestamp
|
||||
period=12, # time horizon to forecast, e.g., 12 months
|
||||
task="ts_forecast",
|
||||
time_budget=1, # time budget in seconds
|
||||
estimator_list=["arima", "sarimax"],
|
||||
log_file_name="test/ts_forecast.log",
|
||||
)
|
||||
print(automl.predict(X_train[72:]))
|
||||
# an alternative way to specify predict steps for arima/sarimax
|
||||
print(automl.predict(12))
|
||||
|
||||
|
||||
def test_numpy_large():
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from flaml import AutoML
|
||||
|
||||
X_train = pd.date_range("2017-01-01", periods=70000, freq="T")
|
||||
y_train = pd.DataFrame(np.random.randint(6500, 7500, 70000))
|
||||
automl = AutoML()
|
||||
automl.fit(
|
||||
X_train=X_train[:-10].values, # a single column of timestamp
|
||||
y_train=y_train[:-10].values, # value for each timestamp
|
||||
period=10, # time horizon to forecast, e.g., 12 months
|
||||
task="ts_forecast",
|
||||
time_budget=10, # time budget in seconds
|
||||
)
|
||||
|
||||
|
||||
def load_multi_dataset():
|
||||
"""multivariate time series forecasting dataset"""
|
||||
import pandas as pd
|
||||
|
||||
# pd.set_option("display.max_rows", None, "display.max_columns", None)
|
||||
df = pd.read_csv(
|
||||
"https://raw.githubusercontent.com/srivatsan88/YouTubeLI/master/dataset/nyc_energy_consumption.csv"
|
||||
)
|
||||
# preprocessing data
|
||||
df["timeStamp"] = pd.to_datetime(df["timeStamp"])
|
||||
df = df.set_index("timeStamp")
|
||||
df = df.resample("D").mean()
|
||||
df["temp"] = df["temp"].fillna(method="ffill")
|
||||
df["precip"] = df["precip"].fillna(method="ffill")
|
||||
df = df[:-2] # last two rows are NaN for 'demand' column so remove them
|
||||
df = df.reset_index()
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def test_multivariate_forecast_num(budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]):
|
||||
df = load_multi_dataset()
|
||||
# split data into train and test
|
||||
time_horizon = 180
|
||||
num_samples = df.shape[0]
|
||||
split_idx = num_samples - time_horizon
|
||||
train_df = df[:split_idx]
|
||||
test_df = df[split_idx:]
|
||||
# test dataframe must contain values for the regressors / multivariate variables
|
||||
X_test = test_df[["timeStamp", "temp", "precip"]]
|
||||
y_test = test_df["demand"]
|
||||
# return
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": budget, # total running time in seconds
|
||||
"metric": "mape", # primary metric
|
||||
"task": "ts_forecast", # task type
|
||||
"log_file_name": "test/energy_forecast_numerical.log", # flaml log file
|
||||
"eval_method": "holdout",
|
||||
"log_type": "all",
|
||||
"label": "demand",
|
||||
}
|
||||
"""The main flaml automl API"""
|
||||
try:
|
||||
import prophet
|
||||
|
||||
automl.fit(dataframe=train_df, **settings, period=time_horizon)
|
||||
except ImportError:
|
||||
print("not using prophet due to ImportError")
|
||||
automl.fit(
|
||||
dataframe=train_df,
|
||||
**settings,
|
||||
estimator_list=estimators_when_no_prophet,
|
||||
period=time_horizon,
|
||||
)
|
||||
""" retrieve best config and best learner"""
|
||||
print("Best ML leaner:", automl.best_estimator)
|
||||
print("Best hyperparmeter config:", automl.best_config)
|
||||
print(f"Best mape on validation data: {automl.best_loss}")
|
||||
print(f"Training duration of best run: {automl.best_config_train_time}s")
|
||||
print(automl.model.estimator)
|
||||
""" pickle and save the automl object """
|
||||
import pickle
|
||||
|
||||
with open("automl.pkl", "wb") as f:
|
||||
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
|
||||
""" compute predictions of testing dataset """
|
||||
y_pred = automl.predict(X_test)
|
||||
print("Predicted labels", y_pred)
|
||||
print("True labels", y_test)
|
||||
""" compute different metric values on testing dataset"""
|
||||
from flaml.automl.ml import sklearn_metric_loss_score
|
||||
|
||||
print("mape", "=", sklearn_metric_loss_score("mape", y_pred, y_test))
|
||||
from flaml.automl.data import get_output_from_log
|
||||
|
||||
(
|
||||
time_history,
|
||||
best_valid_loss_history,
|
||||
valid_loss_history,
|
||||
config_history,
|
||||
metric_history,
|
||||
) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
|
||||
for config in config_history:
|
||||
print(config)
|
||||
print(automl.resource_attr)
|
||||
print(automl.max_resource)
|
||||
print(automl.min_resource)
|
||||
|
||||
# import matplotlib.pyplot as plt
|
||||
#
|
||||
# plt.figure()
|
||||
# plt.plot(X_test["timeStamp"], y_test, label="Actual Demand")
|
||||
# plt.plot(X_test["timeStamp"], y_pred, label="FLAML Forecast")
|
||||
# plt.xlabel("Date")
|
||||
# plt.ylabel("Energy Demand")
|
||||
# plt.legend()
|
||||
# plt.show()
|
||||
|
||||
|
||||
def load_multi_dataset_cat(time_horizon):
|
||||
df = load_multi_dataset()
|
||||
|
||||
df = df[["timeStamp", "demand", "temp"]]
|
||||
|
||||
# feature engineering - use discrete values to denote different categories
|
||||
def season(date):
|
||||
date = (date.month, date.day)
|
||||
spring = (3, 20)
|
||||
summer = (6, 21)
|
||||
fall = (9, 22)
|
||||
winter = (12, 21)
|
||||
if date < spring or date >= winter:
|
||||
return "winter" # winter 0
|
||||
elif spring <= date < summer:
|
||||
return "spring" # spring 1
|
||||
elif summer <= date < fall:
|
||||
return "summer" # summer 2
|
||||
elif fall <= date < winter:
|
||||
return "fall" # fall 3
|
||||
|
||||
def get_monthly_avg(data):
|
||||
data["month"] = data["timeStamp"].dt.month
|
||||
data = data[["month", "temp"]].groupby("month")
|
||||
data = data.agg({"temp": "mean"})
|
||||
return data
|
||||
|
||||
monthly_avg = get_monthly_avg(df).to_dict().get("temp")
|
||||
|
||||
def above_monthly_avg(date, temp):
|
||||
month = date.month
|
||||
if temp > monthly_avg.get(month):
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
df["season"] = df["timeStamp"].apply(season)
|
||||
df["above_monthly_avg"] = df.apply(lambda x: above_monthly_avg(x["timeStamp"], x["temp"]), axis=1)
|
||||
|
||||
# split data into train and test
|
||||
num_samples = df.shape[0]
|
||||
split_idx = num_samples - time_horizon
|
||||
train_df = df[:split_idx]
|
||||
test_df = df[split_idx:]
|
||||
|
||||
del train_df["temp"], train_df["month"]
|
||||
|
||||
return train_df, test_df
|
||||
|
||||
|
||||
def test_multivariate_forecast_cat(budget=5, estimators_when_no_prophet=["arima", "sarimax", "holt-winters"]):
|
||||
time_horizon = 180
|
||||
train_df, test_df = load_multi_dataset_cat(time_horizon)
|
||||
X_test = test_df[
|
||||
["timeStamp", "season", "above_monthly_avg"]
|
||||
] # test dataframe must contain values for the regressors / multivariate variables
|
||||
y_test = test_df["demand"]
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": budget, # total running time in seconds
|
||||
"metric": "mape", # primary metric
|
||||
"task": "ts_forecast", # task type
|
||||
"log_file_name": "test/energy_forecast_categorical.log", # flaml log file
|
||||
"eval_method": "holdout",
|
||||
"log_type": "all",
|
||||
"label": "demand",
|
||||
}
|
||||
"""The main flaml automl API"""
|
||||
try:
|
||||
import prophet
|
||||
|
||||
automl.fit(dataframe=train_df, **settings, period=time_horizon)
|
||||
except ImportError:
|
||||
print("not using prophet due to ImportError")
|
||||
automl.fit(
|
||||
dataframe=train_df,
|
||||
**settings,
|
||||
estimator_list=estimators_when_no_prophet,
|
||||
period=time_horizon,
|
||||
)
|
||||
""" retrieve best config and best learner"""
|
||||
print("Best ML leaner:", automl.best_estimator)
|
||||
print("Best hyperparmeter config:", automl.best_config)
|
||||
print(f"Best mape on validation data: {automl.best_loss}")
|
||||
print(f"Training duration of best run: {automl.best_config_train_time}s")
|
||||
print(automl.model.estimator)
|
||||
""" pickle and save the automl object """
|
||||
import pickle
|
||||
|
||||
with open("automl.pkl", "wb") as f:
|
||||
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
|
||||
""" compute predictions of testing dataset """
|
||||
y_pred = automl.predict(X_test)
|
||||
print("Predicted labels", y_pred)
|
||||
print("True labels", y_test)
|
||||
""" compute different metric values on testing dataset"""
|
||||
from flaml.automl.ml import sklearn_metric_loss_score
|
||||
|
||||
print("mape", "=", sklearn_metric_loss_score("mape", y_pred, y_test))
|
||||
print("rmse", "=", sklearn_metric_loss_score("rmse", y_pred, y_test))
|
||||
print("mse", "=", sklearn_metric_loss_score("mse", y_pred, y_test))
|
||||
print("mae", "=", sklearn_metric_loss_score("mae", y_pred, y_test))
|
||||
from flaml.automl.data import get_output_from_log
|
||||
|
||||
(
|
||||
time_history,
|
||||
best_valid_loss_history,
|
||||
valid_loss_history,
|
||||
config_history,
|
||||
metric_history,
|
||||
) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
|
||||
for config in config_history:
|
||||
print(config)
|
||||
print(automl.resource_attr)
|
||||
print(automl.max_resource)
|
||||
print(automl.min_resource)
|
||||
|
||||
# import matplotlib.pyplot as plt
|
||||
#
|
||||
# plt.figure()
|
||||
# plt.plot(X_test["timeStamp"], y_test, label="Actual Demand")
|
||||
# plt.plot(X_test["timeStamp"], y_pred, label="FLAML Forecast")
|
||||
# plt.xlabel("Date")
|
||||
# plt.ylabel("Energy Demand")
|
||||
# plt.legend()
|
||||
# plt.show()
|
||||
|
||||
|
||||
def test_forecast_classification(budget=5):
|
||||
from hcrystalball.utils import get_sales_data
|
||||
|
||||
time_horizon = 30
|
||||
df = get_sales_data(n_dates=180, n_assortments=1, n_states=1, n_stores=1)
|
||||
df = df[["Sales", "Open", "Promo", "Promo2"]]
|
||||
# feature engineering
|
||||
import numpy as np
|
||||
|
||||
df["above_mean_sales"] = np.where(df["Sales"] > df["Sales"].mean(), 1, 0)
|
||||
df.reset_index(inplace=True)
|
||||
train_df = df[:-time_horizon]
|
||||
test_df = df[-time_horizon:]
|
||||
X_train, X_test = (
|
||||
train_df[["Date", "Open", "Promo", "Promo2"]],
|
||||
test_df[["Date", "Open", "Promo", "Promo2"]],
|
||||
)
|
||||
y_train, y_test = train_df["above_mean_sales"], test_df["above_mean_sales"]
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": budget, # total running time in seconds
|
||||
"metric": "accuracy", # primary metric
|
||||
"task": "ts_forecast_classification", # task type
|
||||
"log_file_name": "test/sales_classification_forecast.log", # flaml log file
|
||||
"eval_method": "holdout",
|
||||
}
|
||||
"""The main flaml automl API"""
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings, period=time_horizon)
|
||||
""" retrieve best config and best learner"""
|
||||
print("Best ML leaner:", automl.best_estimator)
|
||||
print("Best hyperparmeter config:", automl.best_config)
|
||||
print(f"Best mape on validation data: {automl.best_loss}")
|
||||
print(f"Training duration of best run: {automl.best_config_train_time}s")
|
||||
print(automl.model.estimator)
|
||||
""" pickle and save the automl object """
|
||||
import pickle
|
||||
|
||||
with open("automl.pkl", "wb") as f:
|
||||
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
|
||||
""" compute predictions of testing dataset """
|
||||
y_pred = automl.predict(X_test)
|
||||
""" compute different metric values on testing dataset"""
|
||||
from flaml.automl.ml import sklearn_metric_loss_score
|
||||
|
||||
print(y_test)
|
||||
print(y_pred)
|
||||
print("accuracy", "=", 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test))
|
||||
from flaml.automl.data import get_output_from_log
|
||||
|
||||
(
|
||||
time_history,
|
||||
best_valid_loss_history,
|
||||
valid_loss_history,
|
||||
config_history,
|
||||
metric_history,
|
||||
) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
|
||||
for config in config_history:
|
||||
print(config)
|
||||
print(automl.resource_attr)
|
||||
print(automl.max_resource)
|
||||
print(automl.min_resource)
|
||||
# import matplotlib.pyplot as plt
|
||||
#
|
||||
# plt.title("Learning Curve")
|
||||
# plt.xlabel("Wall Clock Time (s)")
|
||||
# plt.ylabel("Validation Accuracy")
|
||||
# plt.scatter(time_history, 1 - np.array(valid_loss_history))
|
||||
# plt.step(time_history, 1 - np.array(best_valid_loss_history), where="post")
|
||||
# plt.show()
|
||||
|
||||
|
||||
def get_stalliion_data():
|
||||
from pytorch_forecasting.data.examples import get_stallion_data
|
||||
|
||||
data = get_stallion_data()
|
||||
# add time index - For datasets with no missing values, FLAML will automate this process
|
||||
data["time_idx"] = data["date"].dt.year * 12 + data["date"].dt.month
|
||||
data["time_idx"] -= data["time_idx"].min()
|
||||
# add additional features
|
||||
data["month"] = data.date.dt.month.astype(str).astype("category") # categories have be strings
|
||||
data["log_volume"] = np.log(data.volume + 1e-8)
|
||||
data["avg_volume_by_sku"] = data.groupby(["time_idx", "sku"], observed=True).volume.transform("mean")
|
||||
data["avg_volume_by_agency"] = data.groupby(["time_idx", "agency"], observed=True).volume.transform("mean")
|
||||
# we want to encode special days as one variable and thus need to first reverse one-hot encoding
|
||||
special_days = [
|
||||
"easter_day",
|
||||
"good_friday",
|
||||
"new_year",
|
||||
"christmas",
|
||||
"labor_day",
|
||||
"independence_day",
|
||||
"revolution_day_memorial",
|
||||
"regional_games",
|
||||
"beer_capital",
|
||||
"music_fest",
|
||||
]
|
||||
data[special_days] = data[special_days].apply(lambda x: x.map({0: "-", 1: x.name})).astype("category")
|
||||
return data, special_days
|
||||
|
||||
|
||||
def test_forecast_panel(budget=5):
|
||||
data, special_days = get_stalliion_data()
|
||||
time_horizon = 6 # predict six months
|
||||
training_cutoff = data["time_idx"].max() - time_horizon
|
||||
data["time_idx"] = data["time_idx"].astype("int")
|
||||
ts_col = data.pop("date")
|
||||
data.insert(0, "date", ts_col)
|
||||
# FLAML assumes input is not sorted, but we sort here for comparison purposes with y_test
|
||||
data = data.sort_values(["agency", "sku", "date"])
|
||||
X_train = data[lambda x: x.time_idx <= training_cutoff]
|
||||
X_test = data[lambda x: x.time_idx > training_cutoff]
|
||||
y_train = X_train.pop("volume")
|
||||
y_test = X_test.pop("volume")
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": budget, # total running time in seconds
|
||||
"metric": "mape", # primary metric
|
||||
"task": "ts_forecast_panel", # task type
|
||||
"log_file_name": "test/stallion_forecast.log", # flaml log file
|
||||
"eval_method": "holdout",
|
||||
}
|
||||
fit_kwargs_by_estimator = {
|
||||
"tft": {
|
||||
"max_encoder_length": 24,
|
||||
"static_categoricals": ["agency", "sku"],
|
||||
"static_reals": ["avg_population_2017", "avg_yearly_household_income_2017"],
|
||||
"time_varying_known_categoricals": ["special_days", "month"],
|
||||
"variable_groups": {
|
||||
"special_days": special_days
|
||||
}, # group of categorical variables can be treated as one variable
|
||||
"time_varying_known_reals": [
|
||||
"time_idx",
|
||||
"price_regular",
|
||||
"discount_in_percent",
|
||||
],
|
||||
"time_varying_unknown_categoricals": [],
|
||||
"time_varying_unknown_reals": [
|
||||
"volume", # target column
|
||||
"log_volume",
|
||||
"industry_volume",
|
||||
"soda_volume",
|
||||
"avg_max_temp",
|
||||
"avg_volume_by_agency",
|
||||
"avg_volume_by_sku",
|
||||
],
|
||||
"batch_size": 256,
|
||||
"max_epochs": 1,
|
||||
"gpu_per_trial": -1,
|
||||
}
|
||||
}
|
||||
"""The main flaml automl API"""
|
||||
automl.fit(
|
||||
X_train=X_train,
|
||||
y_train=y_train,
|
||||
**settings,
|
||||
period=time_horizon,
|
||||
group_ids=["agency", "sku"],
|
||||
fit_kwargs_by_estimator=fit_kwargs_by_estimator,
|
||||
)
|
||||
""" retrieve best config and best learner"""
|
||||
print("Best ML leaner:", automl.best_estimator)
|
||||
print("Best hyperparmeter config:", automl.best_config)
|
||||
print(f"Best mape on validation data: {automl.best_loss}")
|
||||
print(f"Training duration of best run: {automl.best_config_train_time}s")
|
||||
print(automl.model.estimator)
|
||||
""" pickle and save the automl object """
|
||||
import pickle
|
||||
|
||||
with open("automl.pkl", "wb") as f:
|
||||
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
|
||||
""" compute predictions of testing dataset """
|
||||
y_pred = automl.predict(X_test)
|
||||
""" compute different metric values on testing dataset"""
|
||||
from flaml.automl.ml import sklearn_metric_loss_score
|
||||
|
||||
print(y_test)
|
||||
print(y_pred)
|
||||
print("mape", "=", sklearn_metric_loss_score("mape", y_pred, y_test))
|
||||
|
||||
def smape(y_pred, y_test):
|
||||
import numpy as np
|
||||
|
||||
y_test, y_pred = np.array(y_test), np.array(y_pred)
|
||||
return round(
|
||||
np.mean(np.abs(y_pred - y_test) / ((np.abs(y_pred) + np.abs(y_test)) / 2)) * 100,
|
||||
2,
|
||||
)
|
||||
|
||||
print("smape", "=", smape(y_pred, y_test))
|
||||
# TODO: compute prediction for a specific time series
|
||||
# """compute prediction for a specific time series"""
|
||||
# a01_sku01_preds = automl.predict(X_test[(X_test["agency"] == "Agency_01") & (X_test["sku"] == "SKU_01")])
|
||||
# print("Agency01 SKU_01 predictions: ", a01_sku01_preds)
|
||||
from flaml.automl.data import get_output_from_log
|
||||
|
||||
(
|
||||
time_history,
|
||||
best_valid_loss_history,
|
||||
valid_loss_history,
|
||||
config_history,
|
||||
metric_history,
|
||||
) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
|
||||
for config in config_history:
|
||||
print(config)
|
||||
print(automl.resource_attr)
|
||||
print(automl.max_resource)
|
||||
print(automl.min_resource)
|
||||
|
||||
|
||||
def test_cv_step():
|
||||
n = 300
|
||||
time_col = "date"
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
time_col: pd.date_range(start="1/1/2001", periods=n, freq="D"),
|
||||
"y": np.sin(np.linspace(start=0, stop=200, num=n)),
|
||||
}
|
||||
)
|
||||
|
||||
def split_by_date(df: pd.DataFrame, dt: datetime.date):
|
||||
dt = datetime.datetime(dt.year, dt.month, dt.day)
|
||||
return df[df[time_col] <= dt], df[df[time_col] > dt]
|
||||
|
||||
horizon = 60
|
||||
data_end = df.date.max()
|
||||
train_end = data_end - datetime.timedelta(days=horizon)
|
||||
|
||||
train_df, val_df = split_by_date(df, train_end)
|
||||
from flaml import AutoML
|
||||
|
||||
tgts = ["y"]
|
||||
# tgt = "SERIES_SANCTIONS"
|
||||
|
||||
preds = {}
|
||||
for tgt in tgts:
|
||||
features = [] # [c for c in train_df.columns if "SERIES" not in c and c != time_col]
|
||||
|
||||
automl = AutoML(time_budget=5, metric="mae", task="ts_forecast", eval_method="cv")
|
||||
|
||||
automl.fit(
|
||||
dataframe=train_df[[time_col] + features + [tgt]],
|
||||
label=tgt,
|
||||
period=horizon,
|
||||
time_col=time_col,
|
||||
verbose=4,
|
||||
n_splits=5,
|
||||
cv_step_size=5,
|
||||
)
|
||||
|
||||
pred = automl.predict(val_df)
|
||||
|
||||
if isinstance(pred, pd.DataFrame):
|
||||
pred = pred[tgt]
|
||||
assert not np.isnan(pred.sum())
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
preds[tgt] = pred
|
||||
# plt.figure(figsize=(16, 8), dpi=80)
|
||||
# plt.plot(df[time_col], df[tgt])
|
||||
# plt.plot(val_df[time_col], pred)
|
||||
# plt.legend(["actual", "predicted"])
|
||||
# plt.show()
|
||||
|
||||
print("yahoo!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test_forecast_automl(60)
|
||||
# test_multivariate_forecast_num(5)
|
||||
# test_multivariate_forecast_cat(5)
|
||||
# test_numpy()
|
||||
# test_forecast_classification(5)
|
||||
test_forecast_panel(5)
|
||||
# test_cv_step()
|
||||
@@ -1,64 +0,0 @@
|
||||
import pytest
|
||||
from pandas import DataFrame
|
||||
from sklearn.datasets import load_iris
|
||||
import mlflow
|
||||
import mlflow.entities
|
||||
from flaml import AutoML
|
||||
|
||||
|
||||
class TestMLFlowLoggingParam:
|
||||
def test_should_start_new_run_by_default(self, automl_settings):
|
||||
with mlflow.start_run():
|
||||
parent = mlflow.last_active_run()
|
||||
automl = AutoML()
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
children = self._get_child_runs(parent)
|
||||
assert len(children) >= 1, "Expected at least 1 child run, got {}".format(len(children))
|
||||
|
||||
def test_should_not_start_new_run_when_mlflow_logging_set_to_false_in_init(self, automl_settings):
|
||||
with mlflow.start_run():
|
||||
parent = mlflow.last_active_run()
|
||||
automl = AutoML(mlflow_logging=False)
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
children = self._get_child_runs(parent)
|
||||
assert len(children) == 0, "Expected 0 child runs, got {}".format(len(children))
|
||||
|
||||
def test_should_not_start_new_run_when_mlflow_logging_set_to_false_in_fit(self, automl_settings):
|
||||
with mlflow.start_run():
|
||||
parent = mlflow.last_active_run()
|
||||
automl = AutoML()
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl.fit(X_train=X_train, y_train=y_train, mlflow_logging=False, **automl_settings)
|
||||
|
||||
children = self._get_child_runs(parent)
|
||||
assert len(children) == 0, "Expected 0 child runs, got {}".format(len(children))
|
||||
|
||||
def test_should_start_new_run_when_mlflow_logging_set_to_true_in_fit(self, automl_settings):
|
||||
with mlflow.start_run():
|
||||
parent = mlflow.last_active_run()
|
||||
automl = AutoML(mlflow_logging=False)
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl.fit(X_train=X_train, y_train=y_train, mlflow_logging=True, **automl_settings)
|
||||
|
||||
children = self._get_child_runs(parent)
|
||||
assert len(children) >= 1, "Expected at least 1 child run, got {}".format(len(children))
|
||||
|
||||
@staticmethod
|
||||
def _get_child_runs(parent_run: mlflow.entities.Run) -> DataFrame:
|
||||
experiment_id = parent_run.info.experiment_id
|
||||
return mlflow.search_runs(
|
||||
[experiment_id], filter_string="tags.mlflow.parentRunId = '{}'".format(parent_run.info.run_id)
|
||||
)
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def automl_settings(self):
|
||||
return {
|
||||
"time_budget": 2, # in seconds
|
||||
"metric": "accuracy",
|
||||
"task": "classification",
|
||||
"log_file_name": "iris.log",
|
||||
}
|
||||
@@ -1,534 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
import scipy.sparse
|
||||
from sklearn.datasets import load_iris, load_wine
|
||||
from flaml import AutoML
|
||||
from flaml.automl.data import get_output_from_log
|
||||
from flaml.automl.model import LGBMEstimator, XGBoostSklearnEstimator, SKLearnEstimator
|
||||
from flaml import tune
|
||||
from flaml.automl.training_log import training_log_reader
|
||||
|
||||
|
||||
class MyRegularizedGreedyForest(SKLearnEstimator):
|
||||
def __init__(self, task="binary", **config):
|
||||
super().__init__(task, **config)
|
||||
|
||||
if isinstance(task, str):
|
||||
from flaml.automl.task.factory import task_factory
|
||||
|
||||
task = task_factory(task)
|
||||
|
||||
if task.is_classification():
|
||||
from rgf.sklearn import RGFClassifier
|
||||
|
||||
self.estimator_class = RGFClassifier
|
||||
else:
|
||||
from rgf.sklearn import RGFRegressor
|
||||
|
||||
self.estimator_class = RGFRegressor
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, data_size, task):
|
||||
space = {
|
||||
"max_leaf": {
|
||||
"domain": tune.lograndint(lower=4, upper=data_size[0]),
|
||||
"init_value": 4,
|
||||
},
|
||||
"n_iter": {
|
||||
"domain": tune.lograndint(lower=1, upper=data_size[0]),
|
||||
"init_value": 1,
|
||||
},
|
||||
"n_tree_search": {
|
||||
"domain": tune.lograndint(lower=1, upper=32768),
|
||||
"init_value": 1,
|
||||
},
|
||||
"opt_interval": {
|
||||
"domain": tune.lograndint(lower=1, upper=10000),
|
||||
"init_value": 100,
|
||||
},
|
||||
"learning_rate": {"domain": tune.loguniform(lower=0.01, upper=20.0)},
|
||||
"min_samples_leaf": {
|
||||
"domain": tune.lograndint(lower=1, upper=20),
|
||||
"init_value": 20,
|
||||
},
|
||||
}
|
||||
return space
|
||||
|
||||
@classmethod
|
||||
def size(cls, config):
|
||||
max_leaves = int(round(config.get("max_leaf", 1)))
|
||||
n_estimators = int(round(config.get("n_iter", 1)))
|
||||
return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8
|
||||
|
||||
@classmethod
|
||||
def cost_relative2lgbm(cls):
|
||||
return 1.0
|
||||
|
||||
|
||||
class MyLargeXGB(XGBoostSklearnEstimator):
|
||||
@classmethod
|
||||
def search_space(cls, **params):
|
||||
return {
|
||||
"n_estimators": {
|
||||
"domain": tune.lograndint(lower=4, upper=32768),
|
||||
"init_value": 32768,
|
||||
"low_cost_init_value": 4,
|
||||
},
|
||||
"max_leaves": {
|
||||
"domain": tune.lograndint(lower=4, upper=3276),
|
||||
"init_value": 3276,
|
||||
"low_cost_init_value": 4,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class MyLargeLGBM(LGBMEstimator):
|
||||
@classmethod
|
||||
def search_space(cls, **params):
|
||||
return {
|
||||
"n_estimators": {
|
||||
"domain": tune.lograndint(lower=4, upper=32768),
|
||||
"init_value": 32768,
|
||||
"low_cost_init_value": 4,
|
||||
},
|
||||
"num_leaves": {
|
||||
"domain": tune.lograndint(lower=4, upper=3276),
|
||||
"init_value": 3276,
|
||||
"low_cost_init_value": 4,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def custom_metric(
|
||||
X_val,
|
||||
y_val,
|
||||
estimator,
|
||||
labels,
|
||||
X_train,
|
||||
y_train,
|
||||
weight_val=None,
|
||||
weight_train=None,
|
||||
config=None,
|
||||
groups_val=None,
|
||||
groups_train=None,
|
||||
):
|
||||
from sklearn.metrics import log_loss
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
y_pred = estimator.predict_proba(X_val)
|
||||
pred_time = (time.time() - start) / len(X_val)
|
||||
val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
|
||||
y_pred = estimator.predict_proba(X_train)
|
||||
train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
|
||||
alpha = 0.5
|
||||
return val_loss * (1 + alpha) - alpha * train_loss, {
|
||||
"val_loss": val_loss,
|
||||
"train_loss": train_loss,
|
||||
"pred_time": pred_time,
|
||||
}
|
||||
|
||||
|
||||
class TestMultiClass(unittest.TestCase):
|
||||
def test_custom_learner(self):
|
||||
automl = AutoML()
|
||||
automl.add_learner(learner_name="RGF", learner_class=MyRegularizedGreedyForest)
|
||||
X_train, y_train = load_wine(return_X_y=True)
|
||||
settings = {
|
||||
"time_budget": 8, # total running time in seconds
|
||||
"estimator_list": ["RGF", "lgbm", "rf", "xgboost"],
|
||||
"task": "classification", # task type
|
||||
"sample": True, # whether to subsample training data
|
||||
"log_file_name": "test/wine.log",
|
||||
"log_training_metric": True, # whether to log training metric
|
||||
"n_jobs": 1,
|
||||
}
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
# print the best model found for RGF
|
||||
print(automl.best_model_for_estimator("RGF"))
|
||||
|
||||
MyRegularizedGreedyForest.search_space = lambda data_size, task: {}
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
|
||||
try:
|
||||
import ray
|
||||
|
||||
del settings["time_budget"]
|
||||
settings["max_iter"] = 5
|
||||
# test the "_choice_" issue when using ray
|
||||
automl.fit(X_train=X_train, y_train=y_train, n_concurrent_trials=2, **settings)
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
def test_ensemble(self):
|
||||
automl = AutoML()
|
||||
automl.add_learner(learner_name="RGF", learner_class=MyRegularizedGreedyForest)
|
||||
X_train, y_train = load_wine(return_X_y=True)
|
||||
settings = {
|
||||
"time_budget": 5, # total running time in seconds
|
||||
"estimator_list": ["rf", "xgboost", "catboost"],
|
||||
"task": "classification", # task type
|
||||
"sample": True, # whether to subsample training data
|
||||
"log_file_name": "test/wine.log",
|
||||
"log_training_metric": True, # whether to log training metric
|
||||
"ensemble": {
|
||||
"final_estimator": MyRegularizedGreedyForest(),
|
||||
"passthrough": False,
|
||||
},
|
||||
"n_jobs": 1,
|
||||
}
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
|
||||
def test_dataframe(self):
|
||||
self.test_classification(True)
|
||||
|
||||
def test_custom_metric(self):
|
||||
df, y = load_iris(return_X_y=True, as_frame=True)
|
||||
df["label"] = y
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"dataframe": df,
|
||||
"label": "label",
|
||||
"time_budget": 5,
|
||||
"eval_method": "cv",
|
||||
"metric": custom_metric,
|
||||
"task": "classification",
|
||||
"log_file_name": "test/iris_custom.log",
|
||||
"log_training_metric": True,
|
||||
"log_type": "all",
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
"sample_weight": np.ones(len(y)),
|
||||
"pred_time_limit": 1e-5,
|
||||
"ensemble": True,
|
||||
}
|
||||
automl.fit(**settings)
|
||||
print(automl.classes_)
|
||||
print(automl.model)
|
||||
print(automl.config_history)
|
||||
print(automl.best_model_for_estimator("rf"))
|
||||
print(automl.best_iteration)
|
||||
print(automl.best_estimator)
|
||||
automl = AutoML()
|
||||
estimator = automl.get_estimator_from_log(settings["log_file_name"], record_id=0, task="multiclass")
|
||||
print(estimator)
|
||||
(
|
||||
time_history,
|
||||
best_valid_loss_history,
|
||||
valid_loss_history,
|
||||
config_history,
|
||||
metric_history,
|
||||
) = get_output_from_log(filename=settings["log_file_name"], time_budget=6)
|
||||
print(metric_history)
|
||||
try:
|
||||
import ray
|
||||
|
||||
df = ray.put(df)
|
||||
settings["dataframe"] = df
|
||||
settings["use_ray"] = True
|
||||
del settings["time_budget"]
|
||||
settings["max_iter"] = 2
|
||||
automl.fit(**settings)
|
||||
estimator = automl.get_estimator_from_log(settings["log_file_name"], record_id=1, task="multiclass")
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
def test_classification(self, as_frame=False):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 4,
|
||||
"metric": "accuracy",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/iris.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
|
||||
if as_frame:
|
||||
# test drop column
|
||||
X_train.columns = range(X_train.shape[1])
|
||||
X_train[X_train.shape[1]] = np.zeros(len(y_train))
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl_experiment.classes_)
|
||||
print(automl_experiment.predict(X_train)[:5])
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.best_model_for_estimator("catboost"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
del automl_settings["metric"]
|
||||
del automl_settings["model_history"]
|
||||
del automl_settings["log_training_metric"]
|
||||
automl_experiment = AutoML(task="classification")
|
||||
duration = automl_experiment.retrain_from_log(
|
||||
log_file_name=automl_settings["log_file_name"],
|
||||
X_train=X_train,
|
||||
y_train=y_train,
|
||||
train_full=True,
|
||||
record_id=0,
|
||||
)
|
||||
print(duration)
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.predict_proba(X_train)[:5])
|
||||
|
||||
def test_micro_macro_f1(self):
|
||||
automl_experiment_micro = AutoML()
|
||||
automl_experiment_macro = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": "classification",
|
||||
"log_file_name": "test/micro_macro_f1.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl_experiment_micro.fit(X_train=X_train, y_train=y_train, metric="micro_f1", **automl_settings)
|
||||
automl_experiment_macro.fit(X_train=X_train, y_train=y_train, metric="macro_f1", **automl_settings)
|
||||
estimator = automl_experiment_macro.model
|
||||
y_pred = estimator.predict(X_train)
|
||||
y_pred_proba = estimator.predict_proba(X_train)
|
||||
from flaml.automl.ml import norm_confusion_matrix, multi_class_curves
|
||||
|
||||
print(norm_confusion_matrix(y_train, y_pred))
|
||||
from sklearn.metrics import roc_curve, precision_recall_curve
|
||||
|
||||
print(multi_class_curves(y_train, y_pred_proba, roc_curve))
|
||||
print(multi_class_curves(y_train, y_pred_proba, precision_recall_curve))
|
||||
|
||||
def test_roc_auc_ovr(self):
|
||||
automl_experiment = AutoML()
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl_settings = {
|
||||
"time_budget": 1,
|
||||
"metric": "roc_auc_ovr",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/roc_auc_ovr.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"sample_weight": np.ones(len(y_train)),
|
||||
"eval_method": "holdout",
|
||||
"model_history": True,
|
||||
}
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
def test_roc_auc_ovo(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 1,
|
||||
"metric": "roc_auc_ovo",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/roc_auc_ovo.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
def test_roc_auc_ovr_weighted(self):
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": 1,
|
||||
"metric": "roc_auc_ovr_weighted",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/roc_auc_weighted.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
|
||||
def test_roc_auc_ovo_weighted(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 1,
|
||||
"metric": "roc_auc_ovo_weighted",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/roc_auc_weighted.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
def test_sparse_matrix_classification(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"metric": "auto",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/sparse_classification.log",
|
||||
"split_type": "uniform",
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train = scipy.sparse.random(1554, 21, dtype=int)
|
||||
y_train = np.random.randint(3, size=1554)
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl_experiment.classes_)
|
||||
print(automl_experiment.predict_proba(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.best_model_for_estimator("extra_tree"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
|
||||
def _test_memory_limit(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
|
||||
automl_settings = {
|
||||
"time_budget": -1,
|
||||
"task": "classification",
|
||||
"log_file_name": "test/classification_oom.log",
|
||||
"estimator_list": ["large_lgbm"],
|
||||
"log_type": "all",
|
||||
"hpo_method": "random",
|
||||
"free_mem_ratio": 0.2,
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True, as_frame=True)
|
||||
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, max_iter=1, **automl_settings)
|
||||
print(automl_experiment.model)
|
||||
|
||||
def test_time_limit(self):
|
||||
automl_experiment = AutoML()
|
||||
automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM)
|
||||
automl_experiment.add_learner(learner_name="large_xgb", learner_class=MyLargeXGB)
|
||||
automl_settings = {
|
||||
"time_budget": 0.5,
|
||||
"task": "classification",
|
||||
"log_file_name": "test/classification_timeout.log",
|
||||
"estimator_list": ["catboost"],
|
||||
"log_type": "all",
|
||||
"hpo_method": "random",
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True, as_frame=True)
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl_experiment.model.params)
|
||||
automl_settings["estimator_list"] = ["large_xgb"]
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl_experiment.model)
|
||||
automl_settings["estimator_list"] = ["large_lgbm"]
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl_experiment.model)
|
||||
|
||||
def test_fit_w_starting_point(self, as_frame=True, n_concurrent_trials=1):
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"max_iter": 3,
|
||||
"metric": "accuracy",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/iris.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
|
||||
if as_frame:
|
||||
# test drop column
|
||||
X_train.columns = range(X_train.shape[1])
|
||||
X_train[X_train.shape[1]] = np.zeros(len(y_train))
|
||||
automl.fit(X_train=X_train, y_train=y_train, n_concurrent_trials=n_concurrent_trials, **settings)
|
||||
automl_val_accuracy = 1.0 - automl.best_loss
|
||||
print("Best ML leaner:", automl.best_estimator)
|
||||
print("Best hyperparmeter config:", automl.best_config)
|
||||
print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
|
||||
print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
|
||||
|
||||
starting_points = automl.best_config_per_estimator
|
||||
print("starting_points", starting_points)
|
||||
print("loss of the starting_points", automl.best_loss_per_estimator)
|
||||
settings_resume = {
|
||||
"time_budget": 2,
|
||||
"metric": "accuracy",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/iris_resume.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
"log_type": "all",
|
||||
"starting_points": starting_points,
|
||||
}
|
||||
new_automl = AutoML()
|
||||
new_automl.fit(X_train=X_train, y_train=y_train, **settings_resume)
|
||||
|
||||
new_automl_val_accuracy = 1.0 - new_automl.best_loss
|
||||
print("Best ML leaner:", new_automl.best_estimator)
|
||||
print("Best hyperparmeter config:", new_automl.best_config)
|
||||
print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
|
||||
print("Training duration of best run: {0:.4g} s".format(new_automl.best_config_train_time))
|
||||
|
||||
def test_fit_w_starting_point_2(self, as_frame=True):
|
||||
try:
|
||||
import ray
|
||||
|
||||
self.test_fit_w_starting_points_list(as_frame, 2)
|
||||
self.test_fit_w_starting_point(as_frame, 2)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
def test_fit_w_starting_points_list(self, as_frame=True, n_concurrent_trials=1):
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"max_iter": 3,
|
||||
"metric": "accuracy",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/iris.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
|
||||
if as_frame:
|
||||
# test drop column
|
||||
X_train.columns = range(X_train.shape[1])
|
||||
X_train[X_train.shape[1]] = np.zeros(len(y_train))
|
||||
automl.fit(X_train=X_train, y_train=y_train, n_concurrent_trials=n_concurrent_trials, **settings)
|
||||
automl_val_accuracy = 1.0 - automl.best_loss
|
||||
print("Best ML leaner:", automl.best_estimator)
|
||||
print("Best hyperparmeter config:", automl.best_config)
|
||||
print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
|
||||
print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
|
||||
|
||||
starting_points = {}
|
||||
log_file_name = settings["log_file_name"]
|
||||
with training_log_reader(log_file_name) as reader:
|
||||
sample_size = 1000
|
||||
for record in reader.records():
|
||||
config = record.config
|
||||
config["FLAML_sample_size"] = sample_size
|
||||
sample_size += 1000
|
||||
learner = record.learner
|
||||
if learner not in starting_points:
|
||||
starting_points[learner] = []
|
||||
starting_points[learner].append(config)
|
||||
max_iter = sum([len(s) for k, s in starting_points.items()])
|
||||
settings_resume = {
|
||||
"time_budget": 2,
|
||||
"metric": "accuracy",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/iris_resume_all.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"max_iter": max_iter,
|
||||
"model_history": True,
|
||||
"log_type": "all",
|
||||
"starting_points": starting_points,
|
||||
"append_log": True,
|
||||
}
|
||||
new_automl = AutoML()
|
||||
new_automl.fit(X_train=X_train, y_train=y_train, **settings_resume)
|
||||
|
||||
new_automl_val_accuracy = 1.0 - new_automl.best_loss
|
||||
# print('Best ML leaner:', new_automl.best_estimator)
|
||||
# print('Best hyperparmeter config:', new_automl.best_config)
|
||||
print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
|
||||
# print('Training duration of best run: {0:.4g} s'.format(new_automl_experiment.best_config_train_time))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,45 +0,0 @@
|
||||
import nbformat
|
||||
from nbconvert.preprocessors import ExecutePreprocessor
|
||||
from nbconvert.preprocessors import CellExecutionError
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
|
||||
here = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
|
||||
def run_notebook(input_nb, output_nb="executed_notebook.ipynb", save=False):
|
||||
try:
|
||||
file_path = os.path.join(here, os.pardir, os.pardir, "notebook", input_nb)
|
||||
with open(file_path) as f:
|
||||
nb = nbformat.read(f, as_version=4)
|
||||
ep = ExecutePreprocessor(timeout=3600, kernel_name="python3")
|
||||
ep.preprocess(nb, {"metadata": {"path": here}})
|
||||
except CellExecutionError:
|
||||
raise
|
||||
finally:
|
||||
if save:
|
||||
with open(os.path.join(here, output_nb), "w", encoding="utf-8") as f:
|
||||
nbformat.write(nb, f)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform != "darwin" or "3.8" not in sys.version,
|
||||
reason="Only run on macOS with Python 3.8",
|
||||
)
|
||||
def test_automl_classification(save=False):
|
||||
run_notebook("automl_classification.ipynb", save=save)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform != "darwin" or "3.7" not in sys.version,
|
||||
reason="Only run on macOS with Python 3.7",
|
||||
)
|
||||
def test_zeroshot_lightgbm(save=False):
|
||||
run_notebook("zeroshot_lightgbm.ipynb", save=save)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test_automl_classification(save=True)
|
||||
test_zeroshot_lightgbm(save=True)
|
||||
@@ -1,181 +0,0 @@
|
||||
import sys
|
||||
from openml.exceptions import OpenMLServerException
|
||||
from requests.exceptions import ChunkedEncodingError, SSLError
|
||||
from minio.error import ServerError
|
||||
|
||||
|
||||
def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
|
||||
from flaml.automl.data import load_openml_dataset
|
||||
import urllib3
|
||||
|
||||
performance_check_budget = 600
|
||||
if (
|
||||
sys.platform == "darwin"
|
||||
and budget < performance_check_budget
|
||||
and dataset_format == "dataframe"
|
||||
and "3.9" in sys.version
|
||||
):
|
||||
budget = performance_check_budget # revise the buget on macos
|
||||
if budget == performance_check_budget:
|
||||
budget = None
|
||||
max_iter = 60
|
||||
else:
|
||||
max_iter = None
|
||||
try:
|
||||
X_train, X_test, y_train, y_test = load_openml_dataset(
|
||||
dataset_id=1169, data_dir="test/", dataset_format=dataset_format
|
||||
)
|
||||
except (
|
||||
OpenMLServerException,
|
||||
ChunkedEncodingError,
|
||||
urllib3.exceptions.ReadTimeoutError,
|
||||
SSLError,
|
||||
ServerError,
|
||||
Exception,
|
||||
) as e:
|
||||
print(e)
|
||||
return
|
||||
""" import AutoML class from flaml package """
|
||||
from flaml import AutoML
|
||||
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": budget, # total running time in seconds
|
||||
"max_iter": max_iter, # maximum number of iterations
|
||||
"metric": "accuracy", # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
|
||||
"task": "classification", # task type
|
||||
"log_file_name": "airlines_experiment.log", # flaml log file
|
||||
"seed": 7654321, # random seed
|
||||
"hpo_method": hpo_method,
|
||||
"log_type": "all",
|
||||
"estimator_list": [
|
||||
"lgbm",
|
||||
"xgboost",
|
||||
"xgb_limitdepth",
|
||||
"rf",
|
||||
"extra_tree",
|
||||
], # list of ML learners
|
||||
"eval_method": "holdout",
|
||||
}
|
||||
"""The main flaml automl API"""
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
""" retrieve best config and best learner """
|
||||
print("Best ML leaner:", automl.best_estimator)
|
||||
print("Best hyperparmeter config:", automl.best_config)
|
||||
print("Best accuracy on validation data: {0:.4g}".format(1 - automl.best_loss))
|
||||
print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
|
||||
print(automl.model.estimator)
|
||||
print(automl.best_config_per_estimator)
|
||||
print("time taken to find best model:", automl.time_to_find_best_model)
|
||||
""" pickle and save the automl object """
|
||||
import pickle
|
||||
|
||||
with open("automl.pkl", "wb") as f:
|
||||
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
|
||||
""" compute predictions of testing dataset """
|
||||
y_pred = automl.predict(X_test)
|
||||
print("Predicted labels", y_pred)
|
||||
print("True labels", y_test)
|
||||
y_pred_proba = automl.predict_proba(X_test)[:, 1]
|
||||
""" compute different metric values on testing dataset """
|
||||
from flaml.automl.ml import sklearn_metric_loss_score
|
||||
|
||||
accuracy = 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test)
|
||||
print("accuracy", "=", accuracy)
|
||||
print("roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test))
|
||||
print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test))
|
||||
if budget is None:
|
||||
assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67"
|
||||
from flaml.automl.data import get_output_from_log
|
||||
|
||||
(
|
||||
time_history,
|
||||
best_valid_loss_history,
|
||||
valid_loss_history,
|
||||
config_history,
|
||||
metric_history,
|
||||
) = get_output_from_log(filename=settings["log_file_name"], time_budget=6)
|
||||
for config in config_history:
|
||||
print(config)
|
||||
print(automl.resource_attr)
|
||||
print(automl.max_resource)
|
||||
print(automl.min_resource)
|
||||
print(automl.feature_names_in_)
|
||||
print(automl.feature_importances_)
|
||||
if budget is not None:
|
||||
automl.fit(X_train=X_train, y_train=y_train, ensemble=True, **settings)
|
||||
|
||||
|
||||
def test_automl_array():
|
||||
test_automl(5, "array", "bs")
|
||||
|
||||
|
||||
def _test_nobudget():
|
||||
# needs large RAM to run this test
|
||||
test_automl(-1)
|
||||
|
||||
|
||||
def test_mlflow():
|
||||
# subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"])
|
||||
import mlflow
|
||||
from flaml.automl.data import load_openml_task
|
||||
|
||||
try:
|
||||
X_train, X_test, y_train, y_test = load_openml_task(task_id=7592, data_dir="test/")
|
||||
except (OpenMLServerException, ChunkedEncodingError, SSLError, ServerError, Exception) as e:
|
||||
print(e)
|
||||
return
|
||||
""" import AutoML class from flaml package """
|
||||
from flaml import AutoML
|
||||
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": 5, # total running time in seconds
|
||||
"metric": "accuracy", # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
|
||||
"estimator_list": ["lgbm", "rf", "xgboost"], # list of ML learners
|
||||
"task": "classification", # task type
|
||||
"sample": False, # whether to subsample training data
|
||||
"log_file_name": "adult.log", # flaml log file
|
||||
"learner_selector": "roundrobin",
|
||||
}
|
||||
mlflow.set_experiment("flaml")
|
||||
with mlflow.start_run() as run:
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
mlflow.sklearn.log_model(automl, "automl")
|
||||
loaded_model = mlflow.pyfunc.load_model(f"{run.info.artifact_uri}/automl")
|
||||
print(loaded_model.predict(X_test))
|
||||
automl._mem_thres = 0
|
||||
print(automl.trainable(automl.points_to_evaluate[0]))
|
||||
|
||||
settings["use_ray"] = True
|
||||
try:
|
||||
with mlflow.start_run() as run:
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
mlflow.sklearn.log_model(automl, "automl")
|
||||
automl = mlflow.sklearn.load_model(f"{run.info.artifact_uri}/automl")
|
||||
print(automl.predict_proba(X_test))
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def test_mlflow_iris():
|
||||
from sklearn.datasets import load_iris
|
||||
import mlflow
|
||||
from flaml import AutoML
|
||||
|
||||
with mlflow.start_run():
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2, # in seconds
|
||||
"metric": "accuracy",
|
||||
"task": "classification",
|
||||
"log_file_name": "iris.log",
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
# subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_automl(600)
|
||||
@@ -1,118 +0,0 @@
|
||||
from flaml.tune.space import unflatten_hierarchical
|
||||
from flaml import AutoML
|
||||
from sklearn.datasets import fetch_california_housing
|
||||
import os
|
||||
import unittest
|
||||
import logging
|
||||
import tempfile
|
||||
import io
|
||||
|
||||
|
||||
class TestLogging(unittest.TestCase):
|
||||
def test_logging_level(self):
|
||||
from flaml import logger, logger_formatter
|
||||
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
training_log = os.path.join(d, "training.log")
|
||||
|
||||
# Configure logging for the FLAML logger
|
||||
# and add a handler that outputs to a buffer.
|
||||
logger.setLevel(logging.INFO)
|
||||
buf = io.StringIO()
|
||||
ch = logging.StreamHandler(buf)
|
||||
ch.setFormatter(logger_formatter)
|
||||
logger.addHandler(ch)
|
||||
|
||||
# Run a simple job.
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 1,
|
||||
"metric": "rmse",
|
||||
"task": "regression",
|
||||
"log_file_name": training_log,
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
"keep_search_state": True,
|
||||
"learner_selector": "roundrobin",
|
||||
}
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True)
|
||||
n = len(y_train) >> 1
|
||||
print(automl.model, automl.classes_, automl.predict(X_train))
|
||||
automl.fit(
|
||||
X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings
|
||||
)
|
||||
logger.info(automl.search_space)
|
||||
logger.info(automl.low_cost_partial_config)
|
||||
logger.info(automl.points_to_evaluate)
|
||||
logger.info(automl.cat_hp_cost)
|
||||
import optuna as ot
|
||||
|
||||
study = ot.create_study()
|
||||
from flaml.tune.space import define_by_run_func, add_cost_to_space
|
||||
|
||||
sample = define_by_run_func(study.ask(), automl.search_space)
|
||||
logger.info(sample)
|
||||
logger.info(unflatten_hierarchical(sample, automl.search_space))
|
||||
add_cost_to_space(automl.search_space, automl.low_cost_partial_config, automl.cat_hp_cost)
|
||||
logger.info(automl.search_space["ml"].categories)
|
||||
if automl.best_config:
|
||||
config = automl.best_config.copy()
|
||||
config["learner"] = automl.best_estimator
|
||||
automl.trainable({"ml": config})
|
||||
from flaml import tune, BlendSearch
|
||||
from flaml.automl import size
|
||||
from functools import partial
|
||||
|
||||
low_cost_partial_config = automl.low_cost_partial_config
|
||||
search_alg = BlendSearch(
|
||||
metric="val_loss",
|
||||
mode="min",
|
||||
space=automl.search_space,
|
||||
low_cost_partial_config=low_cost_partial_config,
|
||||
points_to_evaluate=automl.points_to_evaluate,
|
||||
cat_hp_cost=automl.cat_hp_cost,
|
||||
resource_attr=automl.resource_attr,
|
||||
min_resource=automl.min_resource,
|
||||
max_resource=automl.max_resource,
|
||||
config_constraints=[
|
||||
(
|
||||
partial(size, automl._state.learner_classes),
|
||||
"<=",
|
||||
automl._mem_thres,
|
||||
)
|
||||
],
|
||||
metric_constraints=automl.metric_constraints,
|
||||
)
|
||||
analysis = tune.run(
|
||||
automl.trainable,
|
||||
search_alg=search_alg, # verbose=2,
|
||||
time_budget_s=1,
|
||||
num_samples=-1,
|
||||
)
|
||||
print(min(trial.last_result["val_loss"] for trial in analysis.trials))
|
||||
config = analysis.trials[-1].last_result["config"]["ml"]
|
||||
automl._state._train_with_config(config.pop("learner"), config)
|
||||
for _ in range(3):
|
||||
print(
|
||||
search_alg._ls.complete_config(
|
||||
low_cost_partial_config,
|
||||
search_alg._ls_bound_min,
|
||||
search_alg._ls_bound_max,
|
||||
)
|
||||
)
|
||||
# Check if the log buffer is populated.
|
||||
self.assertTrue(len(buf.getvalue()) > 0)
|
||||
|
||||
import pickle
|
||||
|
||||
with open("automl.pkl", "wb") as f:
|
||||
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
|
||||
print(automl.__version__)
|
||||
pred1 = automl.predict(X_train)
|
||||
with open("automl.pkl", "rb") as f:
|
||||
automl = pickle.load(f)
|
||||
pred2 = automl.predict(X_train)
|
||||
delta = pred1 - pred2
|
||||
assert max(delta) == 0 and min(delta) == 0
|
||||
automl.save_best_config("test/housing.json")
|
||||
@@ -1,233 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
import scipy.sparse
|
||||
from sklearn.datasets import (
|
||||
fetch_california_housing,
|
||||
)
|
||||
|
||||
from flaml import AutoML
|
||||
from flaml.automl.data import get_output_from_log
|
||||
from flaml.automl.model import XGBoostEstimator
|
||||
|
||||
|
||||
def logregobj(preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight
|
||||
grad = preds - labels
|
||||
hess = preds * (1.0 - preds)
|
||||
return grad, hess
|
||||
|
||||
|
||||
class MyXGB1(XGBoostEstimator):
|
||||
"""XGBoostEstimator with logregobj as the objective function"""
|
||||
|
||||
def __init__(self, **config):
|
||||
super().__init__(objective=logregobj, **config)
|
||||
|
||||
|
||||
class MyXGB2(XGBoostEstimator):
|
||||
"""XGBoostEstimator with 'reg:squarederror' as the objective function"""
|
||||
|
||||
def __init__(self, **config):
|
||||
super().__init__(objective="reg:squarederror", **config)
|
||||
|
||||
|
||||
class TestRegression(unittest.TestCase):
|
||||
def test_regression(self):
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": "regression",
|
||||
"log_file_name": "test/california.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True)
|
||||
n = int(len(y_train) * 9 // 10)
|
||||
automl.fit(X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings)
|
||||
assert automl._state.eval_method == "holdout"
|
||||
y_pred = automl.predict(X_train)
|
||||
print(y_pred)
|
||||
print(automl.model.estimator)
|
||||
n_iter = automl.model.estimator.get_params("n_estimators")
|
||||
print(automl.config_history)
|
||||
print(automl.best_model_for_estimator("xgboost"))
|
||||
print(automl.best_iteration)
|
||||
print(automl.best_estimator)
|
||||
print(get_output_from_log(automl_settings["log_file_name"], 1))
|
||||
automl.retrain_from_log(
|
||||
task="regression",
|
||||
log_file_name=automl_settings["log_file_name"],
|
||||
X_train=X_train,
|
||||
y_train=y_train,
|
||||
train_full=True,
|
||||
time_budget=1,
|
||||
)
|
||||
automl.retrain_from_log(
|
||||
task="regression",
|
||||
log_file_name=automl_settings["log_file_name"],
|
||||
X_train=X_train,
|
||||
y_train=y_train,
|
||||
time_budget=0,
|
||||
)
|
||||
automl = AutoML()
|
||||
automl.retrain_from_log(
|
||||
task="regression",
|
||||
log_file_name=automl_settings["log_file_name"],
|
||||
X_train=X_train[:n],
|
||||
y_train=y_train[:n],
|
||||
train_full=True,
|
||||
)
|
||||
print(automl.model.estimator)
|
||||
y_pred2 = automl.predict(X_train)
|
||||
# In some rare case, the last config is early stopped and it's the best config. But the logged config's n_estimator is not reduced.
|
||||
assert n_iter != automl.model.estimator.get_params("n_estimator") or (y_pred == y_pred2).all()
|
||||
|
||||
def test_sparse_matrix_regression(self):
|
||||
X_train = scipy.sparse.random(300, 900, density=0.0001)
|
||||
y_train = np.random.uniform(size=300)
|
||||
X_val = scipy.sparse.random(100, 900, density=0.0001)
|
||||
y_val = np.random.uniform(size=100)
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": 2,
|
||||
"metric": "mae",
|
||||
"task": "regression",
|
||||
"log_file_name": "test/sparse_regression.log",
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
"keep_search_state": True,
|
||||
"verbose": 0,
|
||||
"early_stop": True,
|
||||
}
|
||||
automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings)
|
||||
assert automl._state.X_val.shape == X_val.shape
|
||||
print(automl.predict(X_train))
|
||||
print(automl.model)
|
||||
print(automl.config_history)
|
||||
print(automl.best_model_for_estimator("rf"))
|
||||
print(automl.best_iteration)
|
||||
print(automl.best_estimator)
|
||||
print(automl.best_config)
|
||||
print(automl.best_loss)
|
||||
print(automl.best_config_train_time)
|
||||
|
||||
settings.update(
|
||||
{
|
||||
"estimator_list": ["catboost"],
|
||||
"keep_search_state": False,
|
||||
"model_history": False,
|
||||
"use_best_model": False,
|
||||
"time_budget": None,
|
||||
"max_iter": 2,
|
||||
"custom_hp": {"catboost": {"n_estimators": {"domain": 100}}},
|
||||
}
|
||||
)
|
||||
automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings)
|
||||
|
||||
def test_parallel(self, hpo_method=None):
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 10,
|
||||
"task": "regression",
|
||||
"log_file_name": "test/california.log",
|
||||
"log_type": "all",
|
||||
"n_jobs": 1,
|
||||
"n_concurrent_trials": 10,
|
||||
"hpo_method": hpo_method,
|
||||
}
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True)
|
||||
try:
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.best_model_for_estimator("xgboost"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
def test_sparse_matrix_regression_holdout(self):
|
||||
X_train = scipy.sparse.random(8, 100)
|
||||
y_train = np.random.uniform(size=8)
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 1,
|
||||
"eval_method": "holdout",
|
||||
"task": "regression",
|
||||
"log_file_name": "test/sparse_regression.log",
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
"metric": "mse",
|
||||
"sample_weight": np.ones(len(y_train)),
|
||||
"early_stop": True,
|
||||
}
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.best_model_for_estimator("rf"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
|
||||
def test_regression_xgboost(self):
|
||||
X_train = scipy.sparse.random(300, 900, density=0.0001)
|
||||
y_train = np.random.uniform(size=300)
|
||||
X_val = scipy.sparse.random(100, 900, density=0.0001)
|
||||
y_val = np.random.uniform(size=100)
|
||||
automl_experiment = AutoML()
|
||||
automl_experiment.add_learner(learner_name="my_xgb1", learner_class=MyXGB1)
|
||||
automl_experiment.add_learner(learner_name="my_xgb2", learner_class=MyXGB2)
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"estimator_list": ["my_xgb1", "my_xgb2"],
|
||||
"task": "regression",
|
||||
"log_file_name": "test/regression_xgboost.log",
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
"keep_search_state": True,
|
||||
"early_stop": True,
|
||||
}
|
||||
automl_experiment.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
|
||||
assert automl_experiment._state.X_val.shape == X_val.shape
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.best_model_for_estimator("my_xgb2"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
print(automl_experiment.best_config)
|
||||
print(automl_experiment.best_loss)
|
||||
print(automl_experiment.best_config_train_time)
|
||||
|
||||
|
||||
def test_multioutput():
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.multioutput import MultiOutputRegressor, RegressorChain
|
||||
|
||||
# create regression data
|
||||
X, y = make_regression(n_targets=3)
|
||||
|
||||
# split into train and test data
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
|
||||
|
||||
# train the model
|
||||
model = MultiOutputRegressor(AutoML(task="regression", time_budget=1))
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# predict
|
||||
print(model.predict(X_test))
|
||||
|
||||
# train the model
|
||||
model = RegressorChain(AutoML(task="regression", time_budget=1))
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# predict
|
||||
print(model.predict(X_test))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,271 +0,0 @@
|
||||
from flaml import AutoML
|
||||
import pandas as pd
|
||||
from sklearn.datasets import fetch_california_housing, fetch_openml
|
||||
|
||||
|
||||
class TestScore:
|
||||
def test_forecast(self, budget=5):
|
||||
import pickle
|
||||
|
||||
# using dataframe
|
||||
import statsmodels.api as sm
|
||||
|
||||
data = sm.datasets.co2.load_pandas().data["co2"].resample("MS").mean()
|
||||
data = data.fillna(data.bfill()).to_frame().reset_index().rename(columns={"index": "ds", "co2": "y"})
|
||||
num_samples = data.shape[0]
|
||||
time_horizon = 12
|
||||
split_idx = num_samples - time_horizon
|
||||
X_test = data[split_idx:]["ds"]
|
||||
y_test = data[split_idx:]["y"]
|
||||
|
||||
df = data[:split_idx]
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": budget, # total running time in seconds
|
||||
"metric": "mape", # primary metric
|
||||
"task": "ts_forecast", # task type
|
||||
"log_file_name": "test/CO2_forecast.log", # flaml log file
|
||||
"eval_method": "holdout",
|
||||
"label": "y",
|
||||
}
|
||||
"""The main flaml automl API"""
|
||||
try:
|
||||
import prophet
|
||||
|
||||
automl.fit(
|
||||
dataframe=df,
|
||||
estimator_list=["prophet", "arima", "sarimax"],
|
||||
**settings,
|
||||
period=time_horizon,
|
||||
)
|
||||
automl.score(X_test, y_test)
|
||||
automl.pickle("automl.pkl")
|
||||
with open("automl.pkl", "rb") as f:
|
||||
pickle.load(f) # v1.1 of prophet raises RecursionError
|
||||
except (ImportError, RecursionError):
|
||||
print("not using prophet due to ImportError or RecursionError (when unpickling in v1.1)")
|
||||
automl.fit(
|
||||
dataframe=df,
|
||||
**settings,
|
||||
estimator_list=["arima", "sarimax"],
|
||||
period=time_horizon,
|
||||
)
|
||||
automl.score(X_test, y_test)
|
||||
automl.pickle("automl.pkl")
|
||||
with open("automl.pkl", "rb") as f:
|
||||
pickle.load(f)
|
||||
|
||||
def test_classification(self):
|
||||
X = pd.DataFrame(
|
||||
{
|
||||
"f1": [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
|
||||
"f2": [
|
||||
3.0,
|
||||
16.0,
|
||||
10.0,
|
||||
12.0,
|
||||
3.0,
|
||||
14.0,
|
||||
11.0,
|
||||
12.0,
|
||||
5.0,
|
||||
14.0,
|
||||
20.0,
|
||||
16.0,
|
||||
15.0,
|
||||
11.0,
|
||||
],
|
||||
"f3": [
|
||||
"a",
|
||||
"b",
|
||||
"a",
|
||||
"c",
|
||||
"c",
|
||||
"b",
|
||||
"b",
|
||||
"b",
|
||||
"b",
|
||||
"a",
|
||||
"b",
|
||||
1.0,
|
||||
1.0,
|
||||
"a",
|
||||
],
|
||||
"f4": [
|
||||
True,
|
||||
True,
|
||||
False,
|
||||
True,
|
||||
True,
|
||||
False,
|
||||
False,
|
||||
False,
|
||||
True,
|
||||
True,
|
||||
False,
|
||||
False,
|
||||
True,
|
||||
True,
|
||||
],
|
||||
}
|
||||
)
|
||||
y = pd.Series([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
|
||||
|
||||
automl = AutoML()
|
||||
for each_estimator in [
|
||||
"catboost",
|
||||
"lrl2",
|
||||
"lrl1",
|
||||
"rf",
|
||||
"lgbm",
|
||||
"extra_tree",
|
||||
"kneighbor",
|
||||
"xgboost",
|
||||
]:
|
||||
automl_settings = {
|
||||
"time_budget": 6,
|
||||
"task": "classification",
|
||||
"n_jobs": 1,
|
||||
"estimator_list": [each_estimator],
|
||||
"metric": "accuracy",
|
||||
"log_training_metric": True,
|
||||
}
|
||||
automl.score(X, y) # for covering the case no estimator is trained
|
||||
|
||||
automl.fit(X, y, **automl_settings)
|
||||
automl.score(X, y)
|
||||
automl.score(X, y, **{"metric": "accuracy"})
|
||||
|
||||
automl.pickle("automl.pkl")
|
||||
|
||||
def test_regression(self):
|
||||
automl_experiment = AutoML()
|
||||
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True)
|
||||
n = int(len(y_train) * 9 // 10)
|
||||
|
||||
for each_estimator in [
|
||||
"lgbm",
|
||||
"xgboost",
|
||||
"rf",
|
||||
"extra_tree",
|
||||
"catboost",
|
||||
"kneighbor",
|
||||
]:
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": "regression",
|
||||
"log_file_name": "test/california.log",
|
||||
"log_training_metric": True,
|
||||
"estimator_list": [each_estimator],
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
automl_experiment.fit(
|
||||
X_train=X_train[:n],
|
||||
y_train=y_train[:n],
|
||||
X_val=X_train[n:],
|
||||
y_val=y_train[n:],
|
||||
**automl_settings,
|
||||
)
|
||||
|
||||
automl_experiment.score(X_train[n:], y_train[n:], **{"metric": "mse"})
|
||||
automl_experiment.pickle("automl.pkl")
|
||||
|
||||
def test_rank(self):
|
||||
from sklearn.externals._arff import ArffException
|
||||
|
||||
dataset = "credit-g"
|
||||
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
y = y.cat.codes
|
||||
except (ArffException, ValueError):
|
||||
from sklearn.datasets import load_wine
|
||||
|
||||
X, y = load_wine(return_X_y=True)
|
||||
|
||||
import numpy as np
|
||||
|
||||
automl = AutoML()
|
||||
n = 500
|
||||
|
||||
for each_estimator in ["lgbm", "xgboost"]:
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": "rank",
|
||||
"log_file_name": "test/{}.log".format(dataset),
|
||||
"model_history": True,
|
||||
"groups": np.array([0] * 200 + [1] * 200 + [2] * 100), # group labels
|
||||
"learner_selector": "roundrobin",
|
||||
"estimator_list": [each_estimator],
|
||||
}
|
||||
automl.fit(X[:n], y[:n], **automl_settings)
|
||||
try:
|
||||
automl.score(X[n:], y[n:])
|
||||
automl.pickle("automl.pkl")
|
||||
except NotImplementedError:
|
||||
pass
|
||||
|
||||
def test_class(self):
|
||||
# to test classification task with labels need encoding
|
||||
X = pd.DataFrame(
|
||||
{
|
||||
"f1": [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
|
||||
"f2": [
|
||||
3.0,
|
||||
16.0,
|
||||
10.0,
|
||||
12.0,
|
||||
3.0,
|
||||
14.0,
|
||||
11.0,
|
||||
12.0,
|
||||
5.0,
|
||||
14.0,
|
||||
20.0,
|
||||
16.0,
|
||||
15.0,
|
||||
11.0,
|
||||
],
|
||||
}
|
||||
)
|
||||
y = pd.Series(
|
||||
[
|
||||
"a",
|
||||
"b",
|
||||
"c",
|
||||
"d",
|
||||
"a",
|
||||
"b",
|
||||
"c",
|
||||
"d",
|
||||
"a",
|
||||
"b",
|
||||
"c",
|
||||
"d",
|
||||
"a",
|
||||
"b",
|
||||
]
|
||||
)
|
||||
|
||||
automl = AutoML()
|
||||
|
||||
automl_settings = {
|
||||
"time_budget": 6,
|
||||
"task": "classification",
|
||||
"n_jobs": 1,
|
||||
"estimator_list": ["xgboost"],
|
||||
"metric": "accuracy",
|
||||
"log_training_metric": True,
|
||||
}
|
||||
|
||||
automl.fit(X, y, **automl_settings)
|
||||
assert automl._label_transformer is not None
|
||||
assert automl.score(X, y) > 0
|
||||
automl.pickle("automl.pkl")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test = TestScore()
|
||||
test.test_forecast()
|
||||
@@ -1,205 +0,0 @@
|
||||
from sklearn.datasets import fetch_openml
|
||||
from flaml.automl import AutoML
|
||||
from sklearn.model_selection import GroupKFold, train_test_split, KFold
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
|
||||
dataset = "credit-g"
|
||||
|
||||
|
||||
def _test(split_type):
|
||||
from sklearn.externals._arff import ArffException
|
||||
|
||||
automl = AutoML()
|
||||
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
# "metric": 'accuracy',
|
||||
"task": "classification",
|
||||
"log_file_name": "test/{}.log".format(dataset),
|
||||
"model_history": True,
|
||||
"log_training_metric": True,
|
||||
"split_type": split_type,
|
||||
}
|
||||
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
except (ArffException, ValueError):
|
||||
from sklearn.datasets import load_wine
|
||||
|
||||
X, y = load_wine(return_X_y=True)
|
||||
if split_type != "time":
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
|
||||
else:
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
pred = automl.predict(X_test)
|
||||
acc = accuracy_score(y_test, pred)
|
||||
|
||||
print(acc)
|
||||
|
||||
|
||||
def _test_uniform():
|
||||
_test(split_type="uniform")
|
||||
|
||||
|
||||
def test_time():
|
||||
_test(split_type="time")
|
||||
|
||||
|
||||
def test_groups():
|
||||
from sklearn.externals._arff import ArffException
|
||||
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
except (ArffException, ValueError):
|
||||
from sklearn.datasets import load_wine
|
||||
|
||||
X, y = load_wine(return_X_y=True)
|
||||
|
||||
import numpy as np
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": "classification",
|
||||
"log_file_name": "test/{}.log".format(dataset),
|
||||
"model_history": True,
|
||||
"eval_method": "cv",
|
||||
"groups": np.random.randint(low=0, high=10, size=len(y)),
|
||||
"estimator_list": ["lgbm", "rf", "xgboost", "kneighbor"],
|
||||
"learner_selector": "roundrobin",
|
||||
}
|
||||
automl.fit(X, y, **automl_settings)
|
||||
|
||||
automl_settings["eval_method"] = "holdout"
|
||||
automl.fit(X, y, **automl_settings)
|
||||
|
||||
automl_settings["split_type"] = GroupKFold(n_splits=3)
|
||||
try:
|
||||
automl.fit(X, y, **automl_settings)
|
||||
raise RuntimeError("GroupKFold object as split_type should fail when eval_method is holdout")
|
||||
except AssertionError:
|
||||
# eval_method must be 'auto' or 'cv' for custom data splitter.
|
||||
pass
|
||||
|
||||
automl_settings["eval_method"] = "cv"
|
||||
automl.fit(X, y, **automl_settings)
|
||||
|
||||
|
||||
def test_stratified_groupkfold():
|
||||
from sklearn.model_selection import StratifiedGroupKFold
|
||||
from minio.error import ServerError
|
||||
from flaml.data import load_openml_dataset
|
||||
|
||||
try:
|
||||
X_train, _, y_train, _ = load_openml_dataset(dataset_id=1169, data_dir="test/")
|
||||
except (ServerError, Exception):
|
||||
return
|
||||
splitter = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=0)
|
||||
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": 6,
|
||||
"metric": "ap",
|
||||
"eval_method": "cv",
|
||||
"split_type": splitter,
|
||||
"groups": X_train["Airline"],
|
||||
"estimator_list": [
|
||||
"lgbm",
|
||||
"rf",
|
||||
"xgboost",
|
||||
"extra_tree",
|
||||
"xgb_limitdepth",
|
||||
"lrl1",
|
||||
],
|
||||
}
|
||||
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
|
||||
|
||||
def test_rank():
|
||||
from sklearn.externals._arff import ArffException
|
||||
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
y = y.cat.codes
|
||||
except (ArffException, ValueError):
|
||||
from sklearn.datasets import load_wine
|
||||
|
||||
X, y = load_wine(return_X_y=True)
|
||||
import numpy as np
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": "rank",
|
||||
"log_file_name": "test/{}.log".format(dataset),
|
||||
"model_history": True,
|
||||
"eval_method": "cv",
|
||||
"groups": np.array([0] * 200 + [1] * 200 + [2] * 200 + [3] * 200 + [4] * 100 + [5] * 100), # group labels
|
||||
"learner_selector": "roundrobin",
|
||||
}
|
||||
automl.fit(X, y, **automl_settings)
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": "rank",
|
||||
"metric": "ndcg@5", # 5 can be replaced by any number
|
||||
"log_file_name": "test/{}.log".format(dataset),
|
||||
"model_history": True,
|
||||
"groups": [200] * 4 + [100] * 2, # alternative way: group counts
|
||||
# "estimator_list": ['lgbm', 'xgboost'], # list of ML learners
|
||||
"learner_selector": "roundrobin",
|
||||
}
|
||||
automl.fit(X, y, **automl_settings)
|
||||
|
||||
|
||||
def test_object():
|
||||
from sklearn.externals._arff import ArffException
|
||||
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
except (ArffException, ValueError):
|
||||
from sklearn.datasets import load_wine
|
||||
|
||||
X, y = load_wine(return_X_y=True)
|
||||
|
||||
import numpy as np
|
||||
|
||||
class TestKFold(KFold):
|
||||
def __init__(self, n_splits):
|
||||
self.n_splits = int(n_splits)
|
||||
|
||||
def split(self, X):
|
||||
rng = np.random.default_rng()
|
||||
train_num = int(len(X) * 0.8)
|
||||
for _ in range(self.n_splits):
|
||||
permu_idx = rng.permutation(len(X))
|
||||
yield permu_idx[:train_num], permu_idx[train_num:]
|
||||
|
||||
def get_n_splits(self, X=None, y=None, groups=None):
|
||||
return self.n_splits
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": "classification",
|
||||
"log_file_name": "test/{}.log".format(dataset),
|
||||
"model_history": True,
|
||||
"log_training_metric": True,
|
||||
"split_type": TestKFold(5),
|
||||
}
|
||||
automl.fit(X, y, **automl_settings)
|
||||
assert automl._state.eval_method == "cv", "eval_method must be 'cv' for custom data splitter"
|
||||
|
||||
kf = TestKFold(5)
|
||||
kf.shuffle = True
|
||||
automl_settings["split_type"] = kf
|
||||
automl.fit(X, y, **automl_settings)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_groups()
|
||||
@@ -1,115 +0,0 @@
|
||||
import os
|
||||
import unittest
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
from sklearn.datasets import fetch_california_housing
|
||||
|
||||
from flaml import AutoML
|
||||
from flaml.automl.training_log import training_log_reader
|
||||
|
||||
|
||||
class TestTrainingLog(unittest.TestCase):
|
||||
def test_training_log(self, path="test_training_log.log", estimator_list="auto", use_ray=False):
|
||||
with TemporaryDirectory() as d:
|
||||
filename = os.path.join(d, path)
|
||||
|
||||
# Run a simple job.
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 1,
|
||||
"metric": "mse",
|
||||
"task": "regression",
|
||||
"log_file_name": filename,
|
||||
"log_training_metric": True,
|
||||
"mem_thres": 1024 * 1024,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
"train_time_limit": 0.1,
|
||||
"verbose": 3,
|
||||
# "ensemble": True,
|
||||
"keep_search_state": True,
|
||||
"estimator_list": estimator_list,
|
||||
}
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
# Check if the training log file is populated.
|
||||
self.assertTrue(os.path.exists(filename))
|
||||
if automl.best_estimator:
|
||||
estimator, config = automl.best_estimator, automl.best_config
|
||||
model0 = automl.best_model_for_estimator(estimator)
|
||||
print(model0.params)
|
||||
if "n_estimators" in config:
|
||||
assert model0.params["n_estimators"] == config["n_estimators"]
|
||||
|
||||
# train on full data with no time limit
|
||||
automl._state.time_budget = -1
|
||||
model, _ = automl._state._train_with_config(estimator, config)
|
||||
|
||||
# assuming estimator & config are saved and loaded as follows
|
||||
automl = AutoML()
|
||||
automl.fit(
|
||||
X_train=X_train,
|
||||
y_train=y_train,
|
||||
max_iter=1,
|
||||
task="regression",
|
||||
estimator_list=[estimator],
|
||||
n_jobs=1,
|
||||
starting_points={estimator: config},
|
||||
use_ray=use_ray,
|
||||
)
|
||||
print(automl.best_config)
|
||||
# then the fitted model should be equivalent to model
|
||||
assert (
|
||||
str(model.estimator) == str(automl.model.estimator)
|
||||
or estimator == "xgboost"
|
||||
and str(model.estimator.get_dump()) == str(automl.model.estimator.get_dump())
|
||||
or estimator == "catboost"
|
||||
and str(model.estimator.get_all_params()) == str(automl.model.estimator.get_all_params())
|
||||
)
|
||||
automl.fit(
|
||||
X_train=X_train,
|
||||
y_train=y_train,
|
||||
max_iter=1,
|
||||
task="regression",
|
||||
estimator_list=[estimator],
|
||||
n_jobs=1,
|
||||
starting_points={estimator: {}},
|
||||
)
|
||||
print(automl.best_config)
|
||||
|
||||
with training_log_reader(filename) as reader:
|
||||
count = 0
|
||||
for record in reader.records():
|
||||
print(record)
|
||||
count += 1
|
||||
self.assertGreater(count, 0)
|
||||
|
||||
automl_settings["log_file_name"] = ""
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
if automl._selected:
|
||||
automl._selected.update(None, 0)
|
||||
automl = AutoML()
|
||||
automl.fit(X_train=X_train, y_train=y_train, max_iter=0, task="regression")
|
||||
|
||||
def test_illfilename(self):
|
||||
try:
|
||||
self.test_training_log("/")
|
||||
except IsADirectoryError:
|
||||
print("IsADirectoryError happens as expected in linux.")
|
||||
except PermissionError:
|
||||
print("PermissionError happens as expected in windows.")
|
||||
|
||||
def test_each_estimator(self):
|
||||
try:
|
||||
import ray
|
||||
|
||||
ray.shutdown()
|
||||
ray.init()
|
||||
use_ray = True
|
||||
except ImportError:
|
||||
use_ray = False
|
||||
self.test_training_log(estimator_list=["xgboost"], use_ray=use_ray)
|
||||
self.test_training_log(estimator_list=["catboost"], use_ray=use_ray)
|
||||
self.test_training_log(estimator_list=["extra_tree"], use_ray=use_ray)
|
||||
self.test_training_log(estimator_list=["rf"], use_ray=use_ray)
|
||||
self.test_training_log(estimator_list=["lgbm"], use_ray=use_ray)
|
||||
@@ -1,212 +0,0 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
from sklearn.datasets import load_iris
|
||||
from flaml import AutoML
|
||||
from flaml.automl.model import LGBMEstimator
|
||||
from flaml import tune
|
||||
|
||||
|
||||
class TestWarmStart(unittest.TestCase):
|
||||
def test_fit_w_freezinghp_starting_point(self, as_frame=True):
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 1,
|
||||
"metric": "accuracy",
|
||||
"task": "classification",
|
||||
"estimator_list": ["lgbm"],
|
||||
"log_file_name": "test/iris.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
|
||||
if as_frame:
|
||||
# test drop column
|
||||
X_train.columns = range(X_train.shape[1])
|
||||
X_train[X_train.shape[1]] = np.zeros(len(y_train))
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
automl_val_accuracy = 1.0 - automl.best_loss
|
||||
print("Best ML leaner:", automl.best_estimator)
|
||||
print("Best hyperparmeter config:", automl.best_config)
|
||||
print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy))
|
||||
print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
|
||||
# 1. Get starting points from previous experiments.
|
||||
starting_points = automl.best_config_per_estimator
|
||||
print("starting_points", starting_points)
|
||||
print("loss of the starting_points", automl.best_loss_per_estimator)
|
||||
starting_point = starting_points["lgbm"]
|
||||
hps_to_freeze = ["colsample_bytree", "reg_alpha", "reg_lambda", "log_max_bin"]
|
||||
|
||||
# 2. Constrct a new class:
|
||||
# a. write the hps you want to freeze as hps with constant 'domain';
|
||||
# b. specify the new search space of the other hps accrodingly.
|
||||
|
||||
class MyPartiallyFreezedLargeLGBM(LGBMEstimator):
|
||||
@classmethod
|
||||
def search_space(cls, **params):
|
||||
# (1) Get the hps in the original search space
|
||||
space = LGBMEstimator.search_space(**params)
|
||||
# (2) Set up the fixed value from hps from the starting point
|
||||
for hp_name in hps_to_freeze:
|
||||
# if an hp is specifed to be freezed, use tine value provided in the starting_point
|
||||
# otherwise use the setting from the original search space
|
||||
if hp_name in starting_point:
|
||||
space[hp_name] = {"domain": starting_point[hp_name]}
|
||||
# (3.1) Configure the search space for hps that are in the original search space
|
||||
# but you want to change something, for example the range.
|
||||
revised_hps_to_search = {
|
||||
"n_estimators": {
|
||||
"domain": tune.lograndint(lower=10, upper=32768),
|
||||
"init_value": starting_point.get("n_estimators") or space["n_estimators"].get("init_value", 10),
|
||||
"low_cost_init_value": space["n_estimators"].get("low_cost_init_value", 10),
|
||||
},
|
||||
"num_leaves": {
|
||||
"domain": tune.lograndint(lower=10, upper=3276),
|
||||
"init_value": starting_point.get("num_leaves") or space["num_leaves"].get("init_value", 10),
|
||||
"low_cost_init_value": space["num_leaves"].get("low_cost_init_value", 10),
|
||||
},
|
||||
# (3.2) Add a new hp which is not in the original search space
|
||||
"subsample": {
|
||||
"domain": tune.uniform(lower=0.1, upper=1.0),
|
||||
"init_value": 0.1,
|
||||
},
|
||||
}
|
||||
space.update(revised_hps_to_search)
|
||||
return space
|
||||
|
||||
new_estimator_name = "large_lgbm"
|
||||
new_automl = AutoML()
|
||||
new_automl.add_learner(learner_name=new_estimator_name, learner_class=MyPartiallyFreezedLargeLGBM)
|
||||
|
||||
automl_settings_resume = {
|
||||
"time_budget": 3,
|
||||
"metric": "accuracy",
|
||||
"task": "classification",
|
||||
"estimator_list": [new_estimator_name],
|
||||
"log_file_name": "test/iris_resume.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True,
|
||||
"log_type": "all",
|
||||
"starting_points": {new_estimator_name: starting_point},
|
||||
}
|
||||
|
||||
new_automl.fit(X_train=X_train, y_train=y_train, **automl_settings_resume)
|
||||
|
||||
new_automl_val_accuracy = 1.0 - new_automl.best_loss
|
||||
print("Best ML leaner:", new_automl.best_estimator)
|
||||
print("Best hyperparmeter config:", new_automl.best_config)
|
||||
print("Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy))
|
||||
print("Training duration of best run: {0:.4g} s".format(new_automl.best_config_train_time))
|
||||
|
||||
def test_nobudget(self):
|
||||
automl = AutoML()
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
automl.fit(X_train, y_train)
|
||||
print(automl.best_config_per_estimator)
|
||||
|
||||
def test_FLAML_sample_size_in_starting_points(self):
|
||||
from openml.exceptions import OpenMLServerException
|
||||
from requests.exceptions import ChunkedEncodingError, SSLError
|
||||
from minio.error import ServerError
|
||||
from flaml.automl.data import load_openml_dataset
|
||||
from flaml import AutoML
|
||||
|
||||
try:
|
||||
X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./")
|
||||
except (OpenMLServerException, ChunkedEncodingError, SSLError, ServerError, Exception):
|
||||
from sklearn.datasets import load_wine
|
||||
|
||||
X_train, y_train = load_wine(return_X_y=True)
|
||||
|
||||
automl_settings = {
|
||||
"time_budget": 3,
|
||||
"task": "classification",
|
||||
}
|
||||
|
||||
automl1 = AutoML()
|
||||
print(len(y_train))
|
||||
automl1.fit(X_train, y_train, **automl_settings)
|
||||
print("automl1.best_config_per_estimator", automl1.best_config_per_estimator)
|
||||
|
||||
automl_settings["starting_points"] = automl1.best_config_per_estimator
|
||||
automl2 = AutoML()
|
||||
automl2.fit(X_train, y_train, **automl_settings)
|
||||
|
||||
automl_settings["starting_points"] = {
|
||||
"xgboost": {
|
||||
"n_estimators": 4,
|
||||
"max_leaves": 4,
|
||||
"min_child_weight": 0.26208115308159446,
|
||||
"learning_rate": 0.25912534572860507,
|
||||
"subsample": 0.9266743941610592,
|
||||
"colsample_bylevel": 1.0,
|
||||
"colsample_bytree": 1.0,
|
||||
"reg_alpha": 0.0013933617380144255,
|
||||
"reg_lambda": 0.18096917948292954,
|
||||
"FLAML_sample_size": 20000,
|
||||
},
|
||||
"xgb_limitdepth": None,
|
||||
"lrl1": None,
|
||||
}
|
||||
from flaml import tune
|
||||
|
||||
automl_settings["custom_hp"] = {
|
||||
"xgboost": {
|
||||
"n_estimators": {
|
||||
"domain": tune.choice([10, 20]),
|
||||
},
|
||||
}
|
||||
}
|
||||
automl2 = AutoML()
|
||||
automl2.fit(X_train, y_train, **automl_settings)
|
||||
|
||||
try:
|
||||
import ray
|
||||
|
||||
automl_settings["n_concurrent_trials"] = 2
|
||||
except ImportError:
|
||||
automl_settings["n_concurrent_trials"] = 1
|
||||
# setting different FLAML_sample_size
|
||||
automl_settings["starting_points"] = {
|
||||
"catboost": {
|
||||
"early_stopping_rounds": 10,
|
||||
"learning_rate": 0.09999999999999996,
|
||||
"n_estimators": 1,
|
||||
"FLAML_sample_size": 10000,
|
||||
},
|
||||
"xgboost": {
|
||||
"n_estimators": 4,
|
||||
"max_leaves": 4,
|
||||
"min_child_weight": 0.26208115308159446,
|
||||
"learning_rate": 0.25912534572860507,
|
||||
"subsample": 0.9266743941610592,
|
||||
"colsample_bylevel": 1.0,
|
||||
"colsample_bytree": 1.0,
|
||||
"reg_alpha": 0.0013933617380144255,
|
||||
"reg_lambda": 0.18096917948292954,
|
||||
"FLAML_sample_size": 20000,
|
||||
},
|
||||
"xgb_limitdepth": None,
|
||||
"lrl1": None,
|
||||
}
|
||||
automl3 = AutoML()
|
||||
automl3.fit(X_train, y_train, **automl_settings)
|
||||
|
||||
automl_settings["sample"] = False
|
||||
automl4 = AutoML()
|
||||
try:
|
||||
automl4.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
**automl_settings,
|
||||
)
|
||||
raise RuntimeError(
|
||||
"When sample=False and starting_points contain FLAML_sample_size, AssertionError is expected but not raised."
|
||||
)
|
||||
except AssertionError:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,98 +0,0 @@
|
||||
import unittest
|
||||
|
||||
from sklearn.datasets import fetch_openml
|
||||
from sklearn.model_selection import train_test_split
|
||||
from flaml.automl import AutoML
|
||||
from flaml.automl.model import XGBoostSklearnEstimator
|
||||
from flaml import tune
|
||||
|
||||
|
||||
dataset = "credit-g"
|
||||
|
||||
|
||||
class XGBoost2D(XGBoostSklearnEstimator):
|
||||
@classmethod
|
||||
def search_space(cls, data_size, task):
|
||||
upper = min(32768, int(data_size[0]))
|
||||
return {
|
||||
"n_estimators": {
|
||||
"domain": tune.lograndint(lower=4, upper=upper),
|
||||
"low_cost_init_value": 4,
|
||||
},
|
||||
"max_leaves": {
|
||||
"domain": tune.lograndint(lower=4, upper=upper),
|
||||
"low_cost_init_value": 4,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_simple(method=None):
|
||||
automl = AutoML()
|
||||
automl.add_learner(learner_name="XGBoost2D", learner_class=XGBoost2D)
|
||||
|
||||
automl_settings = {
|
||||
"estimator_list": ["XGBoost2D"],
|
||||
"task": "classification",
|
||||
"log_file_name": f"test/xgboost2d_{dataset}_{method}.log",
|
||||
"n_jobs": 1,
|
||||
"hpo_method": method,
|
||||
"log_type": "all",
|
||||
"retrain_full": "budget",
|
||||
"keep_search_state": True,
|
||||
"time_budget": 1,
|
||||
}
|
||||
from sklearn.externals._arff import ArffException
|
||||
|
||||
try:
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
except (ArffException, ValueError):
|
||||
from sklearn.datasets import load_wine
|
||||
|
||||
X, y = load_wine(return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
print(automl.estimator_list)
|
||||
print(automl.search_space)
|
||||
print(automl.points_to_evaluate)
|
||||
if not automl.best_config:
|
||||
return
|
||||
config = automl.best_config.copy()
|
||||
config["learner"] = automl.best_estimator
|
||||
automl.trainable(config)
|
||||
from flaml import tune
|
||||
from flaml.automl import size
|
||||
from functools import partial
|
||||
|
||||
analysis = tune.run(
|
||||
automl.trainable,
|
||||
automl.search_space,
|
||||
metric="val_loss",
|
||||
mode="min",
|
||||
low_cost_partial_config=automl.low_cost_partial_config,
|
||||
points_to_evaluate=automl.points_to_evaluate,
|
||||
cat_hp_cost=automl.cat_hp_cost,
|
||||
resource_attr=automl.resource_attr,
|
||||
min_resource=automl.min_resource,
|
||||
max_resource=automl.max_resource,
|
||||
time_budget_s=automl._state.time_budget,
|
||||
config_constraints=[(partial(size, automl._state.learner_classes), "<=", automl._mem_thres)],
|
||||
metric_constraints=automl.metric_constraints,
|
||||
num_samples=5,
|
||||
)
|
||||
print(analysis.trials[-1])
|
||||
|
||||
|
||||
def test_optuna():
|
||||
test_simple(method="optuna")
|
||||
|
||||
|
||||
def test_random():
|
||||
test_simple(method="random")
|
||||
|
||||
|
||||
def test_grid():
|
||||
test_simple(method="grid")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,71 +0,0 @@
|
||||
import unittest
|
||||
|
||||
from sklearn.datasets import fetch_openml
|
||||
from sklearn.model_selection import train_test_split
|
||||
from flaml.automl import AutoML
|
||||
from flaml.automl.model import XGBoostSklearnEstimator
|
||||
from flaml import tune
|
||||
|
||||
|
||||
dataset = "credit-g"
|
||||
|
||||
|
||||
class XGBoost2D(XGBoostSklearnEstimator):
|
||||
@classmethod
|
||||
def search_space(cls, data_size, task):
|
||||
upper = min(32768, int(data_size))
|
||||
return {
|
||||
"n_estimators": {
|
||||
"domain": tune.lograndint(lower=4, upper=upper),
|
||||
"init_value": 4,
|
||||
},
|
||||
"max_leaves": {
|
||||
"domain": tune.lograndint(lower=4, upper=upper),
|
||||
"init_value": 4,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _test_simple(method=None, size_ratio=1.0):
|
||||
automl = AutoML()
|
||||
automl.add_learner(learner_name="XGBoost2D", learner_class=XGBoost2D)
|
||||
|
||||
X, y = fetch_openml(name=dataset, return_X_y=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
|
||||
|
||||
final_size = int(len(y_train) * size_ratio)
|
||||
X_train = X_train[:final_size]
|
||||
y_train = y_train[:final_size]
|
||||
automl_settings = {
|
||||
"estimator_list": ["XGBoost2D"],
|
||||
# "metric": 'accuracy',
|
||||
"task": "classification",
|
||||
"log_file_name": f"test/xgboost2d_{dataset}_{method}_{final_size}.log",
|
||||
# "log_training_metric": True,
|
||||
# "split_type": split_type,
|
||||
"n_jobs": 1,
|
||||
"hpo_method": method,
|
||||
"log_type": "all",
|
||||
"time_budget": 3600,
|
||||
}
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
|
||||
|
||||
def _test_grid_1():
|
||||
_test_simple(method="grid", size_ratio=1.0 / 3.0)
|
||||
|
||||
|
||||
def _test_grid_2():
|
||||
_test_simple(method="grid", size_ratio=2.0 / 3.0)
|
||||
|
||||
|
||||
def _test_grid_4():
|
||||
_test_simple(method="grid", size_ratio=0.5)
|
||||
|
||||
|
||||
def _test_grid_3():
|
||||
_test_simple(method="grid", size_ratio=1.0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,13 +0,0 @@
|
||||
Dataset,NumberOfInstances,NumberOfFeatures,NumberOfClasses,PercentageOfNumericFeatures
|
||||
2dplanes,36691,10,0,1.0
|
||||
adult,43957,14,2,0.42857142857142855
|
||||
Airlines,485444,7,2,0.42857142857142855
|
||||
Albert,382716,78,2,0.3333333333333333
|
||||
Amazon_employee_access,29492,9,2,0.0
|
||||
bng_breastTumor,104976,9,0,0.1111111111111111
|
||||
bng_pbc,900000,18,0,0.5555555555555556
|
||||
car,1555,6,4,0.0
|
||||
connect-4,60801,42,3,0.0
|
||||
dilbert,9000,2000,5,1.0
|
||||
Dionis,374569,60,355,1.0
|
||||
poker,922509,10,0,1.0
|
||||
|
@@ -1 +0,0 @@
|
||||
{"class": "extra_tree", "hyperparameters": {"n_estimators": 16, "max_features": 1.0, "max_leaves": 54}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "extra_tree", "hyperparameters": {"n_estimators": 2047, "max_features": 1.0, "max_leaves": 8194, "criterion": "gini", "FLAML_sample_size": 436899}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "extra_tree", "hyperparameters": {"n_estimators": 1733, "max_features": 0.3841826938360253, "max_leaves": 32767, "criterion": "entropy", "FLAML_sample_size": 344444}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "extra_tree", "hyperparameters": {"n_estimators": 812, "max_features": 1.0, "max_leaves": 1474, "criterion": "entropy"}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "extra_tree", "hyperparameters": {"n_estimators": 859, "max_features": 1.0, "max_leaves": 967, "criterion": "entropy"}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "extra_tree", "hyperparameters": {"n_estimators": 90, "max_features": 1.0, "max_leaves": 1301, "FLAML_sample_size": 94478}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "extra_tree", "hyperparameters": {"n_estimators": 1211, "max_features": 1.0, "max_leaves": 32767, "FLAML_sample_size": 810000}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "extra_tree", "hyperparameters": {"n_estimators": 333, "max_features": 1.0, "max_leaves": 201, "criterion": "gini"}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "extra_tree", "hyperparameters": {"n_estimators": 229, "max_features": 0.5372053700721111, "max_leaves": 11150, "criterion": "entropy"}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "extra_tree", "hyperparameters": {}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "extra_tree", "hyperparameters": {"n_estimators": 346, "max_features": 1.0, "max_leaves": 1007, "criterion": "entropy"}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "extra_tree", "hyperparameters": {"n_estimators": 1416, "max_features": 1.0, "max_leaves": 32767, "FLAML_sample_size": 830258}}
|
||||
@@ -1,142 +0,0 @@
|
||||
task,fold,type,result,params
|
||||
2dplanes,0,regression,0.946503,{'_modeljson': 'et/2dplanes.json'}
|
||||
2dplanes,0,regression,0.945047,{'_modeljson': 'et/adult.json'}
|
||||
2dplanes,0,regression,0.933571,{'_modeljson': 'et/Airlines.json'}
|
||||
2dplanes,0,regression,0.919021,{'_modeljson': 'et/Albert.json'}
|
||||
2dplanes,0,regression,0.944532,{'_modeljson': 'et/Amazon_employee_access.json'}
|
||||
2dplanes,0,regression,0.94471,{'_modeljson': 'et/bng_breastTumor.json'}
|
||||
2dplanes,0,regression,0.914912,{'_modeljson': 'et/bng_pbc.json'}
|
||||
2dplanes,0,regression,0.946045,{'_modeljson': 'et/car.json'}
|
||||
2dplanes,0,regression,0.935777,{'_modeljson': 'et/connect-4.json'}
|
||||
2dplanes,0,regression,0.91501,{'_modeljson': 'et/default.json'}
|
||||
2dplanes,0,regression,0.94497,{'_modeljson': 'et/dilbert.json'}
|
||||
2dplanes,0,regression,0.914907,{'_modeljson': 'et/poker.json'}
|
||||
adult,0,binary,0.902771,{'_modeljson': 'et/2dplanes.json'}
|
||||
adult,0,binary,0.919086,{'_modeljson': 'et/adult.json'}
|
||||
adult,0,binary,0.906742,{'_modeljson': 'et/Airlines.json'}
|
||||
adult,0,binary,0.897039,{'_modeljson': 'et/Albert.json'}
|
||||
adult,0,binary,0.919317,{'_modeljson': 'et/Amazon_employee_access.json'}
|
||||
adult,0,binary,0.918404,{'_modeljson': 'et/bng_breastTumor.json'}
|
||||
adult,0,binary,0.895193,{'_modeljson': 'et/bng_pbc.json'}
|
||||
adult,0,binary,0.912965,{'_modeljson': 'et/car.json'}
|
||||
adult,0,binary,0.904228,{'_modeljson': 'et/connect-4.json'}
|
||||
adult,0,binary,0.893933,{'_modeljson': 'et/default.json'}
|
||||
adult,0,binary,0.918539,{'_modeljson': 'et/dilbert.json'}
|
||||
adult,0,binary,0.895813,{'_modeljson': 'et/poker.json'}
|
||||
Airlines,0,binary,0.683928,{'_modeljson': 'et/2dplanes.json'}
|
||||
Airlines,0,binary,0.709673,{'_modeljson': 'et/adult.json'}
|
||||
Airlines,0,binary,0.724391,{'_modeljson': 'et/Airlines.json'}
|
||||
Airlines,0,binary,0.707411,{'_modeljson': 'et/Albert.json'}
|
||||
Airlines,0,binary,0.713548,{'_modeljson': 'et/Amazon_employee_access.json'}
|
||||
Airlines,0,binary,0.712774,{'_modeljson': 'et/bng_breastTumor.json'}
|
||||
Airlines,0,binary,0.708477,{'_modeljson': 'et/bng_pbc.json'}
|
||||
Airlines,0,binary,0.695604,{'_modeljson': 'et/car.json'}
|
||||
Airlines,0,binary,0.719631,{'_modeljson': 'et/connect-4.json'}
|
||||
Airlines,0,binary,0.619025,{'_modeljson': 'et/default.json'}
|
||||
Airlines,0,binary,0.710038,{'_modeljson': 'et/dilbert.json'}
|
||||
Airlines,0,binary,0.708628,{'_modeljson': 'et/poker.json'}
|
||||
Albert,0,binary,0.707126,{'_modeljson': 'et/2dplanes.json'}
|
||||
Albert,0,binary,0.727819,{'_modeljson': 'et/adult.json'}
|
||||
Albert,0,binary,0.733953,{'_modeljson': 'et/Airlines.json'}
|
||||
Albert,0,binary,0.739138,{'_modeljson': 'et/Albert.json'}
|
||||
Albert,0,binary,0.729251,{'_modeljson': 'et/Amazon_employee_access.json'}
|
||||
Albert,0,binary,0.728612,{'_modeljson': 'et/bng_breastTumor.json'}
|
||||
Albert,0,binary,0.736396,{'_modeljson': 'et/bng_pbc.json'}
|
||||
Albert,0,binary,0.719311,{'_modeljson': 'et/car.json'}
|
||||
Albert,0,binary,0.735032,{'_modeljson': 'et/connect-4.json'}
|
||||
Albert,0,binary,0.725017,{'_modeljson': 'et/default.json'}
|
||||
Albert,0,binary,0.728108,{'_modeljson': 'et/dilbert.json'}
|
||||
Albert,0,binary,0.736668,{'_modeljson': 'et/poker.json'}
|
||||
Amazon_employee_access,0,binary,0.708259,{'_modeljson': 'et/2dplanes.json'}
|
||||
Amazon_employee_access,0,binary,0.872603,{'_modeljson': 'et/adult.json'}
|
||||
Amazon_employee_access,0,binary,0.839293,{'_modeljson': 'et/Airlines.json'}
|
||||
Amazon_employee_access,0,binary,0.834606,{'_modeljson': 'et/Albert.json'}
|
||||
Amazon_employee_access,0,binary,0.873141,{'_modeljson': 'et/Amazon_employee_access.json'}
|
||||
Amazon_employee_access,0,binary,0.860569,{'_modeljson': 'et/bng_breastTumor.json'}
|
||||
Amazon_employee_access,0,binary,0.834654,{'_modeljson': 'et/bng_pbc.json'}
|
||||
Amazon_employee_access,0,binary,0.81679,{'_modeljson': 'et/car.json'}
|
||||
Amazon_employee_access,0,binary,0.831975,{'_modeljson': 'et/connect-4.json'}
|
||||
Amazon_employee_access,0,binary,0.839651,{'_modeljson': 'et/default.json'}
|
||||
Amazon_employee_access,0,binary,0.868815,{'_modeljson': 'et/dilbert.json'}
|
||||
Amazon_employee_access,0,binary,0.841461,{'_modeljson': 'et/poker.json'}
|
||||
bng_breastTumor,0,regression,0.137191,{'_modeljson': 'et/2dplanes.json'}
|
||||
bng_breastTumor,0,regression,0.181002,{'_modeljson': 'et/adult.json'}
|
||||
bng_breastTumor,0,regression,0.163121,{'_modeljson': 'et/Airlines.json'}
|
||||
bng_breastTumor,0,regression,0.116596,{'_modeljson': 'et/Albert.json'}
|
||||
bng_breastTumor,0,regression,0.181745,{'_modeljson': 'et/Amazon_employee_access.json'}
|
||||
bng_breastTumor,0,regression,0.180948,{'_modeljson': 'et/bng_breastTumor.json'}
|
||||
bng_breastTumor,0,regression,0.0784668,{'_modeljson': 'et/bng_pbc.json'}
|
||||
bng_breastTumor,0,regression,0.168552,{'_modeljson': 'et/car.json'}
|
||||
bng_breastTumor,0,regression,0.165576,{'_modeljson': 'et/connect-4.json'}
|
||||
bng_breastTumor,0,regression,-0.28734,{'_modeljson': 'et/default.json'}
|
||||
bng_breastTumor,0,regression,0.1822,{'_modeljson': 'et/dilbert.json'}
|
||||
bng_breastTumor,0,regression,0.0780929,{'_modeljson': 'et/poker.json'}
|
||||
bng_pbc,0,regression,0.332032,{'_modeljson': 'et/2dplanes.json'}
|
||||
bng_pbc,0,regression,0.3879,{'_modeljson': 'et/adult.json'}
|
||||
bng_pbc,0,regression,0.411442,{'_modeljson': 'et/Airlines.json'}
|
||||
bng_pbc,0,regression,0.400094,{'_modeljson': 'et/Albert.json'}
|
||||
bng_pbc,0,regression,0.394067,{'_modeljson': 'et/Amazon_employee_access.json'}
|
||||
bng_pbc,0,regression,0.391695,{'_modeljson': 'et/bng_breastTumor.json'}
|
||||
bng_pbc,0,regression,0.421267,{'_modeljson': 'et/bng_pbc.json'}
|
||||
bng_pbc,0,regression,0.361909,{'_modeljson': 'et/car.json'}
|
||||
bng_pbc,0,regression,0.402332,{'_modeljson': 'et/connect-4.json'}
|
||||
bng_pbc,0,regression,0.418622,{'_modeljson': 'et/default.json'}
|
||||
bng_pbc,0,regression,0.388768,{'_modeljson': 'et/dilbert.json'}
|
||||
bng_pbc,0,regression,0.421152,{'_modeljson': 'et/poker.json'}
|
||||
car,0,multiclass,-0.0815482,{'_modeljson': 'et/2dplanes.json'}
|
||||
car,0,multiclass,-0.218552,{'_modeljson': 'et/adult.json'}
|
||||
car,0,multiclass,-0.0474428,{'_modeljson': 'et/Airlines.json'}
|
||||
car,0,multiclass,-0.108586,{'_modeljson': 'et/Albert.json'}
|
||||
car,0,multiclass,-0.218073,{'_modeljson': 'et/Amazon_employee_access.json'}
|
||||
car,0,multiclass,-0.0397411,{'_modeljson': 'et/bng_breastTumor.json'}
|
||||
car,0,multiclass,-0.0485655,{'_modeljson': 'et/bng_pbc.json'}
|
||||
car,0,multiclass,-0.0524496,{'_modeljson': 'et/car.json'}
|
||||
car,0,multiclass,-0.0690461,{'_modeljson': 'et/connect-4.json'}
|
||||
car,0,multiclass,-0.111939,{'_modeljson': 'et/default.json'}
|
||||
car,0,multiclass,-0.218153,{'_modeljson': 'et/dilbert.json'}
|
||||
car,0,multiclass,-0.0502018,{'_modeljson': 'et/poker.json'}
|
||||
connect-4,0,multiclass,-0.706448,{'_modeljson': 'et/2dplanes.json'}
|
||||
connect-4,0,multiclass,-0.54998,{'_modeljson': 'et/adult.json'}
|
||||
connect-4,0,multiclass,-0.495074,{'_modeljson': 'et/Airlines.json'}
|
||||
connect-4,0,multiclass,-0.468797,{'_modeljson': 'et/Albert.json'}
|
||||
connect-4,0,multiclass,-0.528177,{'_modeljson': 'et/Amazon_employee_access.json'}
|
||||
connect-4,0,multiclass,-0.545043,{'_modeljson': 'et/bng_breastTumor.json'}
|
||||
connect-4,0,multiclass,-0.57415,{'_modeljson': 'et/bng_pbc.json'}
|
||||
connect-4,0,multiclass,-0.639965,{'_modeljson': 'et/car.json'}
|
||||
connect-4,0,multiclass,-0.459906,{'_modeljson': 'et/connect-4.json'}
|
||||
connect-4,0,multiclass,-0.540561,{'_modeljson': 'et/default.json'}
|
||||
connect-4,0,multiclass,-0.547218,{'_modeljson': 'et/dilbert.json'}
|
||||
connect-4,0,multiclass,-0.573145,{'_modeljson': 'et/poker.json'}
|
||||
dilbert,0,multiclass,-0.626964,{'_modeljson': 'et/2dplanes.json'}
|
||||
dilbert,0,multiclass,-0.230603,{'_modeljson': 'et/adult.json'}
|
||||
dilbert,0,multiclass,-0.246071,{'_modeljson': 'et/Airlines.json'}
|
||||
dilbert,0,multiclass,-0.237068,{'_modeljson': 'et/Albert.json'}
|
||||
dilbert,0,multiclass,-0.230785,{'_modeljson': 'et/Amazon_employee_access.json'}
|
||||
dilbert,0,multiclass,-0.253409,{'_modeljson': 'et/bng_breastTumor.json'}
|
||||
dilbert,0,multiclass,-0.247331,{'_modeljson': 'et/bng_pbc.json'}
|
||||
dilbert,0,multiclass,-0.383859,{'_modeljson': 'et/car.json'}
|
||||
dilbert,0,multiclass,-0.234819,{'_modeljson': 'et/connect-4.json'}
|
||||
dilbert,0,multiclass,-0.308227,{'_modeljson': 'et/default.json'}
|
||||
dilbert,0,multiclass,-0.231163,{'_modeljson': 'et/dilbert.json'}
|
||||
dilbert,0,multiclass,-0.245383,{'_modeljson': 'et/poker.json'}
|
||||
Dionis,0,multiclass,-3.354,{'_modeljson': 'et/2dplanes.json'}
|
||||
Dionis,0,multiclass,-1.56815,{'_modeljson': 'et/adult.json'}
|
||||
Dionis,0,multiclass,-0.758098,{'_modeljson': 'et/Airlines.json'}
|
||||
Dionis,0,multiclass,-1.36204,{'_modeljson': 'et/Amazon_employee_access.json'}
|
||||
Dionis,0,multiclass,-1.40398,{'_modeljson': 'et/bng_breastTumor.json'}
|
||||
Dionis,0,multiclass,-2.44773,{'_modeljson': 'et/car.json'}
|
||||
Dionis,0,multiclass,-0.759589,{'_modeljson': 'et/connect-4.json'}
|
||||
Dionis,0,multiclass,-0.789821,{'_modeljson': 'et/default.json'}
|
||||
Dionis,0,multiclass,-1.54593,{'_modeljson': 'et/dilbert.json'}
|
||||
poker,0,regression,0.103608,{'_modeljson': 'et/2dplanes.json'}
|
||||
poker,0,regression,0.314258,{'_modeljson': 'et/adult.json'}
|
||||
poker,0,regression,0.531285,{'_modeljson': 'et/Airlines.json'}
|
||||
poker,0,regression,0.30208,{'_modeljson': 'et/Albert.json'}
|
||||
poker,0,regression,0.358474,{'_modeljson': 'et/Amazon_employee_access.json'}
|
||||
poker,0,regression,0.344292,{'_modeljson': 'et/bng_breastTumor.json'}
|
||||
poker,0,regression,0.663188,{'_modeljson': 'et/bng_pbc.json'}
|
||||
poker,0,regression,0.180103,{'_modeljson': 'et/car.json'}
|
||||
poker,0,regression,0.394291,{'_modeljson': 'et/connect-4.json'}
|
||||
poker,0,regression,0.753355,{'_modeljson': 'et/default.json'}
|
||||
poker,0,regression,0.317809,{'_modeljson': 'et/dilbert.json'}
|
||||
poker,0,regression,0.663812,{'_modeljson': 'et/poker.json'}
|
||||
|
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {"n_estimators": 103, "num_leaves": 33, "min_child_samples": 4, "learning_rate": 0.05800185361316003, "log_max_bin": 6, "colsample_bytree": 1.0, "reg_alpha": 1.5987124004961213, "reg_lambda": 10.56445079499673}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {"n_estimators": 733, "num_leaves": 11, "min_child_samples": 94, "learning_rate": 0.06276798296942972, "log_max_bin": 6, "colsample_bytree": 0.6341928918435795, "reg_alpha": 0.5811038918218691, "reg_lambda": 43.304997517523944}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {"n_estimators": 2541, "num_leaves": 1667, "min_child_samples": 29, "learning_rate": 0.0016660662914022302, "log_max_bin": 8, "colsample_bytree": 0.5157078343718623, "reg_alpha": 0.045792841240713165, "reg_lambda": 0.0012362651138125363, "FLAML_sample_size": 436899}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {"n_estimators": 12659, "num_leaves": 566, "min_child_samples": 51, "learning_rate": 0.0017248557932071625, "log_max_bin": 10, "colsample_bytree": 0.35373661752616337, "reg_alpha": 0.004824272162679245, "reg_lambda": 8.51563063056529, "FLAML_sample_size": 344444}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {"n_estimators": 198, "num_leaves": 6241, "min_child_samples": 3, "learning_rate": 0.003807690748728824, "log_max_bin": 10, "colsample_bytree": 0.3192882305722113, "reg_alpha": 0.024630507311503163, "reg_lambda": 0.06738306675149014}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {"n_estimators": 362, "num_leaves": 1208, "min_child_samples": 8, "learning_rate": 0.02070742242160566, "log_max_bin": 4, "colsample_bytree": 0.37915528071680865, "reg_alpha": 0.002982599447751338, "reg_lambda": 1.136605174453919, "FLAML_sample_size": 337147}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {"n_estimators": 11842, "num_leaves": 31, "min_child_samples": 3, "learning_rate": 0.0015861878568503534, "log_max_bin": 8, "colsample_bytree": 0.3814347840573729, "reg_alpha": 0.0009765625, "reg_lambda": 0.011319689446351965}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {"n_estimators": 644, "num_leaves": 40, "min_child_samples": 38, "learning_rate": 0.06007328261566753, "log_max_bin": 5, "colsample_bytree": 0.6950692048656423, "reg_alpha": 0.0009765625, "reg_lambda": 9.849318389111616, "FLAML_sample_size": 94478}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {"n_estimators": 27202, "num_leaves": 848, "min_child_samples": 2, "learning_rate": 0.0019296395751528979, "log_max_bin": 5, "colsample_bytree": 0.7328229531785452, "reg_alpha": 6.112225454676263, "reg_lambda": 0.08606162543586986, "FLAML_sample_size": 810000}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {"n_estimators": 311, "num_leaves": 4, "min_child_samples": 5, "learning_rate": 0.5547292134798673, "log_max_bin": 3, "colsample_bytree": 0.9917614238487915, "reg_alpha": 0.0009765625, "reg_lambda": 0.0019177370889840813}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {"n_estimators": 3726, "num_leaves": 155, "min_child_samples": 4, "learning_rate": 0.040941607728296484, "log_max_bin": 5, "colsample_bytree": 0.5326256194627191, "reg_alpha": 0.7408711930398492, "reg_lambda": 0.5467731065349226}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {"n_estimators": 7325, "num_leaves": 15, "min_child_samples": 6, "learning_rate": 0.009932524214971736, "log_max_bin": 6, "colsample_bytree": 0.8592091503131608, "reg_alpha": 0.0009997224940106115, "reg_lambda": 0.04069855891326503}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "lgbm", "hyperparameters": {"n_estimators": 32767, "num_leaves": 372, "min_child_samples": 4, "learning_rate": 0.03517259015200922, "log_max_bin": 5, "colsample_bytree": 1.0, "reg_alpha": 0.02271142170225636, "reg_lambda": 0.001963791798843179, "FLAML_sample_size": 830258}}
|
||||
@@ -1,167 +0,0 @@
|
||||
task,fold,type,result,params
|
||||
2dplanes,0,regression,0.946366,{'_modeljson': 'lgbm/2dplanes.json'}
|
||||
2dplanes,0,regression,0.907774,{'_modeljson': 'lgbm/adult.json'}
|
||||
2dplanes,0,regression,0.901643,{'_modeljson': 'lgbm/Airlines.json'}
|
||||
2dplanes,0,regression,0.915098,{'_modeljson': 'lgbm/Albert.json'}
|
||||
2dplanes,0,regression,0.302328,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
|
||||
2dplanes,0,regression,0.94523,{'_modeljson': 'lgbm/bng_breastTumor.json'}
|
||||
2dplanes,0,regression,0.945698,{'_modeljson': 'lgbm/bng_pbc.json'}
|
||||
2dplanes,0,regression,0.946194,{'_modeljson': 'lgbm/car.json'}
|
||||
2dplanes,0,regression,0.945549,{'_modeljson': 'lgbm/connect-4.json'}
|
||||
2dplanes,0,regression,0.946232,{'_modeljson': 'lgbm/default.json'}
|
||||
2dplanes,0,regression,0.945594,{'_modeljson': 'lgbm/dilbert.json'}
|
||||
2dplanes,0,regression,0.836996,{'_modeljson': 'lgbm/Dionis.json'}
|
||||
2dplanes,0,regression,0.917152,{'_modeljson': 'lgbm/poker.json'}
|
||||
adult,0,binary,0.927203,{'_modeljson': 'lgbm/2dplanes.json'}
|
||||
adult,0,binary,0.932072,{'_modeljson': 'lgbm/adult.json'}
|
||||
adult,0,binary,0.926563,{'_modeljson': 'lgbm/Airlines.json'}
|
||||
adult,0,binary,0.928604,{'_modeljson': 'lgbm/Albert.json'}
|
||||
adult,0,binary,0.911171,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
|
||||
adult,0,binary,0.930645,{'_modeljson': 'lgbm/bng_breastTumor.json'}
|
||||
adult,0,binary,0.928603,{'_modeljson': 'lgbm/bng_pbc.json'}
|
||||
adult,0,binary,0.915825,{'_modeljson': 'lgbm/car.json'}
|
||||
adult,0,binary,0.919499,{'_modeljson': 'lgbm/connect-4.json'}
|
||||
adult,0,binary,0.930109,{'_modeljson': 'lgbm/default.json'}
|
||||
adult,0,binary,0.932453,{'_modeljson': 'lgbm/dilbert.json'}
|
||||
adult,0,binary,0.921959,{'_modeljson': 'lgbm/Dionis.json'}
|
||||
adult,0,binary,0.910763,{'_modeljson': 'lgbm/poker.json'}
|
||||
Airlines,0,binary,0.705404,{'_modeljson': 'lgbm/2dplanes.json'}
|
||||
Airlines,0,binary,0.714521,{'_modeljson': 'lgbm/adult.json'}
|
||||
Airlines,0,binary,0.732288,{'_modeljson': 'lgbm/Airlines.json'}
|
||||
Airlines,0,binary,0.710273,{'_modeljson': 'lgbm/Albert.json'}
|
||||
Airlines,0,binary,0.707107,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
|
||||
Airlines,0,binary,0.718682,{'_modeljson': 'lgbm/bng_breastTumor.json'}
|
||||
Airlines,0,binary,0.724703,{'_modeljson': 'lgbm/bng_pbc.json'}
|
||||
Airlines,0,binary,0.690574,{'_modeljson': 'lgbm/car.json'}
|
||||
Airlines,0,binary,0.725808,{'_modeljson': 'lgbm/connect-4.json'}
|
||||
Airlines,0,binary,0.710419,{'_modeljson': 'lgbm/default.json'}
|
||||
Airlines,0,binary,0.710419,{'_modeljson': 'lgbm/default.json'}
|
||||
Airlines,0,binary,0.718609,{'_modeljson': 'lgbm/dilbert.json'}
|
||||
Airlines,0,binary,0.716213,{'_modeljson': 'lgbm/Dionis.json'}
|
||||
Airlines,0,binary,0.654868,{'_modeljson': 'lgbm/poker.json'}
|
||||
Albert,0,binary,0.744825,{'_modeljson': 'lgbm/2dplanes.json'}
|
||||
Albert,0,binary,0.758979,{'_modeljson': 'lgbm/adult.json'}
|
||||
Albert,0,binary,0.758364,{'_modeljson': 'lgbm/Airlines.json'}
|
||||
Albert,0,binary,0.770923,{'_modeljson': 'lgbm/Albert.json'}
|
||||
Albert,0,binary,0.745091,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
|
||||
Albert,0,binary,0.754523,{'_modeljson': 'lgbm/APSFailure.json'}
|
||||
Albert,0,binary,0.759939,{'_modeljson': 'lgbm/bng_breastTumor.json'}
|
||||
Albert,0,binary,0.765119,{'_modeljson': 'lgbm/bng_pbc.json'}
|
||||
Albert,0,binary,0.745067,{'_modeljson': 'lgbm/car.json'}
|
||||
Albert,0,binary,0.762311,{'_modeljson': 'lgbm/connect-4.json'}
|
||||
Albert,0,binary,0.753181,{'_modeljson': 'lgbm/default.json'}
|
||||
Albert,0,binary,0.753181,{'_modeljson': 'lgbm/default.json'}
|
||||
Albert,0,binary,0.760248,{'_modeljson': 'lgbm/dilbert.json'}
|
||||
Albert,0,binary,0.758111,{'_modeljson': 'lgbm/Dionis.json'}
|
||||
Albert,0,binary,0.761768,{'_modeljson': 'lgbm/poker.json'}
|
||||
Amazon_employee_access,0,binary,0.811238,{'_modeljson': 'lgbm/2dplanes.json'}
|
||||
Amazon_employee_access,0,binary,0.867285,{'_modeljson': 'lgbm/adult.json'}
|
||||
Amazon_employee_access,0,binary,0.8888,{'_modeljson': 'lgbm/Airlines.json'}
|
||||
Amazon_employee_access,0,binary,0.881302,{'_modeljson': 'lgbm/Albert.json'}
|
||||
Amazon_employee_access,0,binary,0.891085,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
|
||||
Amazon_employee_access,0,binary,0.816736,{'_modeljson': 'lgbm/APSFailure.json'}
|
||||
Amazon_employee_access,0,binary,0.861187,{'_modeljson': 'lgbm/bng_breastTumor.json'}
|
||||
Amazon_employee_access,0,binary,0.848348,{'_modeljson': 'lgbm/bng_pbc.json'}
|
||||
Amazon_employee_access,0,binary,0.760891,{'_modeljson': 'lgbm/car.json'}
|
||||
Amazon_employee_access,0,binary,0.872951,{'_modeljson': 'lgbm/connect-4.json'}
|
||||
Amazon_employee_access,0,binary,0.851183,{'_modeljson': 'lgbm/default.json'}
|
||||
Amazon_employee_access,0,binary,0.851183,{'_modeljson': 'lgbm/default.json'}
|
||||
Amazon_employee_access,0,binary,0.851173,{'_modeljson': 'lgbm/dilbert.json'}
|
||||
Amazon_employee_access,0,binary,0.843577,{'_modeljson': 'lgbm/Dionis.json'}
|
||||
Amazon_employee_access,0,binary,0.866543,{'_modeljson': 'lgbm/poker.json'}
|
||||
bng_breastTumor,0,regression,0.186246,{'_modeljson': 'lgbm/2dplanes.json'}
|
||||
bng_breastTumor,0,regression,0.181787,{'_modeljson': 'lgbm/adult.json'}
|
||||
bng_breastTumor,0,regression,0.177175,{'_modeljson': 'lgbm/Airlines.json'}
|
||||
bng_breastTumor,0,regression,0.169053,{'_modeljson': 'lgbm/Albert.json'}
|
||||
bng_breastTumor,0,regression,0.0734972,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
|
||||
bng_breastTumor,0,regression,0.192189,{'_modeljson': 'lgbm/APSFailure.json'}
|
||||
bng_breastTumor,0,regression,0.195887,{'_modeljson': 'lgbm/bng_breastTumor.json'}
|
||||
bng_breastTumor,0,regression,0.144786,{'_modeljson': 'lgbm/bng_pbc.json'}
|
||||
bng_breastTumor,0,regression,0.168074,{'_modeljson': 'lgbm/car.json'}
|
||||
bng_breastTumor,0,regression,0.159819,{'_modeljson': 'lgbm/connect-4.json'}
|
||||
bng_breastTumor,0,regression,0.192813,{'_modeljson': 'lgbm/default.json'}
|
||||
bng_breastTumor,0,regression,0.192813,{'_modeljson': 'lgbm/default.json'}
|
||||
bng_breastTumor,0,regression,0.193994,{'_modeljson': 'lgbm/dilbert.json'}
|
||||
bng_breastTumor,0,regression,0.162977,{'_modeljson': 'lgbm/Dionis.json'}
|
||||
bng_breastTumor,0,regression,-0.0283641,{'_modeljson': 'lgbm/poker.json'}
|
||||
bng_pbc,0,regression,0.415569,{'_modeljson': 'lgbm/2dplanes.json'}
|
||||
bng_pbc,0,regression,0.421659,{'_modeljson': 'lgbm/adult.json'}
|
||||
bng_pbc,0,regression,0.433399,{'_modeljson': 'lgbm/Airlines.json'}
|
||||
bng_pbc,0,regression,0.429397,{'_modeljson': 'lgbm/Albert.json'}
|
||||
bng_pbc,0,regression,0.218693,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
|
||||
bng_pbc,0,regression,0.426949,{'_modeljson': 'lgbm/APSFailure.json'}
|
||||
bng_pbc,0,regression,0.444361,{'_modeljson': 'lgbm/bng_breastTumor.json'}
|
||||
bng_pbc,0,regression,0.459898,{'_modeljson': 'lgbm/bng_pbc.json'}
|
||||
bng_pbc,0,regression,0.404274,{'_modeljson': 'lgbm/car.json'}
|
||||
bng_pbc,0,regression,0.453742,{'_modeljson': 'lgbm/connect-4.json'}
|
||||
bng_pbc,0,regression,0.425581,{'_modeljson': 'lgbm/default.json'}
|
||||
bng_pbc,0,regression,0.425581,{'_modeljson': 'lgbm/default.json'}
|
||||
bng_pbc,0,regression,0.440833,{'_modeljson': 'lgbm/dilbert.json'}
|
||||
bng_pbc,0,regression,0.42319,{'_modeljson': 'lgbm/Dionis.json'}
|
||||
bng_pbc,0,regression,0.440263,{'_modeljson': 'lgbm/poker.json'}
|
||||
car,0,multiclass,-0.126115,{'_modeljson': 'lgbm/2dplanes.json'}
|
||||
car,0,multiclass,-0.20528,{'_modeljson': 'lgbm/adult.json'}
|
||||
car,0,multiclass,-0.189212,{'_modeljson': 'lgbm/Airlines.json'}
|
||||
car,0,multiclass,-0.233147,{'_modeljson': 'lgbm/Albert.json'}
|
||||
car,0,multiclass,-0.598807,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
|
||||
car,0,multiclass,-0.119622,{'_modeljson': 'lgbm/APSFailure.json'}
|
||||
car,0,multiclass,-0.0372956,{'_modeljson': 'lgbm/bng_breastTumor.json'}
|
||||
car,0,multiclass,-0.179642,{'_modeljson': 'lgbm/bng_pbc.json'}
|
||||
car,0,multiclass,-0.000121047,{'_modeljson': 'lgbm/car.json'}
|
||||
car,0,multiclass,-0.050453,{'_modeljson': 'lgbm/connect-4.json'}
|
||||
car,0,multiclass,-0.00234879,{'_modeljson': 'lgbm/default.json'}
|
||||
car,0,multiclass,-0.00234879,{'_modeljson': 'lgbm/default.json'}
|
||||
car,0,multiclass,-0.000295737,{'_modeljson': 'lgbm/dilbert.json'}
|
||||
car,0,multiclass,-0.297016,{'_modeljson': 'lgbm/Dionis.json'}
|
||||
car,0,multiclass,-0.00178529,{'_modeljson': 'lgbm/poker.json'}
|
||||
connect-4,0,multiclass,-0.527657,{'_modeljson': 'lgbm/2dplanes.json'}
|
||||
connect-4,0,multiclass,-0.462894,{'_modeljson': 'lgbm/adult.json'}
|
||||
connect-4,0,multiclass,-0.449048,{'_modeljson': 'lgbm/Airlines.json'}
|
||||
connect-4,0,multiclass,-0.393871,{'_modeljson': 'lgbm/Albert.json'}
|
||||
connect-4,0,multiclass,-0.73746,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
|
||||
connect-4,0,multiclass,-0.485399,{'_modeljson': 'lgbm/APSFailure.json'}
|
||||
connect-4,0,multiclass,-0.393378,{'_modeljson': 'lgbm/bng_breastTumor.json'}
|
||||
connect-4,0,multiclass,-0.388117,{'_modeljson': 'lgbm/bng_pbc.json'}
|
||||
connect-4,0,multiclass,-0.484577,{'_modeljson': 'lgbm/car.json'}
|
||||
connect-4,0,multiclass,-0.32741,{'_modeljson': 'lgbm/connect-4.json'}
|
||||
connect-4,0,multiclass,-0.482328,{'_modeljson': 'lgbm/default.json'}
|
||||
connect-4,0,multiclass,-0.482328,{'_modeljson': 'lgbm/default.json'}
|
||||
connect-4,0,multiclass,-0.413426,{'_modeljson': 'lgbm/dilbert.json'}
|
||||
connect-4,0,multiclass,-0.438676,{'_modeljson': 'lgbm/Dionis.json'}
|
||||
connect-4,0,multiclass,-0.489035,{'_modeljson': 'lgbm/poker.json'}
|
||||
dilbert,0,multiclass,-0.134669,{'_modeljson': 'lgbm/2dplanes.json'}
|
||||
dilbert,0,multiclass,-0.0405039,{'_modeljson': 'lgbm/adult.json'}
|
||||
dilbert,0,multiclass,-0.0888238,{'_modeljson': 'lgbm/Airlines.json'}
|
||||
dilbert,0,multiclass,-0.0618876,{'_modeljson': 'lgbm/Albert.json'}
|
||||
dilbert,0,multiclass,-0.0653412,{'_modeljson': 'lgbm/APSFailure.json'}
|
||||
dilbert,0,multiclass,-0.0484292,{'_modeljson': 'lgbm/bng_breastTumor.json'}
|
||||
dilbert,0,multiclass,-0.126248,{'_modeljson': 'lgbm/bng_pbc.json'}
|
||||
dilbert,0,multiclass,-0.0473867,{'_modeljson': 'lgbm/car.json'}
|
||||
dilbert,0,multiclass,-0.0759236,{'_modeljson': 'lgbm/connect-4.json'}
|
||||
dilbert,0,multiclass,-0.0490604,{'_modeljson': 'lgbm/default.json'}
|
||||
dilbert,0,multiclass,-0.0490604,{'_modeljson': 'lgbm/default.json'}
|
||||
dilbert,0,multiclass,-0.034108,{'_modeljson': 'lgbm/dilbert.json'}
|
||||
dilbert,0,multiclass,-0.0661046,{'_modeljson': 'lgbm/Dionis.json'}
|
||||
dilbert,0,multiclass,-0.0744684,{'_modeljson': 'lgbm/poker.json'}
|
||||
Dionis,0,multiclass,-0.395452,{'_modeljson': 'lgbm/2dplanes.json'}
|
||||
Dionis,0,multiclass,-1.40235,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
|
||||
Dionis,0,multiclass,-0.306241,{'_modeljson': 'lgbm/APSFailure.json'}
|
||||
Dionis,0,multiclass,-33.7902,{'_modeljson': 'lgbm/car.json'}
|
||||
Dionis,0,multiclass,-27.9456,{'_modeljson': 'lgbm/default.json'}
|
||||
Dionis,0,multiclass,-28.095,{'_modeljson': 'lgbm/default.json'}
|
||||
Dionis,0,multiclass,-0.318142,{'_modeljson': 'lgbm/Dionis.json'}
|
||||
poker,0,regression,0.203695,{'_modeljson': 'lgbm/2dplanes.json'}
|
||||
poker,0,regression,0.424513,{'_modeljson': 'lgbm/adult.json'}
|
||||
poker,0,regression,0.490528,{'_modeljson': 'lgbm/Airlines.json'}
|
||||
poker,0,regression,0.767652,{'_modeljson': 'lgbm/Albert.json'}
|
||||
poker,0,regression,0.0592655,{'_modeljson': 'lgbm/Amazon_employee_access.json'}
|
||||
poker,0,regression,0.393168,{'_modeljson': 'lgbm/APSFailure.json'}
|
||||
poker,0,regression,0.614152,{'_modeljson': 'lgbm/bng_breastTumor.json'}
|
||||
poker,0,regression,0.854134,{'_modeljson': 'lgbm/bng_pbc.json'}
|
||||
poker,0,regression,0.197075,{'_modeljson': 'lgbm/car.json'}
|
||||
poker,0,regression,0.879695,{'_modeljson': 'lgbm/connect-4.json'}
|
||||
poker,0,regression,0.284102,{'_modeljson': 'lgbm/default.json'}
|
||||
poker,0,regression,0.284102,{'_modeljson': 'lgbm/default.json'}
|
||||
poker,0,regression,0.433648,{'_modeljson': 'lgbm/dilbert.json'}
|
||||
poker,0,regression,0.657666,{'_modeljson': 'lgbm/Dionis.json'}
|
||||
poker,0,regression,0.940835,{'_modeljson': 'lgbm/poker.json'}
|
||||
|
@@ -1 +0,0 @@
|
||||
{"class": "rf", "hyperparameters": {"n_estimators": 38, "max_features": 1.0, "max_leaves": 58}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "rf", "hyperparameters": {"n_estimators": 418, "max_features": 0.5303485415288045, "max_leaves": 6452, "criterion": "entropy", "FLAML_sample_size": 436899}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "rf", "hyperparameters": {"n_estimators": 2047, "max_features": 0.10091610074262287, "max_leaves": 32767, "criterion": "entropy", "FLAML_sample_size": 344444}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "rf", "hyperparameters": {"n_estimators": 501, "max_features": 0.24484242524861066, "max_leaves": 1156, "criterion": "entropy"}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "rf", "hyperparameters": {"n_estimators": 510, "max_features": 0.12094682590862652, "max_leaves": 32767, "criterion": "entropy", "FLAML_sample_size": 337147}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "rf", "hyperparameters": {"n_estimators": 1212, "max_features": 0.3129111648657632, "max_leaves": 779, "criterion": "entropy"}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "rf", "hyperparameters": {"n_estimators": 288, "max_features": 0.6436380990499977, "max_leaves": 1823, "FLAML_sample_size": 94478}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "rf", "hyperparameters": {"n_estimators": 2047, "max_features": 0.3158919059422144, "max_leaves": 32767, "FLAML_sample_size": 810000}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "rf", "hyperparameters": {"n_estimators": 792, "max_features": 1.0, "max_leaves": 67, "criterion": "entropy"}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "rf", "hyperparameters": {"n_estimators": 1907, "max_features": 0.3728618389498168, "max_leaves": 11731, "criterion": "entropy"}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "rf", "hyperparameters": {}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "rf", "hyperparameters": {"n_estimators": 350, "max_features": 0.748250835121453, "max_leaves": 433, "criterion": "entropy"}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "rf", "hyperparameters": {"n_estimators": 2047, "max_features": 1.0, "max_leaves": 32767, "FLAML_sample_size": 830258}}
|
||||
@@ -1,145 +0,0 @@
|
||||
task,fold,type,result,metric,params,info
|
||||
2dplanes,0,regression,0.946488,r2,{'_modeljson': 'rf/2dplanes.json'},
|
||||
2dplanes,0,regression,0.936392,r2,{'_modeljson': 'rf/adult.json'},
|
||||
2dplanes,0,regression,0.940486,r2,{'_modeljson': 'rf/Airlines.json'},
|
||||
2dplanes,0,regression,0.924025,r2,{'_modeljson': 'rf/Albert.json'},
|
||||
2dplanes,0,regression,0.911362,r2,{'_modeljson': 'rf/Amazon_employee_access.json'},
|
||||
2dplanes,0,regression,0.944353,r2,{'_modeljson': 'rf/bng_breastTumor.json'},
|
||||
2dplanes,0,regression,0.932343,r2,{'_modeljson': 'rf/bng_pbc.json'},
|
||||
2dplanes,0,regression,0.946423,r2,{'_modeljson': 'rf/car.json'},
|
||||
2dplanes,0,regression,0.937309,r2,{'_modeljson': 'rf/connect-4.json'},
|
||||
2dplanes,0,regression,0.930126,r2,{'_modeljson': 'rf/default.json'},
|
||||
2dplanes,0,regression,0.945707,r2,{'_modeljson': 'rf/dilbert.json'},
|
||||
2dplanes,0,regression,0.923313,r2,{'_modeljson': 'rf/Dionis.json'},
|
||||
2dplanes,0,regression,0.930579,r2,{'_modeljson': 'rf/poker.json'},
|
||||
adult,0,binary,0.912946,auc,{'_modeljson': 'rf/2dplanes.json'},
|
||||
adult,0,binary,0.91978,auc,{'_modeljson': 'rf/adult.json'},
|
||||
adult,0,binary,0.910127,auc,{'_modeljson': 'rf/Airlines.json'},
|
||||
adult,0,binary,0.910553,auc,{'_modeljson': 'rf/Albert.json'},
|
||||
adult,0,binary,0.919662,auc,{'_modeljson': 'rf/Amazon_employee_access.json'},
|
||||
adult,0,binary,0.915769,auc,{'_modeljson': 'rf/bng_breastTumor.json'},
|
||||
adult,0,binary,0.91003,auc,{'_modeljson': 'rf/bng_pbc.json'},
|
||||
adult,0,binary,0.914697,auc,{'_modeljson': 'rf/car.json'},
|
||||
adult,0,binary,0.911118,auc,{'_modeljson': 'rf/connect-4.json'},
|
||||
adult,0,binary,0.907368,auc,{'_modeljson': 'rf/default.json'},
|
||||
adult,0,binary,0.919216,auc,{'_modeljson': 'rf/dilbert.json'},
|
||||
adult,0,binary,0.910528,auc,{'_modeljson': 'rf/Dionis.json'},
|
||||
adult,0,binary,0.904508,auc,{'_modeljson': 'rf/poker.json'},
|
||||
Airlines,0,binary,0.687817,auc,{'_modeljson': 'rf/2dplanes.json'},
|
||||
Airlines,0,binary,0.712804,auc,{'_modeljson': 'rf/adult.json'},
|
||||
Airlines,0,binary,0.727357,auc,{'_modeljson': 'rf/Airlines.json'},
|
||||
Airlines,0,binary,0.705541,auc,{'_modeljson': 'rf/Albert.json'},
|
||||
Airlines,0,binary,0.71012,auc,{'_modeljson': 'rf/Amazon_employee_access.json'},
|
||||
Airlines,0,binary,0.722532,auc,{'_modeljson': 'rf/bng_breastTumor.json'},
|
||||
Airlines,0,binary,0.709287,auc,{'_modeljson': 'rf/bng_pbc.json'},
|
||||
Airlines,0,binary,0.688678,auc,{'_modeljson': 'rf/car.json'},
|
||||
Airlines,0,binary,0.725288,auc,{'_modeljson': 'rf/connect-4.json'},
|
||||
Airlines,0,binary,0.657276,auc,{'_modeljson': 'rf/default.json'},
|
||||
Airlines,0,binary,0.708515,auc,{'_modeljson': 'rf/dilbert.json'},
|
||||
Airlines,0,binary,0.705826,auc,{'_modeljson': 'rf/Dionis.json'},
|
||||
Airlines,0,binary,0.699484,auc,{'_modeljson': 'rf/poker.json'},
|
||||
Albert,0,binary,0.712348,auc,{'_modeljson': 'rf/2dplanes.json'},
|
||||
Albert,0,binary,0.72836,auc,{'_modeljson': 'rf/adult.json'},
|
||||
Albert,0,binary,0.734105,auc,{'_modeljson': 'rf/Airlines.json'},
|
||||
Albert,0,binary,0.737119,auc,{'_modeljson': 'rf/Albert.json'},
|
||||
Albert,0,binary,0.729216,auc,{'_modeljson': 'rf/Amazon_employee_access.json'},
|
||||
Albert,0,binary,0.731546,auc,{'_modeljson': 'rf/bng_breastTumor.json'},
|
||||
Albert,0,binary,0.734847,auc,{'_modeljson': 'rf/bng_pbc.json'},
|
||||
Albert,0,binary,0.713965,auc,{'_modeljson': 'rf/car.json'},
|
||||
Albert,0,binary,0.735372,auc,{'_modeljson': 'rf/connect-4.json'},
|
||||
Albert,0,binary,0.728232,auc,{'_modeljson': 'rf/default.json'},
|
||||
Albert,0,binary,0.726823,auc,{'_modeljson': 'rf/dilbert.json'},
|
||||
Albert,0,binary,0.735994,auc,{'_modeljson': 'rf/Dionis.json'},
|
||||
Amazon_employee_access,0,binary,0.728779,auc,{'_modeljson': 'rf/2dplanes.json'},
|
||||
Amazon_employee_access,0,binary,0.87801,auc,{'_modeljson': 'rf/adult.json'},
|
||||
Amazon_employee_access,0,binary,0.88085,auc,{'_modeljson': 'rf/Airlines.json'},
|
||||
Amazon_employee_access,0,binary,0.881869,auc,{'_modeljson': 'rf/Albert.json'},
|
||||
Amazon_employee_access,0,binary,0.881463,auc,{'_modeljson': 'rf/Amazon_employee_access.json'},
|
||||
Amazon_employee_access,0,binary,0.882723,auc,{'_modeljson': 'rf/bng_breastTumor.json'},
|
||||
Amazon_employee_access,0,binary,0.88299,auc,{'_modeljson': 'rf/bng_pbc.json'},
|
||||
Amazon_employee_access,0,binary,0.808575,auc,{'_modeljson': 'rf/car.json'},
|
||||
Amazon_employee_access,0,binary,0.881209,auc,{'_modeljson': 'rf/connect-4.json'},
|
||||
Amazon_employee_access,0,binary,0.877507,auc,{'_modeljson': 'rf/default.json'},
|
||||
Amazon_employee_access,0,binary,0.875146,auc,{'_modeljson': 'rf/dilbert.json'},
|
||||
Amazon_employee_access,0,binary,0.878121,auc,{'_modeljson': 'rf/Dionis.json'},
|
||||
Amazon_employee_access,0,binary,0.886312,auc,{'_modeljson': 'rf/poker.json'},
|
||||
bng_breastTumor,0,regression,0.153657,r2,{'_modeljson': 'rf/2dplanes.json'},
|
||||
bng_breastTumor,0,regression,0.156403,r2,{'_modeljson': 'rf/adult.json'},
|
||||
bng_breastTumor,0,regression,0.174569,r2,{'_modeljson': 'rf/Airlines.json'},
|
||||
bng_breastTumor,0,regression,0.0441869,r2,{'_modeljson': 'rf/Albert.json'},
|
||||
bng_breastTumor,0,regression,0.157992,r2,{'_modeljson': 'rf/Amazon_employee_access.json'},
|
||||
bng_breastTumor,0,regression,0.186635,r2,{'_modeljson': 'rf/bng_breastTumor.json'},
|
||||
bng_breastTumor,0,regression,0.0527547,r2,{'_modeljson': 'rf/bng_pbc.json'},
|
||||
bng_breastTumor,0,regression,0.158852,r2,{'_modeljson': 'rf/car.json'},
|
||||
bng_breastTumor,0,regression,0.150611,r2,{'_modeljson': 'rf/connect-4.json'},
|
||||
bng_breastTumor,0,regression,-0.02142,r2,{'_modeljson': 'rf/default.json'},
|
||||
bng_breastTumor,0,regression,0.183562,r2,{'_modeljson': 'rf/dilbert.json'},
|
||||
bng_breastTumor,0,regression,0.0414589,r2,{'_modeljson': 'rf/Dionis.json'},
|
||||
bng_breastTumor,0,regression,0.00390625,r2,{'_modeljson': 'rf/poker.json'},
|
||||
bng_pbc,0,regression,0.344043,r2,{'_modeljson': 'rf/2dplanes.json'},
|
||||
bng_pbc,0,regression,0.402376,r2,{'_modeljson': 'rf/adult.json'},
|
||||
bng_pbc,0,regression,0.423262,r2,{'_modeljson': 'rf/Airlines.json'},
|
||||
bng_pbc,0,regression,0.386142,r2,{'_modeljson': 'rf/Albert.json'},
|
||||
bng_pbc,0,regression,0.403857,r2,{'_modeljson': 'rf/Amazon_employee_access.json'},
|
||||
bng_pbc,0,regression,0.413944,r2,{'_modeljson': 'rf/bng_breastTumor.json'},
|
||||
bng_pbc,0,regression,0.43206,r2,{'_modeljson': 'rf/bng_pbc.json'},
|
||||
bng_pbc,0,regression,0.348594,r2,{'_modeljson': 'rf/car.json'},
|
||||
bng_pbc,0,regression,0.427588,r2,{'_modeljson': 'rf/connect-4.json'},
|
||||
bng_pbc,0,regression,0.415337,r2,{'_modeljson': 'rf/default.json'},
|
||||
bng_pbc,0,regression,0.393936,r2,{'_modeljson': 'rf/dilbert.json'},
|
||||
bng_pbc,0,regression,0.415246,r2,{'_modeljson': 'rf/Dionis.json'},
|
||||
car,0,multiclass,-0.0575382,neg_logloss,{'_modeljson': 'rf/2dplanes.json'},
|
||||
car,0,multiclass,-0.155878,neg_logloss,{'_modeljson': 'rf/adult.json'},
|
||||
car,0,multiclass,-0.0691041,neg_logloss,{'_modeljson': 'rf/Airlines.json'},
|
||||
car,0,multiclass,-0.156607,neg_logloss,{'_modeljson': 'rf/Albert.json'},
|
||||
car,0,multiclass,-0.156968,neg_logloss,{'_modeljson': 'rf/Amazon_employee_access.json'},
|
||||
car,0,multiclass,-0.0692317,neg_logloss,{'_modeljson': 'rf/bng_breastTumor.json'},
|
||||
car,0,multiclass,-0.159856,neg_logloss,{'_modeljson': 'rf/bng_pbc.json'},
|
||||
car,0,multiclass,-0.046769,neg_logloss,{'_modeljson': 'rf/car.json'},
|
||||
car,0,multiclass,-0.0981933,neg_logloss,{'_modeljson': 'rf/connect-4.json'},
|
||||
car,0,multiclass,-0.0971712,neg_logloss,{'_modeljson': 'rf/default.json'},
|
||||
car,0,multiclass,-0.0564843,neg_logloss,{'_modeljson': 'rf/dilbert.json'},
|
||||
car,0,multiclass,-0.157771,neg_logloss,{'_modeljson': 'rf/Dionis.json'},
|
||||
car,0,multiclass,-0.0511764,neg_logloss,{'_modeljson': 'rf/poker.json'},
|
||||
connect-4,0,multiclass,-0.725888,neg_logloss,{'_modeljson': 'rf/2dplanes.json'},
|
||||
connect-4,0,multiclass,-0.576056,neg_logloss,{'_modeljson': 'rf/adult.json'},
|
||||
connect-4,0,multiclass,-0.48458,neg_logloss,{'_modeljson': 'rf/Airlines.json'},
|
||||
connect-4,0,multiclass,-0.505598,neg_logloss,{'_modeljson': 'rf/Albert.json'},
|
||||
connect-4,0,multiclass,-0.568184,neg_logloss,{'_modeljson': 'rf/Amazon_employee_access.json'},
|
||||
connect-4,0,multiclass,-0.537511,neg_logloss,{'_modeljson': 'rf/bng_breastTumor.json'},
|
||||
connect-4,0,multiclass,-0.479022,neg_logloss,{'_modeljson': 'rf/bng_pbc.json'},
|
||||
connect-4,0,multiclass,-0.713123,neg_logloss,{'_modeljson': 'rf/car.json'},
|
||||
connect-4,0,multiclass,-0.475306,neg_logloss,{'_modeljson': 'rf/connect-4.json'},
|
||||
connect-4,0,multiclass,-0.518061,neg_logloss,{'_modeljson': 'rf/default.json'},
|
||||
connect-4,0,multiclass,-0.599112,neg_logloss,{'_modeljson': 'rf/dilbert.json'},
|
||||
connect-4,0,multiclass,-0.503642,neg_logloss,{'_modeljson': 'rf/Dionis.json'},
|
||||
connect-4,0,multiclass,-0.57852,neg_logloss,{'_modeljson': 'rf/poker.json'},
|
||||
dilbert,0,multiclass,-0.557959,neg_logloss,{'_modeljson': 'rf/2dplanes.json'},
|
||||
dilbert,0,multiclass,-0.294462,neg_logloss,{'_modeljson': 'rf/adult.json'},
|
||||
dilbert,0,multiclass,-0.293928,neg_logloss,{'_modeljson': 'rf/Airlines.json'},
|
||||
dilbert,0,multiclass,-0.299661,neg_logloss,{'_modeljson': 'rf/Albert.json'},
|
||||
dilbert,0,multiclass,-0.294668,neg_logloss,{'_modeljson': 'rf/Amazon_employee_access.json'},
|
||||
dilbert,0,multiclass,-0.314706,neg_logloss,{'_modeljson': 'rf/bng_breastTumor.json'},
|
||||
dilbert,0,multiclass,-0.313807,neg_logloss,{'_modeljson': 'rf/bng_pbc.json'},
|
||||
dilbert,0,multiclass,-0.51482,neg_logloss,{'_modeljson': 'rf/car.json'},
|
||||
dilbert,0,multiclass,-0.293982,neg_logloss,{'_modeljson': 'rf/connect-4.json'},
|
||||
dilbert,0,multiclass,-0.343209,neg_logloss,{'_modeljson': 'rf/default.json'},
|
||||
dilbert,0,multiclass,-0.2945,neg_logloss,{'_modeljson': 'rf/dilbert.json'},
|
||||
dilbert,0,multiclass,-0.298305,neg_logloss,{'_modeljson': 'rf/Dionis.json'},
|
||||
Dionis,0,multiclass,-3.55264,neg_logloss,{'_modeljson': 'rf/2dplanes.json'},
|
||||
Dionis,0,multiclass,-1.07117,neg_logloss,{'_modeljson': 'rf/bng_breastTumor.json'},
|
||||
Dionis,0,multiclass,-0.784388,neg_logloss,{'_modeljson': 'rf/default.json'},
|
||||
Dionis,0,multiclass,-0.580332,neg_logloss,{'_modeljson': 'rf/Dionis.json'},
|
||||
poker,0,regression,0.125176,r2,{'_modeljson': 'rf/2dplanes.json'},
|
||||
poker,0,regression,0.148019,r2,{'_modeljson': 'rf/adult.json'},
|
||||
poker,0,regression,0.322507,r2,{'_modeljson': 'rf/Airlines.json'},
|
||||
poker,0,regression,0.172264,r2,{'_modeljson': 'rf/Albert.json'},
|
||||
poker,0,regression,0.113673,r2,{'_modeljson': 'rf/Amazon_employee_access.json'},
|
||||
poker,0,regression,0.243427,r2,{'_modeljson': 'rf/bng_breastTumor.json'},
|
||||
poker,0,regression,0.379662,r2,{'_modeljson': 'rf/bng_pbc.json'},
|
||||
poker,0,regression,0.133342,r2,{'_modeljson': 'rf/car.json'},
|
||||
poker,0,regression,0.296597,r2,{'_modeljson': 'rf/connect-4.json'},
|
||||
poker,0,regression,0.608532,r2,{'_modeljson': 'rf/default.json'},
|
||||
poker,0,regression,0.192625,r2,{'_modeljson': 'rf/dilbert.json'},
|
||||
poker,0,regression,0.172139,r2,{'_modeljson': 'rf/Dionis.json'},
|
||||
poker,0,regression,0.528869,r2,{'_modeljson': 'rf/poker.json'},
|
||||
|
@@ -1,221 +0,0 @@
|
||||
import sys
|
||||
import pickle
|
||||
from sklearn.datasets import load_iris, fetch_california_housing, load_breast_cancer
|
||||
from sklearn.model_selection import train_test_split
|
||||
import pandas as pd
|
||||
from flaml import AutoML
|
||||
from flaml.default import (
|
||||
preprocess_and_suggest_hyperparams,
|
||||
suggest_hyperparams,
|
||||
suggest_learner,
|
||||
)
|
||||
from flaml.default import portfolio, regret
|
||||
|
||||
|
||||
def test_greedy_feedback(path="test/default", strategy="greedy-feedback"):
|
||||
# sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
|
||||
# portfolio.main()
|
||||
# sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task multiclass --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
|
||||
# portfolio.main()
|
||||
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task regression --estimator lgbm --strategy {strategy}".split()
|
||||
portfolio.main()
|
||||
|
||||
|
||||
def test_build_portfolio(path="test/default", strategy="greedy"):
|
||||
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task binary --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
|
||||
portfolio.main()
|
||||
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task multiclass --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
|
||||
portfolio.main()
|
||||
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task regression --estimator lgbm xgboost xgb_limitdepth rf extra_tree --strategy {strategy}".split()
|
||||
portfolio.main()
|
||||
|
||||
|
||||
def test_iris(as_frame=True):
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"metric": "accuracy",
|
||||
"task": "classification",
|
||||
"log_file_name": "test/iris.log",
|
||||
"n_jobs": 1,
|
||||
"starting_points": "data",
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
|
||||
automl.fit(X_train, y_train, **automl_settings)
|
||||
automl_settings["starting_points"] = "data:test/default"
|
||||
automl.fit(X_train, y_train, **automl_settings)
|
||||
|
||||
|
||||
def test_housing(as_frame=True):
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"task": "regression",
|
||||
"estimator_list": ["xgboost", "lgbm"],
|
||||
"log_file_name": "test/housing.log",
|
||||
"n_jobs": 1,
|
||||
"starting_points": "data",
|
||||
"max_iter": 0,
|
||||
}
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=as_frame)
|
||||
automl.fit(X_train, y_train, **automl_settings)
|
||||
|
||||
|
||||
def test_regret():
|
||||
sys.argv = "regret.py --result_csv test/default/lgbm/results.csv --task_type binary --output test/default/lgbm/binary_regret.csv".split()
|
||||
regret.main()
|
||||
|
||||
|
||||
def test_suggest_classification():
|
||||
location = "test/default"
|
||||
X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
|
||||
suggested = suggest_hyperparams("classification", X_train, y_train, "lgbm", location=location)
|
||||
print(suggested)
|
||||
suggested = preprocess_and_suggest_hyperparams("classification", X_train, y_train, "xgboost", location=location)
|
||||
print(suggested)
|
||||
suggested = suggest_hyperparams("classification", X_train, y_train, "xgb_limitdepth", location=location)
|
||||
print(suggested)
|
||||
|
||||
X, y = load_iris(return_X_y=True, as_frame=True)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
|
||||
(
|
||||
hyperparams,
|
||||
estimator_class,
|
||||
X,
|
||||
y,
|
||||
feature_transformer,
|
||||
label_transformer,
|
||||
) = preprocess_and_suggest_hyperparams("classification", X_train, y_train, "lgbm", location=location)
|
||||
with open("test/default/feature_transformer", "wb") as f:
|
||||
pickle.dump(feature_transformer, f, pickle.HIGHEST_PROTOCOL)
|
||||
model = estimator_class(**hyperparams) # estimator_class is LGBMClassifier
|
||||
model.fit(X, y)
|
||||
X_test = feature_transformer.transform(X_test)
|
||||
y_pred = label_transformer.inverse_transform(pd.Series(model.predict(X_test).astype(int)))
|
||||
print(y_pred)
|
||||
suggested = suggest_hyperparams("classification", X_train, y_train, "xgboost", location=location)
|
||||
print(suggested)
|
||||
suggested = preprocess_and_suggest_hyperparams(
|
||||
"classification", X_train, y_train, "xgb_limitdepth", location=location
|
||||
)
|
||||
print(suggested)
|
||||
suggested = suggest_hyperparams("classification", X_train, y_train, "xgb_limitdepth", location=location)
|
||||
suggested = suggest_learner(
|
||||
"classification",
|
||||
X_train,
|
||||
y_train,
|
||||
estimator_list=["xgboost", "xgb_limitdepth"],
|
||||
location=location,
|
||||
)
|
||||
print(suggested)
|
||||
|
||||
|
||||
def test_suggest_regression():
|
||||
location = "test/default"
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True)
|
||||
suggested = suggest_hyperparams("regression", X_train, y_train, "lgbm", location=location)
|
||||
print(suggested)
|
||||
suggested = preprocess_and_suggest_hyperparams("regression", X_train, y_train, "xgboost", location=location)
|
||||
print(suggested)
|
||||
suggested = suggest_hyperparams("regression", X_train, y_train, "xgb_limitdepth", location=location)
|
||||
print(suggested)
|
||||
suggested = suggest_learner("regression", X_train, y_train, location=location)
|
||||
print(suggested)
|
||||
|
||||
|
||||
def test_rf():
|
||||
from flaml.default import RandomForestRegressor, RandomForestClassifier
|
||||
|
||||
X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
|
||||
rf = RandomForestClassifier()
|
||||
rf.fit(X_train[:100], y_train[:100])
|
||||
rf.predict(X_train)
|
||||
rf.predict_proba(X_train)
|
||||
print(rf)
|
||||
|
||||
location = "test/default"
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True)
|
||||
rf = RandomForestRegressor(default_location=location)
|
||||
rf.fit(X_train[:100], y_train[:100])
|
||||
rf.predict(X_train)
|
||||
print(rf)
|
||||
|
||||
|
||||
def test_extratrees():
|
||||
from flaml.default import ExtraTreesRegressor, ExtraTreesClassifier
|
||||
|
||||
X_train, y_train = load_iris(return_X_y=True, as_frame=True)
|
||||
classifier = ExtraTreesClassifier()
|
||||
classifier.fit(X_train[:100], y_train[:100])
|
||||
classifier.predict(X_train)
|
||||
classifier.predict_proba(X_train)
|
||||
print(classifier)
|
||||
|
||||
location = "test/default"
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True)
|
||||
regressor = ExtraTreesRegressor(default_location=location)
|
||||
regressor.fit(X_train[:100], y_train[:100])
|
||||
regressor.predict(X_train)
|
||||
print(regressor)
|
||||
|
||||
|
||||
def test_lgbm():
|
||||
from flaml.default import LGBMRegressor, LGBMClassifier
|
||||
|
||||
X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
|
||||
classifier = LGBMClassifier(n_jobs=1)
|
||||
classifier.fit(X_train, y_train)
|
||||
classifier.predict(X_train, pred_contrib=True)
|
||||
classifier.predict_proba(X_train)
|
||||
print(classifier.get_params())
|
||||
print(classifier)
|
||||
print(classifier.classes_)
|
||||
|
||||
location = "test/default"
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True)
|
||||
regressor = LGBMRegressor(default_location=location)
|
||||
regressor.fit(X_train, y_train)
|
||||
regressor.predict(X_train)
|
||||
print(regressor)
|
||||
|
||||
|
||||
def test_xgboost():
|
||||
from flaml.default import XGBRegressor, XGBClassifier
|
||||
|
||||
X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
|
||||
classifier = XGBClassifier(max_depth=0)
|
||||
classifier.fit(X_train[:100], y_train[:100])
|
||||
classifier.predict(X_train)
|
||||
classifier.predict_proba(X_train)
|
||||
print(classifier)
|
||||
print(classifier.classes_)
|
||||
|
||||
location = "test/default"
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True, as_frame=True)
|
||||
regressor = XGBRegressor(default_location=location)
|
||||
regressor.fit(X_train[:100], y_train[:100])
|
||||
regressor.predict(X_train)
|
||||
print(regressor)
|
||||
|
||||
|
||||
def test_nobudget():
|
||||
X_train, y_train = load_breast_cancer(return_X_y=True, as_frame=True)
|
||||
automl = AutoML()
|
||||
automl.fit(
|
||||
X_train[:20],
|
||||
y_train[:20],
|
||||
estimator_list=["lgbm", "extra_tree", "rf"],
|
||||
max_iter=12,
|
||||
starting_points="data",
|
||||
log_file_name="test/default/no_budget.txt",
|
||||
log_type="all",
|
||||
)
|
||||
automl.fit(X_train[:20], y_train[:20], estimator_list=["lgbm", "extra_tree", "rf"])
|
||||
# make sure that zero-shot config out of the search space does not degnerate to low cost init config
|
||||
assert automl.best_config_per_estimator["extra_tree"]["n_estimators"] > 4
|
||||
# make sure that the zero-shot config {} is not modified
|
||||
assert "criterion" not in automl.best_config_per_estimator["rf"]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_build_portfolio("flaml/default")
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 2704, "max_depth": 2, "min_child_weight": 0.23751738294732322, "learning_rate": 0.019828117294812268, "subsample": 0.8798706041292946, "colsample_bylevel": 0.978891799553329, "colsample_bytree": 1.0, "reg_alpha": 0.3023181744217667, "reg_lambda": 101.10719177747677}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 3573, "max_depth": 13, "min_child_weight": 2.921657581984971, "learning_rate": 0.00699976723859477, "subsample": 0.6110504706508572, "colsample_bylevel": 0.9998661537469163, "colsample_bytree": 0.5457693412489456, "reg_alpha": 0.05315763138176945, "reg_lambda": 23.067599600958623, "FLAML_sample_size": 436899}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 3526, "max_depth": 13, "min_child_weight": 0.0994486725676356, "learning_rate": 0.0009765625, "subsample": 0.46123759274652554, "colsample_bylevel": 1.0, "colsample_bytree": 0.4498813776397717, "reg_alpha": 0.002599398546499414, "reg_lambda": 0.028336396854402753}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 5457, "max_depth": 6, "min_child_weight": 0.19978269031877885, "learning_rate": 0.003906732665632749, "subsample": 0.8207785234496902, "colsample_bylevel": 0.8438751931476698, "colsample_bytree": 0.42202862997585794, "reg_alpha": 0.017372558844968737, "reg_lambda": 0.03977802121721031}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 7782, "max_depth": 7, "min_child_weight": 0.3794874452608909, "learning_rate": 0.006733035771172325, "subsample": 1.0, "colsample_bylevel": 1.0, "colsample_bytree": 0.5611305922560855, "reg_alpha": 8.203853065625196, "reg_lambda": 56.48543538808782, "FLAML_sample_size": 94478}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 1013, "max_depth": 15, "min_child_weight": 57.33124114425335, "learning_rate": 0.009706354607542536, "subsample": 1.0, "colsample_bylevel": 0.7925997002174675, "colsample_bytree": 0.874062117666267, "reg_alpha": 0.7965442116152655, "reg_lambda": 2.769937488341342, "FLAML_sample_size": 810000}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 624, "max_depth": 3, "min_child_weight": 0.0017043575728019624, "learning_rate": 0.8481863978692453, "subsample": 0.9897901748446495, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.0009765625, "reg_lambda": 0.008686469265798288}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 1499, "max_depth": 11, "min_child_weight": 0.07563529776156448, "learning_rate": 0.039042609221240955, "subsample": 0.7832981935783824, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.0009765625, "reg_lambda": 23.513066752844153}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgb_limitdepth", "hyperparameters": {}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 405, "max_depth": 4, "min_child_weight": 0.2264977130755997, "learning_rate": 0.3390883186947167, "subsample": 0.8078627200173096, "colsample_bylevel": 0.8570282862730856, "colsample_bytree": 0.8280063772581445, "reg_alpha": 0.007634576038353066, "reg_lambda": 1.7101180066063097}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgb_limitdepth", "hyperparameters": {"n_estimators": 3234, "max_depth": 13, "min_child_weight": 0.07784911437942721, "learning_rate": 0.0565426521738442, "subsample": 1.0, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.007928962402687697, "reg_lambda": 3.881249823648859, "FLAML_sample_size": 830258}}
|
||||
@@ -1,116 +0,0 @@
|
||||
task,fold,type,result,params
|
||||
2dplanes,0,regression,0.946567,{'_modeljson': 'xgblimit/2dplanes.json'}
|
||||
2dplanes,0,regression,0.94503,{'_modeljson': 'xgblimit/adult.json'}
|
||||
2dplanes,0,regression,0.945074,{'_modeljson': 'xgblimit/Airlines.json'}
|
||||
2dplanes,0,regression,0.806694,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
|
||||
2dplanes,0,regression,0.945799,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
|
||||
2dplanes,0,regression,0.944103,{'_modeljson': 'xgblimit/bng_pbc.json'}
|
||||
2dplanes,0,regression,0.945327,{'_modeljson': 'xgblimit/car.json'}
|
||||
2dplanes,0,regression,0.923926,{'_modeljson': 'xgblimit/connect-4.json'}
|
||||
2dplanes,0,regression,0.944454,{'_modeljson': 'xgblimit/default.json'}
|
||||
2dplanes,0,regression,0.945212,{'_modeljson': 'xgblimit/dilbert.json'}
|
||||
2dplanes,0,regression,0.910852,{'_modeljson': 'xgblimit/poker.json'}
|
||||
adult,0,binary,0.923082,{'_modeljson': 'xgblimit/2dplanes.json'}
|
||||
adult,0,binary,0.932355,{'_modeljson': 'xgblimit/adult.json'}
|
||||
adult,0,binary,0.928373,{'_modeljson': 'xgblimit/Airlines.json'}
|
||||
adult,0,binary,0.927574,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
|
||||
adult,0,binary,0.929427,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
|
||||
adult,0,binary,0.92204,{'_modeljson': 'xgblimit/bng_pbc.json'}
|
||||
adult,0,binary,0.721115,{'_modeljson': 'xgblimit/car.json'}
|
||||
adult,0,binary,0.921465,{'_modeljson': 'xgblimit/connect-4.json'}
|
||||
adult,0,binary,0.931234,{'_modeljson': 'xgblimit/default.json'}
|
||||
adult,0,binary,0.927801,{'_modeljson': 'xgblimit/dilbert.json'}
|
||||
adult,0,binary,0.916878,{'_modeljson': 'xgblimit/poker.json'}
|
||||
Airlines,0,binary,0.699604,{'_modeljson': 'xgblimit/2dplanes.json'}
|
||||
Airlines,0,binary,0.711053,{'_modeljson': 'xgblimit/adult.json'}
|
||||
Airlines,0,binary,0.732443,{'_modeljson': 'xgblimit/Airlines.json'}
|
||||
Airlines,0,binary,0.72875,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
|
||||
Airlines,0,binary,0.725056,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
|
||||
Airlines,0,binary,0.730476,{'_modeljson': 'xgblimit/bng_pbc.json'}
|
||||
Airlines,0,binary,0.71788,{'_modeljson': 'xgblimit/car.json'}
|
||||
Airlines,0,binary,0.72604,{'_modeljson': 'xgblimit/connect-4.json'}
|
||||
Airlines,0,binary,0.719845,{'_modeljson': 'xgblimit/default.json'}
|
||||
Airlines,0,binary,0.719302,{'_modeljson': 'xgblimit/dilbert.json'}
|
||||
Airlines,0,binary,0.684382,{'_modeljson': 'xgblimit/poker.json'}
|
||||
Albert,0,binary,0.743682,{'_modeljson': 'xgblimit/2dplanes.json'}
|
||||
Albert,0,binary,0.759246,{'_modeljson': 'xgblimit/adult.json'}
|
||||
Albert,0,binary,0.766177,{'_modeljson': 'xgblimit/Airlines.json'}
|
||||
Albert,0,binary,0.74969,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
|
||||
Albert,0,binary,0.766961,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
|
||||
Albert,0,binary,0.764534,{'_modeljson': 'xgblimit/bng_pbc.json'}
|
||||
Albert,0,binary,0.753311,{'_modeljson': 'xgblimit/car.json'}
|
||||
Albert,0,binary,0.765229,{'_modeljson': 'xgblimit/connect-4.json'}
|
||||
Albert,0,binary,0.757802,{'_modeljson': 'xgblimit/default.json'}
|
||||
Albert,0,binary,0.7596,{'_modeljson': 'xgblimit/dilbert.json'}
|
||||
Albert,0,binary,0.761456,{'_modeljson': 'xgblimit/poker.json'}
|
||||
Amazon_employee_access,0,binary,0.759779,{'_modeljson': 'xgblimit/2dplanes.json'}
|
||||
Amazon_employee_access,0,binary,0.876747,{'_modeljson': 'xgblimit/adult.json'}
|
||||
Amazon_employee_access,0,binary,0.864954,{'_modeljson': 'xgblimit/Airlines.json'}
|
||||
Amazon_employee_access,0,binary,0.894651,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
|
||||
Amazon_employee_access,0,binary,0.845645,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
|
||||
Amazon_employee_access,0,binary,0.789099,{'_modeljson': 'xgblimit/bng_pbc.json'}
|
||||
Amazon_employee_access,0,binary,0.550859,{'_modeljson': 'xgblimit/car.json'}
|
||||
Amazon_employee_access,0,binary,0.870599,{'_modeljson': 'xgblimit/connect-4.json'}
|
||||
Amazon_employee_access,0,binary,0.851702,{'_modeljson': 'xgblimit/default.json'}
|
||||
Amazon_employee_access,0,binary,0.86385,{'_modeljson': 'xgblimit/dilbert.json'}
|
||||
Amazon_employee_access,0,binary,0.864415,{'_modeljson': 'xgblimit/poker.json'}
|
||||
bng_breastTumor,0,regression,0.163382,{'_modeljson': 'xgblimit/2dplanes.json'}
|
||||
bng_breastTumor,0,regression,0.1789,{'_modeljson': 'xgblimit/adult.json'}
|
||||
bng_breastTumor,0,regression,0.188483,{'_modeljson': 'xgblimit/Airlines.json'}
|
||||
bng_breastTumor,0,regression,0.159704,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
|
||||
bng_breastTumor,0,regression,0.1953,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
|
||||
bng_breastTumor,0,regression,0.191805,{'_modeljson': 'xgblimit/bng_pbc.json'}
|
||||
bng_breastTumor,0,regression,0.12139,{'_modeljson': 'xgblimit/car.json'}
|
||||
bng_breastTumor,0,regression,0.163165,{'_modeljson': 'xgblimit/connect-4.json'}
|
||||
bng_breastTumor,0,regression,0.186541,{'_modeljson': 'xgblimit/default.json'}
|
||||
bng_breastTumor,0,regression,0.183899,{'_modeljson': 'xgblimit/dilbert.json'}
|
||||
bng_breastTumor,0,regression,0.108646,{'_modeljson': 'xgblimit/poker.json'}
|
||||
bng_pbc,0,regression,0.384556,{'_modeljson': 'xgblimit/2dplanes.json'}
|
||||
bng_pbc,0,regression,0.42041,{'_modeljson': 'xgblimit/adult.json'}
|
||||
bng_pbc,0,regression,0.449808,{'_modeljson': 'xgblimit/Airlines.json'}
|
||||
bng_pbc,0,regression,0.409944,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
|
||||
bng_pbc,0,regression,0.439854,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
|
||||
bng_pbc,0,regression,0.457955,{'_modeljson': 'xgblimit/bng_pbc.json'}
|
||||
bng_pbc,0,regression,0.418702,{'_modeljson': 'xgblimit/car.json'}
|
||||
bng_pbc,0,regression,0.455731,{'_modeljson': 'xgblimit/connect-4.json'}
|
||||
bng_pbc,0,regression,0.436902,{'_modeljson': 'xgblimit/default.json'}
|
||||
bng_pbc,0,regression,0.423052,{'_modeljson': 'xgblimit/dilbert.json'}
|
||||
bng_pbc,0,regression,0.447478,{'_modeljson': 'xgblimit/poker.json'}
|
||||
car,0,multiclass,-0.18106,{'_modeljson': 'xgblimit/2dplanes.json'}
|
||||
car,0,multiclass,-0.170386,{'_modeljson': 'xgblimit/adult.json'}
|
||||
car,0,multiclass,-0.169973,{'_modeljson': 'xgblimit/Airlines.json'}
|
||||
car,0,multiclass,-0.498314,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
|
||||
car,0,multiclass,-0.230405,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
|
||||
car,0,multiclass,-0.330863,{'_modeljson': 'xgblimit/bng_pbc.json'}
|
||||
car,0,multiclass,-8.16E-05,{'_modeljson': 'xgblimit/car.json'}
|
||||
car,0,multiclass,-0.0239037,{'_modeljson': 'xgblimit/connect-4.json'}
|
||||
car,0,multiclass,-0.010029,{'_modeljson': 'xgblimit/default.json'}
|
||||
car,0,multiclass,-0.00720156,{'_modeljson': 'xgblimit/dilbert.json'}
|
||||
car,0,multiclass,-0.00360416,{'_modeljson': 'xgblimit/poker.json'}
|
||||
connect-4,0,multiclass,-0.597091,{'_modeljson': 'xgblimit/2dplanes.json'}
|
||||
connect-4,0,multiclass,-0.484427,{'_modeljson': 'xgblimit/adult.json'}
|
||||
connect-4,0,multiclass,-0.387769,{'_modeljson': 'xgblimit/Airlines.json'}
|
||||
connect-4,0,multiclass,-0.553347,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
|
||||
connect-4,0,multiclass,-0.425107,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
|
||||
connect-4,0,multiclass,-0.441974,{'_modeljson': 'xgblimit/bng_pbc.json'}
|
||||
connect-4,0,multiclass,-0.410519,{'_modeljson': 'xgblimit/car.json'}
|
||||
connect-4,0,multiclass,-0.342773,{'_modeljson': 'xgblimit/connect-4.json'}
|
||||
connect-4,0,multiclass,-0.430665,{'_modeljson': 'xgblimit/default.json'}
|
||||
connect-4,0,multiclass,-0.416631,{'_modeljson': 'xgblimit/dilbert.json'}
|
||||
connect-4,0,multiclass,-0.466644,{'_modeljson': 'xgblimit/poker.json'}
|
||||
dilbert,0,multiclass,-0.189149,{'_modeljson': 'xgblimit/2dplanes.json'}
|
||||
dilbert,0,multiclass,-0.184569,{'_modeljson': 'xgblimit/bng_pbc.json'}
|
||||
dilbert,0,multiclass,-0.0485906,{'_modeljson': 'xgblimit/car.json'}
|
||||
dilbert,0,multiclass,-0.0643938,{'_modeljson': 'xgblimit/default.json'}
|
||||
dilbert,0,multiclass,-0.0425865,{'_modeljson': 'xgblimit/dilbert.json'}
|
||||
poker,0,regression,0.194424,{'_modeljson': 'xgblimit/2dplanes.json'}
|
||||
poker,0,regression,0.443714,{'_modeljson': 'xgblimit/adult.json'}
|
||||
poker,0,regression,0.837273,{'_modeljson': 'xgblimit/Airlines.json'}
|
||||
poker,0,regression,0.354783,{'_modeljson': 'xgblimit/Amazon_employee_access.json'}
|
||||
poker,0,regression,0.749681,{'_modeljson': 'xgblimit/bng_breastTumor.json'}
|
||||
poker,0,regression,0.782336,{'_modeljson': 'xgblimit/bng_pbc.json'}
|
||||
poker,0,regression,0.640848,{'_modeljson': 'xgblimit/car.json'}
|
||||
poker,0,regression,0.924649,{'_modeljson': 'xgblimit/connect-4.json'}
|
||||
poker,0,regression,0.635679,{'_modeljson': 'xgblimit/default.json'}
|
||||
poker,0,regression,0.672338,{'_modeljson': 'xgblimit/dilbert.json'}
|
||||
poker,0,regression,0.92563,{'_modeljson': 'xgblimit/poker.json'}
|
||||
|
@@ -1 +0,0 @@
|
||||
{"class": "xgboost", "hyperparameters": {"n_estimators": 6705, "max_leaves": 24, "min_child_weight": 58.562722088466444, "learning_rate": 0.0009765625, "subsample": 0.8993009465247683, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.2679275019160531, "reg_lambda": 91.95034898844547}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgboost", "hyperparameters": {"n_estimators": 17309, "max_leaves": 1146, "min_child_weight": 0.0193980002033358, "learning_rate": 0.0009765625, "subsample": 0.4169778612218198, "colsample_bylevel": 1.0, "colsample_bytree": 0.5504959296065052, "reg_alpha": 0.00505548829948545, "reg_lambda": 21.287234956122028, "FLAML_sample_size": 436899}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgboost", "hyperparameters": {"n_estimators": 6357, "max_leaves": 206, "min_child_weight": 1.9495322566288034, "learning_rate": 0.0068766724195393905, "subsample": 0.9451618245005704, "colsample_bylevel": 0.9030482524943064, "colsample_bytree": 0.9278972006416252, "reg_alpha": 0.01857648400903689, "reg_lambda": 6.021166480604588, "FLAML_sample_size": 344444}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgboost", "hyperparameters": {"n_estimators": 591, "max_leaves": 16651, "min_child_weight": 0.03356567864689129, "learning_rate": 0.002595066436678338, "subsample": 0.9114132805513452, "colsample_bylevel": 0.9503441844594458, "colsample_bytree": 0.5703338448066768, "reg_alpha": 0.010405212349127894, "reg_lambda": 0.05352660657433639}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgboost", "hyperparameters": {"n_estimators": 23282, "max_leaves": 19, "min_child_weight": 0.02198438885474473, "learning_rate": 0.001700636796132106, "subsample": 1.0, "colsample_bylevel": 0.8954745234489918, "colsample_bytree": 0.22331977285961732, "reg_alpha": 0.4115502489939291, "reg_lambda": 0.015523027968801352}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgboost", "hyperparameters": {"n_estimators": 4038, "max_leaves": 89, "min_child_weight": 0.23500921146599626, "learning_rate": 0.0039779941096963365, "subsample": 0.9421092355451888, "colsample_bylevel": 0.7772326835688742, "colsample_bytree": 0.6864341727912397, "reg_alpha": 4.8782018848557, "reg_lambda": 0.7531969031616396, "FLAML_sample_size": 94478}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgboost", "hyperparameters": {"n_estimators": 32767, "max_leaves": 623, "min_child_weight": 0.03783048691639616, "learning_rate": 0.0021758863899615554, "subsample": 0.9086242379539484, "colsample_bylevel": 0.5880499360809446, "colsample_bytree": 1.0, "reg_alpha": 0.0037398450188259108, "reg_lambda": 16.894310259361305, "FLAML_sample_size": 810000}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgboost", "hyperparameters": {"n_estimators": 765, "max_leaves": 6, "min_child_weight": 0.001, "learning_rate": 1.0, "subsample": 0.9833803894285497, "colsample_bylevel": 1.0, "colsample_bytree": 1.0, "reg_alpha": 0.0012553728257619922, "reg_lambda": 0.03280542610559108}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgboost", "hyperparameters": {"n_estimators": 6458, "max_leaves": 196, "min_child_weight": 0.020541449256787844, "learning_rate": 0.0067240405208345, "subsample": 0.5764514509827234, "colsample_bylevel": 1.0, "colsample_bytree": 0.9478632468968712, "reg_alpha": 0.08196899811780128, "reg_lambda": 1.3914579996946315}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgboost", "hyperparameters": {}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgboost", "hyperparameters": {"n_estimators": 5739, "max_leaves": 5, "min_child_weight": 0.1359602026207002, "learning_rate": 0.14496176867613397, "subsample": 0.864897070662231, "colsample_bylevel": 0.01, "colsample_bytree": 0.9394057513384305, "reg_alpha": 0.001103317921178771, "reg_lambda": 0.1655504349283218}}
|
||||
@@ -1 +0,0 @@
|
||||
{"class": "xgboost", "hyperparameters": {"n_estimators": 6866, "max_leaves": 238, "min_child_weight": 0.1000665069590469, "learning_rate": 0.05522440252112267, "subsample": 0.9621433799637473, "colsample_bylevel": 0.8366787895853636, "colsample_bytree": 1.0, "reg_alpha": 0.002455941636379231, "reg_lambda": 0.02487031358204277, "FLAML_sample_size": 830258}}
|
||||
@@ -1,222 +0,0 @@
|
||||
task,fold,type,result,params
|
||||
2dplanes,0,regression,0.946474,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
2dplanes,0,regression,0.849793,{'_modeljson': 'xgb/adult.json'}
|
||||
2dplanes,0,regression,0.940611,{'_modeljson': 'xgb/Albert.json'}
|
||||
2dplanes,0,regression,0.68908,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
2dplanes,0,regression,0.945551,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
2dplanes,0,regression,0.929904,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
2dplanes,0,regression,0.944099,{'_modeljson': 'xgb/car.json'}
|
||||
2dplanes,0,regression,0.938336,{'_modeljson': 'xgb/connect-4.json'}
|
||||
2dplanes,0,regression,0.944454,{'_modeljson': 'xgb/default.json'}
|
||||
2dplanes,0,regression,0.945477,{'_modeljson': 'xgb/dilbert.json'}
|
||||
2dplanes,0,regression,0.91563,{'_modeljson': 'xgb/poker.json'}
|
||||
dilbert,0,multiclass,-0.362419,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
dilbert,0,multiclass,-0.515024,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
dilbert,0,multiclass,-0.158604,{'_modeljson': 'xgb/car.json'}
|
||||
dilbert,0,multiclass,-0.0643938,{'_modeljson': 'xgb/default.json'}
|
||||
dilbert,0,multiclass,-0.0383872,{'_modeljson': 'xgb/dilbert.json'}
|
||||
dilbert,0,multiclass,-0.0611286,{'_modeljson': 'xgb/poker.json'}
|
||||
poker,0,regression,0.20821,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
poker,0,regression,0.206438,{'_modeljson': 'xgb/adult.json'}
|
||||
poker,0,regression,0.815665,{'_modeljson': 'xgb/Airlines.json'}
|
||||
poker,0,regression,0.857257,{'_modeljson': 'xgb/Albert.json'}
|
||||
poker,0,regression,0.362568,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
poker,0,regression,0.559622,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
poker,0,regression,0.922282,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
poker,0,regression,0.846139,{'_modeljson': 'xgb/car.json'}
|
||||
poker,0,regression,0.891631,{'_modeljson': 'xgb/connect-4.json'}
|
||||
poker,0,regression,0.635679,{'_modeljson': 'xgb/default.json'}
|
||||
poker,0,regression,0.377996,{'_modeljson': 'xgb/dilbert.json'}
|
||||
poker,0,regression,0.935986,{'_modeljson': 'xgb/poker.json'}
|
||||
adult,0,binary,0.918094,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
adult,0,binary,0.932468,{'_modeljson': 'xgb/adult.json'}
|
||||
adult,0,binary,0.92673,{'_modeljson': 'xgb/Airlines.json'}
|
||||
adult,0,binary,0.922077,{'_modeljson': 'xgb/Albert.json'}
|
||||
adult,0,binary,0.920837,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
adult,0,binary,0.92964,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
adult,0,binary,0.916531,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
adult,0,binary,0.884114,{'_modeljson': 'xgb/car.json'}
|
||||
adult,0,binary,0.917887,{'_modeljson': 'xgb/connect-4.json'}
|
||||
adult,0,binary,0.931234,{'_modeljson': 'xgb/default.json'}
|
||||
adult,0,binary,0.928861,{'_modeljson': 'xgb/dilbert.json'}
|
||||
adult,0,binary,0.909018,{'_modeljson': 'xgb/poker.json'}
|
||||
Airlines,0,binary,0.703353,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
Airlines,0,binary,0.696962,{'_modeljson': 'xgb/adult.json'}
|
||||
Airlines,0,binary,0.73153,{'_modeljson': 'xgb/Airlines.json'}
|
||||
Airlines,0,binary,0.731577,{'_modeljson': 'xgb/Albert.json'}
|
||||
Airlines,0,binary,0.725394,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
Airlines,0,binary,0.722896,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
Airlines,0,binary,0.716839,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
Airlines,0,binary,0.715654,{'_modeljson': 'xgb/car.json'}
|
||||
Airlines,0,binary,0.73107,{'_modeljson': 'xgb/connect-4.json'}
|
||||
Airlines,0,binary,0.719845,{'_modeljson': 'xgb/default.json'}
|
||||
Airlines,0,binary,0.71873,{'_modeljson': 'xgb/dilbert.json'}
|
||||
Airlines,0,binary,0.676427,{'_modeljson': 'xgb/poker.json'}
|
||||
Albert,0,binary,0.742648,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
Albert,0,binary,0.758723,{'_modeljson': 'xgb/adult.json'}
|
||||
Albert,0,binary,0.763066,{'_modeljson': 'xgb/Airlines.json'}
|
||||
Albert,0,binary,0.768073,{'_modeljson': 'xgb/Albert.json'}
|
||||
Albert,0,binary,0.74349,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
Albert,0,binary,0.764,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
Albert,0,binary,0.767514,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
Albert,0,binary,0.743392,{'_modeljson': 'xgb/car.json'}
|
||||
Albert,0,binary,0.766006,{'_modeljson': 'xgb/connect-4.json'}
|
||||
Albert,0,binary,0.757802,{'_modeljson': 'xgb/default.json'}
|
||||
Albert,0,binary,0.746511,{'_modeljson': 'xgb/dilbert.json'}
|
||||
Albert,0,binary,0.761985,{'_modeljson': 'xgb/poker.json'}
|
||||
Amazon_employee_access,0,binary,0.727287,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
Amazon_employee_access,0,binary,0.855441,{'_modeljson': 'xgb/adult.json'}
|
||||
Amazon_employee_access,0,binary,0.85984,{'_modeljson': 'xgb/Airlines.json'}
|
||||
Amazon_employee_access,0,binary,0.873629,{'_modeljson': 'xgb/Albert.json'}
|
||||
Amazon_employee_access,0,binary,0.897708,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
Amazon_employee_access,0,binary,0.862679,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
Amazon_employee_access,0,binary,0.872059,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
Amazon_employee_access,0,binary,0.657192,{'_modeljson': 'xgb/car.json'}
|
||||
Amazon_employee_access,0,binary,0.877547,{'_modeljson': 'xgb/connect-4.json'}
|
||||
Amazon_employee_access,0,binary,0.851702,{'_modeljson': 'xgb/default.json'}
|
||||
Amazon_employee_access,0,binary,0.853361,{'_modeljson': 'xgb/dilbert.json'}
|
||||
Amazon_employee_access,0,binary,0.859734,{'_modeljson': 'xgb/poker.json'}
|
||||
bng_breastTumor,0,regression,0.184421,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
bng_breastTumor,0,regression,0.163226,{'_modeljson': 'xgb/adult.json'}
|
||||
bng_breastTumor,0,regression,0.18037,{'_modeljson': 'xgb/Airlines.json'}
|
||||
bng_breastTumor,0,regression,0.177238,{'_modeljson': 'xgb/Albert.json'}
|
||||
bng_breastTumor,0,regression,-0.118976,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
bng_breastTumor,0,regression,0.195539,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
bng_breastTumor,0,regression,0.106337,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
bng_breastTumor,0,regression,0.149326,{'_modeljson': 'xgb/car.json'}
|
||||
bng_breastTumor,0,regression,0.161193,{'_modeljson': 'xgb/connect-4.json'}
|
||||
bng_breastTumor,0,regression,0.186541,{'_modeljson': 'xgb/default.json'}
|
||||
bng_breastTumor,0,regression,0.186499,{'_modeljson': 'xgb/dilbert.json'}
|
||||
bng_breastTumor,0,regression,-0.032219,{'_modeljson': 'xgb/poker.json'}
|
||||
bng_pbc,0,regression,0.411719,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
bng_pbc,0,regression,0.409769,{'_modeljson': 'xgb/adult.json'}
|
||||
bng_pbc,0,regression,0.450806,{'_modeljson': 'xgb/Airlines.json'}
|
||||
bng_pbc,0,regression,0.458384,{'_modeljson': 'xgb/Albert.json'}
|
||||
bng_pbc,0,regression,0.236669,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
bng_pbc,0,regression,0.441873,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
bng_pbc,0,regression,0.462226,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
bng_pbc,0,regression,0.431868,{'_modeljson': 'xgb/car.json'}
|
||||
bng_pbc,0,regression,0.45678,{'_modeljson': 'xgb/connect-4.json'}
|
||||
bng_pbc,0,regression,0.436902,{'_modeljson': 'xgb/default.json'}
|
||||
bng_pbc,0,regression,0.418839,{'_modeljson': 'xgb/dilbert.json'}
|
||||
bng_pbc,0,regression,0.448148,{'_modeljson': 'xgb/poker.json'}
|
||||
car,0,multiclass,-0.38726,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
car,0,multiclass,-0.22547,{'_modeljson': 'xgb/adult.json'}
|
||||
car,0,multiclass,-0.208402,{'_modeljson': 'xgb/Airlines.json'}
|
||||
car,0,multiclass,-0.0256159,{'_modeljson': 'xgb/Albert.json'}
|
||||
car,0,multiclass,-0.627705,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
car,0,multiclass,-0.166328,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
car,0,multiclass,-0.0201057,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
car,0,multiclass,-8.45E-05,{'_modeljson': 'xgb/car.json'}
|
||||
car,0,multiclass,-0.0129025,{'_modeljson': 'xgb/connect-4.json'}
|
||||
car,0,multiclass,-0.010029,{'_modeljson': 'xgb/default.json'}
|
||||
car,0,multiclass,-0.00218674,{'_modeljson': 'xgb/dilbert.json'}
|
||||
car,0,multiclass,-0.00426392,{'_modeljson': 'xgb/poker.json'}
|
||||
connect-4,0,multiclass,-0.578339,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
connect-4,0,multiclass,-0.489378,{'_modeljson': 'xgb/adult.json'}
|
||||
connect-4,0,multiclass,-0.406886,{'_modeljson': 'xgb/Airlines.json'}
|
||||
connect-4,0,multiclass,-0.332411,{'_modeljson': 'xgb/Albert.json'}
|
||||
connect-4,0,multiclass,-0.636516,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
connect-4,0,multiclass,-0.425947,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
connect-4,0,multiclass,-0.354612,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
connect-4,0,multiclass,-0.452201,{'_modeljson': 'xgb/car.json'}
|
||||
connect-4,0,multiclass,-0.338363,{'_modeljson': 'xgb/connect-4.json'}
|
||||
connect-4,0,multiclass,-0.430665,{'_modeljson': 'xgb/default.json'}
|
||||
connect-4,0,multiclass,-0.497404,{'_modeljson': 'xgb/dilbert.json'}
|
||||
connect-4,0,multiclass,-0.592309,{'_modeljson': 'xgb/poker.json'}
|
||||
adult,0,binary,0.918094,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
adult,0,binary,0.932468,{'_modeljson': 'xgb/adult.json'}
|
||||
adult,0,binary,0.92673,{'_modeljson': 'xgb/Airlines.json'}
|
||||
adult,0,binary,0.922077,{'_modeljson': 'xgb/Albert.json'}
|
||||
adult,0,binary,0.920837,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
adult,0,binary,0.92964,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
adult,0,binary,0.916531,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
adult,0,binary,0.884114,{'_modeljson': 'xgb/car.json'}
|
||||
adult,0,binary,0.917887,{'_modeljson': 'xgb/connect-4.json'}
|
||||
adult,0,binary,0.931234,{'_modeljson': 'xgb/default.json'}
|
||||
adult,0,binary,0.928861,{'_modeljson': 'xgb/dilbert.json'}
|
||||
adult,0,binary,0.909018,{'_modeljson': 'xgb/poker.json'}
|
||||
Airlines,0,binary,0.703353,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
Airlines,0,binary,0.696962,{'_modeljson': 'xgb/adult.json'}
|
||||
Airlines,0,binary,0.73153,{'_modeljson': 'xgb/Airlines.json'}
|
||||
Airlines,0,binary,0.731577,{'_modeljson': 'xgb/Albert.json'}
|
||||
Airlines,0,binary,0.725394,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
Airlines,0,binary,0.722896,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
Airlines,0,binary,0.716839,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
Airlines,0,binary,0.715654,{'_modeljson': 'xgb/car.json'}
|
||||
Airlines,0,binary,0.73107,{'_modeljson': 'xgb/connect-4.json'}
|
||||
Airlines,0,binary,0.719845,{'_modeljson': 'xgb/default.json'}
|
||||
Airlines,0,binary,0.71873,{'_modeljson': 'xgb/dilbert.json'}
|
||||
Airlines,0,binary,0.676427,{'_modeljson': 'xgb/poker.json'}
|
||||
Albert,0,binary,0.742648,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
Albert,0,binary,0.758723,{'_modeljson': 'xgb/adult.json'}
|
||||
Albert,0,binary,0.763066,{'_modeljson': 'xgb/Airlines.json'}
|
||||
Albert,0,binary,0.768073,{'_modeljson': 'xgb/Albert.json'}
|
||||
Albert,0,binary,0.74349,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
Albert,0,binary,0.764,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
Albert,0,binary,0.767514,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
Albert,0,binary,0.743392,{'_modeljson': 'xgb/car.json'}
|
||||
Albert,0,binary,0.766006,{'_modeljson': 'xgb/connect-4.json'}
|
||||
Albert,0,binary,0.757802,{'_modeljson': 'xgb/default.json'}
|
||||
Albert,0,binary,0.746511,{'_modeljson': 'xgb/dilbert.json'}
|
||||
Albert,0,binary,0.761985,{'_modeljson': 'xgb/poker.json'}
|
||||
Amazon_employee_access,0,binary,0.727287,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
Amazon_employee_access,0,binary,0.855441,{'_modeljson': 'xgb/adult.json'}
|
||||
Amazon_employee_access,0,binary,0.85984,{'_modeljson': 'xgb/Airlines.json'}
|
||||
Amazon_employee_access,0,binary,0.873629,{'_modeljson': 'xgb/Albert.json'}
|
||||
Amazon_employee_access,0,binary,0.897708,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
Amazon_employee_access,0,binary,0.862679,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
Amazon_employee_access,0,binary,0.872059,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
Amazon_employee_access,0,binary,0.657192,{'_modeljson': 'xgb/car.json'}
|
||||
Amazon_employee_access,0,binary,0.877547,{'_modeljson': 'xgb/connect-4.json'}
|
||||
Amazon_employee_access,0,binary,0.851702,{'_modeljson': 'xgb/default.json'}
|
||||
Amazon_employee_access,0,binary,0.853361,{'_modeljson': 'xgb/dilbert.json'}
|
||||
Amazon_employee_access,0,binary,0.859734,{'_modeljson': 'xgb/poker.json'}
|
||||
bng_breastTumor,0,regression,0.184421,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
bng_breastTumor,0,regression,0.163226,{'_modeljson': 'xgb/adult.json'}
|
||||
bng_breastTumor,0,regression,0.18037,{'_modeljson': 'xgb/Airlines.json'}
|
||||
bng_breastTumor,0,regression,0.177238,{'_modeljson': 'xgb/Albert.json'}
|
||||
bng_breastTumor,0,regression,-0.118976,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
bng_breastTumor,0,regression,0.195539,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
bng_breastTumor,0,regression,0.106337,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
bng_breastTumor,0,regression,0.149326,{'_modeljson': 'xgb/car.json'}
|
||||
bng_breastTumor,0,regression,0.161193,{'_modeljson': 'xgb/connect-4.json'}
|
||||
bng_breastTumor,0,regression,0.186541,{'_modeljson': 'xgb/default.json'}
|
||||
bng_breastTumor,0,regression,0.186499,{'_modeljson': 'xgb/dilbert.json'}
|
||||
bng_breastTumor,0,regression,-0.032219,{'_modeljson': 'xgb/poker.json'}
|
||||
bng_pbc,0,regression,0.411719,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
bng_pbc,0,regression,0.409769,{'_modeljson': 'xgb/adult.json'}
|
||||
bng_pbc,0,regression,0.450806,{'_modeljson': 'xgb/Airlines.json'}
|
||||
bng_pbc,0,regression,0.458384,{'_modeljson': 'xgb/Albert.json'}
|
||||
bng_pbc,0,regression,0.236669,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
bng_pbc,0,regression,0.441873,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
bng_pbc,0,regression,0.462226,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
bng_pbc,0,regression,0.431868,{'_modeljson': 'xgb/car.json'}
|
||||
bng_pbc,0,regression,0.45678,{'_modeljson': 'xgb/connect-4.json'}
|
||||
bng_pbc,0,regression,0.436902,{'_modeljson': 'xgb/default.json'}
|
||||
bng_pbc,0,regression,0.418839,{'_modeljson': 'xgb/dilbert.json'}
|
||||
bng_pbc,0,regression,0.448148,{'_modeljson': 'xgb/poker.json'}
|
||||
car,0,multiclass,-0.38726,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
car,0,multiclass,-0.22547,{'_modeljson': 'xgb/adult.json'}
|
||||
car,0,multiclass,-0.208402,{'_modeljson': 'xgb/Airlines.json'}
|
||||
car,0,multiclass,-0.0256159,{'_modeljson': 'xgb/Albert.json'}
|
||||
car,0,multiclass,-0.627705,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
car,0,multiclass,-0.166328,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
car,0,multiclass,-0.0201057,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
car,0,multiclass,-8.45E-05,{'_modeljson': 'xgb/car.json'}
|
||||
car,0,multiclass,-0.0129025,{'_modeljson': 'xgb/connect-4.json'}
|
||||
car,0,multiclass,-0.010029,{'_modeljson': 'xgb/default.json'}
|
||||
car,0,multiclass,-0.00218674,{'_modeljson': 'xgb/dilbert.json'}
|
||||
car,0,multiclass,-0.00426392,{'_modeljson': 'xgb/poker.json'}
|
||||
connect-4,0,multiclass,-0.578339,{'_modeljson': 'xgb/2dplanes.json'}
|
||||
connect-4,0,multiclass,-0.489378,{'_modeljson': 'xgb/adult.json'}
|
||||
connect-4,0,multiclass,-0.406886,{'_modeljson': 'xgb/Airlines.json'}
|
||||
connect-4,0,multiclass,-0.332411,{'_modeljson': 'xgb/Albert.json'}
|
||||
connect-4,0,multiclass,-0.636516,{'_modeljson': 'xgb/Amazon_employee_access.json'}
|
||||
connect-4,0,multiclass,-0.425947,{'_modeljson': 'xgb/bng_breastTumor.json'}
|
||||
connect-4,0,multiclass,-0.354612,{'_modeljson': 'xgb/bng_pbc.json'}
|
||||
connect-4,0,multiclass,-0.452201,{'_modeljson': 'xgb/car.json'}
|
||||
connect-4,0,multiclass,-0.338363,{'_modeljson': 'xgb/connect-4.json'}
|
||||
connect-4,0,multiclass,-0.430665,{'_modeljson': 'xgb/default.json'}
|
||||
connect-4,0,multiclass,-0.497404,{'_modeljson': 'xgb/dilbert.json'}
|
||||
connect-4,0,multiclass,-0.592309,{'_modeljson': 'xgb/poker.json'}
|
||||
|
@@ -1,14 +0,0 @@
|
||||
from flaml.automl.data import load_openml_dataset
|
||||
from flaml.default import LGBMRegressor
|
||||
from flaml.automl.ml import sklearn_metric_loss_score
|
||||
|
||||
X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir="./")
|
||||
lgbm = LGBMRegressor()
|
||||
|
||||
hyperparams, estimator_name, X_transformed, y_transformed = lgbm.suggest_hyperparams(X_train, y_train)
|
||||
print(hyperparams)
|
||||
|
||||
lgbm.fit(X_train, y_train)
|
||||
y_pred = lgbm.predict(X_test)
|
||||
print("flamlized lgbm r2 =", 1 - sklearn_metric_loss_score("r2", y_pred, y_test))
|
||||
print(lgbm)
|
||||
@@ -1,13 +0,0 @@
|
||||
from flaml.automl.data import load_openml_dataset
|
||||
from flaml.default import XGBClassifier
|
||||
from flaml.automl.ml import sklearn_metric_loss_score
|
||||
|
||||
X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./")
|
||||
xgb = XGBClassifier()
|
||||
xgb.fit(X_train, y_train)
|
||||
y_pred = xgb.predict(X_test)
|
||||
print(
|
||||
"flamlized xgb accuracy =",
|
||||
1 - sklearn_metric_loss_score("accuracy", y_pred, y_test),
|
||||
)
|
||||
print(xgb)
|
||||
@@ -1,8 +0,0 @@
|
||||
def test_load_args_sub():
|
||||
from flaml.automl.nlp.huggingface.training_args import TrainingArgumentsForAuto
|
||||
|
||||
TrainingArgumentsForAuto.load_args_from_console()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_load_args_sub()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user