mirror of
https://github.com/microsoft/autogen.git
synced 2026-02-16 18:45:19 -05:00
first
This commit is contained in:
40
test/automl/test_lexiflow.py
Normal file
40
test/automl/test_lexiflow.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from flaml import AutoML
|
||||
from flaml.data import load_openml_dataset
|
||||
|
||||
|
||||
def _test_lexiflow():
|
||||
|
||||
X_train, X_test, y_train, y_test = load_openml_dataset(
|
||||
dataset_id=179, data_dir="test/data"
|
||||
)
|
||||
|
||||
lexico_objectives = {}
|
||||
lexico_objectives["metrics"] = ["val_loss", "pred_time"]
|
||||
lexico_objectives["tolerances"] = {"val_loss": 0.01, "pred_time": 0.0}
|
||||
lexico_objectives["targets"] = {"val_loss": 0.0, "pred_time": 0.0}
|
||||
lexico_objectives["modes"] = ["min", "min"]
|
||||
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": 100,
|
||||
"lexico_objectives": lexico_objectives,
|
||||
"estimator_list": ["xgboost"],
|
||||
"use_ray": True,
|
||||
"task": "classification",
|
||||
"max_iter": 10000000,
|
||||
"train_time_limit": 60,
|
||||
"verbose": 0,
|
||||
"eval_method": "holdout",
|
||||
"mem_thres": 128 * (1024**3),
|
||||
"seed": 1,
|
||||
}
|
||||
automl.fit(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test, **settings)
|
||||
print(automl.predict(X_train))
|
||||
print(automl.model)
|
||||
print(automl.config_history)
|
||||
print(automl.best_iteration)
|
||||
print(automl.best_estimator)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test_lexiflow()
|
||||
@@ -20,7 +20,7 @@ def main():
|
||||
|
||||
logger.info(" ".join(f"{k}={v}" for k, v in vars(args).items()))
|
||||
|
||||
data_path = os.path.join(args.data, 'data.csv')
|
||||
data_path = os.path.join(args.data, "data.csv")
|
||||
df = pd.read_csv(data_path)
|
||||
|
||||
train_df, test_df = train_test_split(
|
||||
|
||||
@@ -19,7 +19,7 @@ environment:
|
||||
os: Linux
|
||||
|
||||
command: >-
|
||||
python data_prep.py
|
||||
python data_prep.py
|
||||
--data {inputs.data}
|
||||
--test_train_ratio {inputs.test_train_ratio}
|
||||
--train_data {outputs.train_data}
|
||||
|
||||
@@ -83,10 +83,10 @@ def build_and_submit_aml_pipeline(config):
|
||||
################################################
|
||||
# load component functions
|
||||
################################################
|
||||
data_prep_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR
|
||||
/ "data_prep/data_prep.yaml")
|
||||
train_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR
|
||||
/ "train/train.yaml")
|
||||
data_prep_component = Component.from_yaml(
|
||||
ws, yaml_file=LOCAL_DIR / "data_prep/data_prep.yaml"
|
||||
)
|
||||
train_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR / "train/train.yaml")
|
||||
|
||||
################################################
|
||||
# build pipeline
|
||||
|
||||
@@ -14,16 +14,19 @@ def remote_run():
|
||||
################################################
|
||||
# connect to your Azure ML workspace
|
||||
################################################
|
||||
ws = Workspace(subscription_id=args.subscription_id,
|
||||
resource_group=args.resource_group,
|
||||
workspace_name=args.workspace)
|
||||
ws = Workspace(
|
||||
subscription_id=args.subscription_id,
|
||||
resource_group=args.resource_group,
|
||||
workspace_name=args.workspace,
|
||||
)
|
||||
|
||||
################################################
|
||||
# load component functions
|
||||
################################################
|
||||
|
||||
pipeline_tuning_func = Component.from_yaml(ws, yaml_file=LOCAL_DIR
|
||||
/ "tuner/component_spec.yaml")
|
||||
pipeline_tuning_func = Component.from_yaml(
|
||||
ws, yaml_file=LOCAL_DIR / "tuner/component_spec.yaml"
|
||||
)
|
||||
|
||||
################################################
|
||||
# build pipeline
|
||||
@@ -44,6 +47,7 @@ def remote_run():
|
||||
def local_run():
|
||||
logger.info("Run tuner locally.")
|
||||
from tuner import tuner_func
|
||||
|
||||
tuner_func.tune_pipeline(concurrent_run=2)
|
||||
|
||||
|
||||
@@ -52,15 +56,18 @@ if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_mutually_exclusive_group(required=False)
|
||||
parser.add_argument(
|
||||
"--subscription_id", type=str, help="your_subscription_id", required=False,
|
||||
"--subscription_id",
|
||||
type=str,
|
||||
help="your_subscription_id",
|
||||
required=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resource_group", type=str, help="your_resource_group", required=False)
|
||||
parser.add_argument(
|
||||
"--workspace", type=str, help="your_workspace", required=False)
|
||||
"--resource_group", type=str, help="your_resource_group", required=False
|
||||
)
|
||||
parser.add_argument("--workspace", type=str, help="your_workspace", required=False)
|
||||
|
||||
parser.add_argument('--remote', dest='remote', action='store_true')
|
||||
parser.add_argument('--local', dest='remote', action='store_false')
|
||||
parser.add_argument("--remote", dest="remote", action="store_true")
|
||||
parser.add_argument("--local", dest="remote", action="store_false")
|
||||
parser.set_defaults(remote=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import pandas as pd
|
||||
from azureml.core import Run
|
||||
|
||||
|
||||
class LightGBMCallbackHandler():
|
||||
class LightGBMCallbackHandler:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@@ -24,16 +24,22 @@ class LightGBMCallbackHandler():
|
||||
def main(args):
|
||||
"""Main function of the script."""
|
||||
|
||||
train_path = os.path.join(args.train_data, 'data.csv')
|
||||
train_path = os.path.join(args.train_data, "data.csv")
|
||||
print("traning_path:", train_path)
|
||||
|
||||
test_path = os.path.join(args.test_data, 'data.csv')
|
||||
test_path = os.path.join(args.test_data, "data.csv")
|
||||
|
||||
train_set = lgb.Dataset(train_path)
|
||||
test_set = lgb.Dataset(test_path)
|
||||
callbacks_handler = LightGBMCallbackHandler()
|
||||
config = {"header": True, "objective": "binary", "label_column": 30, "metric": "binary_error",
|
||||
"n_estimators": args.n_estimators, "learning_rate": args.learning_rate}
|
||||
config = {
|
||||
"header": True,
|
||||
"objective": "binary",
|
||||
"label_column": 30,
|
||||
"metric": "binary_error",
|
||||
"n_estimators": args.n_estimators,
|
||||
"learning_rate": args.learning_rate,
|
||||
}
|
||||
gbm = lgb.train(
|
||||
config,
|
||||
train_set,
|
||||
@@ -44,9 +50,9 @@ def main(args):
|
||||
],
|
||||
)
|
||||
|
||||
print('Saving model...')
|
||||
print("Saving model...")
|
||||
# save model to file
|
||||
gbm.save_model(os.path.join(args.model, 'model.txt'))
|
||||
gbm.save_model(os.path.join(args.model, "model.txt"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -4,9 +4,9 @@ name: classifier
|
||||
version: 0.0.1
|
||||
display_name: Train lgbm classifier
|
||||
inputs:
|
||||
train_data:
|
||||
train_data:
|
||||
type: path
|
||||
test_data:
|
||||
test_data:
|
||||
type: path
|
||||
learning_rate:
|
||||
type: float
|
||||
@@ -20,8 +20,8 @@ environment:
|
||||
conda_dependencies_file: env.yaml
|
||||
os: Linux
|
||||
command: >-
|
||||
python train.py
|
||||
--train_data {inputs.train_data}
|
||||
python train.py
|
||||
--train_data {inputs.train_data}
|
||||
--test_data {inputs.test_data}
|
||||
--learning_rate {inputs.learning_rate}
|
||||
--n_estimators {inputs.n_estimators}
|
||||
|
||||
@@ -9,4 +9,4 @@ environment:
|
||||
conda_dependencies_file: env.yaml
|
||||
os: Linux
|
||||
command: >-
|
||||
python tuner/tuner_func.py
|
||||
python tuner/tuner_func.py
|
||||
|
||||
@@ -8,8 +8,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_with_config(config: dict):
|
||||
"""Run the pipeline with a given config dict
|
||||
"""
|
||||
"""Run the pipeline with a given config dict"""
|
||||
|
||||
# pass the hyperparameters to AzureML jobs by overwriting the config file.
|
||||
overrides = [f"{key}={value}" for key, value in config.items()]
|
||||
@@ -24,25 +23,25 @@ def run_with_config(config: dict):
|
||||
while not stop:
|
||||
# get status
|
||||
status = run._core_run.get_status()
|
||||
print(f'status: {status}')
|
||||
print(f"status: {status}")
|
||||
|
||||
# get metrics
|
||||
metrics = run._core_run.get_metrics(recursive=True)
|
||||
if metrics:
|
||||
run_metrics = list(metrics.values())
|
||||
|
||||
new_metric = run_metrics[0]['eval_binary_error']
|
||||
new_metric = run_metrics[0]["eval_binary_error"]
|
||||
|
||||
if type(new_metric) == list:
|
||||
new_metric = new_metric[-1]
|
||||
|
||||
print(f'eval_binary_error: {new_metric}')
|
||||
print(f"eval_binary_error: {new_metric}")
|
||||
|
||||
tune.report(eval_binary_error=new_metric)
|
||||
|
||||
time.sleep(5)
|
||||
|
||||
if status == 'FAILED' or status == 'Completed':
|
||||
if status == "FAILED" or status == "Completed":
|
||||
stop = True
|
||||
|
||||
print("The run is terminated.")
|
||||
|
||||
128
test/tune/test_lexiflow.py
Normal file
128
test/tune/test_lexiflow.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import torch
|
||||
import thop
|
||||
import torch.nn as nn
|
||||
from flaml import tune
|
||||
import torch.nn.functional as F
|
||||
import torchvision
|
||||
import numpy as np
|
||||
import time
|
||||
from ray import tune as raytune
|
||||
|
||||
DEVICE = torch.device("cpu")
|
||||
BATCHSIZE = 128
|
||||
N_TRAIN_EXAMPLES = BATCHSIZE * 30
|
||||
N_VALID_EXAMPLES = BATCHSIZE * 10
|
||||
|
||||
|
||||
def _test_lexiflow():
|
||||
train_dataset = torchvision.datasets.FashionMNIST(
|
||||
"test/data",
|
||||
train=True,
|
||||
download=True,
|
||||
transform=torchvision.transforms.ToTensor(),
|
||||
)
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
torch.utils.data.Subset(train_dataset, list(range(N_TRAIN_EXAMPLES))),
|
||||
batch_size=BATCHSIZE,
|
||||
shuffle=True,
|
||||
)
|
||||
|
||||
val_dataset = torchvision.datasets.FashionMNIST(
|
||||
"test/data", train=False, transform=torchvision.transforms.ToTensor()
|
||||
)
|
||||
|
||||
val_loader = torch.utils.data.DataLoader(
|
||||
torch.utils.data.Subset(val_dataset, list(range(N_VALID_EXAMPLES))),
|
||||
batch_size=BATCHSIZE,
|
||||
shuffle=True,
|
||||
)
|
||||
|
||||
def define_model(configuration):
|
||||
n_layers = configuration["n_layers"]
|
||||
layers = []
|
||||
in_features = 28 * 28
|
||||
for i in range(n_layers):
|
||||
out_features = configuration["n_units_l{}".format(i)]
|
||||
layers.append(nn.Linear(in_features, out_features))
|
||||
layers.append(nn.ReLU())
|
||||
p = configuration["dropout_{}".format(i)]
|
||||
layers.append(nn.Dropout(p))
|
||||
in_features = out_features
|
||||
layers.append(nn.Linear(in_features, 10))
|
||||
layers.append(nn.LogSoftmax(dim=1))
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def train_model(model, optimizer, train_loader):
|
||||
model.train()
|
||||
for batch_idx, (data, target) in enumerate(train_loader):
|
||||
data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)
|
||||
optimizer.zero_grad()
|
||||
F.nll_loss(model(data), target).backward()
|
||||
optimizer.step()
|
||||
|
||||
def eval_model(model, valid_loader):
|
||||
model.eval()
|
||||
correct = 0
|
||||
with torch.no_grad():
|
||||
for batch_idx, (data, target) in enumerate(valid_loader):
|
||||
data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)
|
||||
pred = model(data).argmax(dim=1, keepdim=True)
|
||||
correct += pred.eq(target.view_as(pred)).sum().item()
|
||||
|
||||
accuracy = correct / N_VALID_EXAMPLES
|
||||
flops, params = thop.profile(
|
||||
model, inputs=(torch.randn(1, 28 * 28).to(DEVICE),), verbose=False
|
||||
)
|
||||
return np.log2(flops), 1 - accuracy, params
|
||||
|
||||
def evaluate_function(configuration):
|
||||
model = define_model(configuration).to(DEVICE)
|
||||
optimizer = torch.optim.Adam(model.parameters(), configuration["lr"])
|
||||
n_epoch = configuration["n_epoch"]
|
||||
for epoch in range(n_epoch):
|
||||
train_model(model, optimizer, train_loader)
|
||||
flops, error_rate, params = eval_model(model, val_loader)
|
||||
return {"error_rate": error_rate, "flops": flops, "params": params}
|
||||
|
||||
lexico_objectives = {}
|
||||
lexico_objectives["metrics"] = ["error_rate", "flops"]
|
||||
lexico_objectives["tolerances"] = {"error_rate": 0.02, "flops": 0.0}
|
||||
lexico_objectives["targets"] = {"error_rate": 0.0, "flops": 0.0}
|
||||
lexico_objectives["modes"] = ["min", "min"]
|
||||
|
||||
search_space = {
|
||||
"n_layers": raytune.randint(lower=1, upper=3),
|
||||
"n_units_l0": raytune.randint(lower=4, upper=128),
|
||||
"n_units_l1": raytune.randint(lower=4, upper=128),
|
||||
"n_units_l2": raytune.randint(lower=4, upper=128),
|
||||
"dropout_0": raytune.uniform(lower=0.2, upper=0.5),
|
||||
"dropout_1": raytune.uniform(lower=0.2, upper=0.5),
|
||||
"dropout_2": raytune.uniform(lower=0.2, upper=0.5),
|
||||
"lr": raytune.loguniform(lower=1e-5, upper=1e-1),
|
||||
"n_epoch": raytune.randint(lower=1, upper=20),
|
||||
}
|
||||
|
||||
low_cost_partial_config = {
|
||||
"n_layers": 1,
|
||||
"n_units_l0": 4,
|
||||
"n_units_l1": 4,
|
||||
"n_units_l2": 4,
|
||||
"n_epoch": 1,
|
||||
}
|
||||
|
||||
analysis = tune.run(
|
||||
evaluate_function,
|
||||
num_samples=100000000,
|
||||
time_budget_s=100,
|
||||
config=search_space,
|
||||
use_ray=False,
|
||||
lexico_objectives=lexico_objectives,
|
||||
low_cost_partial_config=low_cost_partial_config,
|
||||
)
|
||||
result = analysis.best_result
|
||||
print(result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test_lexiflow()
|
||||
Reference in New Issue
Block a user