mirror of
https://github.com/microsoft/autogen.git
synced 2026-04-20 03:02:16 -04:00
Merge branch 'microsoft:main' into main
This commit is contained in:
@@ -89,7 +89,12 @@ class SearchState:
|
||||
renamed_type = list(
|
||||
inspect.signature(domain_one_dim.is_valid).parameters.values()
|
||||
)[0].annotation
|
||||
type_match = renamed_type == Any or isinstance(value_one_dim, renamed_type)
|
||||
type_match = (
|
||||
renamed_type == Any
|
||||
or isinstance(value_one_dim, renamed_type)
|
||||
or isinstance(value_one_dim, int)
|
||||
and renamed_type is float
|
||||
)
|
||||
if not (type_match and domain_one_dim.is_valid(value_one_dim)):
|
||||
return False
|
||||
elif value_one_dim != domain_one_dim:
|
||||
@@ -380,6 +385,15 @@ class AutoMLState:
|
||||
tune.report(**result)
|
||||
return result
|
||||
|
||||
def sanitize(self, config: dict) -> dict:
|
||||
"""Make a config ready for passing to estimator."""
|
||||
config = config.get("ml", config).copy()
|
||||
if "FLAML_sample_size" in config:
|
||||
del config["FLAML_sample_size"]
|
||||
if "learner" in config:
|
||||
del config["learner"]
|
||||
return config
|
||||
|
||||
def _train_with_config(
|
||||
self,
|
||||
estimator,
|
||||
@@ -390,11 +404,7 @@ class AutoMLState:
|
||||
sample_size = config_w_resource.get(
|
||||
"FLAML_sample_size", len(self.y_train_all)
|
||||
)
|
||||
config = config_w_resource.get("ml", config_w_resource).copy()
|
||||
if "FLAML_sample_size" in config:
|
||||
del config["FLAML_sample_size"]
|
||||
if "learner" in config:
|
||||
del config["learner"]
|
||||
config = self.sanitize(config_w_resource)
|
||||
|
||||
this_estimator_kwargs = self.fit_kwargs_by_estimator.get(
|
||||
estimator
|
||||
@@ -1498,6 +1508,10 @@ class AutoML(BaseEstimator):
|
||||
):
|
||||
"""Retrain from log file.
|
||||
|
||||
This function is intended to retrain the logged configurations.
|
||||
NOTE: In some rare case, the last config is early stopped to meet time_budget and it's the best config.
|
||||
But the logged config's ITER_HP (e.g., n_estimators) is not reduced.
|
||||
|
||||
Args:
|
||||
log_file_name: A string of the log file name.
|
||||
X_train: A numpy array or dataframe of training data in shape n*m.
|
||||
@@ -3171,6 +3185,7 @@ class AutoML(BaseEstimator):
|
||||
# Add a checkpoint for the current best config to the log.
|
||||
if self._training_log:
|
||||
self._training_log.checkpoint()
|
||||
self._state.time_from_start = time.time() - self._start_time_flag
|
||||
if self._best_estimator:
|
||||
self._selected = self._search_states[self._best_estimator]
|
||||
self.modelcount = sum(
|
||||
@@ -3194,7 +3209,7 @@ class AutoML(BaseEstimator):
|
||||
x[1].learner_class(
|
||||
task=self._state.task,
|
||||
n_jobs=self._state.n_jobs,
|
||||
**x[1].best_config,
|
||||
**self._state.sanitize(x[1].best_config),
|
||||
),
|
||||
)
|
||||
for x in search_states[:2]
|
||||
@@ -3205,13 +3220,15 @@ class AutoML(BaseEstimator):
|
||||
x[1].learner_class(
|
||||
task=self._state.task,
|
||||
n_jobs=self._state.n_jobs,
|
||||
**x[1].best_config,
|
||||
**self._state.sanitize(x[1].best_config),
|
||||
),
|
||||
)
|
||||
for x in search_states[2:]
|
||||
if x[1].best_loss < 4 * self._selected.best_loss
|
||||
]
|
||||
logger.info(estimators)
|
||||
logger.info(
|
||||
[(estimator[0], estimator[1].params) for estimator in estimators]
|
||||
)
|
||||
if len(estimators) > 1:
|
||||
if self._state.task in CLASSIFICATION:
|
||||
from sklearn.ensemble import StackingClassifier as Stacker
|
||||
|
||||
@@ -948,7 +948,7 @@ class LGBMEstimator(BaseEstimator):
|
||||
"low_cost_init_value": 4,
|
||||
},
|
||||
"min_child_samples": {
|
||||
"domain": tune.lograndint(lower=2, upper=2 ** 7 + 1),
|
||||
"domain": tune.lograndint(lower=2, upper=2**7 + 1),
|
||||
"init_value": 20,
|
||||
},
|
||||
"learning_rate": {
|
||||
@@ -1047,7 +1047,6 @@ class LGBMEstimator(BaseEstimator):
|
||||
self.params[self.ITER_HP] = 1
|
||||
self._t1 = self._fit(X_train, y_train, **kwargs)
|
||||
if budget is not None and self._t1 >= budget or n_iter == 1:
|
||||
# self.params[self.ITER_HP] = n_iter
|
||||
return self._t1
|
||||
mem1 = psutil.virtual_memory().available if psutil is not None else 1
|
||||
self._mem1 = mem0 - mem1
|
||||
@@ -1168,7 +1167,7 @@ class XGBoostEstimator(SKLearnEstimator):
|
||||
},
|
||||
"min_child_weight": {
|
||||
"domain": tune.loguniform(lower=0.001, upper=128),
|
||||
"init_value": 1,
|
||||
"init_value": 1.0,
|
||||
},
|
||||
"learning_rate": {
|
||||
"domain": tune.loguniform(lower=1 / 1024, upper=1.0),
|
||||
@@ -1797,17 +1796,17 @@ class ARIMA(Prophet):
|
||||
def search_space(cls, **params):
|
||||
space = {
|
||||
"p": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 2,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"d": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 2,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"q": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 1,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
@@ -1884,32 +1883,32 @@ class SARIMAX(ARIMA):
|
||||
def search_space(cls, **params):
|
||||
space = {
|
||||
"p": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 2,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"d": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 2,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"q": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 1,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"P": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 1,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"D": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 1,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
"Q": {
|
||||
"domain": tune.quniform(lower=0, upper=10, q=1),
|
||||
"domain": tune.qrandint(lower=0, upper=10, q=1),
|
||||
"init_value": 1,
|
||||
"low_cost_init_value": 0,
|
||||
},
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "1.0.5"
|
||||
__version__ = "1.0.7"
|
||||
|
||||
@@ -131,7 +131,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install flaml[notebook,ts_forecast]"
|
||||
"%pip install flaml[notebook,ts_forecast]\n",
|
||||
"# avoid version 1.0.2 to 1.0.5 for this notebook due to a bug for arima and sarimax's init config"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -256,6 +256,7 @@ class TestClassification(unittest.TestCase):
|
||||
time_budget=10,
|
||||
task="classification",
|
||||
n_concurrent_trials=2,
|
||||
ensemble=True,
|
||||
)
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import sys
|
||||
from openml.exceptions import OpenMLServerException
|
||||
from requests.exceptions import ChunkedEncodingError
|
||||
from requests.exceptions import ChunkedEncodingError, SSLError
|
||||
|
||||
|
||||
def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
|
||||
@@ -23,6 +23,7 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
|
||||
OpenMLServerException,
|
||||
ChunkedEncodingError,
|
||||
urllib3.exceptions.ReadTimeoutError,
|
||||
SSLError,
|
||||
) as e:
|
||||
print(e)
|
||||
return
|
||||
@@ -110,7 +111,7 @@ def test_mlflow():
|
||||
X_train, X_test, y_train, y_test = load_openml_task(
|
||||
task_id=7592, data_dir="test/"
|
||||
)
|
||||
except (OpenMLServerException, ChunkedEncodingError) as e:
|
||||
except (OpenMLServerException, ChunkedEncodingError, SSLError) as e:
|
||||
print(e)
|
||||
return
|
||||
""" import AutoML class from flaml package """
|
||||
|
||||
@@ -56,6 +56,7 @@ class TestRegression(unittest.TestCase):
|
||||
y_pred = automl.predict(X_train)
|
||||
print(y_pred)
|
||||
print(automl.model.estimator)
|
||||
n_iter = automl.model.estimator.get_params("n_estimators")
|
||||
print(automl.config_history)
|
||||
print(automl.best_model_for_estimator("xgboost"))
|
||||
print(automl.best_iteration)
|
||||
@@ -86,7 +87,11 @@ class TestRegression(unittest.TestCase):
|
||||
)
|
||||
print(automl.model.estimator)
|
||||
y_pred2 = automl.predict(X_train)
|
||||
assert (y_pred == y_pred2).all()
|
||||
# In some rare case, the last config is early stopped and it's the best config. But the logged config's n_estimator is not reduced.
|
||||
assert (
|
||||
n_iter != automl.model.estimator.get_params("n_estimator")
|
||||
or (y_pred == y_pred2).all()
|
||||
)
|
||||
|
||||
def test_sparse_matrix_regression(self):
|
||||
X_train = scipy.sparse.random(300, 900, density=0.0001)
|
||||
|
||||
@@ -12,6 +12,7 @@ from flaml import AutoVW
|
||||
import string
|
||||
import os
|
||||
import openml
|
||||
from requests.exceptions import SSLError
|
||||
|
||||
VW_DS_DIR = "test/data/"
|
||||
NS_LIST = list(string.ascii_lowercase) + list(string.ascii_uppercase)
|
||||
@@ -96,10 +97,14 @@ def shuffle_data(X, y, seed):
|
||||
def get_oml_to_vw(did, max_ns_num, ds_dir=VW_DS_DIR):
|
||||
success = False
|
||||
print("-----getting oml dataset-------", did)
|
||||
ds = openml.datasets.get_dataset(did)
|
||||
target_attribute = ds.default_target_attribute
|
||||
# if target_attribute is None and did in OML_target_attribute_dict:
|
||||
# target_attribute = OML_target_attribute_dict[did]
|
||||
try:
|
||||
ds = openml.datasets.get_dataset(did)
|
||||
target_attribute = ds.default_target_attribute
|
||||
# if target_attribute is None and did in OML_target_attribute_dict:
|
||||
# target_attribute = OML_target_attribute_dict[did]
|
||||
except (SSLError) as e:
|
||||
print(e)
|
||||
return
|
||||
|
||||
print("target=ds.default_target_attribute", target_attribute)
|
||||
data = ds.get_data(target=target_attribute, dataset_format="array")
|
||||
|
||||
Reference in New Issue
Block a user