From ca9f9054e73fb51f36ed5c22c7c66b115d9b59eb Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Fri, 12 Aug 2022 13:55:17 -0700 Subject: [PATCH] categorical choice can be ordered or unordered (#677) * categorical choice can be ordered or unordered * ordered -> order * move choice into utils * version comparison * packaging -> setuptools * import version * version_parse * test order for choice --- flaml/default/portfolio.py | 2 +- flaml/default/suggest.py | 12 ++++++++++-- flaml/model.py | 1 - flaml/tune/__init__.py | 5 +++-- flaml/tune/space.py | 19 +++++++++++-------- flaml/tune/utils.py | 28 ++++++++++++++++++++++++++++ flaml/version.py | 2 +- test/tune/test_sample.py | 4 +++- 8 files changed, 57 insertions(+), 16 deletions(-) create mode 100644 flaml/tune/utils.py diff --git a/flaml/default/portfolio.py b/flaml/default/portfolio.py index b25642bfb..527dae05e 100644 --- a/flaml/default/portfolio.py +++ b/flaml/default/portfolio.py @@ -6,6 +6,7 @@ import json from sklearn.preprocessing import RobustScaler from flaml.default import greedy from flaml.default.regret import load_result, build_regret +from flaml.version import __version__ regret_bound = 0.01 @@ -113,7 +114,6 @@ def serialize(configs, regret, meta_features, output_file, config_path): ) portfolio = [load_json(config_path.joinpath(m + ".json")) for m in configs] regret = regret.loc[configs] - from flaml import __version__ meta_predictor = { "version": __version__, diff --git a/flaml/default/suggest.py b/flaml/default/suggest.py index 50c8503e7..aa22f0e0a 100644 --- a/flaml/default/suggest.py +++ b/flaml/default/suggest.py @@ -5,12 +5,17 @@ import pathlib import json from flaml.data import CLASSIFICATION, DataTransformer from flaml.ml import get_estimator_class, get_classification_objective +from flaml.version import __version__ LOCATION = pathlib.Path(__file__).parent.resolve() logger = logging.getLogger(__name__) CONFIG_PREDICTORS = {} +def version_parse(version): + return tuple(map(int, (version.split(".")))) + + def meta_feature(task, X_train, y_train, meta_feature_names): this_feature = [] n_row = X_train.shape[0] @@ -72,11 +77,14 @@ def suggest_config(task, X, y, estimator_or_predictor, location=None, k=None): if isinstance(estimator_or_predictor, str) else estimator_or_predictor ) - from flaml import __version__ older_version = "1.0.2" # TODO: update older_version when the newer code can no longer handle the older version json file - assert __version__ >= predictor["version"] >= older_version + assert ( + version_parse(__version__) + >= version_parse(predictor["version"]) + >= version_parse(older_version) + ) prep = predictor["preprocessing"] feature = meta_feature( task, X_train=X, y_train=y, meta_feature_names=predictor["meta_feature_names"] diff --git a/flaml/model.py b/flaml/model.py index 8e31e4178..4a7825f1a 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -32,7 +32,6 @@ from .data import ( TOKENCLASSIFICATION, SUMMARIZATION, NLG_TASKS, - MULTICHOICECLASSIFICATION, ) try: diff --git a/flaml/tune/__init__.py b/flaml/tune/__init__.py index 7c441c32a..3d6c89101 100644 --- a/flaml/tune/__init__.py +++ b/flaml/tune/__init__.py @@ -5,7 +5,6 @@ try: from ray.tune import ( uniform, quniform, - choice, randint, qrandint, randn, @@ -14,12 +13,12 @@ try: qloguniform, lograndint, qlograndint, + sample, ) except (ImportError, AssertionError): from .sample import ( uniform, quniform, - choice, randint, qrandint, randn, @@ -29,7 +28,9 @@ except (ImportError, AssertionError): lograndint, qlograndint, ) + from . import sample from .tune import run, report, INCUMBENT_RESULT from .sample import polynomial_expansion_set from .sample import PolynomialExpansionSet, Categorical, Float from .trial import Trial +from .utils import choice diff --git a/flaml/tune/space.py b/flaml/tune/space.py index a6b4a4861..7e2bf4de3 100644 --- a/flaml/tune/space.py +++ b/flaml/tune/space.py @@ -225,15 +225,18 @@ def add_cost_to_space(space: Dict, low_cost_point: Dict, choice_cost: Dict): domain.choice_cost = cost[ind] domain.const = [domain.const[i] for i in ind] domain.ordered = True - elif all( - isinstance(x, int) or isinstance(x, float) for x in domain.categories - ): - # sort the choices by value - ind = np.argsort(domain.categories) - domain.categories = [domain.categories[i] for i in ind] - domain.ordered = True else: - domain.ordered = False + ordered = getattr(domain, "ordered", None) + if ordered is None: + # automatically decide whether to order the choices based on the value type + domain.ordered = ordered = all( + isinstance(x, (int, float)) for x in domain.categories + ) + if ordered: + # sort the choices by value + ind = np.argsort(domain.categories) + domain.categories = [domain.categories[i] for i in ind] + if low_cost and low_cost not in domain.categories: assert isinstance( low_cost, list diff --git a/flaml/tune/utils.py b/flaml/tune/utils.py new file mode 100644 index 000000000..53dfba3a7 --- /dev/null +++ b/flaml/tune/utils.py @@ -0,0 +1,28 @@ +from typing import Sequence + +try: + from ray import __version__ as ray_version + + assert ray_version >= "1.10.0" + from ray.tune import sample +except (ImportError, AssertionError): + from . import sample + + +def choice(categories: Sequence, order=None): + """Sample a categorical value. + Sampling from ``tune.choice([1, 2])`` is equivalent to sampling from + ``np.random.choice([1, 2])`` + + Args: + categories (Sequence): Sequence of categories to sample from. + order (bool): Whether the categories have an order. If None, will be decided autoamtically: + Numerical categories have an order, while string categories do not. + """ + domain = sample.Categorical(categories).uniform() + domain.ordered = ( + order + if order is not None + else all(isinstance(x, (int, float)) for x in categories) + ) + return domain diff --git a/flaml/version.py b/flaml/version.py index 39e0411d5..9fd0f8dd6 100644 --- a/flaml/version.py +++ b/flaml/version.py @@ -1 +1 @@ -__version__ = "1.0.9" +__version__ = "1.0.10" diff --git a/test/tune/test_sample.py b/test/tune/test_sample.py index c74f10d65..d06a12541 100644 --- a/test/tune/test_sample.py +++ b/test/tune/test_sample.py @@ -4,7 +4,6 @@ from flaml.tune.sample import ( Domain, uniform, quniform, - choice, randint, qrandint, randn, @@ -14,6 +13,7 @@ from flaml.tune.sample import ( lograndint, qlograndint, ) +from flaml.tune import choice def test_sampler(): @@ -22,6 +22,8 @@ def test_sampler(): print(qrandn(2, 10, 2).sample(size=2)) c = choice([1, 2]) print(c.domain_str, len(c), c.is_valid(3)) + c = choice([1, 2], order=False) + print(c.domain_str, len(c), c.ordered) i = randint(1, 10) print(i.domain_str, i.is_valid(10)) d = Domain()