diff --git a/README.md b/README.md index 03324c237..30dc270b4 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ # A Fast Library for Automated Machine Learning & Tuning

- +

diff --git a/flaml/automl.py b/flaml/automl.py index 6f0ef0d68..9426f944c 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -470,7 +470,7 @@ class AutoML(BaseEstimator): 'classification', 'regression', 'ts_forecast', 'rank', 'seq-classification', 'seq-regression', 'summarization'. n_jobs: An integer of the number of threads for training. - log_file_name: A string of the log file name. To disable logging, + log_file_name: A string of the log file name | default="". To disable logging, set it to be an empty string "". estimator_list: A list of strings for estimator names, or 'auto' e.g., ```['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']``` @@ -714,13 +714,11 @@ class AutoML(BaseEstimator): """Time taken to find best model in seconds.""" return self.__dict__.get("_time_taken_best_iter") - def predict( - self, X_test: Union[np.array, pd.DataFrame, List[str], List[List[str]]] - ): + def predict(self, X: Union[np.array, pd.DataFrame, List[str], List[List[str]]]): """Predict label from features. Args: - X_test: A numpy array of featurized instances, shape n * m, + X: A numpy array of featurized instances, shape n * m, or for 'ts_forecast' task: a pandas dataframe with the first column containing timestamp values (datetime type) or an integer n for @@ -748,8 +746,8 @@ class AutoML(BaseEstimator): "No estimator is trained. Please run fit with enough budget." ) return None - X_test = self._preprocess(X_test) - y_pred = estimator.predict(X_test) + X = self._preprocess(X) + y_pred = estimator.predict(X) if ( isinstance(y_pred, np.ndarray) and y_pred.ndim > 1 @@ -763,12 +761,12 @@ class AutoML(BaseEstimator): else: return y_pred - def predict_proba(self, X_test): + def predict_proba(self, X): """Predict the probability of each class from features, only works for classification problems. Args: - X_test: A numpy array of featurized instances, shape n * m. + X: A numpy array of featurized instances, shape n * m. Returns: A numpy array of shape n * c. c is the # classes. Each element at @@ -780,8 +778,8 @@ class AutoML(BaseEstimator): "No estimator is trained. Please run fit with enough budget." ) return None - X_test = self._preprocess(X_test) - proba = self._trained_estimator.predict_proba(X_test) + X = self._preprocess(X) + proba = self._trained_estimator.predict_proba(X) return proba def _preprocess(self, X): @@ -1804,7 +1802,7 @@ class AutoML(BaseEstimator): 'classification', 'regression', 'ts_forecast', 'rank', 'seq-classification', 'seq-regression', 'summarization' n_jobs: An integer of the number of threads for training. - log_file_name: A string of the log file name. To disable logging, + log_file_name: A string of the log file name | default="". To disable logging, set it to be an empty string "". estimator_list: A list of strings for estimator names, or 'auto' e.g., ```['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']``` @@ -2001,7 +1999,7 @@ class AutoML(BaseEstimator): old_level = logger.getEffectiveLevel() self.verbose = verbose logger.setLevel(50 - verbose * 10) - if (not mlflow or not mlflow.active_run()) and not logger.handlers: + if not logger.handlers: # Add the console handler. _ch = logging.StreamHandler() _ch.setFormatter(logger_formatter) @@ -2315,7 +2313,7 @@ class AutoML(BaseEstimator): ), key=lambda x: x.last_result["wall_clock_time"], ) - for _track_iter, trial in enumerate(trials): + for self._track_iter, trial in enumerate(trials): result = trial.last_result better = False if result: @@ -2326,20 +2324,20 @@ class AutoML(BaseEstimator): wall_time = result.get("wall_clock_time") if wall_time is not None: self._state.time_from_start = wall_time + self._iter_per_learner[estimator] += 1 if search_state.sample_size == self._state.data_size[0]: - self._iter_per_learner[estimator] += 1 if not self._fullsize_reached: self._fullsize_reached = True if search_state.best_loss < self._state.best_loss: self._state.best_loss = search_state.best_loss self._best_estimator = estimator - self._config_history[_track_iter] = ( + self._config_history[self._track_iter] = ( self._best_estimator, config, self._time_taken_best_iter, ) self._trained_estimator = search_state.trained_estimator - self._best_iteration = _track_iter + self._best_iteration = self._track_iter self._time_taken_best_iter = self._state.time_from_start better = True self._search_states[estimator].best_config = config @@ -2360,7 +2358,7 @@ class AutoML(BaseEstimator): ) if mlflow is not None and mlflow.active_run(): with mlflow.start_run(nested=True): - mlflow.log_metric("iter_counter", self._iter_per_learner[estimator]) + mlflow.log_metric("iter_counter", self._track_iter) if "intermediate_results" in search_state.metric_for_logging: for each_entry in search_state.metric_for_logging[ "intermediate_results" @@ -2558,8 +2556,9 @@ class AutoML(BaseEstimator): self._state.time_from_start = wall_time # logger.info(f"{self._search_states[estimator].sample_size}, {data_size}") if search_state.sample_size == self._state.data_size[0]: - self._iter_per_learner[estimator] += 1 + self._iter_per_learner_fullsize[estimator] += 1 self._fullsize_reached = True + self._iter_per_learner[estimator] += 1 if search_state.best_loss < self._state.best_loss: best_config_sig = estimator + search_state.get_hist_config_sig( self.data_size_full, search_state.best_config @@ -2681,6 +2680,7 @@ class AutoML(BaseEstimator): self._config_history = {} self._max_iter_per_learner = 10000 self._iter_per_learner = dict([(e, 0) for e in self.estimator_list]) + self._iter_per_learner_fullsize = dict([(e, 0) for e in self.estimator_list]) self._fullsize_reached = False self._trained_estimator = None self._best_estimator = None @@ -2849,7 +2849,8 @@ class AutoML(BaseEstimator): if ( self._search_states[estimator].time2eval_best > self._state.time_budget - self._state.time_from_start - or self._iter_per_learner[estimator] >= self._max_iter_per_learner + or self._iter_per_learner_fullsize[estimator] + >= self._max_iter_per_learner ): inv.append(0) continue diff --git a/flaml/model.py b/flaml/model.py index d5f57bda9..763b36892 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -16,6 +16,8 @@ from sklearn.dummy import DummyClassifier, DummyRegressor from scipy.sparse import issparse import logging import shutil +from pandas import DataFrame, Series, to_datetime +import sys from . import tune from .data import ( group_counts, @@ -31,10 +33,6 @@ from .data import ( MULTICHOICECLASSIFICATION, ) -import pandas as pd -from pandas import DataFrame, Series -import sys - try: import psutil except ImportError: @@ -199,32 +197,32 @@ class BaseEstimator: train_time = self._fit(X_train, y_train, **kwargs) return train_time - def predict(self, X_test): + def predict(self, X): """Predict label from features. Args: - X_test: A numpy array or a dataframe of featurized instances, shape n*m. + X: A numpy array or a dataframe of featurized instances, shape n*m. Returns: A numpy array of shape n*1. Each element is the label for a instance. """ if self._model is not None: - X_test = self._preprocess(X_test) - return self._model.predict(X_test) + X = self._preprocess(X) + return self._model.predict(X) else: logger.warning( "Estimator is not fit yet. Please run fit() before predict()." ) - return np.ones(X_test.shape[0]) + return np.ones(X.shape[0]) - def predict_proba(self, X_test): + def predict_proba(self, X): """Predict the probability of each class from features. Only works for classification problems Args: - X_test: A numpy array of featurized instances, shape n*m. + X: A numpy array of featurized instances, shape n*m. Returns: A numpy array of shape n*c. c is the # classes. @@ -233,8 +231,8 @@ class BaseEstimator: """ assert self._task in CLASSIFICATION, "predict_proba() only for classification." - X_test = self._preprocess(X_test) - return self._model.predict_proba(X_test) + X = self._preprocess(X) + return self._model.predict_proba(X) def cleanup(self): del self._model @@ -380,7 +378,11 @@ class TransformersEstimator(BaseEstimator): if is_str or is_list_of_str: return tokenize_text( - X=X, Y=y, task=self._task, custom_hpo_args=self.custom_hpo_args + X=X, + Y=y, + task=self._task, + custom_hpo_args=self.custom_hpo_args, + tokenizer=self._tokenizer, ) else: return X, None @@ -400,9 +402,8 @@ class TransformersEstimator(BaseEstimator): transformers.logging.set_verbosity_error() - from transformers import EarlyStoppingCallback + from transformers import TrainerCallback from transformers.trainer_utils import set_seed - from transformers import AutoTokenizer from datasets import Dataset from .nlp.utils import ( @@ -422,10 +423,11 @@ class TransformersEstimator(BaseEstimator): # else: from .nlp.huggingface.trainer import TrainerForAuto from .nlp.huggingface.data_collator import DataCollatorForAuto + from .nlp.utils import get_auto_tokenizer this_params = self.params - class EarlyStoppingCallbackForAuto(EarlyStoppingCallback): + class EarlyStoppingCallbackForAuto(TrainerCallback): def on_train_begin(self, args, state, control, **callback_kwargs): self.train_begin_time = time.time() @@ -459,6 +461,10 @@ class TransformersEstimator(BaseEstimator): set_seed(self.params.get("seed", self._TrainingArguments.seed)) self._init_hpo_args(kwargs) + self._tokenizer = get_auto_tokenizer( + self.custom_hpo_args.model_path, self._task + ) + self._metric = kwargs["metric"] self.use_ray = kwargs.get("use_ray") @@ -477,12 +483,6 @@ class TransformersEstimator(BaseEstimator): TransformersEstimator._join(self._X_train, self._y_train) ) - # TODO: set a breakpoint here, observe the resulting train_dataset, - # compare it with the output of the tokenized results in your transformer example - # for example, if your task is MULTIPLECHOICE, you need to compare train_dataset with - # the output of https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L329 - # make sure they are the same - if X_val is not None: if (self._task not in NLG_TASKS) and (self._task != TOKENCLASSIFICATION): self._X_val, _ = self._preprocess(X=X_val, **kwargs) @@ -495,13 +495,7 @@ class TransformersEstimator(BaseEstimator): else: eval_dataset = None - tokenizer = AutoTokenizer.from_pretrained( - self.custom_hpo_args.model_path, use_fast=True - ) - self._tokenizer = tokenizer - num_labels = get_num_labels(self._task, self._y_train) - training_args_config, per_model_config = separate_config( self.params, self._task ) @@ -538,6 +532,7 @@ class TransformersEstimator(BaseEstimator): eval_steps=ckpt_freq, evaluate_during_training=True, save_steps=ckpt_freq, + logging_steps=ckpt_freq, save_total_limit=0, metric_for_best_model="loss", fp16=self.custom_hpo_args.fp16, @@ -553,6 +548,7 @@ class TransformersEstimator(BaseEstimator): do_eval=True, per_device_eval_batch_size=1, eval_steps=ckpt_freq, + logging_steps=ckpt_freq, evaluation_strategy=IntervalStrategy.STEPS, save_steps=ckpt_freq, save_total_limit=0, @@ -566,9 +562,9 @@ class TransformersEstimator(BaseEstimator): model_init=partial(self._model_init, num_labels, per_model_config), train_dataset=train_dataset, eval_dataset=eval_dataset, - tokenizer=tokenizer, + tokenizer=self._tokenizer, data_collator=DataCollatorForAuto( - tokenizer=tokenizer, + tokenizer=self._tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None, ) if self._task == MULTICHOICECLASSIFICATION @@ -599,6 +595,13 @@ class TransformersEstimator(BaseEstimator): num_labels=self._num_labels, per_model_config=self._per_model_config, ) + if hasattr(self._trainer, "intermediate_results"): + self.intermediate_results = [ + x[1] + for x in sorted( + self._trainer.intermediate_results.items(), key=lambda x: x[0] + ) + ] self._trainer = None def _delete_one_ckpt(self, ckpt_location): @@ -634,8 +637,8 @@ class TransformersEstimator(BaseEstimator): f"{PREFIX_CHECKPOINT_DIR}-{best_ckpt_global_step}", ) self.params[self.ITER_HP] = best_ckpt_global_step - print(trainer.state.global_step) - print(trainer.ckpt_to_global_step) + logger.debug(trainer.state.global_step) + logger.debug(trainer.ckpt_to_global_step) return best_ckpt def _compute_metrics_by_dataset_name(self, eval_pred): @@ -663,13 +666,13 @@ class TransformersEstimator(BaseEstimator): if self._task == TOKENCLASSIFICATION else np.argmax(predictions, axis=1) ) - return { - "val_loss": metric_loss_score( + metric_dict = { + "automl_metric": metric_loss_score( metric_name=self._metric, y_predict=predictions, y_true=labels ) } else: - agg_metric, metric_dict = self._metric( + loss, metric_dict = self._metric( X_test=self._X_val, y_test=self._y_val, estimator=self, @@ -677,14 +680,11 @@ class TransformersEstimator(BaseEstimator): X_train=self._X_train, y_train=self._y_train, ) - if not hasattr(self, "intermediate_results"): - self.intermediate_results = [] - self.intermediate_results.append(metric_dict) - return metric_dict + metric_dict["automl_metric"] = loss + return metric_dict def _init_model_for_predict(self, X_test): from datasets import Dataset - from transformers import AutoTokenizer from .nlp.huggingface.trainer import TrainerForAuto from .nlp.huggingface.data_collator import DataCollatorForPredict @@ -695,14 +695,11 @@ class TransformersEstimator(BaseEstimator): output_dir=self.custom_hpo_args.output_dir, **self._training_args_config, ) - tokenizer = AutoTokenizer.from_pretrained( - self.custom_hpo_args.model_path, use_fast=True - ) self._trainer = TrainerForAuto( model=self._model, args=training_args, data_collator=DataCollatorForPredict( - tokenizer=tokenizer, + tokenizer=self._tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None, ) if self._task == MULTICHOICECLASSIFICATION @@ -711,18 +708,18 @@ class TransformersEstimator(BaseEstimator): ) return test_dataset, training_args - def predict_proba(self, X_test): + def predict_proba(self, X): assert ( self._task in CLASSIFICATION ), "predict_proba() only for classification tasks." - test_dataset, _ = self._init_model_for_predict(X_test) + test_dataset, _ = self._init_model_for_predict(X) predictions = self._trainer.predict(test_dataset) self._trainer = None return predictions.predictions - def predict(self, X_test): - test_dataset, training_args = self._init_model_for_predict(X_test) + def predict(self, X): + test_dataset, training_args = self._init_model_for_predict(X) if self._task not in NLG_TASKS: predictions = self._trainer.predict(test_dataset) else: @@ -738,9 +735,6 @@ class TransformersEstimator(BaseEstimator): return predictions.predictions.reshape((len(predictions.predictions),)) elif self._task == TOKENCLASSIFICATION: return np.argmax(predictions.predictions, axis=2) - # TODO: elif self._task == your task, return the corresponding prediction - # e.g., if your task == QUESTIONANSWERING, you need to return the answer instead - # of the index elif self._task == SUMMARIZATION: if isinstance(predictions.predictions, tuple): predictions = np.argmax(predictions.predictions[0], axis=2) @@ -1114,12 +1108,12 @@ class XGBoostEstimator(SKLearnEstimator): train_time = time.time() - start_time return train_time - def predict(self, X_test): + def predict(self, X): import xgboost as xgb - if not issparse(X_test): - X_test = self._preprocess(X_test) - dtest = xgb.DMatrix(X_test) + if not issparse(X): + X = self._preprocess(X) + dtest = xgb.DMatrix(X) return super().predict(dtest) @classmethod @@ -1604,22 +1598,22 @@ class Prophet(SKLearnEstimator): self._model = model return train_time - def predict(self, X_test): - if isinstance(X_test, int): + def predict(self, X): + if isinstance(X, int): raise ValueError( "predict() with steps is only supported for arima/sarimax." " For Prophet, pass a dataframe with the first column containing" " the timestamp values." ) if self._model is not None: - X_test = self._preprocess(X_test) - forecast = self._model.predict(X_test) + X = self._preprocess(X) + forecast = self._model.predict(X) return forecast["yhat"] else: logger.warning( "Estimator is not fit yet. Please run fit() before predict()." ) - return np.ones(X_test.shape[0]) + return np.ones(X.shape[0]) class ARIMA(Prophet): @@ -1648,7 +1642,7 @@ class ARIMA(Prophet): def _join(self, X_train, y_train): train_df = super()._join(X_train, y_train) - train_df.index = pd.to_datetime(train_df[TS_TIMESTAMP_COL]) + train_df.index = to_datetime(train_df[TS_TIMESTAMP_COL]) train_df = train_df.drop(TS_TIMESTAMP_COL, axis=1) return train_df @@ -1684,30 +1678,30 @@ class ARIMA(Prophet): self._model = model return train_time - def predict(self, X_test): + def predict(self, X): if self._model is not None: - if isinstance(X_test, int): - forecast = self._model.forecast(steps=X_test) - elif isinstance(X_test, DataFrame): - start = X_test[TS_TIMESTAMP_COL].iloc[0] - end = X_test[TS_TIMESTAMP_COL].iloc[-1] - if len(X_test.columns) > 1: - X_test = self._preprocess(X_test.drop(columns=TS_TIMESTAMP_COL)) - regressors = list(X_test) - print(start, end, X_test.shape) + if isinstance(X, int): + forecast = self._model.forecast(steps=X) + elif isinstance(X, DataFrame): + start = X[TS_TIMESTAMP_COL].iloc[0] + end = X[TS_TIMESTAMP_COL].iloc[-1] + if len(X.columns) > 1: + X = self._preprocess(X.drop(columns=TS_TIMESTAMP_COL)) + regressors = list(X) + print(start, end, X.shape) forecast = self._model.predict( - start=start, end=end, exog=X_test[regressors] + start=start, end=end, exog=X[regressors] ) else: forecast = self._model.predict(start=start, end=end) else: raise ValueError( - "X_test needs to be either a pandas Dataframe with dates as the first column" + "X needs to be either a pandas Dataframe with dates as the first column" " or an int number of periods for predict()." ) return forecast else: - return np.ones(X_test if isinstance(X_test, int) else X_test.shape[0]) + return np.ones(X if isinstance(X, int) else X.shape[0]) class SARIMAX(ARIMA): @@ -1829,7 +1823,7 @@ class TS_SKLearn_Regressor(SKLearnEstimator): cols = list(X) if len(cols) == 1: ds_col = cols[0] - X = pd.DataFrame(index=X[ds_col]) + X = DataFrame(index=X[ds_col]) elif len(cols) > 1: ds_col = cols[0] exog_cols = cols[1:] @@ -1879,42 +1873,40 @@ class TS_SKLearn_Regressor(SKLearnEstimator): train_time = time.time() - current_time return train_time - def predict(self, X_test): + def predict(self, X): if self._model is not None: - X_test = self.transform_X(X_test) - X_test = self._preprocess(X_test) + X = self.transform_X(X) + X = self._preprocess(X) if isinstance(self._model, list): assert len(self._model) == len( - X_test - ), "Model is optimized for horizon, length of X_test must be equal to `period`." + X + ), "Model is optimized for horizon, length of X must be equal to `period`." preds = [] for i in range(1, len(self._model) + 1): ( X_pred, _, ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format( - X_test.iloc[:i, :] + X.iloc[:i, :] ) preds.append(self._model[i - 1].predict(X_pred)[-1]) - forecast = pd.DataFrame( + forecast = DataFrame( data=np.asarray(preds).reshape(-1, 1), columns=[self.hcrystaball_model.name], - index=X_test.index, + index=X.index, ) else: ( X_pred, _, - ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format( - X_test - ) + ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X) forecast = self._model.predict(X_pred) return forecast else: logger.warning( "Estimator is not fit yet. Please run fit() before predict()." ) - return np.ones(X_test.shape[0]) + return np.ones(X.shape[0]) class LGBM_TS_Regressor(TS_SKLearn_Regressor): diff --git a/flaml/nlp/README.md b/flaml/nlp/README.md index 071632481..b0f336f97 100644 --- a/flaml/nlp/README.md +++ b/flaml/nlp/README.md @@ -1,65 +1,17 @@ -# Hyperparameter Optimization for Huggingface Transformers +# AutoML for NLP -Fine-tuning pre-trained language models based on the transformers library. +This directory contains utility functions used by AutoNLP. Currently we support four NLP tasks: sequence classification, sequence regression, multiple choice and summarization. -An example: +Please refer to this [link](https://microsoft.github.io/FLAML/docs/Examples/AutoML-NLP) for examples. -```python -from flaml import AutoML -import pandas as pd -train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3) -dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3) -test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3) +# Troubleshooting fine-tuning HPO for pre-trained language models -custom_sent_keys = ["#1 String", "#2 String"] -label_key = "Quality" +The frequent updates of transformers may lead to fluctuations in the results of tuning. To help users quickly troubleshoot the result of AutoNLP when a tuning failure occurs (e.g., failing to reproduce previous results), we have provided the following jupyter notebook: -X_train = train_dataset[custom_sent_keys] -y_train = train_dataset[label_key] +* [Troubleshooting HPO for fine-tuning pre-trained language models](https://github.com/microsoft/FLAML/blob/main/notebook/research/acl2021.ipynb) -X_val = dev_dataset[custom_sent_keys] -y_val = dev_dataset[label_key] - -X_test = test_dataset[custom_sent_keys] - -automl = AutoML() - -automl_settings = { - "gpu_per_trial": 0, # use a value larger than 0 for GPU training - "max_iter": 10, - "time_budget": 300, - "task": "seq-classification", - "metric": "accuracy", -} - -automl_settings["custom_hpo_args"] = { - "model_path": "google/electra-small-discriminator", - "output_dir": "data/output/", - "ckpt_per_epoch": 1, -} - -automl.fit( - X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings -) -automl.predict(X_test) - -``` - -The current use cases that are supported: - -1. A simplified version of fine-tuning the GLUE dataset using HuggingFace; -2. For selecting better search space for fine-tuning the GLUE dataset; -3. Use the search algorithms in flaml for more efficient fine-tuning of HuggingFace. - -The use cases that can be supported in future: - -1. HPO fine-tuning for text generation; -2. HPO fine-tuning for question answering. - -## Troubleshooting fine-tuning HPO for pre-trained language models - -To reproduce the results for our ACL2021 paper: +Our findings on troubleshooting fine-tuning the Electra and RoBERTa model for the GLUE dataset can be seen in the following paper published in ACL 2021: * [An Empirical Study on Hyperparameter Optimization for Fine-Tuning Pre-trained Language Models](https://arxiv.org/abs/2106.09204). Xueqing Liu, Chi Wang. ACL-IJCNLP 2021. @@ -72,4 +24,4 @@ To reproduce the results for our ACL2021 paper: } ``` -Please refer to the following jupyter notebook: [Troubleshooting HPO for fine-tuning pre-trained language models](https://github.com/microsoft/FLAML/blob/main/notebook/research/acl2021.ipynb) \ No newline at end of file + diff --git a/flaml/nlp/huggingface/trainer.py b/flaml/nlp/huggingface/trainer.py index 2bd81bf22..4534f4c63 100644 --- a/flaml/nlp/huggingface/trainer.py +++ b/flaml/nlp/huggingface/trainer.py @@ -44,6 +44,19 @@ class TrainerForAuto(Seq2SeqTrainer): model, inputs, prediction_loss_only, ignore_keys ) + def log(self, logs) -> None: + if getattr(self, "_is_seq2seq", None): + super().log(logs) + else: + super(Seq2SeqTrainer, self).log(logs) + if not hasattr(self, "intermediate_results"): + self.intermediate_results = {} + + epoch_num = logs.get("epoch", None) + if epoch_num: + self.intermediate_results.setdefault(epoch_num, {}) + self.intermediate_results[epoch_num].update(logs) + def evaluate( self, eval_dataset=None, @@ -74,10 +87,6 @@ class TrainerForAuto(Seq2SeqTrainer): ignore_keys, metric_key_prefix, ) - # if metrics: - # for key in list(metrics.keys()): - # if key.startswith("eval_"): - # metrics[key[5:]] = metrics.pop(key) if hasattr(self, "ckpt_to_global_step"): self.ckpt_to_global_step[ckpt_dir] = self.state.global_step if metrics: diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index 144a1e560..22bd25faa 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -30,34 +30,61 @@ def load_default_huggingface_metric_for_task(task): global tokenized_column_names -def tokenize_text(X, Y=None, task=None, custom_hpo_args=None): +def get_auto_tokenizer(model_path, task): + from transformers import AutoTokenizer + + if task == SUMMARIZATION: + return AutoTokenizer.from_pretrained( + model_path, # 'roberta-base' + cache_dir=None, + use_fast=True, + revision="main", + use_auth_token=None, + ) + else: + return AutoTokenizer.from_pretrained(model_path, use_fast=True) + + +def tokenize_text(X, Y=None, task=None, custom_hpo_args=None, tokenizer=None): if task in (SEQCLASSIFICATION, SEQREGRESSION): - X_tokenized, _ = tokenize_onedataframe( - X, this_tokenizer=None, task=task, custom_hpo_args=custom_hpo_args + X_tokenized = tokenize_onedataframe( + X, + tokenizer=tokenizer, + task=task, + custom_hpo_args=custom_hpo_args, + prefix_str="", ) return X_tokenized, None elif task == TOKENCLASSIFICATION: - return tokenize_text_tokclassification(X, Y, custom_hpo_args) + return tokenize_text_tokclassification( + X, Y, tokenizer=tokenizer, custom_hpo_args=custom_hpo_args + ) elif task in NLG_TASKS: - return tokenize_seq2seq(X, Y, task=task, custom_hpo_args=custom_hpo_args) + return tokenize_seq2seq( + X, Y, tokenizer=tokenizer, task=task, custom_hpo_args=custom_hpo_args + ) elif task == MULTICHOICECLASSIFICATION: - return tokenize_text_multiplechoice(X, custom_hpo_args) + return tokenize_text_multiplechoice( + X, tokenizer=tokenizer, custom_hpo_args=custom_hpo_args + ) -def tokenize_seq2seq(X, Y, task=None, custom_hpo_args=None): - model_inputs, tokenizer = tokenize_onedataframe( +def tokenize_seq2seq(X, Y, tokenizer, task=None, custom_hpo_args=None): + model_inputs = tokenize_onedataframe( X, - this_tokenizer=None, + tokenizer=tokenizer, task=task, custom_hpo_args=custom_hpo_args, + prefix_str="summarize: ", ) labels = None if Y is not None: - labels, _ = tokenize_onedataframe( + labels = tokenize_onedataframe( Y.to_frame(), - this_tokenizer=tokenizer, + tokenizer=tokenizer, task=task, custom_hpo_args=custom_hpo_args, + prefix_str="", ) labels["label"] = [ [(each_l if each_l != tokenizer.pad_token_id else -100) for each_l in label] @@ -70,7 +97,7 @@ def tokenize_seq2seq(X, Y, task=None, custom_hpo_args=None): def tokenize_and_align_labels( - examples, tokenizer, custom_hpo_args, X_sent_key, Y_sent_key=None + examples, tokenizer, custom_hpo_args=None, X_sent_key=None, Y_sent_key=None ): global tokenized_column_names @@ -115,14 +142,10 @@ def tokenize_and_align_labels( return tokenized_input_and_labels -def tokenize_text_tokclassification(X, Y, custom_hpo_args): - from transformers import AutoTokenizer +def tokenize_text_tokclassification(X, Y, tokenizer, custom_hpo_args=None): import pandas as pd global tokenized_column_names - this_tokenizer = AutoTokenizer.from_pretrained( - custom_hpo_args.model_path, use_fast=True - ) if Y is not None: X_and_Y = pd.concat([X, Y.to_frame()], axis=1) X_key = list(X.keys())[0] @@ -130,7 +153,7 @@ def tokenize_text_tokclassification(X, Y, custom_hpo_args): X_and_Y_tokenized = X_and_Y.apply( lambda x: tokenize_and_align_labels( x, - tokenizer=this_tokenizer, + tokenizer=tokenizer, custom_hpo_args=custom_hpo_args, X_sent_key=X_key, Y_sent_key=Y_key, @@ -150,7 +173,7 @@ def tokenize_text_tokclassification(X, Y, custom_hpo_args): d = X.apply( lambda x: tokenize_and_align_labels( x, - tokenizer=this_tokenizer, + tokenizer=tokenizer, custom_hpo_args=custom_hpo_args, X_sent_key=X_key, Y_sent_key=None, @@ -167,37 +190,21 @@ def tokenize_text_tokclassification(X, Y, custom_hpo_args): def tokenize_onedataframe( X, - this_tokenizer=None, + tokenizer, task=None, custom_hpo_args=None, + prefix_str=None, ): - from transformers import AutoTokenizer import pandas global tokenized_column_names - if this_tokenizer: - with this_tokenizer.as_target_tokenizer(): - d = X.apply( - lambda x: tokenize_row( - x, - this_tokenizer, - prefix=("",) if task is SUMMARIZATION else None, - task=task, - custom_hpo_args=custom_hpo_args, - ), - axis=1, - result_type="expand", - ) - else: - this_tokenizer = AutoTokenizer.from_pretrained( - custom_hpo_args.model_path, use_fast=True - ) + with tokenizer.as_target_tokenizer(): d = X.apply( lambda x: tokenize_row( x, - this_tokenizer, - prefix=("summarize: ",) if task is SUMMARIZATION else None, + tokenizer, + prefix=(prefix_str,) if task is SUMMARIZATION else None, task=task, custom_hpo_args=custom_hpo_args, ), @@ -206,7 +213,7 @@ def tokenize_onedataframe( ) X_tokenized = pandas.DataFrame(columns=tokenized_column_names) X_tokenized[tokenized_column_names] = d - return X_tokenized, this_tokenizer + return X_tokenized def postprocess_text(preds, labels): @@ -223,9 +230,7 @@ def postprocess_text(preds, labels): return preds, labels -def tokenize_row( - this_row, this_tokenizer, prefix=None, task=None, custom_hpo_args=None -): +def tokenize_row(this_row, tokenizer, prefix=None, task=None, custom_hpo_args=None): global tokenized_column_names assert ( "max_seq_length" in custom_hpo_args.__dict__ @@ -234,7 +239,7 @@ def tokenize_row( if prefix: this_row = tuple(["".join(x) for x in zip(prefix, this_row)]) - tokenized_example = this_tokenizer( + tokenized_example = tokenizer( *tuple(this_row), padding="max_length", max_length=custom_hpo_args.max_seq_length, @@ -246,22 +251,14 @@ def tokenize_row( return [tokenized_example[x] for x in tokenized_column_names] -def tokenize_text_multiplechoice(X, custom_hpo_args): - from transformers import AutoTokenizer +def tokenize_text_multiplechoice(X, tokenizer, custom_hpo_args=None): import pandas global tokenized_column_names - this_tokenizer = AutoTokenizer.from_pretrained( - custom_hpo_args.model_path, # 'roberta-base' - cache_dir=None, - use_fast=True, - revision="main", - use_auth_token=None, - ) t = X[["sent1", "sent2", "ending0", "ending1", "ending2", "ending3"]] d = t.apply( - lambda x: tokenize_swag(x, this_tokenizer, custom_hpo_args), + lambda x: tokenize_swag(x, tokenizer, custom_hpo_args), axis=1, result_type="expand", ) @@ -272,7 +269,7 @@ def tokenize_text_multiplechoice(X, custom_hpo_args): return output, None -def tokenize_swag(this_row, this_tokenizer, custom_hpo_args): +def tokenize_swag(this_row, tokenizer, custom_hpo_args=None): global tokenized_column_names first_sentences = [[this_row["sent1"]] * 4] @@ -289,7 +286,7 @@ def tokenize_swag(this_row, this_tokenizer, custom_hpo_args): # From 2 dimension to 1 dimension array first_sentences = list(chain(*first_sentences)) - tokenized_example = this_tokenizer( + tokenized_example = tokenizer( *tuple([first_sentences, second_sentences]), truncation=True, max_length=custom_hpo_args.max_seq_length, @@ -411,10 +408,7 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None): ) from ..data import SEQCLASSIFICATION, SEQREGRESSION, TOKENCLASSIFICATION - this_model_type = AutoConfig.from_pretrained(checkpoint_path).model_type - this_vocab_size = AutoConfig.from_pretrained(checkpoint_path).vocab_size - - def get_this_model(task): + def get_this_model(task, model_config): from transformers import AutoModelForSequenceClassification from transformers import AutoModelForSeq2SeqLM from transformers import AutoModelForMultipleChoice @@ -463,28 +457,34 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None): model_config = AutoConfig.from_pretrained(checkpoint_path) return model_config + current_config = AutoConfig.from_pretrained(checkpoint_path) + this_model_type, this_vocab_size = ( + current_config.model_type, + current_config.vocab_size, + ) + if task == SEQCLASSIFICATION: - num_labels_old = AutoConfig.from_pretrained(checkpoint_path).num_labels + num_labels_old = current_config.num_labels if is_pretrained_model_in_classification_head_list(this_model_type): model_config_num_labels = num_labels_old else: model_config_num_labels = num_labels - model_config = _set_model_config(checkpoint_path) + new_config = _set_model_config(checkpoint_path) if is_pretrained_model_in_classification_head_list(this_model_type): if num_labels != num_labels_old: - this_model = get_this_model(task) - model_config.num_labels = num_labels + this_model = get_this_model(task, new_config) + new_config.num_labels = num_labels this_model.num_labels = num_labels this_model.classifier = ( AutoSeqClassificationHead.from_model_type_and_config( - this_model_type, model_config + this_model_type, new_config ) ) else: - this_model = get_this_model(task) + this_model = get_this_model(task, new_config) else: - this_model = get_this_model(task) + this_model = get_this_model(task, new_config) this_model.resize_token_embeddings(this_vocab_size) return this_model else: @@ -493,7 +493,7 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None): elif task == TOKENCLASSIFICATION: model_config_num_labels = num_labels model_config = _set_model_config(checkpoint_path) - this_model = get_this_model(task) + this_model = get_this_model(task, model_config) return this_model diff --git a/flaml/searcher/blendsearch.py b/flaml/searcher/blendsearch.py index 171c3bfd0..494000199 100644 --- a/flaml/searcher/blendsearch.py +++ b/flaml/searcher/blendsearch.py @@ -100,7 +100,7 @@ class BlendSearch(Searcher): needed for a config. It is used to skip configs which do not fit in memory. metric_constraints: A list of metric constraints to be satisfied. - E.g., `['precision', '>=', 0.9]`. + E.g., `['precision', '>=', 0.9]`. The sign can be ">=" or "<=". seed: An integer of the random seed. experimental: A bool of whether to use experimental features. """ @@ -155,7 +155,7 @@ class BlendSearch(Searcher): from functools import partial gs_space = partial(define_by_run_func, space=space) - evaluated_rewards = None # not supproted by define-by-run + evaluated_rewards = None # not supported by define-by-run else: gs_space = space gs_seed = seed - 10 if (seed - 10) >= 0 else seed - 11 + (1 << 32) @@ -748,6 +748,10 @@ class BlendSearch(Searcher): and value > threshold or sign == ">=" and value < threshold + or sign == ">" + and value <= threshold + or sign == "<" + and value > threshold ): self._result[config_signature] = { self._metric: np.inf * self._ls.metric_op, diff --git a/flaml/searcher/suggestion.py b/flaml/searcher/suggestion.py index 69c27cc73..59e50c2d4 100644 --- a/flaml/searcher/suggestion.py +++ b/flaml/searcher/suggestion.py @@ -123,7 +123,7 @@ class Searcher: mod in ["min", "max", "obs"] for mod in mode ), "All of mode must be 'min' or 'max' or 'obs'!" else: - raise ValueError("Mode most either be a list or string") + raise ValueError("Mode must either be a list or string") def set_search_properties( self, metric: Optional[str], mode: Optional[str], config: Dict diff --git a/flaml/searcher/variant_generator.py b/flaml/searcher/variant_generator.py index 76b18d88e..cd9eb56bc 100644 --- a/flaml/searcher/variant_generator.py +++ b/flaml/searcher/variant_generator.py @@ -22,6 +22,11 @@ import numpy import random from ..tune.sample import Categorical, Domain, RandomState +try: + from ray.tune.sample import Domain as RayDomain +except ImportError: + RayDomain = Domain + logger = logging.getLogger(__name__) @@ -192,10 +197,10 @@ def _resolve_domain_vars( ) except RecursiveDependencyError as e: error = e - except Exception: - raise ValueError( - "Failed to evaluate expression: {}: {}".format(path, domain) - ) + # except Exception: + # raise ValueError( + # "Failed to evaluate expression: {}: {}".format(path, domain) + # ) else: assign_value(spec, path, value) resolved[path] = value @@ -243,7 +248,7 @@ def _is_resolved(v) -> bool: def _try_resolve(v) -> Tuple[bool, Any]: - if isinstance(v, Domain): + if isinstance(v, (Domain, RayDomain)): # Domain to sample from return False, v elif isinstance(v, dict) and len(v) == 1 and "grid_search" in v: diff --git a/flaml/tune/space.py b/flaml/tune/space.py index b8122e43c..fbca06a1b 100644 --- a/flaml/tune/space.py +++ b/flaml/tune/space.py @@ -61,7 +61,15 @@ def define_by_run_func(trial, space: Dict, path: str = "") -> Optional[Dict[str, elif isinstance(sampler, sample.Uniform): if quantize: trial.suggest_float(key, domain.lower, domain.upper, step=quantize) - trial.suggest_float(key, domain.lower, domain.upper) + else: + trial.suggest_float(key, domain.lower, domain.upper) + else: + raise ValueError( + "Optuna search does not support parameters of type " + "`{}` with samplers of type `{}`".format( + type(domain).__name__, type(domain.sampler).__name__ + ) + ) elif isinstance(domain, sample.Integer): if isinstance(sampler, sample.LogUniform): trial.suggest_int( @@ -144,6 +152,8 @@ def unflatten_hierarchical(config: Dict, space: Dict) -> Tuple[Dict, Dict]: key = key[:-8] domain = space.get(key) if domain is not None: + if isinstance(domain, dict): + value, domain = unflatten_hierarchical(value, domain) subspace[key] = domain if isinstance(domain, sample.Domain): sampler = domain.sampler @@ -404,8 +414,8 @@ def denormalize( elif str(sampler) == "Normal": # denormalization for 'Normal' config_denorm[key] = value * sampler.sd + sampler.mean - else: - config_denorm[key] = value + # else: + # config_denorm[key] = value # Handle quantized if quantize is not None: config_denorm[key] = ( @@ -419,6 +429,14 @@ def denormalize( return config_denorm +def equal(config, const) -> bool: + if config == const: + return True + if not isinstance(config, Dict) or not isinstance(const, Dict): + return False + return all(equal(config[key], value) for key, value in const.items()) + + def indexof(domain: Dict, config: Dict) -> int: """Find the index of config in domain.categories.""" index = config.get("_choice_") @@ -435,8 +453,7 @@ def indexof(domain: Dict, config: Dict) -> int: # print(cat.keys()) if not set(config.keys()).issubset(set(cat.keys())): continue - # print(domain.const[i]) - if all(config[key] == value for key, value in domain.const[i].items()): + if equal(config, domain.const[i]): # assumption: the concatenation of constants is a unique identifier return i return None diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py index 5a064cfaf..396f83a2e 100644 --- a/flaml/tune/tune.py +++ b/flaml/tune/tune.py @@ -221,7 +221,7 @@ def run( used, otherwise no scheduler will be used. When set 'flaml', an authentic scheduler implemented in FLAML will be used. It does not require users to report intermediate results in evaluation_function. - Find more details abuot this scheduler in this paper + Find more details about this scheduler in this paper https://arxiv.org/pdf/1911.04706.pdf). When set 'asha', the input for arguments "resource_attr", "min_resource", "max_resource" and "reduction_factor" will be passed @@ -262,7 +262,7 @@ def run( needed for a config. It is used to skip configs which do not fit in memory. metric_constraints: A list of metric constraints to be satisfied. - e.g., `['precision', '>=', 0.9]`. + e.g., `['precision', '>=', 0.9]`. The sign can be ">=" or "<=". max_failure: int | the maximal consecutive number of failures to sample a trial before the tuning is terminated. use_ray: A boolean of whether to use ray as the backend. diff --git a/flaml/version.py b/flaml/version.py index e94731c0f..f8c6ac7fe 100644 --- a/flaml/version.py +++ b/flaml/version.py @@ -1 +1 @@ -__version__ = "0.9.4" +__version__ = "0.9.5" diff --git a/notebook/automl_classification.ipynb b/notebook/automl_classification.ipynb index fc7a95bca..650786ee1 100644 --- a/notebook/automl_classification.ipynb +++ b/notebook/automl_classification.ipynb @@ -18,7 +18,7 @@ "## 1. Introduction\n", "\n", "FLAML is a Python library (https://github.com/microsoft/FLAML) designed to automatically produce accurate machine learning models \n", - "with low computational cost. It is fast and cheap. The simple and lightweight design makes it easy to use and extend, such as adding new learners. FLAML can \n", + "with low computational cost. It is fast and economical. The simple and lightweight design makes it easy to use and extend, such as adding new learners. FLAML can \n", "- serve as an economical AutoML engine,\n", "- be used as a fast hyperparameter tuning tool, or \n", "- be embedded in self-tuning software that requires low latency & resource in repetitive\n", diff --git a/notebook/automl_lightgbm.ipynb b/notebook/automl_lightgbm.ipynb index 529d20bf0..a661f1000 100644 --- a/notebook/automl_lightgbm.ipynb +++ b/notebook/automl_lightgbm.ipynb @@ -2,6 +2,11 @@ "cells": [ { "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, "source": [ "Copyright (c) Microsoft Corporation. All rights reserved. \n", "\n", @@ -13,7 +18,7 @@ "## 1. Introduction\n", "\n", "FLAML is a Python library (https://github.com/microsoft/FLAML) designed to automatically produce accurate machine learning models \n", - "with low computational cost. It is fast and cheap. The simple and lightweight design makes it easy \n", + "with low computational cost. It is fast and economical. The simple and lightweight design makes it easy \n", "to use and extend, such as adding new learners. FLAML can \n", "- serve as an economical AutoML engine,\n", "- be used as a fast hyperparameter tuning tool, or \n", @@ -26,47 +31,44 @@ "```bash\n", "pip install flaml[notebook]\n", "```" - ], - "metadata": { - "slideshow": { - "slide_type": "slide" - } - } + ] }, { "cell_type": "code", "execution_count": 1, + "metadata": {}, + "outputs": [], "source": [ "!pip install flaml[notebook];" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, "source": [ "## 2. Regression Example\n", "### Load data and preprocess\n", "\n", "Download [houses dataset](https://www.openml.org/d/537) from OpenML. The task is to predict median price of the house in the region based on demographic composition and a state of housing market in the region." - ], - "metadata": { - "slideshow": { - "slide_type": "slide" - } - } + ] }, { "cell_type": "code", "execution_count": 1, - "source": [ - "from flaml.data import load_openml_dataset\n", - "X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir='./')" - ], + "metadata": { + "slideshow": { + "slide_type": "subslide" + }, + "tags": [] + }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "load dataset from ./openml_ds537.pkl\n", "Dataset name: houses\n", @@ -75,44 +77,48 @@ ] } ], - "metadata": { - "slideshow": { - "slide_type": "subslide" - }, - "tags": [] - } + "source": [ + "from flaml.data import load_openml_dataset\n", + "X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=537, data_dir='./')" + ] }, { "cell_type": "markdown", - "source": [ - "### Run FLAML\n", - "In the FLAML automl run configuration, users can specify the task type, time budget, error metric, learner list, whether to subsample, resampling strategy type, and so on. All these arguments have default values which will be used if users do not provide them. " - ], "metadata": { "slideshow": { "slide_type": "slide" } - } + }, + "source": [ + "### Run FLAML\n", + "In the FLAML automl run configuration, users can specify the task type, time budget, error metric, learner list, whether to subsample, resampling strategy type, and so on. All these arguments have default values which will be used if users do not provide them. " + ] }, { "cell_type": "code", "execution_count": 2, - "source": [ - "''' import AutoML class from flaml package '''\n", - "from flaml import AutoML\n", - "automl = AutoML()" - ], - "outputs": [], "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] - } + }, + "outputs": [], + "source": [ + "''' import AutoML class from flaml package '''\n", + "from flaml import AutoML\n", + "automl = AutoML()" + ] }, { "cell_type": "code", "execution_count": 3, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], "source": [ "settings = {\n", " \"time_budget\": 240, # total running time in seconds\n", @@ -122,25 +128,21 @@ " \"log_file_name\": 'houses_experiment.log', # flaml log file\n", " \"seed\": 7654321, # random seed\n", "}" - ], - "outputs": [], - "metadata": { - "slideshow": { - "slide_type": "slide" - } - } + ] }, { "cell_type": "code", "execution_count": 4, - "source": [ - "'''The main flaml automl API'''\n", - "automl.fit(X_train=X_train, y_train=y_train, **settings)" - ], + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "[flaml.automl: 09-29 23:10:08] {1446} INFO - Data split method: uniform\n", "[flaml.automl: 09-29 23:10:08] {1450} INFO - Evaluation method: cv\n", @@ -215,37 +217,35 @@ ] } ], + "source": [ + "'''The main flaml automl API'''\n", + "automl.fit(X_train=X_train, y_train=y_train, **settings)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Best model and metric" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] - } - }, - { - "cell_type": "markdown", - "source": [ - "### Best model and metric" - ], - "metadata": { - "slideshow": { - "slide_type": "slide" - } - } - }, - { - "cell_type": "code", - "execution_count": 5, - "source": [ - "''' retrieve best config'''\n", - "print('Best hyperparmeter config:', automl.best_config)\n", - "print('Best r2 on validation data: {0:.4g}'.format(1-automl.best_loss))\n", - "print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))" - ], + }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Best hyperparmeter config: {'n_estimators': 363, 'num_leaves': 216, 'min_child_samples': 42, 'learning_rate': 0.09100963138990395, 'log_max_bin': 8, 'colsample_bytree': 0.8025848209352517, 'reg_alpha': 0.001113000336715291, 'reg_lambda': 76.50614276906414}\n", "Best r2 on validation data: 0.8436\n", @@ -253,22 +253,23 @@ ] } ], - "metadata": { - "slideshow": { - "slide_type": "slide" - }, - "tags": [] - } + "source": [ + "''' retrieve best config'''\n", + "print('Best hyperparmeter config:', automl.best_config)\n", + "print('Best r2 on validation data: {0:.4g}'.format(1-automl.best_loss))\n", + "print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))" + ] }, { "cell_type": "code", "execution_count": 6, - "source": [ - "automl.model.estimator" - ], + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "LGBMRegressor(colsample_bytree=0.8025848209352517,\n", @@ -278,36 +279,31 @@ " verbose=-1)" ] }, + "execution_count": 6, "metadata": {}, - "execution_count": 6 + "output_type": "execute_result" } ], - "metadata": { - "slideshow": { - "slide_type": "slide" - } - } + "source": [ + "automl.model.estimator" + ] }, { "cell_type": "code", "execution_count": 7, - "source": [ - "import matplotlib.pyplot as plt\n", - "plt.barh(automl.model.estimator.feature_name_, automl.model.estimator.feature_importances_)" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "" ] }, + "execution_count": 7, "metadata": {}, - "execution_count": 7 + "output_type": "execute_result" }, { - "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdUAAAD4CAYAAAC6/HyrAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAfTklEQVR4nO3de3hdVZ3/8feHtLTcTIEiTyxIADtcW0IbkPvgDRX9IUi1CgMF52eHy6Diw2gVn7HgOAJlRkRRqDNIueqvXISnCJUfWOgPKSWhbdICBaRVqQiCEi4VhPb7+2Ov0E0ml3OSnZyTk8/rec6Tvddee63vOrvNN2vtnRNFBGZmZjZwm1U6ADMzs1rhpGpmZlYQJ1UzM7OCOKmamZkVxEnVzMysIKMqHYANrvHjx0djY2OlwzAzG1ZaW1ufj4gdyj3PSbXGNTY20tLSUukwzMyGFUm/7c95Xv41MzMriJOqmZlZQZxUzczMCuKkamZmVhAnVTMzs4I4qZqZmRXESdXMzKwgTqpmZmYF8Yc/1Lj2dR00zrq90mGYmfVo7QUfq3QIhfFM1czMrCBOqmZmZgVxUjUzMyuIk6qZmVlBnFTNzMwK4qRqZmZWECfVHEmvDEKbx0ialbaPlbR3P9pYJKm56NjMzKxYTqqDLCJui4gL0u6xQNlJ1czMhgcn1W4oM0fSSkntkqan8iPTrPFGSY9Juk6S0rGjU1mrpEslLUjlp0j6gaRDgGOAOZKWS9o9PwOVNF7S2rS9haSfSnpU0i3AFrnYjpL0gKSHJc2XtPXQvjtmZtYTf6JS9z4JNAH7AeOBhyTdl47tD+wD/AG4HzhUUgtwBXBERKyRdEPXBiPi15JuAxZExI0AKR9353RgfUTsJWky8HCqPx74BvDBiHhV0leBLwPn50+WNBOYCVD3jh36+RaYmVm5PFPt3mHADRGxISKeBe4FDkjHlkbE0xGxEVgONAJ7Ak9FxJpU538k1TIdAVwLEBFtQFsqP4hs+fh+ScuBGcAuXU+OiLkR0RwRzXVb1g8wFDMzK5VnquV7Pbe9gYG9h2+y6QebsSXUF3BXRHx2AH2amdkg8Uy1e4uB6ZLqJO1ANnNc2kv91cBukhrT/vQe6r0MbJPbXwtMTdvTcuX3AScASNoXmJzKl5AtN78nHdtK0t+VMB4zMxsCTqrdu4VsyXUFcA/wlYj4Y0+VI+KvwBnAnZJayZJnRzdVfwr8i6RlknYHLgZOl7SM7N5tpx8BW0t6lOx+aWvq50/AKcANktqAB8iWns3MrAooIiodQ02QtHVEvJKeBr4MeCIivlvpuMY0TIyGGZdUOgwzsx5V459+k9QaEWV/PoBnqsX5fHp4aBVQT/Y0sJmZjSB+UKkgaVZa8ZmpmZlVjmeqZmZmBXFSNTMzK4iTqpmZWUF8T7XGTZpQT0sVPllnZlaLPFM1MzMriJOqmZlZQZxUzczMCuKkamZmVhA/qFTj2td10Djr9kqHYWY2pCr10YeeqZqZmRXESdXMzKwgTqpmZmYFcVI1MzMriJOqmZlZQZxUzczMCuKkWgZJr/RxfJykM3L775J0Y9puknR0P/qcLemc8qM1M7Oh5qRarHHAW0k1Iv4QEdPSbhNQdlI1M7Phw0m1HyRtLeluSQ9Lapf0iXToAmB3ScslzZHUKGmlpM2B84Hp6dj0rjPQVK8xbZ8r6XFJ/w/YI1dnd0l3SmqVtFjSnkM2aDMz65M/Ual/XgOOi4iXJI0Hlki6DZgF7BsRTQCdSTIi/ibpX4HmiPjndGx2dw1Lmgp8hmxmOwp4GGhNh+cCp0XEE5LeC/wQeH83bcwEZgLUvWOHIsZrZmYlcFLtHwH/LukIYCMwAdixoLYPB26JiPUAKVkjaWvgEGC+pM66Y7prICLmkiVgxjRMjILiMjOzPjip9s+JwA7A1Ih4Q9JaYGyZbbzJ25ff+zp/M+DFzlmwmZlVH99T7Z964LmUUN8H7JLKXwa26eGcrsfWAlMAJE0Bdk3l9wHHStpC0jbA/wKIiJeANZI+lc6RpP2KG5KZmQ2Uk2r/XAc0S2oHTgYeA4iIF4D700NHc7qc8ytg784HlYCbgO0krQL+GXg8tfEw8DNgBXAH8FCujROBf5S0AlgFfAIzM6saivAtt1o2pmFiNMy4pNJhmJkNqYH+6TdJrRHRXO55nqmamZkVxEnVzMysIE6qZmZmBXFSNTMzK4h/T7XGTZpQT8sAb9ibmVlpPFM1MzMriJOqmZlZQZxUzczMCuKkamZmVhA/qFTj2td10Djr9kqHYVa1BvrJO2Z5nqmamZkVxEnVzMysIE6qZmZmBXFSNTMzK4iTqpmZWUGcVM3MzAoyIpKqpEZJKyvQ7ytl1p8t6ZxuyisSv5mZlWdEJFUzM7OhMJKSap2kH0taJemXkraQ1CRpiaQ2SbdI2hZA0iJJzWl7vKS1aXsfSUslLU/nTEzl/5Arv0JSXWenkr4taUXqZ8dU1ijpntTG3ZLe3TVYSVPTeSuAM3Pl3cZgZmaVN5KS6kTgsojYB3gROB64GvhqREwG2oFv9tHGacD3IqIJaAaelrQXMB04NJVvAE5M9bcClkTEfsB9wOdT+feBeanf64BLu+nrJ8BZ6dxeY+h6oqSZkloktWxY39HHkMzMrCgjKamuiYjlabsV2B0YFxH3prJ5wBF9tPEA8HVJXwV2iYi/Ah8ApgIPSVqe9ndL9f8GLMj12Zi2DwauT9vXAIflO5E0LsV2X65ObzG8TUTMjYjmiGiu27K+jyGZmVlRRlJSfT23vQEY10vdN9n03oztLIyI64FjgL8Cv5D0fkBks86m9NojImanU96IiMj1OeDPWu4hBjMzqwIjKal21QH8RdLhaf8koHPWupZs9gkwrfMESbsBT0XEpcCtwGTgbmCapHemOttJ2qWPvn8NfCZtnwgszh+MiBeBFyUdlqvTWwxmZlYFRnJSBZgBzJHUBjQB56fyi4HTJS0DxufqfxpYmZZ59wWujohHgG8Av0zt3AU09NHvWcCpqf5JwBe7qXMqcFnqS73FUPJozcxsUGnT6qTVojENE6NhxiWVDsOsavlPv1l3JLVGRHO55430maqZmVlhnFTNzMwK4qRqZmZWECdVMzOzggz49yatuk2aUE+LH8QwMxsSnqmamZkVxEnVzMysIE6qZmZmBXFSNTMzK4gfVKpx7es6aJx1e6XDMLNhyp84VR7PVM3MzAripGpmZlYQJ1UzM7OCOKmamZkVxEnVzMysIE6qZmZmBXFSHQSSGiWtLKHOCbn9ZkmXDn50ZmY2WJxUK6cReCupRkRLRHyhcuGYmdlAjcikmmaJj0m6TtKjkm6UtKWkD0haJqld0pWSxqT6ayVdlMqXSnpPKr9K0rRcu6/00NdiSQ+n1yHp0AXA4ZKWSzpb0pGSFqRztpP0c0ltkpZImpzKZ6e4Fkl6SpKTsJlZFRmRSTXZA/hhROwFvAR8GbgKmB4Rk8g+ber0XP2OVP4D4JIy+nkO+FBETAGmA51LvLOAxRHRFBHf7XLOecCyiJgMfB24OndsT+DDwIHANyWN7tqhpJmSWiS1bFjfUUaoZmY2ECM5qf4+Iu5P29cCHwDWRMTjqWwecESu/g25rweX0c9o4MeS2oH5wN4lnHMYcA1ARNwDbC/pHenY7RHxekQ8T5awd+x6ckTMjYjmiGiu27K+jFDNzGwgRvJn/0aX/ReB7Uus37n9JukHE0mbAZt3c97ZwLPAfqnua/0JNuf13PYGRvY1NDOrKiN5pvpuSZ0zzhOAFqCx834pcBJwb67+9NzXB9L2WmBq2j6GbFbaVT3wTERsTG3WpfKXgW16iG0xcCKApCOB5yPipZJGZWZmFTOSZzmrgTMlXQk8AnwBWALMlzQKeAi4PFd/W0ltZDPFz6ayHwO3SloB3Am82k0/PwRuknRylzptwIZ07lXAstw5s4ErU3/rgRkDG6qZmQ0FRXRdBa19khqBBRGxb4n11wLN6T7msDKmYWI0zCjnuSozs01G6p9+k9QaEc3lnjeSl3/NzMwKNSKXfyNiLVDSLDXVbxy0YMzMrGZ4pmpmZlYQJ1UzM7OCOKmamZkVZETeUx1JJk2op2WEPr1nZjbUPFM1MzMriJOqmZlZQZxUzczMCuKkamZmVhA/qFTj2td10Djr9kqHYVb1RurH8VmxPFM1MzMriJOqmZlZQZxUzczMCuKkamZmVhAnVTMzs4I4qZqZmRWk6pKqpHGSzuijTqOkE0poq1HSyl6OnyLpB/2Js4jzzcystlRdUgXGAb0mVaAR6DOpVook//6vmdkIVI1J9QJgd0nLJc1Jr5WS2iVNz9U5PNU5O81IF0t6OL0OKaO/nSUtkvSEpG92Fkr6B0lLUx9XSKpL5adKelzSUuDQXP2rJF0u6UHgIklNkpZIapN0i6RtU72eyhdJ+q6kFkmPSjpA0s0prn9LdbaSdLukFek9mY6ZmVWNakyqs4DfREQTsARoAvYDPgjMkdSQ6iyOiKaI+C7wHPChiJgCTAcuLaO/A4HjgcnApyQ1S9ortXNoimMDcGLq+zyyZHoYsHeXtnYCDomILwNXA1+NiMlAO9CZsHsqB/hbRDQDlwO3AmcC+wKnSNoe+Ajwh4jYLyL2Be7sbkCSZqbk3LJhfUcZb4WZmQ1EtS9THgbcEBEbgGcl3QscALzUpd5o4AeSOhPg35XRx10R8QKApJtTn28CU4GHJAFsQZa43wssiog/pfo/69LX/IjYIKkeGBcR96byecD8nspz59+WvrYDqyLimdTPU8DOqfw/JF0ILIiIxd0NKCLmAnMBxjRMjDLeCzMzG4BqT6qlOht4lmxGuxnwWhnndk06AQiYFxFfyx+QdGwfbb1aRr/deT193Zjb7twfFRGPS5oCHA38m6S7I+L8AfZpZmYFqcbl35eBbdL2YmC6pDpJOwBHAEu71AGoB56JiI3ASUBdGf19SNJ2krYAjgXuB+4Gpkl6J0A6vgvwIPD3kraXNBr4VHcNRkQH8BdJh6eik4B7eyovNVBJ7wLWR8S1wBxgShnjNDOzQVZ1M9WIeEHS/elXYe4A2oAVZDPIr0TEHyW9AGyQtAK4CvghcJOkk8nuM5YzY1wK3ER2P/TaiGgBkPQN4JeSNgPeAM6MiCWSZgMPAC8Cy3tpdwZwuaQtgaeAU/soL8UksvvKG1NMp5dxrpmZDTJF+JZbLRvTMDEaZlxS6TDMqp7/9JvlSWpND46WpRqXf83MzIalqlv+HQySPgxc2KV4TUQcV4l4zMysNo2IpBoRC4GFlY7DzMxqm5d/zczMCjIiZqoj2aQJ9bT4AQwzsyHhmaqZmVlBnFTNzMwK4qRqZmZWECdVMzOzgvhBpRrXvq6Dxlm3VzoMM0v8yU21zTNVMzOzgjipmpmZFcRJ1czMrCBOqmZmZgVxUjUzMyuIk6qZmVlBnFTNzMwKUtNJVdI4SWf0UadR0gkltNUoaWVx0ZmZWa2p6aQKjAN6TapAI9BnUi2HJH+ohpnZCFTrSfUCYHdJyyXNSa+VktolTc/VOTzVOTvNSBdLeji9DimlI0mnSLpN0j3A3ZK2k/RzSW2SlkianOr1VD5b0rzU928lfVLSRSnWOyWNTvUukPRIOv/iHmKZKalFUsuG9R0DfQ/NzKxEtT6jmgXsGxFNko4HTgP2A8YDD0m6L9U5JyI+DiBpS+BDEfGapInADUBzif1NASZHxJ8lfR9YFhHHSno/cDXQBJzXQznA7sD7gL2BB4DjI+Irkm4BPiZpMXAcsGdEhKRx3QUREXOBuQBjGiZGqW+WmZkNTK3PVPMOA26IiA0R8SxwL3BAN/VGAz+W1A7MJ0twpborIv6c6+8agIi4B9he0jt6KQe4IyLeANqBOuDOVN5OtkzdAbwG/LekTwLry4jNzMwG2UhKqqU6G3iWbEbbDGxexrmvDrDv1wEiYiPwRkR0zjI3AqMi4k3gQOBG4ONsSrpmZlYFaj2pvgxsk7YXA9Ml1UnaATgCWNqlDkA98ExKbCeRzRj7YzFwIoCkI4HnI+KlXsr7JGlroD4ifkGW/PfrZ2xmZjYIavqeakS8IOn+9KswdwBtwAoggK9ExB8lvQBskLQCuAr4IXCTpJPJZoL9nX3OBq6U1Ea2TDujj/JSbAPcKmksIODL/YzNzMwGgTatMFotGtMwMRpmXFLpMMws8d9THR4ktUZEqQ+pvqXWl3/NzMyGTE0v/w4GSR8GLuxSvCYijqtEPGZmVj2cVMsUEQuBhZWOw8zMqo+Tao2bNKGeFt/DMTMbEr6namZmVhAnVTMzs4I4qZqZmRXESdXMzKwgflCpxrWv66Bx1u2VDsOsZvjDG6w3nqmamZkVxEnVzMysIE6qZmZmBXFSNTMzK4iTqpmZWUGcVM3MzAripGpmZlaQPpOqpEZJKwcrAEm/Hqy2Byo/dknNki6tdExmZla9Kv7hDxFxSKVjKEVEtAAtlY7DzMyqV6nLv3WSfixplaRfStpCUpOkJZLaJN0iaVsASYskNaft8ZLWpu19JC2VtDydMzGVv5K+HpnOvVHSY5Kuk6R07OhU1irpUkkLegpU0mxJ8yQtlvRbSZ+UdJGkdkl3Shqd6k2VdG9qc6Gkhlz5CkkrgDNz7R7Z2a+kAyU9IGmZpF9L2iOVnyLp5tTPE5Iu6u1NlfQjSS3pfT0vV97teCVtJenK9D4uk/SJHtqdmdpt2bC+o7cQzMysQKUm1YnAZRGxD/AicDxwNfDViJgMtAPf7KON04DvRUQT0Aw83U2d/YEvAXsDuwGHShoLXAF8NCKmAjuUEO/uwPuBY4BrgV9FxCTgr8DHUmL9PjAttXkl8O107k+AsyJiv17afww4PCL2B/4V+PfcsSZgOjAJmC5p517aOTcimoHJwN9LmtzHeM8F7omIA4H3AXMkbdW10YiYGxHNEdFct2V9L92bmVmRSl3+XRMRy9N2K1nSGhcR96ayecD8Ptp4ADhX0k7AzRHxRDd1lkbE0wCSlgONwCvAUxGxJtW5AZjZR193RMQbktqBOuDOVN6e2twD2Be4K02G64BnJI1L47ov1b8G+Gg37dcD89JsO4DRuWN3R0RHGsMjwC7A73uI89OSZpJdhwayHyY262W8RwHHSDon7Y8F3g082vvbYWZmQ6HUpPp6bnsDMK6Xum+yaQY8trMwIq6X9CDwMeAXkv4pIu7po5/+3vN9PfW5UdIbERGpfGNqU8CqiDg4f1JKqqX4Ftns9zhJjcCirn0nPY5B0q7AOcABEfEXSVeRe796IOD4iFhdYpxmZjaE+vsrNR3AXyQdnvZPAjpnrWuBqWl7WucJknYjm4FdCtxKtuRZitXAbil5Qba0OlCrgR0kHZxiGy1pn4h4EXhR0mGp3ok9nF8PrEvbp/QzhncArwIdknZk04y4t/EuBM7K3Wvev599m5nZIBjI76nOILun10Z2H/H8VH4xcLqkZcD4XP1PAyvTsu6+ZPdk+xQRfwXOAO6U1Aq8TJbU+y0i/kaW8C9MDyQtBzqfQj4VuCzFqR6auAj4Thpjv2bTEbECWEZ2f/Z64P5U3tt4v0W21NwmaVXaNzOzKqFNK6PVS9LWEfFKmqFdBjwREd+tdFyDpcjxjmmYGA0zLik2QLMRzH9PdWSQ1JoeJC3LcPlEpc+nmeMqsqXXKyocz2AbaeM1M6sJFf/wh1KkWdrbZmqSTgW+2KXq/RFxJlUmPaA1pkvxSRHR3l397sZrZmbVb1gk1e5ExE/Ifqe06kXEeysdg5mZDb7hsvxrZmZW9YbtTNVKM2lCPS1+sMLMbEh4pmpmZlYQJ1UzM7OCOKmamZkVxEnVzMysIH5Qqca1r+ugcdbtlQ7DzKqYPyWqOJ6pmpmZFcRJ1czMrCBOqmZmZgVxUjUzMyuIk6qZmVlBnFTNzMwK4qRqZmZWkJpNqpIWSWpO27+QNK7Atk+TdHJR7ZmZWW0YER/+EBFHF9ze5UW2Z2ZmtaGqZqqSGiU9JukqSY9Luk7SByXdL+kJSQdK2krSlZKWSlom6RPp3C0k/VTSo5JuAbbItbtW0vi0/XNJrZJWSZqZq/OKpG9LWiFpiaQde4lztqRz0vYiSRemeB6XdHgqr5N0saSVktoknZXKP5Dibk/jGJOL8TuSlktqkTRF0kJJv5F0Wq7vf5H0UGrzvB7im5naaNmwvmMAV8TMzMpRVUk1eQ/wH8Ce6XUCcBhwDvB14Fzgnog4EHgfMEfSVsDpwPqI2Av4JjC1h/Y/FxFTgWbgC5K2T+VbAUsiYj/gPuDzZcQ8KsXzpdQ3wEygEWiKiMnAdZLGAlcB0yNiEtlKwem5dn4XEU3A4lRvGnAQcB6ApKOAicCBQBMwVdIRXYOJiLkR0RwRzXVb1pcxDDMzG4hqTKprIqI9IjYCq4C7IyKAdrIkdRQwS9JyYBEwFng3cARwLUBEtAFtPbT/BUkrgCXAzmRJCuBvwIK03Zr6KtXN3Zz3QeCKiHgzxfRnYI80vsdTnXkp7k63pa/twIMR8XJE/Al4Pd0TPiq9lgEPk/3QMREzM6sK1XhP9fXc9sbc/kayeDcAx0fE6vxJkvpsWNKRZMnu4IhYL2kRWVIGeCMlb1If5bw3nTGWe15P7eTH3bk/ChDwnYi4YgB9mJnZIKnGmWpfFgJnKWVRSfun8vvIloqRtC8wuZtz64G/pIS6J9nS6mC5C/gnSaNSTNsBq4FGSe9JdU4C7i2jzYXA5yRtndqcIOmdBcZsZmYDMByT6reA0UCbpFVpH+BHwNaSHgXOJ1uK7epOYFSqcwHZEvBg+S/gdynOFcAJEfEacCowX1I72Qy05CeJI+KXwPXAA+n8G4FtCo/czMz6RZtWPK0WjWmYGA0zLql0GGZWxfz3VP8nSa0R0VzuecNxpmpmZlaVqvFBpaoh6VzgU12K50fEtysRj5mZVTcn1V6k5OkEamZmJXFSrXGTJtTT4vslZmZDwvdUzczMCuKkamZmVhAnVTMzs4I4qZqZmRXESdXMzKwgTqpmZmYFcVI1MzMriJOqmZlZQZxUzczMCuK/UlPjJL1M9ndca8144PlKBzFIanVstTouqN2x1eq4oO+x7RIRO5TbqD+msPat7s+fL6p2klpqcVxQu2Or1XFB7Y6tVscFgzc2L/+amZkVxEnVzMysIE6qtW9upQMYJLU6LqjdsdXquKB2x1ar44JBGpsfVDIzMyuIZ6pmZmYFcVI1MzMriJNqjZL0EUmrJT0paVal4ymFpJ0l/UrSI5JWSfpiKt9O0l2Snkhft03lknRpGmObpCm5tmak+k9ImlGpMeVJqpO0TNKCtL+rpAdT/D+TtHkqH5P2n0zHG3NtfC2Vr5b04cqM5O0kjZN0o6THJD0q6eBauGaSzk7/DldKukHS2OF6zSRdKek5SStzZYVdI0lTJbWncy6VpAqOa076t9gm6RZJ43LHur0WPX2/7Ol69yoi/KqxF1AH/AbYDdgcWAHsXem4Soi7AZiStrcBHgf2Bi4CZqXyWcCFafto4A5AwEHAg6l8O+Cp9HXbtL1tFYzvy8D1wIK0/3+Az6Tty4HT0/YZwOVp+zPAz9L23ulajgF2Tde4rgrGNQ/432l7c2DccL9mwARgDbBF7lqdMlyvGXAEMAVYmSsr7BoBS1NdpXM/WsFxHQWMStsX5sbV7bWgl++XPV3vXmOq1D9avwb1H9rBwMLc/teAr1U6rn6M41bgQ2SfCNWQyhrIPtAC4Args7n6q9PxzwJX5MrfVq9CY9kJuBt4P7AgffN5Pvef/61rBiwEDk7bo1I9db2O+XoVHFc9WfJRl/Jhfc3IkurvUwIZla7Zh4fzNQMauySfQq5ROvZYrvxt9YZ6XF2OHQdcl7a7vRb08P2yt/+jvb28/FubOr8hdHo6lQ0baflsf+BBYMeIeCYd+iOwY9ruaZzVOP5LgK8AG9P+9sCLEfFm2s/H+Fb86XhHql+N49oV+BPwk7S0/V+StmKYX7OIWAdcDPwOeIbsGrRSG9esU1HXaELa7lpeDT5HNnOG8sfV2//RHjmpWtWRtDVwE/CliHgpfyyyHxmH1e+BSfo48FxEtFY6lkEwimz57UcRsT/wKtlS4luG6TXbFvgE2Q8N7wK2Aj5S0aAG0XC8Rn2RdC7wJnDdUPbrpFqb1gE75/Z3SmVVT9JosoR6XUTcnIqfldSQjjcAz6XynsZZbeM/FDhG0lrgp2RLwN8Dxknq/PztfIxvxZ+O1wMvUH3jguyn96cj4sG0fyNZkh3u1+yDwJqI+FNEvAHcTHYda+GadSrqGq1L213LK0bSKcDHgRPTDwxQ/rheoOfr3SMn1dr0EDAxPbm2OdmDE7dVOKY+pScG/xt4NCL+M3foNqDzScMZZPdaO8tPTk8rHgR0pOWshcBRkrZNM46jUllFRMTXImKniGgkuxb3RMSJwK+Aaala13F1jndaqh+p/DPpSdNdgYlkD4hUTET8Efi9pD1S0QeARxjm14xs2fcgSVumf5ed4xr21yynkGuUjr0k6aD0Xp2ca2vISfoI2a2WYyJife5QT9ei2++X6fr1dL17Vokb5n4N/ovsCb7HyZ5qO7fS8ZQY82FkS1BtwPL0Oprs3sbdwBPA/wW2S/UFXJbG2A4059r6HPBkep1a6bHl4jqSTU//7pb+Uz8JzAfGpPKxaf/JdHy33PnnpvGuZoiesCxhTE1AS7puPyd7MnTYXzPgPOAxYCVwDdlTo8PymgE3kN0bfoNsdeEfi7xGQHN6n34D/IAuD64N8bieJLtH2vk95PK+rgU9fL/s6Xr39vLHFJqZmRXEy79mZmYFcVI1MzMriJOqmZlZQZxUzczMCuKkamZmVhAnVTMzs4I4qZqZmRXk/wOt+9wyU2lLXwAAAABJRU5ErkJggg==", "text/plain": [ @@ -316,40 +312,44 @@ }, "metadata": { "needs_background": "light" - } + }, + "output_type": "display_data" } ], - "metadata": {} + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.barh(automl.model.estimator.feature_name_, automl.model.estimator.feature_importances_)" + ] }, { "cell_type": "code", "execution_count": 8, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], "source": [ "''' pickle and save the automl object '''\n", "import pickle\n", "with open('automl.pkl', 'wb') as f:\n", " pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)" - ], - "outputs": [], - "metadata": { - "slideshow": { - "slide_type": "slide" - } - } + ] }, { "cell_type": "code", "execution_count": 9, - "source": [ - "''' compute predictions of testing dataset ''' \n", - "y_pred = automl.predict(X_test)\n", - "print('Predicted labels', y_pred)\n", - "print('True labels', y_test)" - ], + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Predicted labels [143391.65036598 245535.13731975 153171.44071644 ... 184354.52735665\n", " 235510.49470402 282617.22858849]\n", @@ -368,27 +368,26 @@ ] } ], + "source": [ + "''' compute predictions of testing dataset ''' \n", + "y_pred = automl.predict(X_test)\n", + "print('Predicted labels', y_pred)\n", + "print('True labels', y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] - } - }, - { - "cell_type": "code", - "execution_count": 10, - "source": [ - "''' compute different metric values on testing dataset'''\n", - "from flaml.ml import sklearn_metric_loss_score\n", - "print('r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))\n", - "print('mse', '=', sklearn_metric_loss_score('mse', y_pred, y_test))\n", - "print('mae', '=', sklearn_metric_loss_score('mae', y_pred, y_test))" - ], + }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "r2 = 0.8505434326525669\n", "mse = 1975592613.1389656\n", @@ -396,28 +395,27 @@ ] } ], - "metadata": { - "slideshow": { - "slide_type": "slide" - }, - "tags": [] - } + "source": [ + "''' compute different metric values on testing dataset'''\n", + "from flaml.ml import sklearn_metric_loss_score\n", + "print('r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))\n", + "print('mse', '=', sklearn_metric_loss_score('mse', y_pred, y_test))\n", + "print('mae', '=', sklearn_metric_loss_score('mae', y_pred, y_test))" + ] }, { "cell_type": "code", "execution_count": 11, - "source": [ - "from flaml.data import get_output_from_log\n", - "time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\n", - " get_output_from_log(filename=settings['log_file_name'], time_budget=60)\n", - "\n", - "for config in config_history:\n", - " print(config)" - ], + "metadata": { + "slideshow": { + "slide_type": "subslide" + }, + "tags": [] + }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "{'Current Learner': 'lgbm', 'Current Sample': 15480, 'Current Hyper-parameters': {'n_estimators': 4, 'num_leaves': 4, 'min_child_samples': 20, 'learning_rate': 0.09999999999999995, 'log_max_bin': 8, 'colsample_bytree': 1.0, 'reg_alpha': 0.0009765625, 'reg_lambda': 1.0}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 4, 'num_leaves': 4, 'min_child_samples': 20, 'learning_rate': 0.09999999999999995, 'log_max_bin': 8, 'colsample_bytree': 1.0, 'reg_alpha': 0.0009765625, 'reg_lambda': 1.0}}\n", "{'Current Learner': 'lgbm', 'Current Sample': 15480, 'Current Hyper-parameters': {'n_estimators': 4, 'num_leaves': 12, 'min_child_samples': 15, 'learning_rate': 0.2284139062380884, 'log_max_bin': 9, 'colsample_bytree': 1.0, 'reg_alpha': 0.0014700173967242716, 'reg_lambda': 7.624911621832711}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 4, 'num_leaves': 12, 'min_child_samples': 15, 'learning_rate': 0.2284139062380884, 'log_max_bin': 9, 'colsample_bytree': 1.0, 'reg_alpha': 0.0014700173967242716, 'reg_lambda': 7.624911621832711}}\n", @@ -430,16 +428,37 @@ ] } ], - "metadata": { - "slideshow": { - "slide_type": "subslide" - }, - "tags": [] - } + "source": [ + "from flaml.data import get_output_from_log\n", + "time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\n", + " get_output_from_log(filename=settings['log_file_name'], time_budget=60)\n", + "\n", + "for config in config_history:\n", + " print(config)" + ] }, { "cell_type": "code", "execution_count": 12, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAc/UlEQVR4nO3df7xVdZ3v8debI8rx55FABw4glEj5oyTJruWUOjmgU4JpjnpnbtotdCadRr2UNGam15tF2bXHJR10TO2qiIaIRjFO/qj8BSgqguHgj4SDP1DEX51E4DN/rHVosd1ns4Gz9t5nr/fz8diPs9d3fddan73E/dnf73et71JEYGZmxdWn3gGYmVl9ORGYmRWcE4GZWcE5EZiZFZwTgZlZwTkRmJkVnBOBWQWS/lLS0nrHYZYnJwJrWJKek/SZesYQEb+NiFF57V/SWEm/kfSmpFWS7pV0TF7HMyvHicAKTVJLHY99PHAzcB0wBNgTOB/43FbsS5L8/7NtFf/DsV5HUh9J50p6WtKrkmZI6p9Zf7OkFyW9nv7a3i+z7hpJl0uaI+lt4PC05fG/JD2ebnOTpH5p/cMkrchs323ddP3XJb0gaaWkL0sKSXuX+QwCLgUuioirIuL1iNgQEfdGxFfSOhdI+v+ZbYan+9suXb5H0sWS7gP+CEyStKDkOGdJmp2+30HSDyQ9L+klSVdIat3G/xzWBJwIrDc6E5gAfBoYDLwGTM2s/yUwEtgDeAS4vmT7k4GLgV2A36VlJwDjgBHAh4FTKhy/bF1J44Czgc8AewOHVdjHKGAocEuFOtX4e2AiyWe5AhglaWRm/cnADen7S4B9gAPT+NpJWiBWcE4E1hudDvxLRKyIiHeAC4Dju34pR8TVEfFmZt1HJO2W2f62iLgv/QX+p7TsxxGxMiJWA7eTfFl2p7u6JwA/jYjFEfHH9NjdeV/694VqP3Q3rkmPty4iXgduA04CSBPCB4HZaQtkInBWRKyOiDeB/wOcuI3HtybgRGC90V7ArZLWSFoDPAmsB/aU1CLpkrTb6A3guXSbAZntl5fZ54uZ938Edq5w/O7qDi7Zd7njdHk1/TuoQp1qlB7jBtJEQNIamJUmpYHAjsDDmfP2q7TcCs6JwHqj5cBREdGWefWLiA6SL7/xJN0zuwHD022U2T6vKXdfIBn07TK0Qt2lJJ/juAp13ib58u7yF2XqlH6WO4GBkg4kSQhd3UKvAJ3AfplztltEVEp4VhBOBNbo+krql3ltR9IXfrGkvQAkDZQ0Pq2/C/AOyS/uHUm6P2plBnCqpA9J2hH4VncVI5n//WzgW5JOlbRrOgh+qKRpabVHgU9JGpZ2bU3eXAAR8S7JlUhTgP4kiYGI2ABcCfxI0h4Aktoljd3qT2tNw4nAGt0ckl+yXa8LgMuA2cC/S3oTeBD4eFr/OuAPQAewJF1XExHxS+DHwN3Assyx3+mm/i3A3wJfAlYCLwH/m6Sfn4i4E7gJeBx4GLijylBuIGkR3RwR6zLl3+iKK+02+w+SQWsrOPnBNGb5kPQh4Algh5IvZLOG4haBWQ+SdGx6vf7uwPeA250ErNE5EZj1rNOAl4GnSa5k+of6hmO2ee4aMjMrOLcIzMwKbrt6B7ClBgwYEMOHD693GGZmvcrDDz/8SkSUvYGw1yWC4cOHs2DBgs1XNDOzjST9obt17hoyMys4JwIzs4JzIjAzKzgnAjOzgnMiMDMruF531ZCZWdHMWtjBlLlLWbmmk8FtrUwaO4oJo9t7bP9OBGZmDWzWwg4mz1xE57vrAehY08nkmYsAeiwZOBEUXN6/NMxs20yZu3RjEujS+e56psxd6kRg264WvzTMbNusXNO5ReVbw4mgwLr7pfH1Wx7nxnnP1ykqM8vq29KHtes3vKd8cFtrjx3DVw0VWHe/KMr9ozOz+hjav5U+2rSstW8Lk8b23MPl3CIosMFtrXSUSQbtba3cdNohdYjIzMrxVUNNohEHZSeNHbXJGAH0/C8NM9t2E0a35/p94URQA406KNt17K/f8jhr12+gvUESlJnVlhNBDTT6oOwOffswelibu4PMCsqDxTXQ6IOy+w7alfEHuhVgVlRuEdSAB2XNrJG5RVADk8aOorVvyyZlHpQ1s0bhFkENeFDWzBqZE0GNTBjdvnFg2N1BZtZIcu0akjRO0lJJyySdW2b9MEl3S1oo6XFJR+cZj5mZvVduiUBSCzAVOArYFzhJ0r4l1c4DZkTEaOBE4Cd5xWNmZuXl2SI4GFgWEc9ExFpgOjC+pE4Au6bvdwNW5hiPmZmVkWciaAeWZ5ZXpGVZFwB/J2kFMAc4s9yOJE2UtEDSglWrVuURq5lZYdX78tGTgGsiYghwNPAzSe+JKSKmRcSYiBgzcODAmgdpZtbM8kwEHcDQzPKQtCzrfwIzACLiAaAfMCDHmMzMrESeiWA+MFLSCEnbkwwGzy6p8zzwVwCSPkSSCNz3Y2ZWQ7ndRxAR6ySdAcwFWoCrI2KxpAuBBRExGzgHuFLSWSQDx6dEROQV07ZoxGmkzcx6Qq43lEXEHJJB4GzZ+Zn3S4BP5hlDT2jUaaTNzHqC7yyuQk9NI73khTfYd9Cum69oZlZD9b5qqFfoqWmkPd2zmTUitwiq4GmkzayZuUVAMgbwyUvuYsS5v+CTl9zFrIWbXuXqaaTNrJkVvkVQzUCwp5E2s2ZW+ESwJQPBfravmTWjwncNbclAsAd7zawZFb5F4IFgMyu6wrcIPBBsZkVX+BaBB4LNrOgKnwjAzxM2s2IrfNeQmVnRORGYmRWcE4GZWcE5EZiZFZwTgZlZwTkRmJkVnBOBmVnBORGYmRWcE4GZWcE5EZiZFZwTgZlZwTkRmJkVnBOBmVnBORGYmRVcrtNQSxoHXAa0AFdFxCUl638EHJ4u7gjsERFtecbUZdbCDqbMXcrKNZ0MbmulX98+DNh5h1oc2sysoeSWCCS1AFOBI4EVwHxJsyNiSVediDgrU/9MYHRe8WTNWtjB5JmLNj60vmNNJ31UiyObmTWePLuGDgaWRcQzEbEWmA6Mr1D/JODGHOPZaMrcpRuTQJcNActXl3+QvZlZM8szEbQDyzPLK9Ky95C0FzACuKub9RMlLZC0YNWqVdsc2MoyD6sHWLt+wzbv28yst2mUweITgVsiYn25lRExLSLGRMSYgQMHbvPBBre1li1v76bczKyZ5ZkIOoChmeUhaVk5J1KjbiGASWNH0dq3ZZOy1r4tTBo7qlYhmJk1jDwTwXxgpKQRkrYn+bKfXVpJ0geB3YEHcoxlExNGt/Pdzx/A9i3Jx29va+W7nz+ACaPL9lyZmTW13K4aioh1ks4A5pJcPnp1RCyWdCGwICK6ksKJwPSIiLxiKWfC6HZunPc8ADeddkgtD21m1lByvY8gIuYAc0rKzi9ZviDPGMzMrLJGGSw2M7M6cSIwMys4JwIzs4JzIjAzKzgnAjOzgnMiMDMrOCcCM7OCcyIwMys4JwIzs4JzIjAzKzgnAjOzgnMiMDMrOCcCM7OCcyIwMys4JwIzs4KrmAgk7SrpA2XKP5xfSGZmVkvdJgJJJwC/B34uabGkj2VWX5N3YGZmVhuVWgTfBA6KiAOBU4GfSTo2XafcIzMzs5qo9KjKloh4ASAi5kk6HLhD0lCgps8XNjOz/FRqEbyZHR9Ik8JhwHhgv5zjMjOzGqnUIvgHSrqAIuJNSeOAE3KNyszMaqbbFkFEPAY8K+nukvJ3I+L63CMzM7OaqHj5aESsBzZI2q1G8ZiZWY1V6hrq8hawSNKdwNtdhRHxT7lFZWZmNVNNIpiZvszMrAltNhFExLVbu/N0YPkyoAW4KiIuKVPnBOACkktSH4uIk7f2eGZmtuWqaRFsFUktwFTgSGAFMF/S7IhYkqkzEpgMfDIiXpO0R17xmJlZeXlOOncwsCwinomItcB0knsQsr4CTI2I1wAi4uUc4zEzszLyTATtwPLM8oq0LGsfYB9J90l6MO1Keg9JEyUtkLRg1apVOYVrZlZMm+0akrQPMAnYK1s/Io7ooeOPJLljeQjwG0kHRMSabKWImAZMAxgzZoyntzAz60HVjBHcDFwBXAms34J9dwBDM8tD0rKsFcBDEfEuyc1rT5EkhvlbcBwzM9sG1SSCdRFx+Vbsez4wUtIIkgRwIlB6RdAs4CTgp5IGkHQVPbMVxzIzs61UzRjB7ZL+UdIgSf27XpvbKCLWAWcAc4EngRkRsVjShZKOSavNBV6VtAS4G5gUEa9u5WcxM7OtUE2L4Ivp30mZsgDev7kNI2IOMKek7PzM+wDOTl9mZlYH1dxQNqIWgZiZWX1Uc9VQX5IpqT+VFt0D/Gs6wGtmZr1cNV1DlwN9gZ+ky3+fln05r6DMzKx2qkkEH4uIj2SW75L0WF4BmZlZbVVz1dD67CMrJb2fLbufwMzMGlg1LYJJwN2SniF5dOVewKm5RmVmZjVTzVVDv05nCR2VFi2NiHfyDcvMzGql20Qg6YiIuEvS50tW7S2JiPDDaszMmkClFsGngbuAz5VZF/ipZWZmTaHbRBAR307fXhgRz2bXpfMHmZlZE6jmqqGflym7pacDMTOz+qg0RvBBYD9gt5Jxgl2BfnkHZmZmtVFpjGAU8FmgjU3HCd4kecSkmZk1gUpjBLcBt0k6JCIeqGFMZmZWQ9XcULZQ0ldJuok2dglFxJdyi8rMzGqmmsHinwF/AYwF7iV55OSbeQZlZma1U00i2DsivgW8HRHXAn8DfDzfsMzMrFaq6Rrqeu7AGkn7Ay8Ce+QXUn5mLexgytylrFzTyeC2Vvr17cOAnXeod1hmZnVVTSKYJml34FvAbGBn4PzKmzSeWQs7mDxzEZ3vJhOndqzppI/qHJSZWQOoZtK5q9K391LFc4ob1ZS5SzcmgS4bApav7qxTRGZmjaHSDWUVHygfEZf2fDj5Wbmm/Bf+2vUbahyJmVljqdQi2CX9Owr4GEm3ECQ3l83LM6g8DG5rpaNMMmhva61DNGZmjaPbq4Yi4jsR8R2Sy0U/GhHnRMQ5wEHAsFoF2FMmjR1Fa9+WTcpa+7YwaeyobrYwMyuGagaL9wTWZpbXpmW9yoTR7QB8/ZbHWbt+A+1trUwaO2pjuZlZUVWTCK4D5km6NV2eAFyTW0Q5mjC6nRvnPQ/ATacdUudozMwaw2ZvKIuIi0meUfxa+jo1Ir5bzc4ljZO0VNIySeeWWX+KpFWSHk1fX97SD2BmZtum0lVDu0bEG5L6A8+lr651/SNidaUdS2oBpgJHAiuA+ZJmR8SSkqo3RcQZWxm/mZlto0pdQzeQTEP9MMmjKbsoXd7cPQUHA8si4hkASdOB8UBpIjAzszqqNA31Z9O/W/tYynZgeWZ5BeXnKDpO0qeAp4CzImJ5aQVJE4GJAMOG9boLlszMGlqlrqGPVtowIh7pgePfDtwYEe9IOg24FjiizLGmAdMAxowZE6Xrzcxs61XqGvphhXVBmS/sEh3A0MzykLTszzuJeDWzeBXw/c3s08zMelilrqHDt3Hf84GRkkaQJIATgZOzFSQNiogX0sVjgCe38ZhmZraFqrmPgHT66X3Z9All11XaJiLWSToDmAu0AFdHxGJJFwILImI28E+SjgHWAauBU7bqU5iZ2VbbbCKQ9G3gMJJEMAc4CvgdyY1mFUXEnHSbbNn5mfeTgclbFLGZmfWoap5QdjzwV8CLEXEq8BFgt1yjMjOzmqkmEXRGxAZgnaRdgZfZdBDYzMx6sWrGCBZIagOuJLm57C3ggVyjMjOzmql0H8FU4IaI+Me06ApJvwJ2jYjHaxKdmZnlrlKL4CngB5IGATNIbvxaWJuwzMysVio9mOayiDgE+DTwKnC1pN9L+rakfWoWoZmZ5aqaaaj/EBHfi4jRwEkkzyPwjV9mZk1is4lA0naSPifpeuCXwFLg87lHZmZmNVFpsPhIkhbA0SQPq58OTIyIt2sUm5mZ1UClweLJJM8kOCciXqtRPGZmVmOVJp3b3OyiZmbWBKq5s9jMzJqYE4GZWcE5EZiZFZwTgZlZwTkRmJkVnBOBmVnBORGYmRWcE4GZWcE5EZiZFZwTgZlZwTkRmJkVnBOBmVnBORGYmRWcE4GZWcHlmggkjZO0VNIySedWqHecpJA0Js94zMzsvXJLBJJagKnAUcC+wEmS9i1Tbxfga8BDecViZmbdy7NFcDCwLCKeiYi1JI+6HF+m3kXA94A/5RiLmZl1I89E0A4szyyvSMs2kvRRYGhE/KLSjiRNlLRA0oJVq1b1fKRmZgVWt8FiSX2AS4FzNlc3IqZFxJiIGDNw4MD8gzMzK5A8E0EHMDSzPCQt67ILsD9wj6TngP8GzPaAsZlZbeWZCOYDIyWNkLQ9cCIwu2tlRLweEQMiYnhEDAceBI6JiAU5xmRmZiVySwQRsQ44A5gLPAnMiIjFki6UdExexzUzsy2zXZ47j4g5wJySsvO7qXtYnrGYmVl5vrPYzKzgnAjMzArOicDMrOCcCMzMCs6JwMys4JwIzMwKzonAzKzgnAjMzArOicDMrOCcCMzMCs6JwMys4JwIzMwKzonAzKzgnAjMzArOicDMrOCcCMzMCs6JwMys4JwIzMwKzonAzKzgnAjMzArOicDMrOCcCMzMCs6JwMys4JwIzMwKzonAzKzgck0EksZJWippmaRzy6w/XdIiSY9K+p2kffOMx8zM3iu3RCCpBZgKHAXsC5xU5ov+hog4ICIOBL4PXJpXPGZmVl6eLYKDgWUR8UxErAWmA+OzFSLijcziTkDkGI+ZmZWxXY77bgeWZ5ZXAB8vrSTpq8DZwPbAEeV2JGkiMBFg2LBhPR6omVmR1X2wOCKmRsQHgG8A53VTZ1pEjImIMQMHDqxtgGZmTS7PRNABDM0sD0nLujMdmJBjPGZmVkaeiWA+MFLSCEnbAycCs7MVJI3MLP4N8J85xmNmZmXkNkYQEesknQHMBVqAqyNisaQLgQURMRs4Q9JngHeB14Av5hWPmZmVl+dgMRExB5hTUnZ+5v3X8jy+mZltXt0Hi83MrL6cCMzMCs6JwMys4JwIzMwKLtfB4kYxa2EHU+YuZeWaTvq29GFo/9Z6h2Rm1jCavkUwa2EHk2cuomNNJwGsXb+BZ195m1kLK93bZmZWHE2fCKbMXUrnu+s3KdsQSbmZmRUgEaxc07lF5WZmRdP0iWBwW/nxgO7KzcyKpukTwaSxo2jt27JJWWvfFiaNHVWniMzMGkvTXzU0YXQ7wMarhga3tTJp7KiN5WZmRdf0iQCSZOAvfjOz8pq+a8jMzCpzIjAzKzgnAjOzgnMiMDMrOCcCM7OCU0TUO4YtImkV8Icqqw8AXskxnJ7Wm+LtTbFC74q3N8UKvSveIse6V0QMLLei1yWCLSFpQUSMqXcc1epN8famWKF3xdubYoXeFa9jLc9dQ2ZmBedEYGZWcM2eCKbVO4At1Jvi7U2xQu+KtzfFCr0rXsdaRlOPEZiZ2eY1e4vAzMw2w4nAzKzgmjYRSBonaamkZZLOrXc8lUh6TtIiSY9KWlDveEpJulrSy5KeyJT1l3SnpP9M/+5ezxi7dBPrBZI60vP7qKSj6xljlqShku6WtETSYklfS8sb7vxWiLXhzq+kfpLmSXosjfU7afkISQ+l3ws3Sdq+3rFCxXivkfRs5twemMvxm3GMQFIL8BRwJLACmA+cFBFL6hpYNyQ9B4yJiIa80UXSp4C3gOsiYv+07PvA6oi4JE20u0fEN+oZZxpXuVgvAN6KiB/UM7ZyJA0CBkXEI5J2AR4GJgCn0GDnt0KsJ9Bg51eSgJ0i4i1JfYHfAV8DzgZmRsR0SVcAj0XE5fWMFSrGezpwR0Tckufxm7VFcDCwLCKeiYi1wHRgfJ1j6rUi4jfA6pLi8cC16ftrSb4Q6q6bWBtWRLwQEY+k798EngTaacDzWyHWhhOJt9LFvukrgCOAri/VhjivUDHemmjWRNAOLM8sr6BB/8GmAvh3SQ9LmljvYKq0Z0S8kL5/EdiznsFU4QxJj6ddR3XvZilH0nBgNPAQDX5+S2KFBjy/klokPQq8DNwJPA2siYh1aZWG+l4ojTcius7txem5/ZGkHfI4drMmgt7m0Ij4KHAU8NW0e6PXiKR/sZH7GC8HPgAcCLwA/LC+4byXpJ2BnwP/HBFvZNc12vktE2tDnt+IWB8RBwJDSHoJPljnkCoqjVfS/sBkkrg/BvQHcukebNZE0AEMzSwPScsaUkR0pH9fBm4l+Ufb6F5K+4y7+o5frnM83YqIl9L/yTYAV9Jg5zftE/45cH1EzEyLG/L8lou10c9vRKwB7gYOAdokdT2ityG/FzLxjku74yIi3gF+Sk7ntlkTwXxgZHqFwPbAicDsOsdUlqSd0oE3JO0E/DXwROWtGsJs4Ivp+y8Ct9Uxloq6vlBTx9JA5zcdJPw34MmIuDSzquHOb3exNuL5lTRQUlv6vpXkwpEnSb5gj0+rNcR5hW7j/X3mx4BIxjNyObdNedUQQHoJ2/8FWoCrI+LiOodUlqT3k7QCALYDbmi0WCXdCBxGMi3uS8C3gVnADGAYybTgJ0RE3Qdpu4n1MJJuiwCeA07L9L/XlaRDgd8Ci4ANafE3SfreG+r8Voj1JBrs/Er6MMlgcAvJD94ZEXFh+v/bdJJuloXA36W/tuuqQrx3AQMBAY8Cp2cGlXvu+M2aCMzMrDrN2jVkZmZVciIwMys4JwIzs4JzIjAzKzgnAjOzgnMisIaS3kb/z5nluZKuyiz/UNLZFba/RtLx6ft7JL3n4d+S+kq6JJ3Z8xFJD0g6Kl33nKQBWxH3xuN2s35qOnvkEkmdmdkkj5c0p+sa8p4kaZCkOyqs317SbzI3WFlBORFYo7kP+ASApD4k9wPsl1n/CeD+bTzGRcAgYP90ao8JwC7buM+KIuKr6fQBRwNPR8SB6euWiDg6vZu0p51NcqdvdzGtBX4N/G0Ox7ZexInAGs39JFMBQJIAngDelLR7OuHWh4BHJJ0vab6kJyRNS++83CxJOwJfAc7supEonSJhRpm6Z6f7f6KklfI/0knAHpP0szLbXZS2EFqqjOk5SQMkDZf0+3TbpyRdL+kzku5LWy8Hp/V3Sid3mydpoaTuZtY9DvhVus1+af1H09hHpnVmAf+9mjiteblJaA0lIlZKWidpGMmv/wdIZog8BHgdWBQRayX9v4i4ECD9Mv4scHsVh9gbeL50YrdSkg4CTgU+TnJX50OS7gXWAucBn4iIVyT1L9luCknr4tTYurs19wa+AHyJZKqUk4FDgWNI7uKdAPwLcFdEfCntUpon6T8i4u1MHCOA1zJ3zZ4OXBYR16fTrnQlqSdIJjSzAnOLwBrR/SRJoCsRPJBZvi+tc7iSJ00tIpljfr9yO9oGhwK3RsTb6S39M4G/TI91c9dDhEqmffgWsFtEnL6VSQDg2YhYlE7gthj4dbqvRcDwtM5fA+cqmbL4HqAfyVQUWYOAVZnlB4BvSvoGsFdEdKbxrwfWds13ZcXkRGCNqGuc4ACSX6wPkrQIPgHcL6kf8BPg+Ig4gKQfvF+V+14GDJO0a49HnfyCP6i0lbCFsvPebMgsb+DPLXgBx2XGGYZFxJMl++kkc04i4gaSVkUnMEfSEZm6OwB/2oaYrZdzIrBGdD9JV8/qdHrj1UAbSTK4nz9/wb2iZG78bq/WKRURfySZQfOytIuka+bHL5RU/S0wQdKOSmaFPTYtuwv4gqT3pdtmv/R/BVwC/CLnX9hzgTO7xkUkjS5T5yn+3ILomtzwmYj4McmMmx9Oy98HvBIR7+YYrzU4JwJrRItIrhZ6sKTs9Yh4Jb3C5kqS1sJckl/iW+I8km6TJUoecn8HUPowmEeAa4B5JDOBXhURCyNiMXAxcK+kx4BLS7a7OY1ttpLphPNwEcmjDB+XtDhd3kQ6XvC0pL3TohOAJ9LupP2B69Lyw4Ff5BSn9RKefdSsSUk6FjgoIs6rUGcmcG5EPFW7yKzR+KohsyYVEbd2dWGVk3aNzXISMLcIzMwKzmMEZmYF50RgZlZwTgRmZgXnRGBmVnBOBGZmBfdf82rdfzULAWwAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ "import numpy as np\n", "\n", @@ -449,132 +468,115 @@ "plt.scatter(time_history, 1 - np.array(valid_loss_history))\n", "plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post')\n", "plt.show()" - ], - "outputs": [ - { - "output_type": "display_data", - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAc/UlEQVR4nO3df7xVdZ3v8debI8rx55FABw4glEj5oyTJruWUOjmgU4JpjnpnbtotdCadRr2UNGam15tF2bXHJR10TO2qiIaIRjFO/qj8BSgqguHgj4SDP1DEX51E4DN/rHVosd1ns4Gz9t5nr/fz8diPs9d3fddan73E/dnf73et71JEYGZmxdWn3gGYmVl9ORGYmRWcE4GZWcE5EZiZFZwTgZlZwTkRmJkVnBOBWQWS/lLS0nrHYZYnJwJrWJKek/SZesYQEb+NiFF57V/SWEm/kfSmpFWS7pV0TF7HMyvHicAKTVJLHY99PHAzcB0wBNgTOB/43FbsS5L8/7NtFf/DsV5HUh9J50p6WtKrkmZI6p9Zf7OkFyW9nv7a3i+z7hpJl0uaI+lt4PC05fG/JD2ebnOTpH5p/cMkrchs323ddP3XJb0gaaWkL0sKSXuX+QwCLgUuioirIuL1iNgQEfdGxFfSOhdI+v+ZbYan+9suXb5H0sWS7gP+CEyStKDkOGdJmp2+30HSDyQ9L+klSVdIat3G/xzWBJwIrDc6E5gAfBoYDLwGTM2s/yUwEtgDeAS4vmT7k4GLgV2A36VlJwDjgBHAh4FTKhy/bF1J44Czgc8AewOHVdjHKGAocEuFOtX4e2AiyWe5AhglaWRm/cnADen7S4B9gAPT+NpJWiBWcE4E1hudDvxLRKyIiHeAC4Dju34pR8TVEfFmZt1HJO2W2f62iLgv/QX+p7TsxxGxMiJWA7eTfFl2p7u6JwA/jYjFEfHH9NjdeV/694VqP3Q3rkmPty4iXgduA04CSBPCB4HZaQtkInBWRKyOiDeB/wOcuI3HtybgRGC90V7ArZLWSFoDPAmsB/aU1CLpkrTb6A3guXSbAZntl5fZ54uZ938Edq5w/O7qDi7Zd7njdHk1/TuoQp1qlB7jBtJEQNIamJUmpYHAjsDDmfP2q7TcCs6JwHqj5cBREdGWefWLiA6SL7/xJN0zuwHD022U2T6vKXdfIBn07TK0Qt2lJJ/juAp13ib58u7yF2XqlH6WO4GBkg4kSQhd3UKvAJ3AfplztltEVEp4VhBOBNbo+krql3ltR9IXfrGkvQAkDZQ0Pq2/C/AOyS/uHUm6P2plBnCqpA9J2hH4VncVI5n//WzgW5JOlbRrOgh+qKRpabVHgU9JGpZ2bU3eXAAR8S7JlUhTgP4kiYGI2ABcCfxI0h4Aktoljd3qT2tNw4nAGt0ckl+yXa8LgMuA2cC/S3oTeBD4eFr/OuAPQAewJF1XExHxS+DHwN3Assyx3+mm/i3A3wJfAlYCLwH/m6Sfn4i4E7gJeBx4GLijylBuIGkR3RwR6zLl3+iKK+02+w+SQWsrOPnBNGb5kPQh4Algh5IvZLOG4haBWQ+SdGx6vf7uwPeA250ErNE5EZj1rNOAl4GnSa5k+of6hmO2ee4aMjMrOLcIzMwKbrt6B7ClBgwYEMOHD693GGZmvcrDDz/8SkSUvYGw1yWC4cOHs2DBgs1XNDOzjST9obt17hoyMys4JwIzs4JzIjAzKzgnAjOzgnMiMDMruF531ZCZWdHMWtjBlLlLWbmmk8FtrUwaO4oJo9t7bP9OBGZmDWzWwg4mz1xE57vrAehY08nkmYsAeiwZOBEUXN6/NMxs20yZu3RjEujS+e56psxd6kRg264WvzTMbNusXNO5ReVbw4mgwLr7pfH1Wx7nxnnP1ykqM8vq29KHtes3vKd8cFtrjx3DVw0VWHe/KMr9ozOz+hjav5U+2rSstW8Lk8b23MPl3CIosMFtrXSUSQbtba3cdNohdYjIzMrxVUNNohEHZSeNHbXJGAH0/C8NM9t2E0a35/p94URQA406KNt17K/f8jhr12+gvUESlJnVlhNBDTT6oOwOffswelibu4PMCsqDxTXQ6IOy+w7alfEHuhVgVlRuEdSAB2XNrJG5RVADk8aOorVvyyZlHpQ1s0bhFkENeFDWzBqZE0GNTBjdvnFg2N1BZtZIcu0akjRO0lJJyySdW2b9MEl3S1oo6XFJR+cZj5mZvVduiUBSCzAVOArYFzhJ0r4l1c4DZkTEaOBE4Cd5xWNmZuXl2SI4GFgWEc9ExFpgOjC+pE4Au6bvdwNW5hiPmZmVkWciaAeWZ5ZXpGVZFwB/J2kFMAc4s9yOJE2UtEDSglWrVuURq5lZYdX78tGTgGsiYghwNPAzSe+JKSKmRcSYiBgzcODAmgdpZtbM8kwEHcDQzPKQtCzrfwIzACLiAaAfMCDHmMzMrESeiWA+MFLSCEnbkwwGzy6p8zzwVwCSPkSSCNz3Y2ZWQ7ndRxAR6ySdAcwFWoCrI2KxpAuBBRExGzgHuFLSWSQDx6dEROQV07ZoxGmkzcx6Qq43lEXEHJJB4GzZ+Zn3S4BP5hlDT2jUaaTNzHqC7yyuQk9NI73khTfYd9Cum69oZlZD9b5qqFfoqWmkPd2zmTUitwiq4GmkzayZuUVAMgbwyUvuYsS5v+CTl9zFrIWbXuXqaaTNrJkVvkVQzUCwp5E2s2ZW+ESwJQPBfravmTWjwncNbclAsAd7zawZFb5F4IFgMyu6wrcIPBBsZkVX+BaBB4LNrOgKnwjAzxM2s2IrfNeQmVnRORGYmRWcE4GZWcE5EZiZFZwTgZlZwTkRmJkVnBOBmVnBORGYmRWcE4GZWcE5EZiZFZwTgZlZwTkRmJkVnBOBmVnBORGYmRVcrtNQSxoHXAa0AFdFxCUl638EHJ4u7gjsERFtecbUZdbCDqbMXcrKNZ0MbmulX98+DNh5h1oc2sysoeSWCCS1AFOBI4EVwHxJsyNiSVediDgrU/9MYHRe8WTNWtjB5JmLNj60vmNNJ31UiyObmTWePLuGDgaWRcQzEbEWmA6Mr1D/JODGHOPZaMrcpRuTQJcNActXl3+QvZlZM8szEbQDyzPLK9Ky95C0FzACuKub9RMlLZC0YNWqVdsc2MoyD6sHWLt+wzbv28yst2mUweITgVsiYn25lRExLSLGRMSYgQMHbvPBBre1li1v76bczKyZ5ZkIOoChmeUhaVk5J1KjbiGASWNH0dq3ZZOy1r4tTBo7qlYhmJk1jDwTwXxgpKQRkrYn+bKfXVpJ0geB3YEHcoxlExNGt/Pdzx/A9i3Jx29va+W7nz+ACaPL9lyZmTW13K4aioh1ks4A5pJcPnp1RCyWdCGwICK6ksKJwPSIiLxiKWfC6HZunPc8ADeddkgtD21m1lByvY8gIuYAc0rKzi9ZviDPGMzMrLJGGSw2M7M6cSIwMys4JwIzs4JzIjAzKzgnAjOzgnMiMDMrOCcCM7OCcyIwMys4JwIzs4JzIjAzKzgnAjOzgnMiMDMrOCcCM7OCcyIwMys4JwIzs4KrmAgk7SrpA2XKP5xfSGZmVkvdJgJJJwC/B34uabGkj2VWX5N3YGZmVhuVWgTfBA6KiAOBU4GfSTo2XafcIzMzs5qo9KjKloh4ASAi5kk6HLhD0lCgps8XNjOz/FRqEbyZHR9Ik8JhwHhgv5zjMjOzGqnUIvgHSrqAIuJNSeOAE3KNyszMaqbbFkFEPAY8K+nukvJ3I+L63CMzM7OaqHj5aESsBzZI2q1G8ZiZWY1V6hrq8hawSNKdwNtdhRHxT7lFZWZmNVNNIpiZvszMrAltNhFExLVbu/N0YPkyoAW4KiIuKVPnBOACkktSH4uIk7f2eGZmtuWqaRFsFUktwFTgSGAFMF/S7IhYkqkzEpgMfDIiXpO0R17xmJlZeXlOOncwsCwinomItcB0knsQsr4CTI2I1wAi4uUc4zEzszLyTATtwPLM8oq0LGsfYB9J90l6MO1Keg9JEyUtkLRg1apVOYVrZlZMm+0akrQPMAnYK1s/Io7ooeOPJLljeQjwG0kHRMSabKWImAZMAxgzZoyntzAz60HVjBHcDFwBXAms34J9dwBDM8tD0rKsFcBDEfEuyc1rT5EkhvlbcBwzM9sG1SSCdRFx+Vbsez4wUtIIkgRwIlB6RdAs4CTgp5IGkHQVPbMVxzIzs61UzRjB7ZL+UdIgSf27XpvbKCLWAWcAc4EngRkRsVjShZKOSavNBV6VtAS4G5gUEa9u5WcxM7OtUE2L4Ivp30mZsgDev7kNI2IOMKek7PzM+wDOTl9mZlYH1dxQNqIWgZiZWX1Uc9VQX5IpqT+VFt0D/Gs6wGtmZr1cNV1DlwN9gZ+ky3+fln05r6DMzKx2qkkEH4uIj2SW75L0WF4BmZlZbVVz1dD67CMrJb2fLbufwMzMGlg1LYJJwN2SniF5dOVewKm5RmVmZjVTzVVDv05nCR2VFi2NiHfyDcvMzGql20Qg6YiIuEvS50tW7S2JiPDDaszMmkClFsGngbuAz5VZF/ipZWZmTaHbRBAR307fXhgRz2bXpfMHmZlZE6jmqqGflym7pacDMTOz+qg0RvBBYD9gt5Jxgl2BfnkHZmZmtVFpjGAU8FmgjU3HCd4kecSkmZk1gUpjBLcBt0k6JCIeqGFMZmZWQ9XcULZQ0ldJuok2dglFxJdyi8rMzGqmmsHinwF/AYwF7iV55OSbeQZlZma1U00i2DsivgW8HRHXAn8DfDzfsMzMrFaq6Rrqeu7AGkn7Ay8Ce+QXUn5mLexgytylrFzTyeC2Vvr17cOAnXeod1hmZnVVTSKYJml34FvAbGBn4PzKmzSeWQs7mDxzEZ3vJhOndqzppI/qHJSZWQOoZtK5q9K391LFc4ob1ZS5SzcmgS4bApav7qxTRGZmjaHSDWUVHygfEZf2fDj5Wbmm/Bf+2vUbahyJmVljqdQi2CX9Owr4GEm3ECQ3l83LM6g8DG5rpaNMMmhva61DNGZmjaPbq4Yi4jsR8R2Sy0U/GhHnRMQ5wEHAsFoF2FMmjR1Fa9+WTcpa+7YwaeyobrYwMyuGagaL9wTWZpbXpmW9yoTR7QB8/ZbHWbt+A+1trUwaO2pjuZlZUVWTCK4D5km6NV2eAFyTW0Q5mjC6nRvnPQ/ATacdUudozMwaw2ZvKIuIi0meUfxa+jo1Ir5bzc4ljZO0VNIySeeWWX+KpFWSHk1fX97SD2BmZtum0lVDu0bEG5L6A8+lr651/SNidaUdS2oBpgJHAiuA+ZJmR8SSkqo3RcQZWxm/mZlto0pdQzeQTEP9MMmjKbsoXd7cPQUHA8si4hkASdOB8UBpIjAzszqqNA31Z9O/W/tYynZgeWZ5BeXnKDpO0qeAp4CzImJ5aQVJE4GJAMOG9boLlszMGlqlrqGPVtowIh7pgePfDtwYEe9IOg24FjiizLGmAdMAxowZE6Xrzcxs61XqGvphhXVBmS/sEh3A0MzykLTszzuJeDWzeBXw/c3s08zMelilrqHDt3Hf84GRkkaQJIATgZOzFSQNiogX0sVjgCe38ZhmZraFqrmPgHT66X3Z9All11XaJiLWSToDmAu0AFdHxGJJFwILImI28E+SjgHWAauBU7bqU5iZ2VbbbCKQ9G3gMJJEMAc4CvgdyY1mFUXEnHSbbNn5mfeTgclbFLGZmfWoap5QdjzwV8CLEXEq8BFgt1yjMjOzmqkmEXRGxAZgnaRdgZfZdBDYzMx6sWrGCBZIagOuJLm57C3ggVyjMjOzmql0H8FU4IaI+Me06ApJvwJ2jYjHaxKdmZnlrlKL4CngB5IGATNIbvxaWJuwzMysVio9mOayiDgE+DTwKnC1pN9L+rakfWoWoZmZ5aqaaaj/EBHfi4jRwEkkzyPwjV9mZk1is4lA0naSPifpeuCXwFLg87lHZmZmNVFpsPhIkhbA0SQPq58OTIyIt2sUm5mZ1UClweLJJM8kOCciXqtRPGZmVmOVJp3b3OyiZmbWBKq5s9jMzJqYE4GZWcE5EZiZFZwTgZlZwTkRmJkVnBOBmVnBORGYmRWcE4GZWcE5EZiZFZwTgZlZwTkRmJkVnBOBmVnBORGYmRWcE4GZWcHlmggkjZO0VNIySedWqHecpJA0Js94zMzsvXJLBJJagKnAUcC+wEmS9i1Tbxfga8BDecViZmbdy7NFcDCwLCKeiYi1JI+6HF+m3kXA94A/5RiLmZl1I89E0A4szyyvSMs2kvRRYGhE/KLSjiRNlLRA0oJVq1b1fKRmZgVWt8FiSX2AS4FzNlc3IqZFxJiIGDNw4MD8gzMzK5A8E0EHMDSzPCQt67ILsD9wj6TngP8GzPaAsZlZbeWZCOYDIyWNkLQ9cCIwu2tlRLweEQMiYnhEDAceBI6JiAU5xmRmZiVySwQRsQ44A5gLPAnMiIjFki6UdExexzUzsy2zXZ47j4g5wJySsvO7qXtYnrGYmVl5vrPYzKzgnAjMzArOicDMrOCcCMzMCs6JwMys4JwIzMwKzonAzKzgnAjMzArOicDMrOCcCMzMCs6JwMys4JwIzMwKzonAzKzgnAjMzArOicDMrOCcCMzMCs6JwMys4JwIzMwKzonAzKzgnAjMzArOicDMrOCcCMzMCs6JwMys4JwIzMwKzonAzKzgck0EksZJWippmaRzy6w/XdIiSY9K+p2kffOMx8zM3iu3RCCpBZgKHAXsC5xU5ov+hog4ICIOBL4PXJpXPGZmVl6eLYKDgWUR8UxErAWmA+OzFSLijcziTkDkGI+ZmZWxXY77bgeWZ5ZXAB8vrSTpq8DZwPbAEeV2JGkiMBFg2LBhPR6omVmR1X2wOCKmRsQHgG8A53VTZ1pEjImIMQMHDqxtgGZmTS7PRNABDM0sD0nLujMdmJBjPGZmVkaeiWA+MFLSCEnbAycCs7MVJI3MLP4N8J85xmNmZmXkNkYQEesknQHMBVqAqyNisaQLgQURMRs4Q9JngHeB14Av5hWPmZmVl+dgMRExB5hTUnZ+5v3X8jy+mZltXt0Hi83MrL6cCMzMCs6JwMys4JwIzMwKLtfB4kYxa2EHU+YuZeWaTvq29GFo/9Z6h2Rm1jCavkUwa2EHk2cuomNNJwGsXb+BZ195m1kLK93bZmZWHE2fCKbMXUrnu+s3KdsQSbmZmRUgEaxc07lF5WZmRdP0iWBwW/nxgO7KzcyKpukTwaSxo2jt27JJWWvfFiaNHVWniMzMGkvTXzU0YXQ7wMarhga3tTJp7KiN5WZmRdf0iQCSZOAvfjOz8pq+a8jMzCpzIjAzKzgnAjOzgnMiMDMrOCcCM7OCU0TUO4YtImkV8Icqqw8AXskxnJ7Wm+LtTbFC74q3N8UKvSveIse6V0QMLLei1yWCLSFpQUSMqXcc1epN8famWKF3xdubYoXeFa9jLc9dQ2ZmBedEYGZWcM2eCKbVO4At1Jvi7U2xQu+KtzfFCr0rXsdaRlOPEZiZ2eY1e4vAzMw2w4nAzKzgmjYRSBonaamkZZLOrXc8lUh6TtIiSY9KWlDveEpJulrSy5KeyJT1l3SnpP9M/+5ezxi7dBPrBZI60vP7qKSj6xljlqShku6WtETSYklfS8sb7vxWiLXhzq+kfpLmSXosjfU7afkISQ+l3ws3Sdq+3rFCxXivkfRs5twemMvxm3GMQFIL8BRwJLACmA+cFBFL6hpYNyQ9B4yJiIa80UXSp4C3gOsiYv+07PvA6oi4JE20u0fEN+oZZxpXuVgvAN6KiB/UM7ZyJA0CBkXEI5J2AR4GJgCn0GDnt0KsJ9Bg51eSgJ0i4i1JfYHfAV8DzgZmRsR0SVcAj0XE5fWMFSrGezpwR0Tckufxm7VFcDCwLCKeiYi1wHRgfJ1j6rUi4jfA6pLi8cC16ftrSb4Q6q6bWBtWRLwQEY+k798EngTaacDzWyHWhhOJt9LFvukrgCOAri/VhjivUDHemmjWRNAOLM8sr6BB/8GmAvh3SQ9LmljvYKq0Z0S8kL5/EdiznsFU4QxJj6ddR3XvZilH0nBgNPAQDX5+S2KFBjy/klokPQq8DNwJPA2siYh1aZWG+l4ojTcius7txem5/ZGkHfI4drMmgt7m0Ij4KHAU8NW0e6PXiKR/sZH7GC8HPgAcCLwA/LC+4byXpJ2BnwP/HBFvZNc12vktE2tDnt+IWB8RBwJDSHoJPljnkCoqjVfS/sBkkrg/BvQHcukebNZE0AEMzSwPScsaUkR0pH9fBm4l+Ufb6F5K+4y7+o5frnM83YqIl9L/yTYAV9Jg5zftE/45cH1EzEyLG/L8lou10c9vRKwB7gYOAdokdT2ityG/FzLxjku74yIi3gF+Sk7ntlkTwXxgZHqFwPbAicDsOsdUlqSd0oE3JO0E/DXwROWtGsJs4Ivp+y8Ct9Uxloq6vlBTx9JA5zcdJPw34MmIuDSzquHOb3exNuL5lTRQUlv6vpXkwpEnSb5gj0+rNcR5hW7j/X3mx4BIxjNyObdNedUQQHoJ2/8FWoCrI+LiOodUlqT3k7QCALYDbmi0WCXdCBxGMi3uS8C3gVnADGAYybTgJ0RE3Qdpu4n1MJJuiwCeA07L9L/XlaRDgd8Ci4ANafE3SfreG+r8Voj1JBrs/Er6MMlgcAvJD94ZEXFh+v/bdJJuloXA36W/tuuqQrx3AQMBAY8Cp2cGlXvu+M2aCMzMrDrN2jVkZmZVciIwMys4JwIzs4JzIjAzKzgnAjOzgnMisIaS3kb/z5nluZKuyiz/UNLZFba/RtLx6ft7JL3n4d+S+kq6JJ3Z8xFJD0g6Kl33nKQBWxH3xuN2s35qOnvkEkmdmdkkj5c0p+sa8p4kaZCkOyqs317SbzI3WFlBORFYo7kP+ASApD4k9wPsl1n/CeD+bTzGRcAgYP90ao8JwC7buM+KIuKr6fQBRwNPR8SB6euWiDg6vZu0p51NcqdvdzGtBX4N/G0Ox7ZexInAGs39JFMBQJIAngDelLR7OuHWh4BHJJ0vab6kJyRNS++83CxJOwJfAc7supEonSJhRpm6Z6f7f6KklfI/0knAHpP0szLbXZS2EFqqjOk5SQMkDZf0+3TbpyRdL+kzku5LWy8Hp/V3Sid3mydpoaTuZtY9DvhVus1+af1H09hHpnVmAf+9mjiteblJaA0lIlZKWidpGMmv/wdIZog8BHgdWBQRayX9v4i4ECD9Mv4scHsVh9gbeL50YrdSkg4CTgU+TnJX50OS7gXWAucBn4iIVyT1L9luCknr4tTYurs19wa+AHyJZKqUk4FDgWNI7uKdAPwLcFdEfCntUpon6T8i4u1MHCOA1zJ3zZ4OXBYR16fTrnQlqSdIJjSzAnOLwBrR/SRJoCsRPJBZvi+tc7iSJ00tIpljfr9yO9oGhwK3RsTb6S39M4G/TI91c9dDhEqmffgWsFtEnL6VSQDg2YhYlE7gthj4dbqvRcDwtM5fA+cqmbL4HqAfyVQUWYOAVZnlB4BvSvoGsFdEdKbxrwfWds13ZcXkRGCNqGuc4ACSX6wPkrQIPgHcL6kf8BPg+Ig4gKQfvF+V+14GDJO0a49HnfyCP6i0lbCFsvPebMgsb+DPLXgBx2XGGYZFxJMl++kkc04i4gaSVkUnMEfSEZm6OwB/2oaYrZdzIrBGdD9JV8/qdHrj1UAbSTK4nz9/wb2iZG78bq/WKRURfySZQfOytIuka+bHL5RU/S0wQdKOSmaFPTYtuwv4gqT3pdtmv/R/BVwC/CLnX9hzgTO7xkUkjS5T5yn+3ILomtzwmYj4McmMmx9Oy98HvBIR7+YYrzU4JwJrRItIrhZ6sKTs9Yh4Jb3C5kqS1sJckl/iW+I8km6TJUoecn8HUPowmEeAa4B5JDOBXhURCyNiMXAxcK+kx4BLS7a7OY1ttpLphPNwEcmjDB+XtDhd3kQ6XvC0pL3TohOAJ9LupP2B69Lyw4Ff5BSn9RKefdSsSUk6FjgoIs6rUGcmcG5EPFW7yKzR+KohsyYVEbd2dWGVk3aNzXISMLcIzMwKzmMEZmYF50RgZlZwTgRmZgXnRGBmVnBOBGZmBfdf82rdfzULAWwAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - } - } - ], - "metadata": { - "slideshow": { - "slide_type": "slide" - } - } + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## 3. Comparison with alternatives\n", "\n", "### FLAML's accuracy" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 13, - "source": [ - "print('flaml (4min) r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))" - ], + "metadata": { + "tags": [] + }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "flaml (4min) r2 = 0.8505434326525669\n" ] } ], - "metadata": { - "tags": [] - } + "source": [ + "print('flaml (4min) r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Default LightGBM" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 14, + "metadata": {}, + "outputs": [], "source": [ "from lightgbm import LGBMRegressor\n", "lgbm = LGBMRegressor()" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 15, - "source": [ - "lgbm.fit(X_train, y_train)" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "LGBMRegressor()" ] }, + "execution_count": 15, "metadata": {}, - "execution_count": 15 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "lgbm.fit(X_train, y_train)" + ] }, { "cell_type": "code", "execution_count": 16, - "source": [ - "y_pred = lgbm.predict(X_test)\n", - "from flaml.ml import sklearn_metric_loss_score\n", - "print('default lgbm r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))" - ], + "metadata": { + "tags": [] + }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "default lgbm r2 = 0.8296179648694404\n" ] } ], - "metadata": { - "tags": [] - } + "source": [ + "y_pred = lgbm.predict(X_test)\n", + "from flaml.ml import sklearn_metric_loss_score\n", + "print('default lgbm r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Optuna LightGBM Tuner" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 17, + "metadata": {}, + "outputs": [], "source": [ "# !pip install optuna==2.8.0;" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 18, + "metadata": {}, + "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "train_x, val_x, train_y, val_y = train_test_split(X_train, y_train, test_size=0.1)\n", @@ -586,21 +588,20 @@ " \"metric\": \"regression\",\n", " \"verbosity\": -1,\n", "}" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 19, - "source": [ - "%%time\n", - "model = lgb.train(params, dtrain, valid_sets=[dtrain, dval], verbose_eval=10000) \n" - ], + "metadata": { + "tags": [ + "outputPrepend" + ] + }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "\u001b[32m[I 2021-09-29 23:14:13,542]\u001b[0m A new study created in memory with name: no-name-c5c149a5-8d21-451d-8907-5246d780db77\u001b[0m\n", "feature_fraction, val_score: 2237193094.198954: 14%|#4 | 1/7 [00:01<00:09, 1.51s/it]\u001b[32m[I 2021-09-29 23:14:15,079]\u001b[0m Trial 0 finished with value: 2237193094.1989536 and parameters: {'feature_fraction': 0.4}. Best is trial 0 with value: 2237193094.1989536.\u001b[0m\n", @@ -680,71 +681,72 @@ ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "CPU times: user 5min 14s, sys: 16.8 s, total: 5min 31s\n", "Wall time: 5min 32s\n" ] }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "\n" ] } ], - "metadata": { - "tags": [ - "outputPrepend" - ] - } + "source": [ + "%%time\n", + "model = lgb.train(params, dtrain, valid_sets=[dtrain, dval], verbose_eval=10000) \n" + ] }, { "cell_type": "markdown", - "source": [], - "metadata": {} + "metadata": {}, + "source": [] }, { "cell_type": "code", "execution_count": 20, - "source": [ - "y_pred = model.predict(X_test)\n", - "from flaml.ml import sklearn_metric_loss_score\n", - "print('Optuna LightGBM Tuner r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))" - ], + "metadata": { + "tags": [] + }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Optuna LightGBM Tuner r2 = 0.8444445782478855\n" ] } ], - "metadata": { - "tags": [] - } + "source": [ + "y_pred = model.predict(X_test)\n", + "from flaml.ml import sklearn_metric_loss_score\n", + "print('Optuna LightGBM Tuner r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## 4. Add a customized LightGBM learner in FLAML\n", "The native API of LightGBM allows one to specify a custom objective function in the model constructor. You can easily enable it by adding a customized LightGBM learner in FLAML. In the following example, we show how to add such a customized LightGBM learner with a custom objective function." - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Create a customized LightGBM learner with a custom objective function" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 21, + "metadata": {}, + "outputs": [], "source": [ "import numpy as np \n", "\n", @@ -778,36 +780,25 @@ "\n", " def __init__(self, **config):\n", " super().__init__(objective=my_loss_obj, **config)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Add the customized learner in FLAML" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 22, - "source": [ - "automl = AutoML()\n", - "automl.add_learner(learner_name='my_lgbm', learner_class=MyLGBM)\n", - "settings = {\n", - " \"time_budget\": 150, # total running time in seconds\n", - " \"metric\": 'r2', # primary metrics for regression can be chosen from: ['mae','mse','r2']\n", - " \"estimator_list\": ['my_lgbm',], # list of ML learners; we tune lightgbm in this example\n", - " \"task\": 'regression', # task type \n", - " \"log_file_name\": 'houses_experiment_my_lgbm.log', # flaml log file\n", - "}\n", - "automl.fit(X_train=X_train, y_train=y_train, **settings)" - ], + "metadata": { + "tags": [] + }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "[flaml.automl: 09-29 23:19:46] {1446} INFO - Data split method: uniform\n", "[flaml.automl: 09-29 23:19:46] {1450} INFO - Evaluation method: cv\n", @@ -905,31 +896,29 @@ ] } ], - "metadata": { - "tags": [] - } + "source": [ + "automl = AutoML()\n", + "automl.add_learner(learner_name='my_lgbm', learner_class=MyLGBM)\n", + "settings = {\n", + " \"time_budget\": 150, # total running time in seconds\n", + " \"metric\": 'r2', # primary metrics for regression can be chosen from: ['mae','mse','r2']\n", + " \"estimator_list\": ['my_lgbm',], # list of ML learners; we tune lightgbm in this example\n", + " \"task\": 'regression', # task type \n", + " \"log_file_name\": 'houses_experiment_my_lgbm.log', # flaml log file\n", + "}\n", + "automl.fit(X_train=X_train, y_train=y_train, **settings)" + ] }, { "cell_type": "code", "execution_count": 23, - "source": [ - "print('Best hyperparmeter config:', automl.best_config)\n", - "print('Best r2 on validation data: {0:.4g}'.format(1-automl.best_loss))\n", - "print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))\n", - "\n", - "y_pred = automl.predict(X_test)\n", - "print('Predicted labels', y_pred)\n", - "print('True labels', y_test)\n", - "\n", - "from flaml.ml import sklearn_metric_loss_score\n", - "print('r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))\n", - "print('mse', '=', sklearn_metric_loss_score('mse', y_pred, y_test))\n", - "print('mae', '=', sklearn_metric_loss_score('mae', y_pred, y_test))" - ], + "metadata": { + "tags": [] + }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Best hyperparmeter config: {'n_estimators': 95, 'num_leaves': 221, 'min_child_samples': 10, 'learning_rate': 0.4130805075333343, 'log_max_bin': 10, 'colsample_bytree': 0.8422311526890249, 'reg_alpha': 0.007704104902643932, 'reg_lambda': 0.003151767359549649}\n", "Best r2 on validation data: 0.8368\n", @@ -954,9 +943,20 @@ ] } ], - "metadata": { - "tags": [] - } + "source": [ + "print('Best hyperparmeter config:', automl.best_config)\n", + "print('Best r2 on validation data: {0:.4g}'.format(1-automl.best_loss))\n", + "print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))\n", + "\n", + "y_pred = automl.predict(X_test)\n", + "print('Predicted labels', y_pred)\n", + "print('True labels', y_test)\n", + "\n", + "from flaml.ml import sklearn_metric_loss_score\n", + "print('r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))\n", + "print('mse', '=', sklearn_metric_loss_score('mse', y_pred, y_test))\n", + "print('mae', '=', sklearn_metric_loss_score('mae', y_pred, y_test))" + ] } ], "metadata": { @@ -964,22 +964,22 @@ "hash": "0cfea3304185a9579d09e0953576b57c8581e46e6ebc6dfeb681bc5a511f7544" }, "kernelspec": { - "name": "python3", - "display_name": "Python 3.8.0 64-bit ('blend': conda)" + "display_name": "Python 3.8.0 64-bit ('blend': conda)", + "name": "python3" }, "language_info": { - "name": "python", - "version": "3.8.0", - "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, - "pygments_lexer": "ipython3", + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", "nbconvert_exporter": "python", - "file_extension": ".py" + "pygments_lexer": "ipython3", + "version": "3.8.0" } }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/notebook/automl_time_series_forecast.ipynb b/notebook/automl_time_series_forecast.ipynb index 14e6f327f..078841ded 100644 --- a/notebook/automl_time_series_forecast.ipynb +++ b/notebook/automl_time_series_forecast.ipynb @@ -13,7 +13,7 @@ "source": [ "## 1. Introduction\n", "\n", - "FLAML is a Python library (https://github.com/microsoft/FLAML) designed to automatically produce accurate machine learning models with low computational cost. It is fast and cheap. The simple and lightweight design makes it easy to use and extend, such as adding new learners. FLAML can\n", + "FLAML is a Python library (https://github.com/microsoft/FLAML) designed to automatically produce accurate machine learning models with low computational cost. It is fast and economical. The simple and lightweight design makes it easy to use and extend, such as adding new learners. FLAML can\n", "\n", " - serve as an economical AutoML engine,\n", " - be used as a fast hyperparameter tuning tool, or\n", diff --git a/notebook/automl_xgboost.ipynb b/notebook/automl_xgboost.ipynb index d714b39ef..e5b41f167 100644 --- a/notebook/automl_xgboost.ipynb +++ b/notebook/automl_xgboost.ipynb @@ -18,7 +18,7 @@ "## 1. Introduction\n", "\n", "FLAML is a Python library (https://github.com/microsoft/FLAML) designed to automatically produce accurate machine learning models \n", - "with low computational cost. It is fast and cheap. The simple and lightweight design makes it easy \n", + "with low computational cost. It is fast and economical. The simple and lightweight design makes it easy \n", "to use and extend, such as adding new learners. FLAML can \n", "- serve as an economical AutoML engine,\n", "- be used as a fast hyperparameter tuning tool, or \n", diff --git a/notebook/integrate_azureml.ipynb b/notebook/integrate_azureml.ipynb index f66e96de1..938fbb8d2 100644 --- a/notebook/integrate_azureml.ipynb +++ b/notebook/integrate_azureml.ipynb @@ -18,7 +18,7 @@ "## 1. Introduction\n", "\n", "FLAML is a Python library (https://github.com/microsoft/FLAML) designed to automatically produce accurate machine learning models \n", - "with low computational cost. It is fast and cheap. The simple and lightweight design makes it easy \n", + "with low computational cost. It is fast and economical. The simple and lightweight design makes it easy \n", "to use and extend, such as adding new learners. FLAML can \n", "- serve as an economical AutoML engine,\n", "- be used as a fast hyperparameter tuning tool, or \n", diff --git a/notebook/integrate_sklearn.ipynb b/notebook/integrate_sklearn.ipynb index 469321540..0d85bb083 100644 --- a/notebook/integrate_sklearn.ipynb +++ b/notebook/integrate_sklearn.ipynb @@ -2,33 +2,34 @@ "cells": [ { "cell_type": "markdown", + "metadata": {}, "source": [ "Copyright (c) 2021. All rights reserved.\n", "\n", "Contributed by: @bnriiitb\n", "\n", "Licensed under the MIT License." - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "# Using AutoML in Sklearn Pipeline\n", "\n", "This tutorial will help you understand how FLAML's AutoML can be used as a transformer in the Sklearn pipeline." - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "\n", "## 1.Introduction\n", "\n", "### 1.1 FLAML - Fast and Lightweight AutoML\n", "\n", - "FLAML is a Python library (https://github.com/microsoft/FLAML) designed to automatically produce accurate machine learning models with low computational cost. It is fast and cheap. The simple and lightweight design makes it easy to use and extend, such as adding new learners. \n", + "FLAML is a Python library (https://github.com/microsoft/FLAML) designed to automatically produce accurate machine learning models with low computational cost. It is fast and economical. The simple and lightweight design makes it easy to use and extend, such as adding new learners. \n", "\n", "FLAML can \n", "- serve as an economical AutoML engine,\n", @@ -42,11 +43,11 @@ "```bash\n", "pip install flaml[notebook]\n", "```" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### 1.2 Why are pipelines a silver bullet?\n", "\n", @@ -62,47 +63,42 @@ "* Allow hyperparameter tuning across the estimators\n", "* Easier to share and collaborate with multiple users (bug fixes, enhancements etc)\n", "* Enforce the implementation and order of steps" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "#### As FLAML's AutoML module can be used a transformer in the Sklearn's pipeline we can get all the benefits of pipeline and thereby write extremley clean, and resuable code." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 44, + "metadata": {}, + "outputs": [], "source": [ "!pip install flaml[notebook];" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## 2. Classification Example\n", "### Load data and preprocess\n", "\n", "Download [Airlines dataset](https://www.openml.org/d/1169) from OpenML. The task is to predict whether a given flight will be delayed, given the information of the scheduled departure." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 4, - "source": [ - "from flaml.data import load_openml_dataset\n", - "X_train, X_test, y_train, y_test = load_openml_dataset(\n", - " dataset_id=1169, data_dir='./', random_state=1234, dataset_format='array')" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "load dataset from ./openml_ds1169.pkl\n", "Dataset name: airlines\n", @@ -111,38 +107,62 @@ ] } ], - "metadata": {} + "source": [ + "from flaml.data import load_openml_dataset\n", + "X_train, X_test, y_train, y_test = load_openml_dataset(\n", + " dataset_id=1169, data_dir='./', random_state=1234, dataset_format='array')" + ] }, { "cell_type": "code", "execution_count": 5, - "source": [ - "X_train[0]" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "array([ 12., 2648., 4., 15., 4., 450., 67.], dtype=float32)" ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "X_train[0]" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## 3. Create a Pipeline" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline(steps=[('imputuer', SimpleImputer()),\n",
+       "                ('standardizer', StandardScaler()),\n",
+       "                ('automl', )])
SimpleImputer()
StandardScaler()
" + ], + "text/plain": [ + "Pipeline(steps=[('imputuer', SimpleImputer()),\n", + " ('standardizer', StandardScaler()),\n", + " ('automl', )])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import sklearn\n", "from sklearn import set_config\n", @@ -163,39 +183,21 @@ " (\"automl\", automl)\n", "])\n", "automl_pipeline" - ], - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "Pipeline(steps=[('imputuer', SimpleImputer()),\n", - " ('standardizer', StandardScaler()),\n", - " ('automl', )])" - ], - "text/html": [ - "
Pipeline(steps=[('imputuer', SimpleImputer()),\n",
-       "                ('standardizer', StandardScaler()),\n",
-       "                ('automl', )])
SimpleImputer()
StandardScaler()
" - ] - }, - "metadata": {}, - "execution_count": 6 - } - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Run FLAML\n", "In the FLAML automl run configuration, users can specify the task type, time budget, error metric, learner list, whether to subsample, resampling strategy type, and so on. All these arguments have default values which will be used if users do not provide them. For example, the default ML learners of FLAML are `['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree', 'lrl1']`. " - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 7, + "metadata": {}, + "outputs": [], "source": [ "settings = {\n", " \"time_budget\": 60, # total running time in seconds\n", @@ -204,24 +206,16 @@ " \"estimator_list\":['xgboost','catboost','lgbm'],\n", " \"log_file_name\": 'airlines_experiment.log', # flaml log file\n", "}" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 8, - "source": [ - "automl_pipeline.fit(X_train, y_train, \n", - " automl__time_budget=settings['time_budget'],\n", - " automl__metric=settings['metric'],\n", - " automl__estimator_list=settings['estimator_list'],\n", - " automl__log_training_metric=True)" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "[flaml.automl: 08-22 21:32:13] {1130} INFO - Evaluation method: holdout\n", "[flaml.automl: 08-22 21:32:14] {624} INFO - Using StratifiedKFold\n", @@ -389,28 +383,47 @@ ] }, { - "output_type": "execute_result", "data": { - "text/plain": [ - "Pipeline(steps=[('imputuer', SimpleImputer()),\n", - " ('standardizer', StandardScaler()),\n", - " ('automl', )])" - ], "text/html": [ "
Pipeline(steps=[('imputuer', SimpleImputer()),\n",
        "                ('standardizer', StandardScaler()),\n",
        "                ('automl', )])
SimpleImputer()
StandardScaler()
" + ], + "text/plain": [ + "Pipeline(steps=[('imputuer', SimpleImputer()),\n", + " ('standardizer', StandardScaler()),\n", + " ('automl', )])" ] }, + "execution_count": 8, "metadata": {}, - "execution_count": 8 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "automl_pipeline.fit(X_train, y_train, \n", + " automl__time_budget=settings['time_budget'],\n", + " automl__metric=settings['metric'],\n", + " automl__estimator_list=settings['estimator_list'],\n", + " automl__log_training_metric=True)" + ] }, { "cell_type": "code", "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best ML leaner: xgboost\n", + "Best hyperparmeter config: {'n_estimators': 63, 'max_leaves': 1797, 'min_child_weight': 0.07275175679381725, 'learning_rate': 0.06234183309508761, 'subsample': 0.9814772488195874, 'colsample_bylevel': 0.810466508891351, 'colsample_bytree': 0.8005378817953572, 'reg_alpha': 0.5768305704485758, 'reg_lambda': 6.867180836557797, 'FLAML_sample_size': 364083}\n", + "Best accuracy on validation data: 0.6721\n", + "Training duration of best run: 15.45 s\n" + ] + } + ], "source": [ "# Get the automl object from the pipeline\n", "automl = automl_pipeline.steps[2][1]\n", @@ -420,75 +433,55 @@ "print('Best hyperparmeter config:', automl.best_config)\n", "print('Best accuracy on validation data: {0:.4g}'.format(1-automl.best_loss))\n", "print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Best ML leaner: xgboost\n", - "Best hyperparmeter config: {'n_estimators': 63, 'max_leaves': 1797, 'min_child_weight': 0.07275175679381725, 'learning_rate': 0.06234183309508761, 'subsample': 0.9814772488195874, 'colsample_bylevel': 0.810466508891351, 'colsample_bytree': 0.8005378817953572, 'reg_alpha': 0.5768305704485758, 'reg_lambda': 6.867180836557797, 'FLAML_sample_size': 364083}\n", - "Best accuracy on validation data: 0.6721\n", - "Training duration of best run: 15.45 s\n" - ] - } - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 10, - "source": [ - "automl.model" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "" ] }, + "execution_count": 10, "metadata": {}, - "execution_count": 10 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "automl.model" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## 4. Persist the model binary file" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 11, + "metadata": {}, + "outputs": [], "source": [ "# Persist the automl object as pickle file\n", "import pickle\n", "with open('automl.pkl', 'wb') as f:\n", " pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 12, - "source": [ - "# Performance inference on the testing dataset\n", - "y_pred = automl_pipeline.predict(X_test)\n", - "print('Predicted labels', y_pred)\n", - "print('True labels', y_test)\n", - "y_pred_proba = automl_pipeline.predict_proba(X_test)[:,1]\n", - "print('Predicted probas ',y_pred_proba[:5])" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Predicted labels [0 1 1 ... 0 1 0]\n", "True labels [0 0 0 ... 1 0 1]\n", @@ -496,13 +489,23 @@ ] } ], - "metadata": {} + "source": [ + "# Performance inference on the testing dataset\n", + "y_pred = automl_pipeline.predict(X_test)\n", + "print('Predicted labels', y_pred)\n", + "print('True labels', y_test)\n", + "y_pred_proba = automl_pipeline.predict_proba(X_test)[:,1]\n", + "print('Predicted probas ',y_pred_proba[:5])" + ] } ], "metadata": { + "interpreter": { + "hash": "0cfea3304185a9579d09e0953576b57c8581e46e6ebc6dfeb681bc5a511f7544" + }, "kernelspec": { - "name": "python3", - "display_name": "Python 3.8.0 64-bit ('blend': conda)" + "display_name": "Python 3.8.0 64-bit ('blend': conda)", + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -515,11 +518,8 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0" - }, - "interpreter": { - "hash": "0cfea3304185a9579d09e0953576b57c8581e46e6ebc6dfeb681bc5a511f7544" } }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/test/nlp/test_autohf.py b/test/nlp/test_autohf.py index 0227b878a..7f2b11ee8 100644 --- a/test/nlp/test_autohf.py +++ b/test/nlp/test_autohf.py @@ -11,14 +11,12 @@ def test_hf_data(): from datasets import load_dataset try: - train_dataset = ( - load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4] - ) + train_dataset = load_dataset("glue", "mrpc", split="train[:1%]").to_pandas() dev_dataset = ( - load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[0:4] + load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[:4] ) test_dataset = ( - load_dataset("glue", "mrpc", split="test[1%:2%]").to_pandas().iloc[0:4] + load_dataset("glue", "mrpc", split="test[2%:3%]").to_pandas().iloc[:4] ) except requests.exceptions.ConnectionError: return @@ -39,7 +37,7 @@ def test_hf_data(): automl_settings = { "gpu_per_trial": 0, "max_iter": 3, - "time_budget": 5, + "time_budget": 10, "task": "seq-classification", "metric": "accuracy", "log_file_name": "seqclass.log", diff --git a/test/nlp/test_autohf_custom_metric.py b/test/nlp/test_autohf_custom_metric.py index 63461ea87..613857bd7 100644 --- a/test/nlp/test_autohf_custom_metric.py +++ b/test/nlp/test_autohf_custom_metric.py @@ -17,7 +17,6 @@ def custom_metric( ): from datasets import Dataset from flaml.model import TransformersEstimator - from flaml.nlp.utils import load_default_huggingface_metric_for_task if estimator._trainer is None: estimator._init_model_for_predict(X_test) @@ -32,13 +31,12 @@ def custom_metric( X_test, _ = estimator._preprocess(X_test) eval_dataset = Dataset.from_pandas(X_test) - estimator_metric_cache = estimator._metric - estimator._metric = load_default_huggingface_metric_for_task(estimator._task) - + estimator_metric_backup = estimator._metric + estimator._metric = "rmse" metrics = trainer.evaluate(eval_dataset) - estimator._metric = estimator_metric_cache + estimator._metric = estimator_metric_backup - return metrics["eval_val_loss"], metrics + return metrics.pop("eval_automl_metric"), metrics @pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os") diff --git a/test/nlp/test_autohf_regression.py b/test/nlp/test_autohf_regression.py index 7edc01751..08b9ab203 100644 --- a/test/nlp/test_autohf_regression.py +++ b/test/nlp/test_autohf_regression.py @@ -13,11 +13,9 @@ def test_regression(): from datasets import load_dataset try: - train_dataset = ( - load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20] - ) + train_dataset = load_dataset("glue", "stsb", split="train[:2%]").to_pandas() dev_dataset = ( - load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20] + load_dataset("glue", "stsb", split="train[2%:3%]").to_pandas().iloc[:32] ) except requests.exceptions.ConnectionError: return @@ -50,9 +48,12 @@ def test_regression(): "fp16": False, } + ray.shutdown() + ray.init() automl.fit( X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings ) + automl.predict(X_val) if __name__ == "__main__": diff --git a/test/test_model.py b/test/test_model.py new file mode 100644 index 000000000..a8e43f87f --- /dev/null +++ b/test/test_model.py @@ -0,0 +1,117 @@ +from sklearn.datasets import make_classification +import numpy as np +from pandas import DataFrame +from datetime import datetime +from flaml.model import ( + KNeighborsEstimator, + LRL2Classifier, + BaseEstimator, + LGBMEstimator, + CatBoostEstimator, + XGBoostEstimator, + RandomForestEstimator, + Prophet, + ARIMA, + LGBM_TS_Regressor, +) + + +def test_lrl2(): + BaseEstimator.search_space(1, "") + X, y = make_classification(100000, 1000) + print("start") + lr = LRL2Classifier() + lr.predict(X) + lr.fit(X, y, budget=1e-5) + + +def test_prep(): + X = np.array( + list( + zip( + [ + 3.0, + 16.0, + 10.0, + 12.0, + 3.0, + 14.0, + 11.0, + 12.0, + 5.0, + 14.0, + 20.0, + 16.0, + 15.0, + 11.0, + ], + [ + "a", + "b", + "a", + "c", + "c", + "b", + "b", + "b", + "b", + "a", + "b", + 1.0, + 1.0, + "a", + ], + ) + ), + dtype=object, + ) + y = np.array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]) + lr = LRL2Classifier() + lr.fit(X, y) + lr.predict(X) + lgbm = LGBMEstimator(n_estimators=4) + lgbm.fit(X, y) + cat = CatBoostEstimator(n_estimators=4) + cat.fit(X, y) + knn = KNeighborsEstimator(task="regression") + knn.fit(X, y) + xgb = XGBoostEstimator(n_estimators=4, max_leaves=4) + xgb.fit(X, y) + xgb.predict(X) + rf = RandomForestEstimator(task="regression", n_estimators=4, criterion="gini") + rf.fit(X, y) + + prophet = Prophet() + try: + prophet.predict(4) + except ValueError: + # predict() with steps is only supported for arima/sarimax. + pass + prophet.predict(X) + + arima = ARIMA() + arima.predict(X) + arima._model = False + try: + arima.predict(X) + except ValueError: + # X_test needs to be either a pandas Dataframe with dates as the first column or an int number of periods for predict(). + pass + + lgbm = LGBM_TS_Regressor(optimize_for_horizon=True, lags=1) + X = DataFrame( + { + "A": [ + datetime(1900, 2, 3), + datetime(1900, 3, 4), + datetime(1900, 3, 4), + datetime(1900, 3, 4), + datetime(1900, 7, 2), + datetime(1900, 8, 9), + ], + } + ) + y = np.array([0, 1, 0, 1, 0, 0]) + lgbm.predict(X[:2]) + lgbm.fit(X, y, period=2) + lgbm.predict(X[:2]) diff --git a/test/tune/test_constraints.py b/test/tune/test_constraints.py new file mode 100644 index 000000000..d4c1095e0 --- /dev/null +++ b/test/tune/test_constraints.py @@ -0,0 +1,25 @@ +def test_config_constraint(): + from flaml import tune + + # Test dict return value + def evaluate_config_dict(config): + metric = (round(config["x"]) - 85000) ** 2 - config["x"] / config["y"] + return {"metric": metric} + + def config_constraint(config): + if config["y"] >= config["x"]: + return 1 + else: + return 0 + + tune.run( + evaluate_config_dict, + config={ + "x": tune.qloguniform(lower=1, upper=100000, q=1), + "y": tune.qrandint(lower=2, upper=100000, q=2), + }, + config_constraints=[(config_constraint, ">", 0.5)], + metric="metric", + mode="max", + num_samples=100, + ) diff --git a/test/tune/test_searcher.py b/test/tune/test_searcher.py index 52d741d52..9a1e96e80 100644 --- a/test/tune/test_searcher.py +++ b/test/tune/test_searcher.py @@ -1,3 +1,4 @@ +from time import sleep import numpy as np try: @@ -12,22 +13,38 @@ except (ImportError, AssertionError): use_ray = False -from flaml.searcher.suggestion import OptunaSearch, Searcher, ConcurrencyLimiter -from flaml.searcher.blendsearch import BlendSearch, CFO, RandomSearch - def define_search_space(trial): trial.suggest_float("a", 6, 8) trial.suggest_float("b", 1e-4, 1e-2, log=True) +def long_define_search_space(trial): + sleep(1) + return 3 + + +def wrong_define_search_space(trial): + return {1: 1} + + def test_searcher(): + from flaml.searcher.suggestion import OptunaSearch, Searcher, ConcurrencyLimiter + from flaml.searcher.blendsearch import BlendSearch, CFO, RandomSearch + from flaml.tune import sample as flamlsample + searcher = Searcher() + try: + searcher = Searcher(metric=1, mode=1) + except ValueError: + # Mode must either be a list or string + pass searcher = Searcher(metric=["m1", "m2"], mode=["max", "min"]) searcher.set_search_properties(None, None, None) searcher.suggest = searcher.on_pause = searcher.on_unpause = lambda _: {} searcher.on_trial_complete = lambda trial_id, result, error: None searcher = ConcurrencyLimiter(searcher, max_concurrent=2, batch=True) + searcher.on_trial_complete("t0") searcher.suggest("t1") searcher.suggest("t2") searcher.on_pause("t1") @@ -43,6 +60,12 @@ def test_searcher(): "a": optuna.distributions.UniformDistribution(6, 8), "b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2), } + searcher = OptunaSearch(["a", config["a"]], metric="m", mode="max") + try: + searcher.suggest("t0") + except ValueError: + # not enough values to unpack (expected 3, got 1) + pass searcher = OptunaSearch( config, points_to_evaluate=[{"a": 6, "b": 1e-3}], @@ -50,14 +73,105 @@ def test_searcher(): metric="m", mode="max", ) + try: + searcher.add_evaluated_point({}, None, error=True) + except ValueError: + # nconsistent parameters set() and distributions {'b', 'a'}. + pass + try: + searcher.add_evaluated_point({"a", 1, "b", 0.01}, None, pruned=True) + except AttributeError: + # 'set' object has no attribute 'keys' + pass + try: + searcher.add_evaluated_point( + {"a": 1, "b": 0.01}, None, intermediate_values=[0.1] + ) + except ValueError: + # `value` is supposed to be set for a complete trial. + pass + try: + searcher = OptunaSearch(config, points_to_evaluate=1) + except TypeError: + # points_to_evaluate expected to be a list, got + pass + try: + searcher = OptunaSearch(config, points_to_evaluate=[1]) + except TypeError: + # points_to_evaluate expected to include list or dict + pass + try: + searcher = OptunaSearch(config, points_to_evaluate=[{"a": 1}]) + except ValueError: + # Dim of point {'a': 1} and parameter_names {'a': UniformDistribution(high=8.0, low=6.0), 'b': LogUniformDistribution(high=0.01, low=0.0001)} do not match. + pass + try: + searcher = OptunaSearch( + config, points_to_evaluate=[{"a": 1, "b": 0.01}], evaluated_rewards=1 + ) + except TypeError: + # valuated_rewards expected to be a list, got . + pass + try: + searcher = OptunaSearch( + config, points_to_evaluate=[{"a": 1, "b": 0.01}], evaluated_rewards=[1, 2] + ) + except ValueError: + # Dim of evaluated_rewards [1, 2] and points_to_evaluate [{'a': 1, 'b': 0.01}] do not match. + pass config = {"a": sample.uniform(6, 8), "b": sample.loguniform(1e-4, 1e-2)} - # searcher = OptunaSearch( - # config, - # points_to_evaluate=[{"a": 6, "b": 1e-3}], - # evaluated_rewards=[{"m": 2}], - # metric="m", - # mode="max", - # ) + OptunaSearch.convert_search_space({"a": 1}) + try: + OptunaSearch.convert_search_space({"a": {"grid_search": [1, 2]}}) + except ValueError: + # Grid search parameters cannot be automatically converted to an Optuna search space. + pass + OptunaSearch.convert_search_space({"a": flamlsample.quniform(1, 3, 1)}) + try: + searcher = OptunaSearch( + config, + points_to_evaluate=[{"a": 6, "b": 1e-3}], + evaluated_rewards=[{"m": 2}], + metric="m", + mode="max", + ) + except ValueError: + # Optuna search does not support parameters of type `Float` with samplers of type `_Uniform` + pass + searcher = OptunaSearch(long_define_search_space, metric="m", mode="min") + try: + searcher.suggest("t0") + except TypeError: + # The return value of the define-by-run function passed in the `space` argument should be either None or a `dict` with `str` keys. + pass + searcher = OptunaSearch(wrong_define_search_space, metric="m", mode="min") + try: + searcher.suggest("t0") + except TypeError: + # At least one of the keys in the dict returned by the define-by-run function passed in the `space` argument was not a `str`. + pass + searcher = OptunaSearch(metric="m", mode="min") + try: + searcher.suggest("t0") + except RuntimeError: + # Trying to sample a configuration from OptunaSearch, but no search space has been defined. + pass + try: + searcher.add_evaluated_point({}, 1) + except RuntimeError: + # Trying to sample a configuration from OptunaSearch, but no search space has been defined. + pass + searcher = OptunaSearch(define_search_space) + try: + searcher.suggest("t0") + except RuntimeError: + # Trying to sample a configuration from OptunaSearch, but the `metric` (None) or `mode` (None) parameters have not been set. + pass + try: + searcher.add_evaluated_point({}, 1) + except RuntimeError: + # Trying to sample a configuration from OptunaSearch, but the `metric` (None) or `mode` (None) parameters have not been set. + pass searcher = OptunaSearch( define_search_space, points_to_evaluate=[{"a": 6, "b": 1e-3}], @@ -166,3 +280,13 @@ def test_searcher(): from flaml import tune tune.run(lambda x: 1, config={}, use_ray=use_ray) + + +def test_no_optuna(): + import subprocess + import sys + + subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "-y", "optuna"]) + import flaml.searcher.suggestion + + subprocess.check_call([sys.executable, "-m", "pip", "install", "optuna==2.8.0"]) diff --git a/test/tune/test_space.py b/test/tune/test_space.py new file mode 100644 index 000000000..f0c018b63 --- /dev/null +++ b/test/tune/test_space.py @@ -0,0 +1,115 @@ +from flaml import BlendSearch, CFO, tune + + +def test_define_by_run(): + from flaml.tune.space import ( + unflatten_hierarchical, + normalize, + indexof, + complete_config, + ) + + space = { + # Sample a float uniformly between -5.0 and -1.0 + "uniform": tune.uniform(-5, -1), + # Sample a float uniformly between 3.2 and 5.4, + # rounding to increments of 0.2 + "quniform": tune.quniform(3.2, 5.4, 0.2), + # Sample a float uniformly between 0.0001 and 0.01, while + # sampling in log space + "loguniform": tune.loguniform(1e-4, 1e-2), + # Sample a float uniformly between 0.0001 and 0.1, while + # sampling in log space and rounding to increments of 0.00005 + "qloguniform": tune.qloguniform(1e-4, 1e-1, 5e-5), + # Sample a random float from a normal distribution with + # mean=10 and sd=2 + # "randn": tune.randn(10, 2), + # Sample a random float from a normal distribution with + # mean=10 and sd=2, rounding to increments of 0.2 + # "qrandn": tune.qrandn(10, 2, 0.2), + # Sample a integer uniformly between -9 (inclusive) and 15 (exclusive) + "randint": tune.randint(-9, 15), + # Sample a random uniformly between -21 (inclusive) and 12 (inclusive (!)) + # rounding to increments of 3 (includes 12) + "qrandint": tune.qrandint(-21, 12, 3), + # Sample a integer uniformly between 1 (inclusive) and 10 (exclusive), + # while sampling in log space + "lograndint": tune.lograndint(1, 10), + # Sample a integer uniformly between 2 (inclusive) and 10 (inclusive (!)), + # while sampling in log space and rounding to increments of 2 + "qlograndint": tune.qlograndint(2, 10, 2), + # Sample an option uniformly from the specified choices + "choice": tune.choice(["a", "b", "c"]), + "const": 5, + } + choice = {"nested": space} + bs = BlendSearch( + space={"c": tune.choice([choice])}, + low_cost_partial_config={"c": choice}, + metric="metric", + mode="max", + ) + print(indexof(bs._gs.space["c"], choice)) + print(indexof(bs._gs.space["c"], {"nested": {"const": 1}})) + config = bs._gs.suggest("t1") + print(config) + config = unflatten_hierarchical(config, bs._gs.space)[0] + print(config) + print(normalize({"c": [choice]}, bs._gs.space, config, {}, False)) + space["randn"] = tune.randn(10, 2) + cfo = CFO( + space={"c": tune.choice([0, choice])}, + metric="metric", + mode="max", + ) + for i in range(5): + cfo.suggest(f"t{i}") + # print(normalize(config, bs._gs.space, config, {}, False)) + print(complete_config({}, cfo._ls.space, cfo._ls)) + + +def test_grid(): + from flaml.searcher.variant_generator import ( + generate_variants, + grid_search, + TuneError, + has_unresolved_values, + ) + from flaml.tune import sample + + space = { + "activation": grid_search(["relu", "tanh"]), + "learning_rate": grid_search([1e-3, 1e-4, 1e-5]), + "c": sample.choice([2, 3]), + } + for _, generated in generate_variants({"config": space}): + config = generated["config"] + print(config) + for _, generated in generate_variants({"config": space}, True): + config = generated["config"] + print(config) + space = { + "activation": grid_search([{"c": sample.choice([2, 3])}]), + "learning_rate": grid_search([1e-3, 1e-4, 1e-5]), + } + try: + for _, generated in generate_variants({"config": space}, True): + config = generated["config"] + print(config) + except ValueError: + # The variable `('config', 'activation', 'c')` could not be unambiguously resolved to a single value. + pass + space = { + "c": sample.choice([{"c1": sample.choice([1, 2])}]), + "a": sample.randint(1, 10), + "b": sample.choice([sample.uniform(10, 20), sample.choice([1, 2])]), + } + for _, generated in generate_variants({"config": space}): + config = generated["config"] + print(config) + space = {"a": grid_search(3)} + try: + print(has_unresolved_values(space)) + except TuneError: + # Grid search expected list of values, got: 3 + pass diff --git a/website/docs/Contribute.md b/website/docs/Contribute.md index d805f84d5..c97724f51 100644 --- a/website/docs/Contribute.md +++ b/website/docs/Contribute.md @@ -80,8 +80,8 @@ nvm install --lts Then: ```console -npm install --global yarn -pip install pydoc-markdown +npm install --global yarn # skip if you use the dev container we provided +pip install pydoc-markdown # skip if you use the dev container we provided cd website yarn install --frozen-lockfile pydoc-markdown diff --git a/website/docs/Use-Cases/Task-Oriented-AutoML.md b/website/docs/Use-Cases/Task-Oriented-AutoML.md index 32c759bc5..79c394791 100644 --- a/website/docs/Use-Cases/Task-Oriented-AutoML.md +++ b/website/docs/Use-Cases/Task-Oriented-AutoML.md @@ -2,7 +2,7 @@ ## Overview -`flaml.AutoML` is a class for task-oriented AutoML. It can be used as a scikit-learn style estimator with the standard `fit` and `predict` functions. The minimal inputs from users are the training data and the task type. +[`flaml.AutoML`](../reference/automl#automl-objects) is a class for task-oriented AutoML. It can be used as a scikit-learn style estimator with the standard `fit` and `predict` functions. The minimal inputs from users are the training data and the task type. * Training data: - numpy array. When the input data are stored in numpy array, they are passed to `fit()` as `X_train` and `y_train`. @@ -122,7 +122,7 @@ The estimator list can contain one or more estimator names, each corresponding t To tune a custom estimator that is not built-in, you need to: -1. Build a custom estimator by inheritting `flaml.model.BaseEstimator` or a derived class. +1. Build a custom estimator by inheritting [`flaml.model.BaseEstimator`](../reference/model#baseestimator-objects) or a derived class. For example, if you have a estimator class with scikit-learn style `fit()` and `predict()` functions, you only need to set `self.estimator_class` to be that class in your constructor. ```python @@ -163,7 +163,7 @@ class MyRegularizedGreedyForest(SKLearnEstimator): return space ``` -In the constructor, we set `self.estimator_class` as `RGFClassifier` or `RGFRegressor` according to the task type. If the estimator you want to tune does not have a scikit-learn style `fit()` and `predict()` API, you can override the `fit()` and `predict()` function of `flaml.model.BaseEstimator`, like [XGBoostEstimator](https://github.com/microsoft/FLAML/blob/59083fbdcb95c15819a0063a355969203022271c/flaml/model.py#L511). +In the constructor, we set `self.estimator_class` as `RGFClassifier` or `RGFRegressor` according to the task type. If the estimator you want to tune does not have a scikit-learn style `fit()` and `predict()` API, you can override the `fit()` and `predict()` function of `flaml.model.BaseEstimator`, like [XGBoostEstimator](../reference/model#xgboostestimator-objects). 2. Give the custom estimator a name and add it in AutoML. E.g., @@ -377,7 +377,7 @@ print(automl.mdoel) # ``` -`flaml.model.LGBMEstimator` is a wrapper class for LightGBM models. To access the underlying model, use the `estimator` property of the `flaml.model.LGBMEstimator` instance. +[`flaml.model.LGBMEstimator`](../reference/model#lgbmestimator-objects) is a wrapper class for LightGBM models. To access the underlying model, use the `estimator` property of the `flaml.model.LGBMEstimator` instance. ```python print(automl.model.estimator) diff --git a/website/docs/Use-Cases/Tune-User-Defined-Function.md b/website/docs/Use-Cases/Tune-User-Defined-Function.md index d6d13d7d3..65b72357b 100644 --- a/website/docs/Use-Cases/Tune-User-Defined-Function.md +++ b/website/docs/Use-Cases/Tune-User-Defined-Function.md @@ -1,6 +1,6 @@ # Tune User Defined Function -`flaml.tune` is a module for economical hyperparameter tuning. It is used internally by `flaml.AutoML`. It can also be used to directly tune a user-defined function (UDF), which is not limited to machine learning model training. You can use `flaml.tune` instead of `flaml.AutoML` if one of the following is true: +[`flaml.tune`](../reference/tune/tune) is a module for economical hyperparameter tuning. It is used internally by `flaml.AutoML`. It can also be used to directly tune a user-defined function (UDF), which is not limited to machine learning model training. You can use `flaml.tune` instead of `flaml.AutoML` if one of the following is true: 1. Your machine learning task is not one of the built-in tasks from `flaml.AutoML`. 1. Your input cannot be represented as X_train + y_train or dataframe + label. @@ -75,8 +75,8 @@ config_search_space = { "y": tune.randint(lower=1, upper=100000) } -# provide the search space to flaml.tune -flaml.tune.run(..., config=config_search_space, ...) +# provide the search space to tune.run +tune.run(..., config=config_search_space, ...) ``` #### More details about the search space domain @@ -121,9 +121,9 @@ config = { # while sampling in log space "lograndint": tune.lograndint(1, 10), - # Sample a integer uniformly between 1 (inclusive) and 10 (inclusive (!)), + # Sample a integer uniformly between 2 (inclusive) and 10 (inclusive (!)), # while sampling in log space and rounding to increments of 2 - "qlograndint": tune.qlograndint(1, 10, 2), + "qlograndint": tune.qlograndint(2, 10, 2), # Sample an option uniformly from the specified choices "choice": tune.choice(["a", "b", "c"]), @@ -170,7 +170,7 @@ Optionally, you can provide a list of config constraints to be satisfied through ### Put together -After the aforementioned key steps, one is ready to perform a tuning task by calling `flaml.tune.run()`. Below is a quick sequential tuning example using the pre-defined search space `config_search_space` and a minimization (`mode='min'`) objective for the `score` metric evaluated in `evaluate_config`, using the default serach algorithm in flaml. The time budget is 10 seconds (`time_budget_s=10`). +After the aforementioned key steps, one is ready to perform a tuning task by calling [`flaml.tune.run()`](../reference/tune/tune#run). Below is a quick sequential tuning example using the pre-defined search space `config_search_space` and a minimization (`mode='min'`) objective for the `score` metric evaluated in `evaluate_config`, using the default serach algorithm in flaml. The time budget is 10 seconds (`time_budget_s=10`). ```python # require: pip install flaml[blendsearch] analysis = tune.run( @@ -209,7 +209,7 @@ There are several advanced tuning options worth mentioning. ### More constraints on the tuning -A user can specify constraints on the configurations to be satisfied via the argument `config_constraints`. The `config_constraints` receives a list of such constraints to be satisfied. Specifically, each constraint is a tuple that consists of (1) a function that takes a configuration as input and returns a numerical value; (2) an operation chosen from "<=" or ">"; (3) a numerical threshold. +A user can specify constraints on the configurations to be satisfied via the argument `config_constraints`. The `config_constraints` receives a list of such constraints to be satisfied. Specifically, each constraint is a tuple that consists of (1) a function that takes a configuration as input and returns a numerical value; (2) an operation chosen from "<=", ">=", "<" or ">"; (3) a numerical threshold. In the following code example, we constrain the output of `area`, which takes a configuration as input and outputs a numerical value, to be no larger than 1000. @@ -222,7 +222,7 @@ flaml.tune.run(evaluation_function=evaluate_config, mode="min", config_constraints=[(area, "<=", 1000)], ...) ``` - You can also specify a list of metric constraints to be satisfied via the argument `metric_constraints`. Each element in the `metric_constraints` list is a tuple that consists of (1) a string specifying the name of the metric (the metric name must be defined and returned in the user-defined `evaluation_function`); (2) an operation chosen from "<=" or ">"; (3) a numerical threshold. + You can also specify a list of metric constraints to be satisfied via the argument `metric_constraints`. Each element in the `metric_constraints` list is a tuple that consists of (1) a string specifying the name of the metric (the metric name must be defined and returned in the user-defined `evaluation_function`); (2) an operation chosen from "<=" or ">="; (3) a numerical threshold. In the following code example, we constrain the metric `score` to be no larger than 0.4. diff --git a/website/static/img/FLAML.png b/website/static/img/FLAML.png deleted file mode 100644 index 4f518680b..000000000 Binary files a/website/static/img/FLAML.png and /dev/null differ diff --git a/website/static/img/auto.svg b/website/static/img/auto.svg index ed27ac7d0..b2c9290ec 100644 --- a/website/static/img/auto.svg +++ b/website/static/img/auto.svg @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/website/static/img/extend.svg b/website/static/img/extend.svg index 572c99569..8829d2d13 100644 --- a/website/static/img/extend.svg +++ b/website/static/img/extend.svg @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/website/static/img/fast.svg b/website/static/img/fast.svg index ad5fd96b0..8abfa7dfa 100644 --- a/website/static/img/fast.svg +++ b/website/static/img/fast.svg @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/website/static/img/flaml.svg b/website/static/img/flaml.svg new file mode 100644 index 000000000..5ae22b683 --- /dev/null +++ b/website/static/img/flaml.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/website/static/img/flaml_logo.ico b/website/static/img/flaml_logo.ico index e353db7c5..48b68de75 100644 Binary files a/website/static/img/flaml_logo.ico and b/website/static/img/flaml_logo.ico differ diff --git a/website/static/img/flaml_logo_fill.svg b/website/static/img/flaml_logo_fill.svg index 302ace08a..de9a08280 100644 --- a/website/static/img/flaml_logo_fill.svg +++ b/website/static/img/flaml_logo_fill.svg @@ -1 +1,28 @@ - \ No newline at end of file + + + + + + + + + + + + diff --git a/website/yarn.lock b/website/yarn.lock index c45143f0e..dfa2fe717 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -3923,9 +3923,9 @@ flux@^4.0.1: fbjs "^3.0.1" follow-redirects@^1.0.0, follow-redirects@^1.14.0: - version "1.14.6" - resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.14.6.tgz#8cfb281bbc035b3c067d6cd975b0f6ade6e855cd" - integrity sha512-fhUl5EwSJbbl8AR+uYL2KQDxLkdSjZGR36xy46AO7cOMTrCMON6Sa28FmAnC2tRTDbd/Uuzz3aJBv7EBN7JH8A== + version "1.14.7" + resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.14.7.tgz#2004c02eb9436eee9a21446a6477debf17e81685" + integrity sha512-+hbxoLbFMbRKDwohX8GkTataGqO6Jb7jGwpAlwgy2bIz25XtRm7KEzJM76R1WiNT5SwZkX4Y75SwBolkpmE7iQ== fork-ts-checker-webpack-plugin@^6.0.5: version "6.5.0" @@ -4671,9 +4671,9 @@ is-ci@^2.0.0: ci-info "^2.0.0" is-core-module@^2.2.0: - version "2.8.0" - resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.8.0.tgz#0321336c3d0925e497fd97f5d95cb114a5ccd548" - integrity sha512-vd15qHsaqrRL7dtH6QNuy0ndJmRDrS9HAM1CAiSifNUFv4x1a0CCVsj18hJ1mShxIG6T2i1sO78MkP56r0nYRw== + version "2.8.1" + resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.8.1.tgz#f59fdfca701d5879d0a6b100a40aa1560ce27211" + integrity sha512-SdNCUs284hr40hFTFP6l0IfZ/RSrMXF3qgoRHd3/79unUTvrFO/JoXwkGm+5J/Oe3E/b5GsnG330uUNgRpu1PA== dependencies: has "^1.0.3" @@ -5487,9 +5487,7 @@ node-fetch@2.6.1: integrity sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw== node-forge@^0.10.0: - version "0.10.0" - resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-0.10.0.tgz#32dea2afb3e9926f02ee5ce8794902691a676bf3" - integrity sha512-PPmu8eEeG9saEUvI97fm4OYxXVB6bFvyNTyiUOBichBpFG8A1Ljw3bY62+5oOjDEMHRnd0Y7HQ+x7uzxOzC6JA== + version "1.0.0" node-releases@^2.0.1: version "2.0.1" @@ -5806,7 +5804,7 @@ path-key@^3.0.0, path-key@^3.1.0: resolved "https://registry.yarnpkg.com/path-key/-/path-key-3.1.1.tgz#581f6ade658cbba65a0d3380de7753295054f375" integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q== -path-parse@^1.0.6: +path-parse@^1.0.6, path-parse@^1.0.7: version "1.0.7" resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== @@ -6774,7 +6772,16 @@ resolve-pathname@^3.0.0: resolved "https://registry.yarnpkg.com/resolve-pathname/-/resolve-pathname-3.0.0.tgz#99d02224d3cf263689becbb393bc560313025dcd" integrity sha512-C7rARubxI8bXFNB/hqcp/4iUeIXJhJZvFPFPiSPRnhU5UPxzMFIl+2E6yY6c4k9giDJAhtV+enfA+G89N6Csng== -resolve@^1.1.6, resolve@^1.14.2, resolve@^1.3.2: +resolve@^1.1.6: + version "1.21.0" + resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.21.0.tgz#b51adc97f3472e6a5cf4444d34bc9d6b9037591f" + integrity sha512-3wCbTpk5WJlyE4mSOtDLhqQmGFi0/TD9VPwmiolnk8U0wRgMEktqCXd3vy5buTO3tljvalNvKrjHEfrd2WpEKA== + dependencies: + is-core-module "^2.8.0" + path-parse "^1.0.7" + supports-preserve-symlinks-flag "^1.0.0" + +resolve@^1.14.2, resolve@^1.3.2: version "1.20.0" resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.20.0.tgz#629a013fb3f70755d6f0b7935cc1c2c5378b1975" integrity sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A== @@ -7052,10 +7059,9 @@ shell-quote@^1.7.2: integrity sha512-Vpfqwm4EnqGdlsBFNmHhxhElJYrdfcxPThu+ryKS5J8L/fhAwLazFZtq+S+TWZ9ANj2piSQLGj6NQg+lKPmxrw== shelljs@^0.8.4: - version "0.8.4" - resolved "https://registry.yarnpkg.com/shelljs/-/shelljs-0.8.4.tgz#de7684feeb767f8716b326078a8a00875890e3c2" - integrity sha512-7gk3UZ9kOfPLIAbslLzyWeGiEqx9e3rxwZM0KE6EL8GlGwjym9Mrlx5/p33bWTu9YG6vcS4MBxYZDHYr5lr8BQ== - dependencies: + version "0.8.5" + resolved "https://registry.yarnpkg.com/shelljs/-/shelljs-0.8.5.tgz#de055408d8361bed66c669d2f000538ced8ee20c" + integrity sha512-TiwcRcrkhHvbrZbnRcFYMLl30Dfov3HKqzp5tO5b4pt6G/SezKcYhmDg15zXVBswHmctSAQKznqNW2LO5tTDow== dependencies: glob "^7.0.0" interpret "^1.0.0" rechoir "^0.6.2" @@ -7333,6 +7339,11 @@ supports-color@^8.0.0: dependencies: has-flag "^4.0.0" +supports-preserve-symlinks-flag@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz#6eda4bd344a3c94aea376d4cc31bc77311039e09" + integrity sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w== + svg-parser@^2.0.2: version "2.0.4" resolved "https://registry.yarnpkg.com/svg-parser/-/svg-parser-2.0.4.tgz#fdc2e29e13951736140b76cb122c8ee6630eb6b5"