mirror of
https://github.com/microsoft/autogen.git
synced 2026-02-18 01:11:25 -05:00
Add pipeline tuner component and dependencies. (#671)
* add pipeline tuner component and dependencies. * clean code. * do not need force rerun. * replace the resources. * update metrics retrieving. * Update test/pipeline_tuning_example/requirements.txt * Update test/pipeline_tuning_example/train/env.yaml * Update test/pipeline_tuning_example/tuner/env.yaml * Update test/pipeline_tuning_example/tuner/tuner_func.py * Update test/pipeline_tuning_example/data_prep/env.yaml * fix issues found by lint with flake8. * add documentation * add data. * do not need AML resource for local run. * AML -> AzureML * clean code. * Update website/docs/Examples/Tune-AzureML pipeline.md * rename and add pip install. * update figure name. * align docs with code. * remove extra line.
This commit is contained in:
38
test/pipeline_tuning_example/data_prep/data_prep.py
Normal file
38
test/pipeline_tuning_example/data_prep/data_prep.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function of the script."""
|
||||
|
||||
# input and output arguments
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--data", type=str, help="path to input data")
|
||||
parser.add_argument("--test_train_ratio", type=float, required=False, default=0.25)
|
||||
parser.add_argument("--train_data", type=str, help="path to train data")
|
||||
parser.add_argument("--test_data", type=str, help="path to test data")
|
||||
args = parser.parse_args()
|
||||
|
||||
logger.info(" ".join(f"{k}={v}" for k, v in vars(args).items()))
|
||||
|
||||
data_path = os.path.join(args.data, 'data.csv')
|
||||
df = pd.read_csv(data_path)
|
||||
|
||||
train_df, test_df = train_test_split(
|
||||
df,
|
||||
test_size=args.test_train_ratio,
|
||||
)
|
||||
|
||||
# output paths are mounted as folder, therefore, we are adding a filename to the path
|
||||
train_df.to_csv(os.path.join(args.train_data, "data.csv"), index=False)
|
||||
|
||||
test_df.to_csv(os.path.join(args.test_data, "data.csv"), index=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
26
test/pipeline_tuning_example/data_prep/data_prep.yaml
Normal file
26
test/pipeline_tuning_example/data_prep/data_prep.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
$schema: https://componentsdk.azureedge.net/jsonschema/CommandComponent.json
|
||||
name: data_prep
|
||||
version: 0.0.1
|
||||
display_name: Data preparation for training
|
||||
type: CommandComponent
|
||||
inputs:
|
||||
data:
|
||||
type: path
|
||||
test_train_ratio:
|
||||
type: float
|
||||
outputs:
|
||||
train_data:
|
||||
type: path
|
||||
test_data:
|
||||
type: path
|
||||
environment:
|
||||
conda:
|
||||
conda_dependencies_file: env.yaml
|
||||
os: Linux
|
||||
|
||||
command: >-
|
||||
python data_prep.py
|
||||
--data {inputs.data}
|
||||
--test_train_ratio {inputs.test_train_ratio}
|
||||
--train_data {outputs.train_data}
|
||||
--test_data {outputs.test_data}
|
||||
15
test/pipeline_tuning_example/data_prep/env.yaml
Normal file
15
test/pipeline_tuning_example/data_prep/env.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
name: data-prep-env
|
||||
channels:
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python=3.8
|
||||
- numpy=1.21.2
|
||||
- pip=21.2.4
|
||||
- scikit-learn=0.24.2
|
||||
- scipy=1.7.1
|
||||
- pandas>=1.1,<1.2
|
||||
- pip:
|
||||
# - inference-schema[numpy-support]==1.3.0
|
||||
# - xlrd==2.0.1
|
||||
- mlflow==1.26.1
|
||||
- azureml-mlflow==1.42.0
|
||||
Reference in New Issue
Block a user