mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
[CI] Reenable torchinductor workflow (#2527)
This commit is contained in:
16
.github/workflows/torch-inductor-tests.yml
vendored
16
.github/workflows/torch-inductor-tests.yml
vendored
@@ -1,7 +1,9 @@
|
||||
name: Torchinductor
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
workflow_run:
|
||||
workflows: ["Wheel"]
|
||||
types: [completed]
|
||||
|
||||
jobs:
|
||||
Runner-Preparation:
|
||||
@@ -23,17 +25,17 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
#- name: Packages
|
||||
# run: |
|
||||
# ./.github/workflows/torchinductor/scripts/install_torchinductor.sh
|
||||
- name: Packages
|
||||
run: |
|
||||
./.github/workflows/torch-inductor/scripts/install_torchinductor.sh torchbench
|
||||
- name: Environment
|
||||
run: |
|
||||
source /opt/torchinductor_venv/bin/activate
|
||||
./.github/workflows/torchinductor/scripts/install_triton.sh
|
||||
./.github/workflows/torch-inductor/scripts/install_triton.sh
|
||||
- name: Performance
|
||||
run: |
|
||||
./.github/workflows/torchinductor/scripts/run_torchinductor_perf.sh
|
||||
./.github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh torchbench
|
||||
# Runs too long time
|
||||
#- name: Accuracy
|
||||
# run: |
|
||||
# ./.github/workflows/torchinductor/scripts/run_torchinductor_acc.sh
|
||||
# ./.github/workflows/torch-inductor/scripts/run_torchinductor_acc.sh
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
dev,name,batch_size,speedup,abs_latency,compilation_latency,compression_ratio
|
||||
cuda,AlbertForMaskedLM,4,1.5511,164.3373,26.8523,1.2647
|
||||
cuda,AlbertForQuestionAnswering,4,1.5501,163.5580,25.7983,1.3145
|
||||
cuda,BartForCausalLM,4,1.5080,71.7230,32.8907,0.9749
|
||||
cuda,BertForMaskedLM,16,1.5350,67.9451,35.3286,1.0494
|
||||
cuda,BertForQuestionAnswering,16,1.6735,53.2963,34.3754,1.1710
|
||||
cuda,BlenderbotSmallForCausalLM,64,1.2106,46.6466,23.8058,0.9120
|
||||
cuda,BlenderbotSmallForConditionalGeneration,64,1.3616,77.3013,55.3546,0.9803
|
||||
cuda,CamemBert,16,1.4779,76.1809,35.3883,1.0469
|
||||
cuda,DebertaForMaskedLM,4,0.8415,62.3395,35.9657,1.0418
|
||||
cuda,DebertaForQuestionAnswering,8,1.0609,67.5151,35.7728,1.1528
|
||||
cuda,DebertaV2ForMaskedLM,1,0.6026,134.6517,66.1783,0.9773
|
||||
cuda,DistilBertForMaskedLM,128,1.2460,66.9382,18.3089,0.9624
|
||||
cuda,DistilBertForQuestionAnswering,256,1.3997,72.4126,18.1956,1.1486
|
||||
cuda,DistillGPT2,16,1.6656,60.5455,17.2280,1.0641
|
||||
cuda,ElectraForCausalLM,32,1.8299,45.4841,37.0944,0.9717
|
||||
cuda,ElectraForQuestionAnswering,64,2.0289,52.6890,35.9632,1.1928
|
||||
cuda,GPT2ForSequenceClassification,4,2.2567,38.2969,30.0527,1.2323
|
||||
cuda,LayoutLMForMaskedLM,16,1.5423,68.8018,36.5562,1.0495
|
||||
cuda,LayoutLMForSequenceClassification,16,1.7058,53.9355,35.2225,1.1659
|
||||
cuda,MBartForCausalLM,4,1.4945,71.4649,32.8653,0.9830
|
||||
cuda,MegatronBertForCausalLM,4,1.4328,58.4404,70.6226,1.0951
|
||||
cuda,MegatronBertForQuestionAnswering,8,1.5886,85.2533,69.1219,1.1152
|
||||
cuda,MobileBertForMaskedLM,64,0.9007,131.7379,107.5275,1.0136
|
||||
cuda,MobileBertForQuestionAnswering,128,0.8435,167.9066,106.7049,0.8579
|
||||
cuda,PLBartForCausalLM,8,1.5261,68.9224,19.5826,0.9887
|
||||
cuda,PLBartForConditionalGeneration,4,1.5298,71.2811,45.6902,1.0495
|
||||
cuda,PegasusForCausalLM,32,1.2212,57.5436,33.3863,0.9736
|
||||
cuda,PegasusForConditionalGeneration,32,1.2822,106.4678,69.8825,1.0689
|
||||
cuda,RobertaForCausalLM,16,1.6128,67.5706,34.7355,1.0496
|
||||
cuda,RobertaForQuestionAnswering,16,1.6800,53.6267,33.8527,1.1704
|
||||
cuda,Speech2Text2ForCausalLM,256,1.8230,32.9145,18.7201,0.8760
|
||||
cuda,T5ForConditionalGeneration,4,1.6592,59.5324,39.4406,1.1814
|
||||
cuda,T5Small,4,1.6581,59.5930,37.0471,1.1814
|
||||
cuda,TrOCRForCausalLM,32,1.2586,106.2633,32.5330,0.9583
|
||||
cuda,XLNetLMHeadModel,8,1.8108,142.8795,84.8197,1.1240
|
||||
cuda,YituTechConvBert,16,1.5207,81.4595,53.1565,1.0362
|
||||
|
@@ -1,54 +0,0 @@
|
||||
dev,name,batch_size,speedup,abs_latency,compilation_latency,compression_ratio
|
||||
cuda,adv_inception_v3,128,1.5923,102.5292,51.6032,1.0472
|
||||
cuda,beit_base_patch16_224,64,1.3390,75.3027,29.7471,1.0156
|
||||
cuda,coat_lite_mini,128,2.0579,53.3689,37.1856,1.0437
|
||||
cuda,convmixer_768_32,32,1.0470,275.5328,23.8037,0.9999
|
||||
cuda,convnext_base,64,1.5084,80.1811,42.5659,1.0373
|
||||
cuda,crossvit_9_240,128,1.5392,37.1806,44.9986,0.9193
|
||||
cuda,cspdarknet53,64,1.4721,75.0403,35.2882,1.0547
|
||||
cuda,deit_base_distilled_patch16_224,64,1.1432,55.9737,23.4038,0.9816
|
||||
cuda,dla102,128,1.5282,123.7284,49.3612,1.0430
|
||||
cuda,dm_nfnet_f0,128,1.4354,79.7518,34.8994,1.1038
|
||||
cuda,dpn107,32,1.2412,83.8921,58.9111,0.9952
|
||||
cuda,eca_botnext26ts_256,128,1.5425,71.2406,28.8920,1.0270
|
||||
cuda,ese_vovnet19b_dw,128,1.4647,42.4837,18.0285,1.0135
|
||||
cuda,fbnetc_100,128,1.5795,53.8033,33.0222,1.0082
|
||||
cuda,gernet_l,128,1.1684,63.4230,26.8687,1.0053
|
||||
cuda,ghostnet_100,128,1.7812,54.4211,47.6168,1.0484
|
||||
cuda,gluon_inception_v3,128,1.5952,102.5018,50.0857,1.0469
|
||||
cuda,gmixer_24_224,128,1.6749,69.2430,42.0841,1.1921
|
||||
cuda,gmlp_s16_224,128,1.5886,79.2132,43.0142,1.2343
|
||||
cuda,hrnet_w18,128,1.3743,221.5304,134.2573,1.0100
|
||||
cuda,inception_v3,128,1.5847,102.8333,49.7648,1.0472
|
||||
cuda,jx_nest_base,32,1.3747,71.4190,61.4053,0.9905
|
||||
cuda,lcnet_050,128,1.8159,18.0047,18.8249,1.0005
|
||||
cuda,mixer_b16_224,128,1.2795,90.9229,21.0438,1.0133
|
||||
cuda,mixnet_l,128,1.2273,149.9722,47.7482,1.0129
|
||||
cuda,mnasnet_100,128,1.6594,40.0512,26.5165,1.0047
|
||||
cuda,mobilenetv2_100,128,1.6085,41.1217,27.4450,1.1731
|
||||
cuda,mobilenetv3_large_100,128,1.6610,37.9995,29.8185,1.0052
|
||||
cuda,mobilevit_s,64,1.5212,55.4152,53.6475,1.0258
|
||||
cuda,nfnet_l0,128,1.4927,65.7078,32.4067,0.9980
|
||||
cuda,pit_b_224,64,1.2286,57.9484,26.5321,0.9606
|
||||
cuda,pnasnet5large,16,1.0000,198.2494,93.4641,1.3184
|
||||
cuda,poolformer_m36,64,1.3486,103.9235,62.3196,1.1942
|
||||
cuda,regnety_002,128,1.3030,32.4968,27.2439,1.0014
|
||||
cuda,repvgg_a2,128,1.2485,59.7729,26.9209,1.0185
|
||||
cuda,res2net101_26w_4s,64,1.0813,94.1773,86.6520,0.9655
|
||||
cuda,res2net50_14w_8s,128,1.3251,109.5258,79.9578,0.9830
|
||||
cuda,res2next50,128,1.2518,125.5008,43.9754,0.9756
|
||||
cuda,resmlp_12_224,128,1.3060,45.2373,19.3709,1.1048
|
||||
cuda,resnest101e,64,1.4346,108.1945,78.1993,1.1037
|
||||
cuda,rexnet_100,128,1.4637,55.0121,41.2075,1.0862
|
||||
cuda,selecsls42b,128,1.4284,44.6645,23.3892,1.0139
|
||||
cuda,spnasnet_100,128,1.5908,45.3189,32.0148,1.0048
|
||||
cuda,swin_base_patch4_window7_224,64,1.6164,89.5854,75.5848,0.9299
|
||||
cuda,swsl_resnext101_32x16d,32,1.0175,110.0041,45.7853,1.0003
|
||||
cuda,tf_efficientnet_b0,128,1.5271,55.7361,34.5551,1.1079
|
||||
cuda,tf_mixnet_l,128,1.2369,155.9027,48.6695,1.0921
|
||||
cuda,tinynet_a,128,1.3792,53.0640,40.6346,1.1108
|
||||
cuda,tnt_s_patch16_224,128,3.1078,104.8486,59.6028,1.0660
|
||||
cuda,twins_pcpvt_base,64,1.5921,67.4600,84.4977,1.0909
|
||||
cuda,visformer_small,128,1.1952,72.8705,23.7303,1.0410
|
||||
cuda,vit_base_patch16_224,64,1.1309,56.4866,22.0208,0.9804
|
||||
cuda,volo_d1_224,64,1.6868,72.0957,65.3011,0.9729
|
||||
|
@@ -1,53 +0,0 @@
|
||||
dev,name,batch_size,speedup,abs_latency,compilation_latency,compression_ratio
|
||||
cuda,BERT_pytorch,16,1.7111,24.2741,35.7065,1.3212
|
||||
cuda,LearningToPaint,96,1.0513,10.7557,11.1879,0.9896
|
||||
cuda,Super_SloMo,6,1.3267,60.4328,28.2097,1.2392
|
||||
cuda,alexnet,128,1.1754,8.3246,5.3319,1.0003
|
||||
cuda,attention_is_all_you_need_pytorch,256,1.3416,36.4401,39.5927,1.1774
|
||||
cuda,dcgan,32,0.9151,2.6249,3.2964,1.0082
|
||||
cuda,densenet121,4,0.9225,51.3747,68.5841,0.9930
|
||||
cuda,doctr_det_predictor,0,0.0000
|
||||
cuda,doctr_reco_predictor,0,0.0000
|
||||
cuda,drq,1,0.9500,3.4884,4.8028,0.9687
|
||||
cuda,fastNLP_Bert,6,1.4328,34.7753,35.4863,1.2368
|
||||
cuda,functorch_dp_cifar10,64,1.2015,8.1625,12.9040,1.0609
|
||||
cuda,functorch_maml_omniglot,1,0.9322,2.5844,3.8640,1.0000
|
||||
cuda,hf_Albert,8,2.1228,30.3377,26.8282,1.2676
|
||||
cuda,hf_Bart,4,1.2899,39.1935,47.2373,1.0080
|
||||
cuda,hf_Bert,4,1.3262,26.1063,35.0281,1.0656
|
||||
cuda,hf_Bert_large,4,1.4163,55.1021,67.2825,1.0915
|
||||
cuda,hf_DistilBert,8,1.4051,21.7191,18.0399,1.0242
|
||||
cuda,hf_GPT2,4,1.6661,26.9039,29.9473,1.1555
|
||||
cuda,hf_Longformer,0,0.0000
|
||||
cuda,hf_Reformer,4,1.1709,64.6979,15.7035,0.9267
|
||||
cuda,hf_T5_large,2,1.7215,107.0798,148.8805,1.1684
|
||||
cuda,lennard_jones,1000,0.8428,1.8488,3.0609,1.0001
|
||||
cuda,maml_omniglot,32,0.9648,2.6869,3.9775,0.9999
|
||||
cuda,mnasnet1_0,32,1.0469,21.6251,25.8232,0.9996
|
||||
cuda,mobilenet_v2,96,1.5604,31.9572,27.0225,1.1734
|
||||
cuda,nvidia_deeprecommender,256,1.0605,9.2080,4.1318,0.9711
|
||||
cuda,phlippe_densenet,128,1.0237,27.5988,28.0400,1.0023
|
||||
cuda,phlippe_resnet,128,1.0493,10.9751,10.2485,1.0092
|
||||
cuda,pytorch_CycleGAN_and_pix2pix,1,1.3724,8.2225,11.9561,1.0219
|
||||
cuda,pytorch_stargan,16,1.1835,11.9178,10.0507,1.0868
|
||||
cuda,pytorch_unet,1,1.3787,29.7543,13.7711,1.0100
|
||||
cuda,resnet152,32,0.9834,63.2446,67.7935,0.9991
|
||||
cuda,resnet18,16,0.9451,9.4977,11.7663,0.9948
|
||||
cuda,resnet50,32,1.0513,24.5141,24.6629,1.0021
|
||||
cuda,resnext50_32x4d,8,0.9216,22.2460,24.3420,0.9984
|
||||
cuda,shufflenet_v2_x1_0,128,1.1943,25.4520,28.8611,1.0951
|
||||
cuda,soft_actor_critic,256,0.8691,1.9637,3.3716,0.9996
|
||||
cuda,speech_transformer,32,1.2718,35.2922,46.9957,1.0897
|
||||
cuda,squeezenet1_1,32,1.1302,8.4540,7.9625,1.0771
|
||||
cuda,timm_efficientdet,1,1.3370,80.0377,120.1814,1.2713
|
||||
cuda,timm_efficientnet,32,1.1874,27.6302,33.9059,1.0971
|
||||
cuda,timm_nfnet,128,1.4525,77.3461,34.3270,1.1056
|
||||
cuda,timm_regnet,32,1.0644,50.6953,35.7562,1.0000
|
||||
cuda,timm_resnest,32,1.6200,14.7763,17.2245,1.0906
|
||||
cuda,timm_vision_transformer,32,1.0800,19.4188,22.0255,0.9966
|
||||
cuda,timm_vision_transformer_large,32,1.0081,393.1742,127.8083,0.9735
|
||||
cuda,timm_vovnet,32,1.1472,22.4727,22.7328,1.0120
|
||||
cuda,torchrec_dlrm,0,0.0000
|
||||
cuda,tts_angular,64,0.8974,6.5057,2.5555,0.9973
|
||||
cuda,vgg16,64,1.2909,50.7405,6.1510,0.9828
|
||||
cuda,yolov3,16,1.2930,54.8069,41.9269,1.0563
|
||||
|
@@ -33,12 +33,22 @@ def compare(baseline: dict, new: dict, threshold: float,
|
||||
print(f"New benchmark {key} not found in baseline")
|
||||
baseline_latency = baseline[key].latency
|
||||
new_latency = new[key].latency
|
||||
if baseline_latency == 0:
|
||||
print(f"Baseline latency for {key} is 0")
|
||||
continue
|
||||
elif new_latency == 0:
|
||||
print(f"New latency for {key} is 0")
|
||||
continue
|
||||
|
||||
if new_latency < baseline_latency * (1 - threshold):
|
||||
print(
|
||||
f"New benchmark {key} is faster than baseline: {new_latency} vs {baseline_latency}")
|
||||
elif new_latency > baseline_latency * (1 + threshold):
|
||||
print(
|
||||
f"New benchmark {key} is slower than baseline: {new_latency} vs {baseline_latency}")
|
||||
else:
|
||||
print(
|
||||
f"New benchmark {key} is within threshold: {new_latency} vs {baseline_latency}")
|
||||
baseline_geomean *= baseline[key].speedup
|
||||
new_geomean *= new[key].speedup
|
||||
|
||||
@@ -46,7 +56,7 @@ def compare(baseline: dict, new: dict, threshold: float,
|
||||
new_geomean = new_geomean ** (1 / len(new))
|
||||
print(f"Baseline geomean: {baseline_geomean}")
|
||||
print(f"New geomean: {new_geomean}")
|
||||
assert new_geomean > baseline_geomean * (1 - geomean_threshold), \
|
||||
assert new_geomean >= baseline_geomean * (1 - geomean_threshold), \
|
||||
f"New geomean is slower than baseline: {new_geomean} vs {baseline_geomean}"
|
||||
|
||||
|
||||
|
||||
@@ -2,19 +2,24 @@
|
||||
|
||||
# remember where we started
|
||||
ROOT="$(pwd)"
|
||||
MODEL_SPEC=$1
|
||||
|
||||
# torchinductor venv
|
||||
whoami
|
||||
# clean up old venv
|
||||
rm -rf /opt/torchinductor_venv
|
||||
python3 -m venv /opt/torchinductor_venv
|
||||
# shellcheck source=/dev/null
|
||||
source /opt/torchinductor_venv/bin/activate
|
||||
# shellcheck source=/dev/null
|
||||
source ./.github/workflows/torchinductor/scripts/common.sh
|
||||
source ./.github/workflows/torch-inductor/scripts/common.sh
|
||||
|
||||
# pytorch nightly
|
||||
pip3 install --force-reinstall --pre torch torchtext torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu118
|
||||
pip3 install --force-reinstall --pre torch torchtext torchvision torchaudio torchrec --extra-index-url https://download.pytorch.org/whl/nightly/cu121
|
||||
# pytorch source to get torchbench for dynamo
|
||||
cd /opt || exit
|
||||
# cleanup old pytorch
|
||||
rm -rf pytorch
|
||||
git clone --recursive https://github.com/pytorch/pytorch
|
||||
cd pytorch || exit
|
||||
# if you are updating an existing checkout
|
||||
@@ -23,20 +28,31 @@ git submodule update --init --recursive
|
||||
cd ..
|
||||
|
||||
# required packages
|
||||
pip3 install expecttest psutil
|
||||
# https://github.com/pytorch/benchmark/blob/main/docker/gcp-a100-runner-dind.dockerfile#L17
|
||||
sudo apt-get install --yes libpango-1.0-0 libpangoft2-1.0-0
|
||||
pip3 install --upgrade pip
|
||||
pip3 install expecttest psutil lightning-utilities pyre_extensions
|
||||
|
||||
# torchbench
|
||||
pip3 install pyyaml
|
||||
git clone https://github.com/pytorch/benchmark.git
|
||||
cd benchmark || exit
|
||||
python3 install.py
|
||||
cd ..
|
||||
if [ "$MODEL_SPEC" == "torchbench" ] || [ "$MODEL_SPEC" != "all" ]; then
|
||||
# clean up old torchbench
|
||||
rm -rf benchmark
|
||||
pip3 install pyyaml
|
||||
git clone https://github.com/pytorch/benchmark.git
|
||||
cd benchmark || exit
|
||||
python3 install.py
|
||||
cd ..
|
||||
fi
|
||||
|
||||
# timm
|
||||
git clone https://github.com/huggingface/pytorch-image-models.git
|
||||
cd pytorch-image-models || exit
|
||||
pip3 install -e .
|
||||
cd ..
|
||||
if [ "$MODEL_SPEC" == "timm_models" ] || [ "$MODEL_SPEC" != "all" ]; then
|
||||
# clean up old timm
|
||||
rm -rf pytorch-image-models
|
||||
git clone https://github.com/huggingface/pytorch-image-models.git
|
||||
cd pytorch-image-models || exit
|
||||
pip3 install -e .
|
||||
cd ..
|
||||
fi
|
||||
|
||||
# build our own triton
|
||||
cd "$ROOT" || exit
|
||||
|
||||
@@ -6,7 +6,7 @@ ROOT="$(pwd)"
|
||||
# shellcheck source=/dev/null
|
||||
source /opt/torchinductor_venv/bin/activate
|
||||
# shellcheck source=/dev/null
|
||||
source ./.github/workflows/torchinductor/scripts/common.sh
|
||||
source ./.github/workflows/torch-inductor/scripts/common.sh
|
||||
|
||||
# build our own triton
|
||||
cd python || exit
|
||||
|
||||
@@ -2,7 +2,8 @@
|
||||
|
||||
# remember where we started
|
||||
ROOT="$(pwd)"
|
||||
INDUCTOR="$ROOT"/.github/workflows/torchinductor
|
||||
INDUCTOR="$ROOT"/.github/workflows/torch-inductor
|
||||
MODEL_SPEC=$1
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source /opt/torchinductor_venv/bin/activate
|
||||
@@ -14,6 +15,9 @@ TEST_REPORTS_DIR=$TEST_REPORTS_DIR/acc
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
|
||||
for model in "${MODELS[@]}"; do
|
||||
if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
|
||||
continue
|
||||
fi
|
||||
echo "Running accuracy test for $model"
|
||||
python3 benchmarks/dynamo/"$model".py --ci --accuracy --timing --explain --inductor --device cuda \
|
||||
--output "$TEST_REPORTS_DIR"/inference_"$model".csv
|
||||
@@ -25,6 +29,9 @@ done
|
||||
|
||||
cd "$ROOT" || exit
|
||||
for model in "${MODELS[@]}"; do
|
||||
if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
|
||||
continue
|
||||
fi
|
||||
echo "Checking accuracy test for $model"
|
||||
python3 "$INDUCTOR"/scripts/check_acc.py "$TEST_REPORTS_DIR"/inference_"$model".csv
|
||||
python3 "$INDUCTOR"/scripts/check_acc.py "$TEST_REPORTS_DIR"/training_"$model".csv
|
||||
|
||||
@@ -2,7 +2,8 @@
|
||||
|
||||
# remember where we started
|
||||
ROOT="$(pwd)"
|
||||
INDUCTOR="$ROOT"/.github/workflows/torchinductor
|
||||
INDUCTOR="$ROOT"/.github/workflows/torch-inductor
|
||||
MODEL_SPEC=$1
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source /opt/torchinductor_venv/bin/activate
|
||||
@@ -14,19 +15,46 @@ sudo nvidia-smi -i 0 -pm 1
|
||||
sudo nvidia-smi -i 0 --lock-gpu-clocks=1350,1350
|
||||
|
||||
cd "$PYTORCH_DIR" || exit
|
||||
TEST_REPORTS_DIR=$TEST_REPORTS_DIR/perf
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
TRITON_TEST_REPORTS_DIR=$TEST_REPORTS_DIR/perf
|
||||
BASE_TEST_REPORTS_DIR=$TEST_REPORTS_DIR/acc
|
||||
mkdir -p "$TRITON_TEST_REPORTS_DIR"
|
||||
mkdir -p "$BASE_TEST_REPORTS_DIR"
|
||||
|
||||
|
||||
echo "Running with Triton Nightly"
|
||||
for model in "${MODELS[@]}"; do
|
||||
if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
|
||||
continue
|
||||
fi
|
||||
echo "Running performance test for $model"
|
||||
python3 benchmarks/dynamo/"$model".py --ci --training --performance --disable-cudagraphs\
|
||||
--device cuda --inductor --amp --output "$TEST_REPORTS_DIR"/"$model".csv
|
||||
python3 benchmarks/dynamo/"$model".py --float32 -dcuda --training --inductor --performance \
|
||||
--output "$TRITON_TEST_REPORTS_DIR"/"$model".csv
|
||||
done
|
||||
|
||||
# install pytorch-triton
|
||||
pip3 uninstall triton -y
|
||||
pip3 install --pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu121
|
||||
|
||||
echo "Running with pytorch-triton"
|
||||
for model in "${MODELS[@]}"; do
|
||||
if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
|
||||
continue
|
||||
fi
|
||||
echo "Running performance test for $model"
|
||||
python3 benchmarks/dynamo/"$model".py --float32 -dcuda --training --inductor --performance \
|
||||
--output "$BASE_TEST_REPORTS_DIR"/"$model".csv
|
||||
done
|
||||
|
||||
# uninstall pytorch-triton
|
||||
pip3 uninstall pytorch-triton -y
|
||||
|
||||
cd "$ROOT" || exit
|
||||
for model in "${MODELS[@]}"; do
|
||||
if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
|
||||
continue
|
||||
fi
|
||||
echo "Checking performance test for $model"
|
||||
python3 "$INDUCTOR"/scripts/check_perf.py --new "$TEST_REPORTS_DIR"/"$model".csv --baseline "$INDUCTOR"/data/"$model".csv
|
||||
python3 "$INDUCTOR"/scripts/check_perf.py --new "$TRITON_TEST_REPORTS_DIR"/"$model".csv --baseline "$BASE_TEST_REPORTS_DIR"/"$model".csv
|
||||
EXIT_STATUS=$?
|
||||
if [ "$EXIT_STATUS" -ne 0 ]; then
|
||||
echo "Performance test for $model failed"
|
||||
|
||||
Reference in New Issue
Block a user