Compare commits

...

10 Commits

Author SHA1 Message Date
Daniel Su
8fdba786d4 echo env vars 2025-06-12 09:48:02 -04:00
Daniel Su
36c719a01d 4 hour timeout 2025-06-02 16:24:17 -04:00
Daniel Su
a2a8952400 fix pytorch unit tests indent level 2025-06-02 16:11:34 -04:00
Daniel Su
53cfe3ac08 print out new vars 2025-06-02 16:06:05 -04:00
Daniel Su
063022ce93 update to rocm-examples amd-staging 2025-06-02 15:38:38 -04:00
Daniel Su
fec20d54c0 remove trailing quote 2025-06-02 13:51:36 -04:00
Daniel Su
560c727e80 remove 'rocroller' from url 2025-06-02 13:49:50 -04:00
Daniel Su
1ad1cea5f2 remove rocdecode test filter 2025-06-02 13:38:33 -04:00
Daniel Su
98859d790f torchCloneBranch 2025-06-02 12:19:08 -04:00
Daniel Su
fb3013e579 Ex CI: create springfield pipeline 2025-05-29 10:51:48 -04:00

View File

@@ -0,0 +1,321 @@
parameters:
- name: rocmFilename
type: string
- name: rocmFilenameOverride
type: string
default: null
- name: testRocm
type: boolean
default: true
- name: enableTorch
type: boolean
default: true
- name: wheelVersion
type: string
- name: apexFilename
type: string
- name: tritonFilename
type: string
- name: torchFilename
type: string
- name: visionFilename
type: string
- name: torchCloneBranch
type: string
default: main
trigger: none
pr: none
jobs:
- job: springfield
timeoutInMinutes: 240
variables:
- group: common
- template: /.azuredevops/variables-global.yml
pool: ${{ variables.GFX942_TEST_POOL }}
workspace:
clean: all
steps:
- checkout: none
- task: Bash@3
displayName: Print run parameters
inputs:
targetType: inline
script: |
echo "rocmFilename: ${{ parameters.rocmFilename }}"
echo "rocmFilenameOverride: ${{ parameters.rocmFilenameOverride }}"
echo "testRocm: ${{ parameters.testRocm }}"
echo "enableTorch: ${{ parameters.enableTorch }}"
echo "wheelVersion: ${{ parameters.wheelVersion }}"
echo "apexFilename: ${{ parameters.apexFilename }}"
echo "tritonFilename: ${{ parameters.tritonFilename }}"
echo "torchFilename: ${{ parameters.torchFilename }}"
echo "visionFilename: ${{ parameters.visionFilename }}"
echo "torchCloneBranch: ${{ parameters.torchCloneBranch }}"
- task: Bash@3
displayName: Setup
inputs:
targetType: inline
script: |
sudo DEBIAN_FRONTEND=noninteractive apt update
sudo DEBIAN_FRONTEND=noninteractive apt install -y tzdata
sudo DEBIAN_FRONTEND=noninteractive apt install -y git ninja-build python3.11 python3.11-venv python3.11-dev
which python3.11 && python3.11 --version
python3.11 -m venv $(Agent.BuildDirectory)/venv
source $(Agent.BuildDirectory)/venv/bin/activate
which python3 && python3 --version
python3 -m pip install 'cmake<4'
- task: Bash@3
displayName: Download ROCm installer
env:
SPRINGFIELD_URL: $(SPRINGFIELD_URL)
inputs:
targetType: inline
${{ if eq(parameters.rocmFilenameOverride, 'null') }}:
script: wget -nv "$SPRINGFIELD_URL/rocm/${{ parameters.rocmFilename }}" -O rocm-installer.run
${{ if ne(parameters.rocmFilenameOverride, 'null') }}:
script: wget -nv "${{ parameters.rocmFilenameOverride }}" -O rocm-installer.run
- task: Bash@3
displayName: Install ROCm
retryCountOnTaskFailure: 3
inputs:
targetType: inline
script: chmod +x ./rocm-installer.run && ./rocm-installer.run target="$(Agent.BuildDirectory)" deps=install rocm postrocm
- task: Bash@3
displayName: Set env vars
inputs:
targetType: inline
script: |
export ROCM_PATH=$(Agent.BuildDirectory)/rocm
export PATH=$PATH:$ROCM_PATH/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ROCM_PATH/lib:$ROCM_PATH/llvm/lib
echo "ROCM_PATH=$(Agent.BuildDirectory)/rocm"
echo "PATH=$PATH:$ROCM_PATH/bin"
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ROCM_PATH/lib:$ROCM_PATH/llvm/lib"
echo "##vso[task.setvariable variable=ROCM_PATH]$ROCM_PATH"
echo "##vso[task.prependpath]$ROCM_PATH/bin"
echo "##vso[task.setvariable variable=LD_LIBRARY_PATH]$LD_LIBRARY_PATH"
- ${{ if parameters.enableTorch }}:
- task: Bash@3
displayName: Install Python wheels
continueOnError: true
env:
SPRINGFIELD_URL: $(SPRINGFIELD_URL)
inputs:
targetType: inline
script: |
source $(Agent.BuildDirectory)/venv/bin/activate
python3 -m pip install $(SPRINGFIELD_URL)/wheels/${{ parameters.wheelVersion }}/${{ parameters.apexFilename }} \
$(SPRINGFIELD_URL)/wheels/${{ parameters.wheelVersion }}/${{ parameters.tritonFilename }} \
$(SPRINGFIELD_URL)/wheels/${{ parameters.wheelVersion }}/${{ parameters.torchFilename }} \
$(SPRINGFIELD_URL)/wheels/${{ parameters.wheelVersion }}/${{ parameters.visionFilename }}
- task: Bash@3
displayName: Install Flash Attention
continueOnError: true
env:
SPRINGFIELD_URL: $(SPRINGFIELD_URL)
inputs:
targetType: inline
script: |
source $(Agent.BuildDirectory)/venv/bin/activate
python3 -m pip install packaging wheel
python3 -m pip install --upgrade setuptools
python3 -m pip install git+https://github.com/rocm/flash-attention.git@ck_tile/fa3_fremont --no-build-isolation
- task: Bash@3
displayName: List files
condition: always()
inputs:
targetType: inline
script: ls -la1R $(Agent.BuildDirectory)
- ${{ if parameters.testRocm }}:
- script: rocminfo
displayName: 'Test: rocminfo'
continueOnError: true
- script: rocm_agent_enumerator
displayName: 'Test: rocm_agent_enumerator'
continueOnError: true
- script: amdclang++ --version
displayName: 'Test: amdclang++ --version'
continueOnError: true
- script: amdclang --version
displayName: 'Test: amdclang --version'
continueOnError: true
- script: rocm-smi
displayName: 'Test: rocm-smi'
continueOnError: true
- script: amd-smi version
displayName: 'Test: amd-smi version'
continueOnError: true
- script: amd-smi list
displayName: 'Test: amd-smi list'
continueOnError: true
- script: amd-smi static
displayName: 'Test: amd-smi static'
continueOnError: true
- script: amd-smi metric
displayName: 'Test: amd-smi metric'
continueOnError: true
- task: Bash@3
displayName: Build rocDecode
continueOnError: true
inputs:
targetType: inline
script: |
export CXX=$ROCM_PATH/bin/amdclang++
export CC=$ROCM_PATH/bin/amdclang
mkdir -p $(Agent.BuildDirectory)/rocdecode
cd $(Agent.BuildDirectory)/rocdecode
cmake $ROCM_PATH/share/rocdecode/test
- task: Bash@3
displayName: 'Test: rocDecode'
continueOnError: true
inputs:
targetType: inline
script: ctest --output-on-failure --output-junit rocdecode_results.xml
workingDirectory: $(Agent.BuildDirectory)/rocdecode
- task: PublishTestResults@2
displayName: Publish rocDecode
continueOnError: true
inputs:
searchFolder: $(Agent.BuildDirectory)/rocdecode
testResultsFormat: JUnit
testResultsFiles: rocdecode_results.xml
- task: Bash@3
displayName: Build rocm-examples
continueOnError: true
inputs:
targetType: inline
script: |
export CXX=$ROCM_PATH/bin/amdclang++
export CC=$ROCM_PATH/bin/amdclang
cd $(Agent.BuildDirectory)
sudo DEBIAN_FRONTEND=noninteractive apt install -y libglfw3-dev
git clone https://github.com/ROCm/rocm-examples.git
cd rocm-examples
mkdir build && cd build
cmake .. -DGPU_TARGETS=gfx942 -DROCM_ROOT=$ROCM_PATH
cmake --build . -- -j$(nproc)
- task: Bash@3
displayName: 'Test: rocm-examples'
continueOnError: true
inputs:
targetType: inline
script: ctest --output-on-failure --output-junit rocm-examples_results.xml
workingDirectory: $(Agent.BuildDirectory)/rocm-examples/build
- task: PublishTestResults@2
displayName: Publish rocm-examples
continueOnError: true
inputs:
searchFolder: $(Agent.BuildDirectory)/rocm-examples/build
testResultsFormat: JUnit
testResultsFiles: rocm-examples_results.xml
- task: Bash@3
displayName: Build rccl
continueOnError: true
inputs:
targetType: inline
script: |
source $(Agent.BuildDirectory)/venv/bin/activate
export CXX=$ROCM_PATH/bin/amdclang++
export CC=$ROCM_PATH/bin/amdclang
cd $(Agent.BuildDirectory)
git clone https://github.com/ROCm/rccl.git
cd rccl
mkdir build && cd build
cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=ON -DGPU_TARGETS=gfx942 -DROCM_PATH=$ROCM_PATH
cmake --build . -- -j$(nproc)
- task: Bash@3
displayName: 'Test: rccl'
continueOnError: true
inputs:
targetType: inline
script: ./rccl-UnitTests --gtest_color=yes --gtest_output=xml:./rccl_results.xml
workingDirectory: $(Agent.BuildDirectory)/rccl/build/test
- task: PublishTestResults@2
displayName: Publish rccl
continueOnError: true
inputs:
searchFolder: $(Agent.BuildDirectory)/rccl/build/test
testResultsFormat: JUnit
testResultsFiles: rccl_results.xml
- task: Bash@3
displayName: 'Test: ROCmValidationSuite'
continueOnError: true
inputs:
targetType: inline
script: |
cd $(Agent.BuildDirectory)/rocm/share/rocm-validation-suite/testscripts
sudo chmod +x *.sh
sudo ./rvsqa.new.sh
- task: Bash@3
displayName: Print RVS logs
continueOnError: true
inputs:
targetType: inline
script: |
for file in $(Agent.BuildDirectory)/rocm/share/rocm-validation-suite/testscripts/*.txt; do
echo "Viewing file: $file"
cat "$file"
done
- ${{ if parameters.enableTorch }}:
- task: Bash@3
displayName: 'Test: import torch'
continueOnError: true
inputs:
targetType: inline
script: |
source $(Agent.BuildDirectory)/venv/bin/activate
python3 -c 'import torch'
- task: Bash@3
displayName: 'Test: torch.cuda.is_available()'
continueOnError: true
inputs:
targetType: inline
script: |
source $(Agent.BuildDirectory)/venv/bin/activate
python3 -c 'import torch; print(torch.cuda.is_available())'
- task: Bash@3
displayName: 'Test: MNIST'
continueOnError: true
inputs:
targetType: inline
script: |
source $(Agent.BuildDirectory)/venv/bin/activate
cd $(Agent.BuildDirectory)
git clone https://github.com/pytorch/examples.git --depth=1
cd examples/mnist
python3 -m pip install -r requirements.txt
python3 main.py
- task: Bash@3
displayName: 'Test: PyTorch unit tests'
continueOnError: true
inputs:
targetType: inline
script: |
source $(Agent.BuildDirectory)/venv/bin/activate
cd $(Agent.BuildDirectory)
git clone https://github.com/ROCm/pytorch.git -b ${{ parameters.torchCloneBranch }} --depth=1
cd pytorch
python3 -m pip install -r requirements.txt
python3 -m pip install -r .ci/docker/requirements-ci.txt
PYTORCH_TEST_WITH_ROCM=1 python3 test/run_test.py --verbose --keep-going \
--include test_nn test_torch test_cuda test_ops \
test_unary_ufuncs test_binary_ufuncs test_autograd
- task: Bash@3
displayName: List files
condition: always()
inputs:
targetType: inline
script: ls -la1R $(Agent.BuildDirectory)