External CI: suppress GPU diag warnings (#3972)

This commit is contained in:
Daniel Su
2024-10-30 16:29:02 -04:00
committed by GitHub
parent 75f7dfbac3
commit 750484ab57

View File

@@ -11,44 +11,44 @@ steps:
continueOnError: true
inputs:
targetType: inline
script: $(Agent.BuildDirectory)/rocm/bin/rocminfo
script: $(Agent.BuildDirectory)/rocm/bin/rocminfo || true
- task: Bash@3
displayName: 'rocm_agent_enumerator'
continueOnError: true
inputs:
targetType: inline
script: $(Agent.BuildDirectory)/rocm/bin/rocm_agent_enumerator
script: $(Agent.BuildDirectory)/rocm/bin/rocm_agent_enumerator || true
- task: Bash@3
displayName: 'List DRI devices'
continueOnError: true
inputs:
targetType: inline
script: ls -la /dev/dri/by-path/
script: ls -la /dev/dri/ || true
- task: Bash@3
displayName: 'List amdgpu/rocm/mesa packages'
continueOnError: true
inputs:
targetType: inline
script: apt list --installed | grep -E 'amdgpu|rocm|mesa'
script: apt list --installed | grep -E 'amdgpu|rocm|mesa' || true
- task: Bash@3
displayName: 'List GPU processes'
continueOnError: true
inputs:
targetType: inline
script: |
ls /sys/class/kfd/kfd/proc/
sudo lsof | grep amdgpu
ls /sys/class/kfd/kfd/proc/ || true
sudo lsof | grep amdgpu || true
- task: Bash@3
displayName: 'System snapshot'
continueOnError: true
inputs:
targetType: inline
script: top -bn1
script: top -bn1 || true
- task: Bash@3
displayName: 'List dmesg'
continueOnError: true
inputs:
targetType: inline
script: |
echo 'rocm-ci: $(Build.DefinitionName) $(System.DefinitionId)' | sudo tee /dev/kmsg
sudo dmesg
echo 'rocm-ci: $(Build.DefinitionName) $(System.DefinitionId)' | sudo tee /dev/kmsg || true
sudo dmesg || true