diff --git a/.azuredevops/templates/steps/gpu-diagnostics.yml b/.azuredevops/templates/steps/gpu-diagnostics.yml index d2e8f210e..60bdac670 100644 --- a/.azuredevops/templates/steps/gpu-diagnostics.yml +++ b/.azuredevops/templates/steps/gpu-diagnostics.yml @@ -11,44 +11,44 @@ steps: continueOnError: true inputs: targetType: inline - script: $(Agent.BuildDirectory)/rocm/bin/rocminfo + script: $(Agent.BuildDirectory)/rocm/bin/rocminfo || true - task: Bash@3 displayName: 'rocm_agent_enumerator' continueOnError: true inputs: targetType: inline - script: $(Agent.BuildDirectory)/rocm/bin/rocm_agent_enumerator + script: $(Agent.BuildDirectory)/rocm/bin/rocm_agent_enumerator || true - task: Bash@3 displayName: 'List DRI devices' continueOnError: true inputs: targetType: inline - script: ls -la /dev/dri/by-path/ + script: ls -la /dev/dri/ || true - task: Bash@3 displayName: 'List amdgpu/rocm/mesa packages' continueOnError: true inputs: targetType: inline - script: apt list --installed | grep -E 'amdgpu|rocm|mesa' + script: apt list --installed | grep -E 'amdgpu|rocm|mesa' || true - task: Bash@3 displayName: 'List GPU processes' continueOnError: true inputs: targetType: inline script: | - ls /sys/class/kfd/kfd/proc/ - sudo lsof | grep amdgpu + ls /sys/class/kfd/kfd/proc/ || true + sudo lsof | grep amdgpu || true - task: Bash@3 displayName: 'System snapshot' continueOnError: true inputs: targetType: inline - script: top -bn1 + script: top -bn1 || true - task: Bash@3 displayName: 'List dmesg' continueOnError: true inputs: targetType: inline script: | - echo 'rocm-ci: $(Build.DefinitionName) $(System.DefinitionId)' | sudo tee /dev/kmsg - sudo dmesg + echo 'rocm-ci: $(Build.DefinitionName) $(System.DefinitionId)' | sudo tee /dev/kmsg || true + sudo dmesg || true