Compare commits

...

20 Commits

Author SHA1 Message Date
Joseph Macaranas
fdad6cfa27 Update pytorch.yml 2024-12-19 13:49:10 -05:00
Joseph Macaranas
b6d6e83a9b Update pytorch.yml 2024-12-19 12:40:52 -05:00
Joseph Macaranas
fe4c5dbe62 Update pytorch.yml 2024-12-19 00:07:28 -05:00
Joseph Macaranas
73f660b683 Checkout 0.20 release vision to try build 2024-12-18 18:01:54 -05:00
Joseph Macaranas
39bec204c0 Update pytorch.yml 2024-12-17 20:50:33 -05:00
Joseph Macaranas
2ab1041ffb Revert "Merge branch 'amd/jmacaran/pytorch_hip_fp16' of https://github.com/ROCm/ROCm into amd/jmacaran/pytorch_hip_fp16"
This reverts commit 4b66b6d7be, reversing
changes made to ebb6f29b58.
2024-12-17 19:38:20 -05:00
Joseph Macaranas
aacf7a96e0 Revert "Update pytorch.yml"
This reverts commit 35c25a762a.
2024-12-17 19:31:27 -05:00
Joseph Macaranas
35c25a762a Update pytorch.yml 2024-12-17 16:45:42 -05:00
Joseph Macaranas
1de2d2306b Try specific clr/HIP build 2024-12-17 16:01:50 -05:00
Joseph Macaranas
cac821b9e4 Update pytorch.yml 2024-12-17 15:10:39 -05:00
Joseph Macaranas
61827b7192 Update pytorch.yml 2024-12-17 15:10:00 -05:00
Joseph Macaranas
24b99fd952 Merge branch 'develop' into amd/jmacaran/pytorch_hip_fp16 2024-12-17 15:07:51 -05:00
Joseph Macaranas
6d965ebdb4 Update pytorch.yml 2024-12-17 15:07:25 -05:00
Joseph Macaranas
4b66b6d7be Merge branch 'amd/jmacaran/pytorch_hip_fp16' of https://github.com/ROCm/ROCm into amd/jmacaran/pytorch_hip_fp16 2024-12-16 22:30:52 -05:00
Joseph Macaranas
ebb6f29b58 Update pytorch.yml 2024-12-16 22:30:23 -05:00
amd-jmacaran
656e7a21f7 External CI: temp patch to test pytorch build failure 2024-12-16 14:05:45 -05:00
Joseph Macaranas
b028a3af96 Patch CK 2024-12-09 20:42:32 -05:00
Joseph Macaranas
77f7795edc Patch CK 2024-12-09 20:19:42 -05:00
Joseph Macaranas
0c2159c67d Adjust patch 2024-12-09 16:52:42 -05:00
amd-jmacaran
8f21bc9d1e External CI: temp patch to test pytorch build failure 2024-12-09 16:44:09 -05:00
2 changed files with 85 additions and 8 deletions

View File

@@ -99,7 +99,7 @@ parameters:
default:
- rocminfo
- MIOpen
- clr
# - clr
- hipBLAS
- hipFFT
- hipRAND
@@ -120,10 +120,11 @@ parameters:
- rocm-core
- rocPRIM
# below are additional dependencies not called out by build script, but throw errors during cmake
- hipCUB
- rocThrust
- hipBLAS-common
- composable_kernel
- hipBLAS-common
- hipCUB
- rocminfo
- rocThrust
- name: rocmTestDependencies
type: object
default:
@@ -166,11 +167,11 @@ jobs:
- template: /.azuredevops/variables-global.yml
# various flags/parameters expected by bash scripts in pytorch repo's .ci directory
- name: ROCM_VERSION
value: 6.3.0
value: 6.4.0
- name: ROCM_PATH
value: /opt/rocm
- name: DESIRED_CUDA
value: 6.3.0
value: 6.4.0
- name: MKLROOT
value: /opt/intel
- name: AOTRITON_INSTALLED_PREFIX
@@ -211,11 +212,36 @@ jobs:
script: |
sudo mkdir -p /opt/python/cp310-cp310/lib/python3.10
sudo ln -s /usr/local/lib/python3.10/dist-packages /opt/python/cp310-cp310/lib/python3.10/site-packages
- task: DownloadPipelineArtifact@2
displayName: Download Specific HIP
inputs:
buildType: 'specific'
project: ROCm-CI
definition: 145
specificBuildWithTriggering: true
itemPattern: '**/*'
buildVersionToDownload: specific
targetPath: '$(Pipeline.Workspace)/d'
pipelineId: 16515
- task: ExtractFiles@1
displayName: Extract clr
inputs:
archiveFilePatterns: '$(Pipeline.Workspace)/d/**/*.tar.gz'
destinationFolder: '$(Agent.BuildDirectory)/rocm'
cleanDestinationFolder: false
overwriteExistingFiles: true
- task: DeleteFiles@1
displayName: Cleanup Compressed clr
inputs:
SourceFolder: '$(Pipeline.Workspace)/d'
Contents: '**/*.tar.gz'
RemoveDotFiles: true
- template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
parameters:
dependencyList: ${{ parameters.rocmDependencies }}
dependencySource: staging
gpuTarget: $(JOB_GPU_TARGET)
setupHIPLibrarySymlinks: true
- task: Bash@3
displayName: ROCm symbolic link
inputs:
@@ -226,8 +252,14 @@ jobs:
displayName: git clone upstream pytorch
inputs:
targetType: inline
script: git clone https://github.com/pytorch/pytorch.git --depth=1 --recurse-submodules
script: git clone https://github.com/pytorch/pytorch.git --recurse-submodules
workingDirectory: $(Build.SourcesDirectory)
- task: Bash@3
displayName: checkout pytorch 2.5
inputs:
targetType: inline
script: git checkout release/2.5
workingDirectory: $(Build.SourcesDirectory)/pytorch
# builder clone still needed due to run_tests.sh at end of build_common.sh call
- task: Bash@3
displayName: git clone pytorch builder
@@ -271,6 +303,18 @@ jobs:
targetType: inline
script: sudo bash ./common/install_aotriton.sh /opt/rocm
workingDirectory: $(Build.SourcesDirectory)/pytorch/.ci/docker
# - task: Bash@3
# displayName: Temporarily Patch HIP
# inputs:
# targetType: inline
# script: git apply $(Build.SourcesDirectory)/.azuredevops/patches/pytorch_hip_fp16.diff
# workingDirectory: $(Agent.BuildDirectory)/rocm
# - task: Bash@3
# displayName: Temporarily Patch CK Submodule
# inputs:
# targetType: inline
# script: git pull origin develop
# workingDirectory: $(Build.SourcesDirectory)/pytorch/third_party/composable_kernel
- task: Bash@3
displayName: Run ROCm Build Script
inputs:
@@ -318,8 +362,14 @@ jobs:
displayName: git clone pytorch vision
inputs:
targetType: inline
script: git clone https://github.com/pytorch/vision.git --depth=1 --recurse-submodules
script: git clone https://github.com/pytorch/vision.git --recurse-submodules
workingDirectory: $(Build.SourcesDirectory)
- task: Bash@3
displayName: checkout release vision
inputs:
targetType: inline
script: git checkout release/0.20
workingDirectory: $(Build.SourcesDirectory)/vision
- task: Bash@3
displayName: Build vision
inputs:

View File

@@ -0,0 +1,27 @@
From 342133a5cb404beae4d7e1994338120ff99a76d2 Mon Sep 17 00:00:00 2001
From: Jatin Chaudhary <JatinJaikishan.Chaudhary@amd.com>
Date: Mon, 09 Dec 2024 11:24:29 +0000
Subject: [PATCH] SWDEV-503299 - Do not use operator to check for nan
Some libs use __HIP_NO_HALF_OPERATORS__ and __HIP_NO_HALF_CONVERSIONS__
which results in operators being hidden and can cause errors.
Change-Id: I83c194d7d727cba30b46d7c296f7d396549f5fca
---
diff --git a/include/hip/amd_detail/amd_hip_fp16.h b/include/hip/amd_detail/amd_hip_fp16.h
index c8117b1..1a08bb8 100644
--- a/include/hip/amd_detail/amd_hip_fp16.h
+++ b/include/hip/amd_detail/amd_hip_fp16.h
@@ -1679,8 +1679,9 @@
__HOST_DEVICE__
bool __hisinf(__half x)
{
- // +Inf/-Inf
- return x == HIPRT_INF_FP16 || x == __ushort_as_half((unsigned short)0xFC00U);
+ __half_raw hr = x;
+ // +/-Inf
+ return hr.x == 0x7C00U || hr.x == 0xFC00U;
}
inline
__HOST_DEVICE__