Merge remote-tracking branch 'external/develop' into sync-develop-from-external

2026-01-07 22:03:58 -05:00 · 2025-11-26 10:09:04 -05:00
parent 1b5a3e54c2 096d91e190
commit 007f24fe7b
21 changed files with 445 additions and 153 deletions
--- a/.azuredevops/components/AMDMIGraphX.yml
+++ b/.azuredevops/components/AMDMIGraphX.yml
@@ -128,6 +128,9 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
      parameters:
        cmakeVersion: '3.28.6'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -152,6 +155,7 @@ jobs:
          -DCMAKE_BUILD_TYPE=Release
          -DGPU_TARGETS=${{ job.target }}
          -DAMDGPU_TARGETS=${{ job.target }}
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
          -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
@@ -192,6 +196,9 @@ jobs:
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
      parameters:
        cmakeVersion: '3.28.6'
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
@@ -217,6 +224,7 @@ jobs:
          -DCMAKE_BUILD_TYPE=Release
          -DGPU_TARGETS=${{ job.target }}
          -DAMDGPU_TARGETS=${{ job.target }}
          -DCMAKE_CXX_COMPILER=$(Agent.BuildDirectory)/rocm/llvm/bin/amdclang++
          -DCMAKE_MODULE_PATH=$(Agent.BuildDirectory)/rocm/lib/cmake/hip
          -DCMAKE_PREFIX_PATH=$(Agent.BuildDirectory)/rocm/llvm;$(Agent.BuildDirectory)/rocm
          -DHALF_INCLUDE_DIR=$(Agent.BuildDirectory)/rocm/include
--- a/.azuredevops/components/amdsmi.yml
+++ b/.azuredevops/components/amdsmi.yml
@@ -1,10 +1,29 @@
 parameters:
 - name: componentName
  type: string
  default: amdsmi
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -31,7 +50,7 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: amdsmi_build_${{ job.os }}
+  - job: ${{ parameters.componentName }}_build_${{ job.os }}
    pool:
      ${{ if eq(job.os, 'ubuntu2404') }}:
        vmImage: 'ubuntu-24.04'
@@ -55,6 +74,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        os: ${{ job.os }}
@@ -65,50 +85,54 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        os: ${{ job.os }}
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        os: ${{ job.os }}
        componentName: ${{ parameters.componentName }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    # - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
    #   parameters:
    #     aptPackages: ${{ parameters.aptPackages }}
- ${{ each job in parameters.jobMatrix.testJobs }}:
+- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - job: amdsmi_test_${{ job.os }}_${{ job.target }}
+  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    dependsOn: amdsmi_build_${{ job.os }}
+    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}
-    condition:
+      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}
-      and(succeeded(),
+      condition:
-        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        and(succeeded(),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        eq(${{ parameters.aggregatePipeline }}, False)
+          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-      )
+          eq(${{ parameters.aggregatePipeline }}, False)
-    variables:
+        )
-    - group: common
+      variables:
-    - template: /.azuredevops/variables-global.yml
+      - group: common
-    pool: ${{ job.target }}_test_pool
+      - template: /.azuredevops/variables-global.yml
-    workspace:
+      pool: ${{ job.target }}_test_pool
-      clean: all
+      workspace:
-    steps:
+        clean: all
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      steps:
-      parameters:
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        aptPackages: ${{ parameters.aptPackages }}
+        parameters:
-        packageManager: ${{ job.packageManager }}
+          aptPackages: ${{ parameters.aptPackages }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+          packageManager: ${{ job.packageManager }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      parameters:
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        os: ${{ job.os }}
+        parameters:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+          os: ${{ job.os }}
-      parameters:
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-        runRocminfo: false
+        parameters:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+          runRocminfo: false
-      parameters:
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        componentName: amdsmi
+        parameters:
-        testDir: '$(Agent.BuildDirectory)'
+          componentName: ${{ parameters.componentName }}
-        testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
+          testDir: '$(Agent.BuildDirectory)'
-        testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
+          testExecutable: 'sudo ./rocm/share/amd_smi/tests/amdsmitst'
-        os: ${{ job.os }}
+          testParameters: '--gtest_output=xml:./test_output.xml --gtest_color=yes'
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+          os: ${{ job.os }}
-      parameters:
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        aptPackages: ${{ parameters.aptPackages }}
+        parameters:
-        environment: test
+          aptPackages: ${{ parameters.aptPackages }}
-        gpuTarget: ${{ job.target }}
+          environment: test
          gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/hipTensor.yml
+++ b/.azuredevops/components/hipTensor.yml
@@ -1,10 +1,29 @@
 parameters:
 - name: componentName
  type: string
  default: hipTensor
 - name: checkoutRepo
  type: string
  default: 'self'
 - name: checkoutRef
  type: string
  default: ''
 # monorepo related parameters
 - name: sparseCheckoutDir
  type: string
  default: ''
 - name: triggerDownstreamJobs
  type: boolean
  default: false
 - name: downstreamAggregateNames
  type: string
  default: ''
 - name: buildDependsOn
  type: object
  default: null
 - name: unifiedBuild
  type: boolean
  default: false
 # set to true if doing full build of ROCm stack
 # and dependencies are pulled from same pipeline
 - name: aggregatePipeline
@@ -51,7 +70,7 @@ parameters:
 jobs:
 - ${{ each job in parameters.jobMatrix.buildJobs }}:
-  - job: hipTensor_build_${{ job.target }}
+  - job: ${{ parameters.componentName }}_build_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -66,12 +85,15 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/checkout.yml
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.triggerDownstreamJobs }}:
          downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
@@ -85,9 +107,12 @@ jobs:
          -GNinja
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/manifest.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-upload.yml
      parameters:
        componentName: ${{ parameters.componentName }}
        gpuTarget: ${{ job.target }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/artifact-links.yml
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
@@ -95,44 +120,47 @@ jobs:
        aptPackages: ${{ parameters.aptPackages }}
        gpuTarget: ${{ job.target }}
- ${{ each job in parameters.jobMatrix.testJobs }}:
+- ${{ if eq(parameters.unifiedBuild, False) }}:
-  - job: hipTensor_test_${{ job.target }}
+  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    timeoutInMinutes: 90
+    - job: ${{ parameters.componentName }}_test_${{ job.target }}
-    dependsOn: hipTensor_build_${{ job.target }}
+      timeoutInMinutes: 90
-    condition:
+      dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
-      and(succeeded(),
+      condition:
-        eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
+        and(succeeded(),
-        not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), variables['Build.DefinitionName'])),
+          eq(variables['ENABLE_${{ upper(job.target) }}_TESTS'], 'true'),
-        eq(${{ parameters.aggregatePipeline }}, False)
+          not(containsValue(split(variables['DISABLED_${{ upper(job.target) }}_TESTS'], ','), '${{ parameters.componentName }}')),
-      )
+          eq(${{ parameters.aggregatePipeline }}, False)
-    variables:
+        )
-    - group: common
+      variables:
-    - template: /.azuredevops/variables-global.yml
+      - group: common
-    pool: ${{ job.target }}_test_pool
+      - template: /.azuredevops/variables-global.yml
-    workspace:
+      pool: ${{ job.target }}_test_pool
-      clean: all
+      workspace:
-    steps:
+        clean: all
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
+      steps:
-      parameters:
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
-        aptPackages: ${{ parameters.aptPackages }}
+        parameters:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
+          aptPackages: ${{ parameters.aptPackages }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/preamble.yml
-      parameters:
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/local-artifact-download.yml
-        gpuTarget: ${{ job.target }}
+        parameters:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
+          gpuTarget: ${{ job.target }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-aqlprofile.yml
-      parameters:
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
-        checkoutRef: ${{ parameters.checkoutRef }}
+        parameters:
-        dependencyList: ${{ parameters.rocmTestDependencies }}
+          checkoutRef: ${{ parameters.checkoutRef }}
-        gpuTarget: ${{ job.target }}
+          dependencyList: ${{ parameters.rocmTestDependencies }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
+          gpuTarget: ${{ job.target }}
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
+          ${{ if parameters.triggerDownstreamJobs }}:
-      parameters:
+            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
-        componentName: hipTensor
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/gpu-diagnostics.yml
-        testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor'
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/test.yml
-        testParameters: '-E ".*-extended" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
+        parameters:
-    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
+          componentName: ${{ parameters.componentName }}
-      parameters:
+          testDir: '$(Agent.BuildDirectory)/rocm/bin/hiptensor'
-        aptPackages: ${{ parameters.aptPackages }}
+          testParameters: '-E ".*-extended" --extra-verbose --output-on-failure --force-new-ctest-process --output-junit test_output.xml'
-        environment: test
+      - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
-        gpuTarget: ${{ job.target }}
+        parameters:
          aptPackages: ${{ parameters.aptPackages }}
          environment: test
          gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocWMMA.yml
+++ b/.azuredevops/components/rocWMMA.yml
@@ -142,7 +142,7 @@ jobs:
 - ${{ if eq(parameters.unifiedBuild, False) }}:
  - ${{ each job in parameters.jobMatrix.testJobs }}:
    - job: ${{ parameters.componentName }}_test_${{ job.target }}
-      timeoutInMinutes: 270
+      timeoutInMinutes: 350
      dependsOn: ${{ parameters.componentName }}_build_${{ job.target }}
      condition:
        and(succeeded(),
--- a/.azuredevops/components/rocm-examples.yml
+++ b/.azuredevops/components/rocm-examples.yml
@@ -62,6 +62,7 @@ parameters:
    - llvm-project
    - MIOpen
    - MIVisionX
    - rocm_smi_lib
    - rccl
    - rocALUTION
    - rocBLAS
@@ -100,6 +101,7 @@ parameters:
    - llvm-project
    - MIOpen
    - MIVisionX
    - rocm_smi_lib
    - rccl
    - rocALUTION
    - rocBLAS
@@ -146,6 +148,7 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-other.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        registerROCmPackages: true
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-cmake-custom.yml
      parameters:
@@ -245,5 +248,6 @@ jobs:
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/docker-container.yml
      parameters:
        aptPackages: ${{ parameters.aptPackages }}
        pipModules: ${{ parameters.pipModules }}
        environment: test
        gpuTarget: ${{ job.target }}
--- a/.azuredevops/components/rocprofiler-compute.yml
+++ b/.azuredevops/components/rocprofiler-compute.yml
@@ -65,6 +65,13 @@ parameters:
    - pytest
    - pytest-cov
    - pytest-xdist
 - name: rocmDependencies
  type: object
  default:
    - clr
    - llvm-project
    - ROCR-Runtime
    - rocprofiler-sdk
 - name: rocmTestDependencies
  type: object
  default:
@@ -101,10 +108,12 @@ jobs:
    ${{ if parameters.buildDependsOn }}:
      dependsOn:
        - ${{ each build in parameters.buildDependsOn }}:
-          - ${{ build }}_${{ job.os }}_${{ job.target }}
+          - ${{ build }}_${{ job.target }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
    - name: ROCM_PATH
      value: $(Agent.BuildDirectory)/rocm
    pool:
      vmImage: ${{ variables.BASE_BUILD_POOL }}
    workspace:
@@ -119,6 +128,14 @@ jobs:
      parameters:
        checkoutRepo: ${{ parameters.checkoutRepo }}
        sparseCheckoutDir: ${{ parameters.sparseCheckoutDir }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/dependencies-rocm.yml
      parameters:
        checkoutRef: ${{ parameters.checkoutRef }}
        dependencyList: ${{ parameters.rocmDependencies }}
        gpuTarget: ${{ job.target }}
        aggregatePipeline: ${{ parameters.aggregatePipeline }}
        ${{ if parameters.triggerDownstreamJobs }}:
            downstreamAggregateNames: ${{ parameters.downstreamAggregateNames }}
    - template: ${{ variables.CI_TEMPLATE_PATH }}/steps/build-cmake.yml
      parameters:
        extraBuildFlags: >-
--- a/.azuredevops/templates/steps/dependencies-dnf.yml
+++ b/.azuredevops/templates/steps/dependencies-dnf.yml
@@ -63,6 +63,7 @@ parameters:
    libopenblas-dev: openblas-devel
    libopenmpi-dev: openmpi-devel
    libpci-dev: libpciaccess-devel
    libsimde-dev: simde-devel
    libssl-dev: openssl-devel
    # note: libstdc++-devel is in the base packages list
    libsystemd-dev: systemd-devel
--- a/.azuredevops/templates/steps/dependencies-rocm.yml
+++ b/.azuredevops/templates/steps/dependencies-rocm.yml
@@ -35,8 +35,8 @@ parameters:
      developBranch: develop
      hasGpuTarget: true
    amdsmi:
-      pipelineId: 99
+      pipelineId: 376
-      developBranch: amd-staging
+      developBranch: develop
      hasGpuTarget: false
    aomp-extras:
      pipelineId: 111
@@ -115,7 +115,7 @@ parameters:
      developBranch: develop
      hasGpuTarget: true
    hipTensor:
-      pipelineId: 105
+      pipelineId: 374
      developBranch: develop
      hasGpuTarget: true
    llvm-project:
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -140,6 +140,7 @@ EoS
 etcd
 fas
 FBGEMM
 FiLM
 FIFOs
 FFT
 FFTs
@@ -160,10 +161,12 @@ Fortran
 Fuyu
 GALB
 GAT
 GATNE
 GCC
 GCD
 GCDs
 GCN
 GCNN
 GDB
 GDDR
 GDR
@@ -182,6 +185,8 @@ Glibc
 GLXT
 Gloo
 GMI
 GNN
 GNNs
 GPG
 GPR
 GPT
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -233,7 +233,7 @@ for a complete overview of this release.
 * Fixed certain output in `amd-smi monitor` when GPUs are partitioned. It fixes the issue with amd-smi monitor such as: `amd-smi monitor -Vqt`, `amd-smi monitor -g 0 -Vqt -w 1`, and `amd-smi monitor -Vqt --file /tmp/test1`. These commands will now be able to display as normal in partitioned GPU scenarios.
 ```{note}
-See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md) for details, examples, and in-depth descriptions.
+See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md#amd_smi_lib-for-rocm-710) for details, examples, and in-depth descriptions.
 ```
 ### **Composable Kernel** (1.1.0)
@@ -677,7 +677,7 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
  * Enabled `TCP_TCP_LATENCY` counter and associated counter for all GPUs except MI300.
 * Interactive metric descriptions in TUI analyze mode.
  * You can now left click on any metric cell to view detailed descriptions in the dedicated `METRIC DESCRIPTION` tab.
-* Support for analysis report output as a sqlite database using ``--output-format db`` analysis mode option.
+* Support for analysis report output as a SQLite database using ``--output-format db`` analysis mode option.
 * `Compute Throughput` panel to TUI's `High Level Analysis` category with the following metrics: VALU FLOPs, VALU IOPs, MFMA FLOPs (F8), MFMA FLOPs (BF16), MFMA FLOPs (F16), MFMA FLOPs (F32), MFMA FLOPs (F64), MFMA FLOPs (F6F4) (in gfx950), MFMA IOPs (Int8), SALU Utilization, VALU Utilization, MFMA Utilization, VMEM Utilization, Branch Utilization, IPC
 * `Memory Throughput` panel to TUI's `High Level Analysis` category with the following metrics: vL1D Cache BW, vL1D Cache Utilization, Theoretical LDS Bandwidth, LDS Utilization, L2 Cache BW, L2 Cache Utilization, L2-Fabric Read BW, L2-Fabric Write BW, sL1D Cache BW, L1I BW, Address Processing Unit Busy, Data-Return Busy, L1I-L2 Bandwidth, sL1D-L2 BW
@@ -763,7 +763,7 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
 * MI300A/X L2-Fabric 64B read counter may display negative values - The rocprof-compute metric 17.6.1 (Read 64B) can report negative values due to incorrect calculation when TCC_BUBBLE_sum + TCC_EA0_RDREQ_32B_sum exceeds TCC_EA0_RDREQ_sum.
  * A workaround has been implemented using max(0, calculated_value) to prevent negative display values while the root cause is under investigation.
 * The profile mode crashes when `--format-rocprof-output json` is selected.
-  * As a workaround, this option should either not be provided or should be set to `csv` instead of `json`. This issue does not affect the profiling results since both `csv` and `json` output formats lead to the same profiling data.  
+    * As a workaround, this option should either not be provided or should be set to `csv` instead of `json`. This issue does not affect the profiling results since both `csv` and `json` output formats lead to the same profiling data.  
 ### **ROCm Data Center Tool** (1.2.0)
@@ -804,6 +804,14 @@ See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/roc
 - Updated PAPI module to v7.2.0b2.
 - ROCprofiler-SDK is now used for tracing OMPT API calls.
 #### Known issues
 * Profiling PyTorch and other AI workloads might fail because it is unable to find the libraries in the default linker path. As a workaround, you need to explicitly add the library path to ``LD_LIBRARY_PATH``. For example, when using PyTorch with Python 3.10, add the following to the environment:
 ```
 export LD_LIBRARY_PATH=:/opt/venv/lib/python3.10/site-packages/torch/lib:$LD_LIBRARY_PATH
 ```
 ### **rocPRIM** (4.1.0)
 #### Added
@@ -881,17 +889,12 @@ As of ROCm 7.0, the internal error state is cleared on each call to `hipGetLastE
 ### **rocSOLVER** (3.31.0)
 #### Added
 * Hybrid computation support for existing routines: STEQR
 #### Optimized
 Improved the performance of:
-* BDSQR and downstream functions such as GESVD.
+* LARF, LARFT, GEQR2, and downstream functions such as GEQRF.
-* STEQR and downstream functions such as SYEV/HEEV.
+* STEDC and divide and conquer Eigensolvers.
 * LARFT and downstream functions such as GEQR2 and GEQRF.
 ### **rocSPARSE** (4.1.0)
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -676,6 +676,10 @@ For a historical overview of ROCm component updates, see the {doc}`ROCm consolid
 - Fixed output for `amd-smi xgmi -l --json`.  
 ```{note}
 See the full [AMD SMI changelog](https://github.com/ROCm/amdsmi/blob/release/rocm-rel-7.1/CHANGELOG.md#amd_smi_lib-for-rocm-710) for details, examples, and in-depth descriptions.
 ```
 ### **Composable Kernel** (1.1.0)
 #### Upcoming changes
@@ -863,6 +867,22 @@ be fixed in a future ROCm release.
 Due to partial data corruption of Electrically Erasable Programmable Read-Only Memory (EEPROM) and limited error handling in the AMD GPU Driver(amdgpu), excessive log output might result when querying the reliability, availability, and serviceability (RAS) bad pages. This issue will be fixed in a future AMD GPU Driver(amdgpu) and ROCm release.
 ### OpenBLAS runtime dependency for hipblastlt-test and hipblaslt-bench
 Running `hipblaslt-test` or `hipblaslt-bench` without installing the OpenBLAS development package results in the following error:
 ```
 libopenblas.so.0: cannot open shared object file: No such file or directory
 ```
 As a workaround, first install `libopenblas-dev` or `libopenblas-deve`, depending on the package manager used. The issue will be fixed in a future ROCm release. See [GitHub issue #5639](https://github.com/ROCm/ROCm/issues/5639).
 ### Reduced precision in gemm_ex operations for rocBLAS and hipBLAS
 Some `gemm_ex` operations with `half` or `f32_r` data types might yield 16-bit precision results instead of the expected 32-bit precision when matrix dimensions are m=1 or n=1. The issue results from the optimization that enables `_ex` APIs to use lower precision multiples. It limits the high-precision matrix operations performed in PyTorch with rocBLAS and hipBLAS. The issue will be fixed in a future ROCm release. See [GitHub issue #5640](https://github.com/ROCm/ROCm/issues/5640).
 ### RCCL profiler plugin failure with AllToAll operations
 The RCCL profiler plugin `librccl-profiler.so` might fail with a segmentation fault during `AllToAll` collective operations due to improperly assigned point-to-point task function pointers. This leads to invalid memory access and prevents profiling of `AllToAll` performance. Other operations, like `AllReduce`, are unaffected. It's recommended to avoid using the RCCL profiler plugin with `AllToAll` operations until the fix is available. This issue is resolved in the {fab}`github`[RCCL `develop` branch](https://github.com/ROCm/rccl/tree/develop) and will be part of a future ROCm release. See [GitHub issue #5653](https://github.com/ROCm/ROCm/issues/5653).
 ## ROCm resolved issues
 The following are previously known issues resolved in this release. For resolved issues related to
--- a/default.xml
+++ b/default.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <manifest>
    <remote name="rocm-org" fetch="https://github.com/ROCm/" />
-    <default revision="refs/tags/rocm-7.1.0"
+    <default revision="refs/tags/rocm-7.1.1"
     remote="rocm-org"
     sync-c="true"
     sync-j="4" />
@@ -25,6 +25,7 @@
    <project groups="mathlibs" name="MIVisionX" />
    <project groups="mathlibs" name="ROCmValidationSuite" />
    <project groups="mathlibs" name="composable_kernel" />
    <project groups="mathlibs" name="hipSOLVER" />
    <project groups="mathlibs" name="hipTensor" />
    <project groups="mathlibs" name="hipfort" />
    <project groups="mathlibs" name="rccl" />
@@ -45,6 +46,7 @@
        rocprofiler rocr-runtime roctracer -->
    <project groups="mathlibs" name="rocm-systems" />
    <project groups="mathlibs" name="rocPyDecode" />
    <project groups="mathlibs" name="rocSOLVER" />
    <project groups="mathlibs" name="rocSHMEM" />
    <project groups="mathlibs" name="rocWMMA" />
    <project groups="mathlibs" name="rocm-cmake" />
--- a/docs/compatibility/compatibility-matrix.rst
+++ b/docs/compatibility/compatibility-matrix.rst
@@ -155,8 +155,38 @@ compatibility and system requirements.
 .. rubric:: Footnotes
 <<<<<<< HEAD
 .. [#os-compatibility] Some operating systems are supported on limited GPUs. For detailed information, see the latest :ref:`supported_distributions`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-operating-systems>`_, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-operating-systems>`_, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-operating-systems>`_.
 .. [#gpu-compatibility] Some GPUs have limited operating system support. For detailed information, see the latest :ref:`supported_GPUs`. For version specific information, see `ROCm 7.1.1 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.1/reference/system-requirements.html#supported-gpus>`_, `ROCm 7.1.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.1.0/reference/system-requirements.html#supported-gpus>`_, and `ROCm 6.4.0 <https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.0/reference/system-requirements.html#supported-gpus>`_.
 =======
 .. [#rhel-10-702] RHEL 10.0 and RHEL 9.6 are supported on all listed :ref:`supported_GPUs` except AMD Radeon PRO V620 GPU.
 .. [#rhel-94-702] RHEL 9.4 is supported on all AMD Instinct GPUs listed under :ref:`supported_GPUs`.
 .. [#rhel-700] RHEL 8.10 is supported only on AMD Instinct MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs.
 .. [#sles-710] **For ROCm 7.1.x** - SLES 15 SP7 is supported only on AMD Instinct MI325X, MI300X, MI300A, MI250X, MI250, MI210, and MI100 GPUs.
 .. [#sles-db-700] **For ROCm 7.0.x** - SLES 15 SP7 and Debian 12 are supported only on AMD Instinct MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
 .. [#ol-710-mi300x] **For ROCm 7.1.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, MI325X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU.
 .. [#ol-700-mi300x] **For ROCm 7.0.x** - Oracle Linux 10 and 9 are supported only on AMD Instinct MI355X, MI350X, and MI300X GPUs. Oracle Linux 8 is supported only on AMD Instinct MI300X GPU.
 .. [#ol-mi300x] **Prior ROCm 7.0.0** - Oracle Linux is supported only on AMD Instinct MI300X GPUs.
 .. [#db-710-mi300x] **For ROCm 7.1.x** - Debian 13 is supported only on AMD Instinct MI325X and MI300X GPUs.
 .. [#db12-710] **For ROCm 7.1.x** - Debian 12 is supported only on AMD Instinct MI325X, MI300X, MI300A, MI250X, MI250, and MI210 GPUs.
 .. [#db-mi300x] **For ROCm 7.0.2** - Debian 13 is supported only on AMD Instinct MI300X GPUs.
 .. [#az-mi300x] Starting ROCm 6.4.0, Azure Linux 3.0 is supported only on AMD Instinct MI300X and AMD Radeon PRO V710 GPUs.
 .. [#rl-700] Rocky Linux 9 is supported only on AMD Instinct MI300X and MI300A GPUs.
 .. [#single-node] **Prior to ROCm 7.0.0** - Debian 12 is supported only on AMD Instinct MI300X GPUs for single-node functionality.
 .. [#mi350x-os-710] AMD Instinct MI355X (gfx950) and MI350X (gfx950) GPUs supports all listed :ref:`supported_distributions` except RHEL 8.10, SLES 15 SP7, Debian 12, Rocky 9, Azure Linux 3.0, and Oracle Linux 8.
 .. [#mi350x-os-700] AMD Instinct MI355X (gfx950) and MI350X (gfx950) GPUs only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, Oracle Linux 10, and Oracle Linux 9.
 .. [#RDNA-OS-700] **For ROCm 7.0.x** - AMD Radeon PRO AI PRO R9700 (gfx1201), AMD Radeon RX 9070 XT (gfx1201), AMD Radeon RX 9070 GRE (gfx1201), AMD Radeon RX 9070 (gfx1201), AMD Radeon RX 9060 XT (gfx1200), AMD Radeon RX 9060 (gfx1200), AMD Radeon RX 7800 XT (gfx1101), AMD Radeon RX 7700 XT (gfx1101), AMD Radeon PRO W7700 (gfx1101), and AMD Radeon PRO W6800 (gfx1030) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, and RHEL 9.6.
 .. [#rd-v710] **For ROCm 7.0.x** - AMD Radeon PRO V710 (gfx1101) GPUs only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, and Azure Linux 3.0.
 .. [#rd-v620] **For ROCm 7.0.x** - AMD Radeon PRO V620 (gfx1030) GPUs only supports Ubuntu 24.04.3 and Ubuntu 22.04.5.
 .. [#mi325x-os-710] **For ROCm 7.1.x** - AMD Instinct MI325X GPUs (gfx942) supports all listed :ref:`supported_distributions` except RHEL 8.10, Rocky 9, Azure Linux 3.0, and Oracle Linux 8.
 .. [#mi325x-os] **For ROCm 7.0.x** - AMD Instinct MI325X GPUs (gfx942) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 9.6, and RHEL 9.4.
 .. [#mi300x-os] **Starting ROCm 7.0.x** - AMD Instinct MI300X GPUs (gfx942) supports all listed :ref:`supported_distributions`.
 .. [#mi300A-os] **Starting ROCm 7.0.x** - AMD Instinct MI300A GPUs (gfx942) supports all listed :ref:`supported_distributions` except on Debian 13, Azure Linux 3.0, Oracle Linux 10, Oracle Linux 9, and Oracle Linux 8.
 .. [#mi200x-os] **For ROCm 7.0.x** - AMD Instinct MI200 Series GPUs (gfx90a) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, SLES 15 SP7, and Debian 12.
 .. [#mi100-710-os] **For ROCM 7.1.x** - AMD Instinct MI100 GPUs (gfx908) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, RHEL 8.10, and SLES 15 SP7.
 .. [#mi100-os] **For ROCm 7.0.x** - AMD Instinct MI100 GPUs (gfx908) only supports Ubuntu 24.04.3, Ubuntu 22.04.5, RHEL 10.0, RHEL 9.6, RHEL 9.4, and RHEL 8.10.
 .. [#tf-mi350] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead.
 >>>>>>> external/develop
 .. [#dgl_compat] DGL is supported only on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0.
 .. [#llama-cpp_compat] llama.cpp is supported only on ROCm 7.0.0 and ROCm 6.4.x.
 .. [#mi325x_KVM] For AMD Instinct MI325X KVM SR-IOV users, do not use AMD GPU Driver (amdgpu) 30.20.0.
@@ -243,7 +273,7 @@ Expand for full historical view of:
   .. [#tf-mi350-past-60] TensorFlow 2.17.1 is not supported on AMD Instinct MI350 Series GPUs. Use TensorFlow 2.19.1 or 2.18.1 with MI350 Series GPUs instead.
   .. [#verl_compat-past-60] verl is supported only on ROCm 6.2.0.
   .. [#stanford-megatron-lm_compat-past-60] Stanford Megatron-LM is supported only on ROCm 6.3.0.
-   .. [#dgl_compat-past-60] DGL is supported only on ROCm 6.4.0.
+   .. [#dgl_compat-past-60] DGL is supported only on ROCm 7.0.0, ROCm 6.4.3 and ROCm 6.4.0.
   .. [#megablocks_compat-past-60] Megablocks is supported only on ROCm 6.3.0.
   .. [#taichi_compat-past-60] Taichi is supported only on ROCm 6.3.2.
   .. [#ray_compat-past-60] Ray is supported only on ROCm 6.4.1.
--- a/docs/compatibility/ml-compatibility/dgl-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/dgl-compatibility.rst
@@ -39,13 +39,13 @@ Support overview
 Version support
 --------------------------------------------------------------------------------
-DGL is supported on `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
+DGL is supported on `ROCm 7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__, 
 `ROCm 6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__, and `ROCm 6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
 Supported devices
 --------------------------------------------------------------------------------
- **Officially Supported**: AMD Instinct™ MI300X (through `hipBLASlt <https://rocm.docs.amd.com/projects/hipBLASLt/en/latest/index.html>`__)
+**Officially Supported**: AMD Instinct™ MI300X, MI250X
 - **Partially Supported**: AMD Instinct™ MI250X
 .. _dgl-recommendations:
@@ -60,16 +60,35 @@ GAT, GCN, and GraphSage. Using these models, a variety of use cases are supporte
 - 1D (Temporal) and 2D (Image) Classification
 - Drug Discovery
-Multiple use cases of DGL have been tested and verified.
+For use cases and recommendations, refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`__, 
-However, a recommended example follows a drug discovery pipeline using the ``SE3Transformer``.
+where you can search for DGL examples and best practices to optimize your workloads on AMD GPUs.
 Refer to the `AMD ROCm blog <https://rocm.blogs.amd.com/>`_, 
 where you can search for DGL examples and best practices to optimize your training workflows on AMD GPUs. 
-Coverage includes:
+* Although multiple use cases of DGL have been tested and verified, a few have been  
  outlined in the `DGL in the Real World: Running GNNs on Real Use Cases 
  <https://rocm.blogs.amd.com/artificial-intelligence/dgl_blog2/README.html>`__ blog 
  post, which walks through four real-world graph neural network (GNN) workloads 
  implemented with the Deep Graph Library on ROCm. It covers tasks ranging from 
  heterogeneous e-commerce graphs and multiplex networks (GATNE) to molecular graph 
  regression (GNN-FiLM) and EEG-based neurological diagnosis (EEG-GCNN). For each use 
  case, the authors detail: the dataset and task, how DGL is used, and their experience 
  porting to ROCm. It is shown that DGL codebases often run without modification, with 
  seamless integration of graph operations, message passing, sampling, and convolution. 
- Single-GPU training/inference
+* The `Graph Neural Networks (GNNs) at Scale: DGL with ROCm on AMD Hardware 
- Multi-GPU training
+  <https://rocm.blogs.amd.com/artificial-intelligence/why-graph-neural/README.html>`__ 
  blog post introduces the Deep Graph Library (DGL) and its enablement on the AMD ROCm platform, 
  bringing high-performance graph neural network (GNN) training to AMD GPUs. DGL bridges 
  the gap between dense tensor frameworks and the irregular nature of graph data through a 
  graph-first, message-passing abstraction. Its design ensures scalability, flexibility, and 
  interoperability across frameworks like PyTorch and TensorFlow. AMD’s ROCm integration 
  enables DGL to run efficiently on HIP-based GPUs, supported by prebuilt Docker containers 
  and open-source repositories. This marks a major step in AMD's mission to advance open, 
  scalable AI ecosystems beyond traditional architectures.
 You can pre-process datasets and begin training on AMD GPUs through:
 * Single-GPU training/inference
 * Multi-GPU training
 .. _dgl-docker-compat:
@@ -85,7 +104,7 @@ with ROCm backends on Docker Hub. The following Docker image tags and associated
 inventories represent the latest available DGL version from the official Docker Hub. 
 Click the |docker-icon| to view the image on Docker Hub.
-.. list-table:: DGL Docker image components
+.. list-table::
    :header-rows: 1
    :class: docker-image-compatibility
@@ -98,43 +117,83 @@ Click the |docker-icon| to view the image on Docker Hub.
    * - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.8.0/images/sha256-943698ddf54c22a7bcad2e5b4ff467752e29e4ba6d0c926789ae7b242cbd92dd"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
-      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
+      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
-      - `2.6.0 <https://github.com/ROCm/pytorch/tree/release/2.6>`__
+      - `2.8.0 <https://github.com/pytorch/pytorch/releases/tag/v2.8.0>`__
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
    * - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-b2ec286a035eb7d0a6aab069561914d21a3cac462281e9c024501ba5ccedfbf7"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
-      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
+      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
-      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
+      - `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm7.0.0_ubuntu22.04_py3.10_pytorch_2.7.1/images/sha256-d27aee16df922ccf0bcd9107bfcb6d20d34235445d456c637e33ca6f19d11a51"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
      - `2.7.1 <https://github.com/pytorch/pytorch/releases/tag/v2.7.1>`__
      - 22.04
      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4.0.amd0_rocm6.4.3_ubuntu24.04_py3.12_pytorch_2.6.0/images/sha256-f3ba6a3c9ec9f6c1cde28449dc9780e0c4c16c4140f4b23f158565fbfd422d6b"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
      - `6.4.3 <https://repo.radeon.com/rocm/apt/6.4.3/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
      - `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.6.0/images/sha256-8ce2c3bcfaa137ab94a75f9e2ea711894748980f57417739138402a542dd5564"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
      - `2.6.0 <https://github.com/pytorch/pytorch/releases/tag/v2.6.0>`__
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
    * - .. raw:: html
           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu24.04_py3.12_pytorch_release_2.4.1/images/sha256-cf1683283b8eeda867b690229c8091c5bbf1edb9f52e8fb3da437c49a612ebe4"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
      - `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
      - 24.04
      - `3.12.9 <https://www.python.org/downloads/release/python-3129/>`__
    * - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.4.1/images/sha256-4834f178c3614e2d09e89e32041db8984c456d45dfd20286e377ca8635686554"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
-      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
+      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
-      - `2.4.1 <https://github.com/ROCm/pytorch/tree/release/2.4>`__
+      - `2.4.1 <https://github.com/pytorch/pytorch/releases/tag/v2.4.1>`__
      - 22.04
      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
    * - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i></a>
+           <a href="https://hub.docker.com/layers/rocm/dgl/dgl-2.4_rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.3.0/images/sha256-88740a2c8ab4084b42b10c3c6ba984cab33dd3a044f479c6d7618e2b2cb05e69"><i class="fab fa-docker fa-lg"></i> rocm/dgl</a>
-      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__.
+      - `6.4.0 <https://repo.radeon.com/rocm/apt/6.4/>`__
      - `2.4.0 <https://github.com/dmlc/dgl/releases/tag/v2.4.0>`__
-      - `2.3.0 <https://github.com/ROCm/pytorch/tree/release/2.3>`__
+      - `2.3.0 <https://github.com/pytorch/pytorch/releases/tag/v2.3.0>`__
      - 22.04
      - `3.10.16 <https://www.python.org/downloads/release/python-31016/>`__
@@ -150,81 +209,102 @@ If you prefer to build it yourself, ensure the following dependencies are instal
    :header-rows: 1
    * - ROCm library
-      - ROCm 6.4.0 Version
+      - ROCm 7.0.0 Version
      - ROCm 6.4.x Version
      - Purpose
    * - `Composable Kernel <https://github.com/ROCm/composable_kernel>`_
      - 1.1.0
      - 1.1.0
      - Enables faster execution of core operations like matrix multiplication
        (GEMM), convolutions and transformations.
    * - `hipBLAS <https://github.com/ROCm/hipBLAS>`_
      - 3.0.0
      - 2.4.0
      - Provides GPU-accelerated Basic Linear Algebra Subprograms (BLAS) for
        matrix and vector operations.
    * - `hipBLASLt <https://github.com/ROCm/hipBLASLt>`_
      - 1.0.0
      - 0.12.0
      - hipBLASLt is an extension of the hipBLAS library, providing additional
        features like epilogues fused into the matrix multiplication kernel or
        use of integer tensor cores.
    * - `hipCUB <https://github.com/ROCm/hipCUB>`_
      - 4.0.0
      - 3.4.0
      - Provides a C++ template library for parallel algorithms for reduction,
        scan, sort and select.
    * - `hipFFT <https://github.com/ROCm/hipFFT>`_
      - 1.0.20
      - 1.0.18
      - Provides GPU-accelerated Fast Fourier Transform (FFT) operations.
    * - `hipRAND <https://github.com/ROCm/hipRAND>`_
      - 3.0.0
      - 2.12.0
      - Provides fast random number generation for GPUs.
    * - `hipSOLVER <https://github.com/ROCm/hipSOLVER>`_
      - 3.0.0
      - 2.4.0
      - Provides GPU-accelerated solvers for linear systems, eigenvalues, and
        singular value decompositions (SVD).
    * - `hipSPARSE <https://github.com/ROCm/hipSPARSE>`_
      - 4.0.1
      - 3.2.0
      - Accelerates operations on sparse matrices, such as sparse matrix-vector
        or matrix-matrix products.
    * - `hipSPARSELt <https://github.com/ROCm/hipSPARSELt>`_
      - 0.2.4
      - 0.2.3
      - Accelerates operations on sparse matrices, such as sparse matrix-vector
        or matrix-matrix products.
    * - `hipTensor <https://github.com/ROCm/hipTensor>`_
      - 2.0.0
      - 1.5.0
      - Optimizes for high-performance tensor operations, such as contractions.
    * - `MIOpen <https://github.com/ROCm/MIOpen>`_
      - 3.5.0
      - 3.4.0
      - Optimizes deep learning primitives such as convolutions, pooling,
        normalization, and activation functions.
    * - `MIGraphX <https://github.com/ROCm/AMDMIGraphX>`_
      - 2.13.0
      - 2.12.0
      - Adds graph-level optimizations, ONNX models and mixed precision support
        and enable Ahead-of-Time (AOT) Compilation.
    * - `MIVisionX <https://github.com/ROCm/MIVisionX>`_
      - 3.3.0
      - 3.2.0
      - Optimizes acceleration for computer vision and AI workloads like
        preprocessing, augmentation, and inferencing.
    * - `rocAL <https://github.com/ROCm/rocAL>`_
-      - :version-ref:`rocAL rocm_version`
+      - 3.3.0
      - 2.2.0
      - Accelerates the data pipeline by offloading intensive preprocessing and
        augmentation tasks. rocAL is part of MIVisionX.
    * - `RCCL <https://github.com/ROCm/rccl>`_
-      - 2.2.0
+      - 2.26.6
      - 2.22.3
      - Optimizes for multi-GPU communication for operations like AllReduce and
        Broadcast.
    * - `rocDecode <https://github.com/ROCm/rocDecode>`_
      - 1.0.0
      - 0.10.0
      - Provides hardware-accelerated data decoding capabilities, particularly
        for image, video, and other dataset formats.
    * - `rocJPEG <https://github.com/ROCm/rocJPEG>`_
      - 1.1.0
      - 0.8.0
      - Provides hardware-accelerated JPEG image decoding and encoding.
    * - `RPP <https://github.com/ROCm/RPP>`_
      - 2.0.0
      - 1.9.10
      - Speeds up data augmentation, transformation, and other preprocessing steps.
    * - `rocThrust <https://github.com/ROCm/rocThrust>`_
      - 4.0.0
      - 3.3.0
      - Provides a C++ template library for parallel algorithms like sorting,
        reduction, and scanning.
    * - `rocWMMA <https://github.com/ROCm/rocWMMA>`_
      - 2.0.0
      - 1.7.0
      - Accelerates warp-level matrix-multiply and matrix-accumulate to speed up matrix
        multiplication (GEMM) and accumulation operations with mixed precision
@@ -253,26 +333,29 @@ Instead of listing them all, support is grouped into the following categories to
 * DGL NN
 * DGL Optim
 * DGL Sparse
-
+* GraphBolt
 Unsupported features
 ================================================================================
-* GraphBolt
+* TF32 Support (only supported for PyTorch 2.7 and above)
 * Partial TF32 Support (MI250X only)
 * Kineto/ROCTracer integration
 Unsupported functions
 ================================================================================
-* ``more_nnz``
+* ``bfs``
 * ``format``
 * ``multiprocess_sparse_adam_state_dict``
 * ``record_stream_ndarray``
 * ``half_spmm``
 * ``segment_mm`` 
 * ``gather_mm_idx_b``
 * ``pgexplainer``
 * ``sample_labors_prob``
 * ``sample_labors_noprob``
 * ``sparse_admin``
 Previous versions
 ===============================================================================
 See :doc:`rocm-install-on-linux:install/3rd-party/previous-versions/dgl-history` to find documentation for previous releases
 of the ``ROCm/dgl`` Docker image.
--- a/docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst
+++ b/docs/compatibility/ml-compatibility/llama-cpp-compatibility.rst
@@ -45,7 +45,7 @@ llama.cpp is supported on `ROCm 7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
 Supported devices
 --------------------------------------------------------------------------------
-**Officially Supported**: AMD Instinct™ MI300X, MI325X, MI210
+**Officially Supported**: AMD Instinct™ MI325X, MI300X, MI210
 Use cases and recommendations
 ================================================================================
@@ -109,27 +109,27 @@ Click |docker-icon| to view the image on Docker Hub.
    * - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_full/images/sha256-a2ecd635eaa65bb289a9041330128677f3ae88bee6fee0597424b17e38d4903c"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_full/images/sha256-a94f0c7a598cc6504ff9e8371c016d7a2f93e69bf54a36c870f9522567201f10g"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_server/images/sha256-cb46b47df415addb5ceb6e6fdf0be70bf9d7f6863bbe6e10c2441ecb84246d52"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_server/images/sha256-be175932c3c96e882dfbc7e20e0e834f58c89c2925f48b222837ee929dfc47ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu24.04_light/images/sha256-8f8536eec4b05c0ff1c022f9fc6c527ad1c89e6c1ca0906e4d39e4de73edbde9"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu24.04_light/images/sha256-d8ba0c70603da502c879b1f8010b439c8e7fa9f6cbdac8bbbbbba97cb41ebc9e"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
-      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
+      - `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
      - 24.04
    * - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_full/images/sha256-f36de2a3b03ae53e81c85422cb3780368c9891e1ac7884b04403a921fe2ea45d"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_full/images/sha256-37582168984f25dce636cc7288298e06d94472ea35f65346b3541e6422b678ee"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_server/images/sha256-df15e8ab11a6837cd3736644fec1e047465d49e37d610ab0b79df000371327df"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_server/images/sha256-7e70578e6c3530c6591cc2c26da24a9ee68a20d318e12241de93c83224f83720"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
      - .. raw:: html
-           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6356_rocm7.0.0_ubuntu22.04_light/images/sha256-4ea2d5bb7964f0ee3ea9b30ba7f343edd6ddfab1b1037669ca7eafad2e3c2bd7"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
+           <a href="https://hub.docker.com/layers/rocm/llama.cpp/llama.cpp-b6652.amd0_rocm7.0.0_ubuntu22.04_light/images/sha256-9a5231acf88b4a229677bc2c636ea3fe78a7a80f558bd80910b919855de93ad5"><i class="fab fa-docker fa-lg"></i> rocm/llama.cpp</a>
-      - `b6356 <https://github.com/ROCm/llama.cpp/tree/release/b6356>`__
+      - `b6652 <https://github.com/ROCm/llama.cpp/tree/release/b6652>`__
      - `7.0.0 <https://repo.radeon.com/rocm/apt/7.0/>`__
      - 22.04
--- a/docs/how-to/deep-learning-rocm.rst
+++ b/docs/how-to/deep-learning-rocm.rst
@@ -84,6 +84,8 @@ The table below summarizes information about ROCm-enabled deep learning framewor
          <a href="https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html"><i class="fas fa-link fa-lg"></i></a>
      - 
        - `Docker image <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html#use-a-prebuilt-docker-image-with-dgl-pre-installed>`__
        - `Wheels package <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/dgl-install.html#use-a-wheels-package>`__
      - .. raw:: html
          <a href="https://github.com/ROCm/dgl"><i class="fab fa-github fa-lg"></i></a> 
--- a/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst
+++ b/docs/how-to/rocm-for-ai/inference-optimization/vllm-optimization.rst
@@ -67,7 +67,7 @@ Quick start examples:
   export VLLM_ROCM_USE_AITER=1
   vllm serve MODEL_NAME
-   # Enable only AITER Triton Prefill-Decode (split) attention
+   # Enable AITER Fused MoE and enable Triton Prefill-Decode (split) attention
   export VLLM_ROCM_USE_AITER=1
   export VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1
   export VLLM_ROCM_USE_AITER_MHA=0
@@ -244,14 +244,17 @@ Most users won't need this, but you can override the defaults:
   * - AITER MHA (standard models)
     - ``VLLM_ROCM_USE_AITER=1`` (auto-selects for non-MLA models)
-   * - AITER Triton Prefill-Decode (split)
+   * - vLLM Triton Unified (default)
     - ``VLLM_ROCM_USE_AITER=0`` (or unset)
   * - Triton Prefill-Decode (split) without AITER
     - | ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
   * - Triton Prefill-Decode (split) along with AITER Fused-MoE
     - | ``VLLM_ROCM_USE_AITER=1``
       | ``VLLM_ROCM_USE_AITER_MHA=0``
       | ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
   * - vLLM Triton Unified (default)
     - ``VLLM_ROCM_USE_AITER=0`` (or unset)
   * - AITER Unified Attention
     - | ``VLLM_ROCM_USE_AITER=1``
       | ``VLLM_ROCM_USE_AITER_MHA=0``
@@ -269,11 +272,11 @@ Most users won't need this, but you can override the defaults:
       --block-size 1 \
       --tensor-parallel-size 8
-   # Advanced: Use Prefill-Decode split (for short input cases)
+   # Advanced: Use Prefill-Decode split (for short input cases) with AITER Fused-MoE
   VLLM_ROCM_USE_AITER=1 \
   VLLM_ROCM_USE_AITER_MHA=0 \
   VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1 \
-   vllm serve meta-llama/Llama-3.3-70B-Instruct
+   vllm serve meta-llama/Llama-4-Scout-17B-16E
 **Which backend should I choose?**
@@ -352,14 +355,14 @@ vLLM V1 on ROCm provides these attention implementations:
 3. **AITER Triton Prefill–Decode Attention** (hybrid, Instinct MI300X-optimized)
-   * Enable with ``VLLM_ROCM_USE_AITER=1``, ``VLLM_ROCM_USE_AITER_MHA=0``, and ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
+   * Enable with ``VLLM_V1_USE_PREFILL_DECODE_ATTENTION=1``
   * Uses separate kernels for prefill and decode phases:
     * **Prefill**: ``context_attention_fwd`` Triton kernel
     * **Primary decode**: ``torch.ops._rocm_C.paged_attention`` (custom ROCm kernel optimized for head sizes 64/128, block sizes 16/32, GQA 1–16, context ≤131k; sliding window not supported)
     * **Fallback decode**: ``kernel_paged_attention_2d`` Triton kernel when shapes don't meet primary decode requirements
-   * Usually better compared to unified Triton kernels (both vLLM and AITER variants)
+   * Usually better compared to unified Triton kernels
   * Performance vs AITER MHA varies: AITER MHA is typically faster overall, but Prefill-Decode split may win in short input scenarios
   * The custom paged attention decode kernel is controlled by ``VLLM_ROCM_CUSTOM_PAGED_ATTN`` (default **True**)
@@ -695,7 +698,9 @@ There are two strategies:
 vLLM engine arguments
 =====================
-Selected arguments that often help on ROCm. See `engine args docs <https://docs.vllm.ai/en/latest/serving/engine_args.html>`_ for the full list.
+Selected arguments that often help on ROCm. See `Engine Arguments
 <https://docs.vllm.ai/en/stable/configuration/engine_args.html>`__ in the vLLM
 documentation for the full list.
 Configure --max-num-seqs
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
--- a/docs/how-to/rocm-for-ai/inference/deploy-your-model.rst
+++ b/docs/how-to/rocm-for-ai/inference/deploy-your-model.rst
@@ -22,7 +22,7 @@ See the `GitHub repository <https://github.com/vllm-project/vllm>`_ and `officia
 <https://docs.vllm.ai/>`_ for more information.
 For guidance on using vLLM with ROCm, refer to `Installation with ROCm
-<https://docs.vllm.ai/en/latest/getting_started/amd-installation.html>`_.
+<https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html#amd-rocm>`__.
 vLLM installation
 -----------------
--- a/docs/sphinx/requirements.in
+++ b/docs/sphinx/requirements.in
@@ -1,4 +1,4 @@
-rocm-docs-core==1.28.0
+rocm-docs-core==1.29.0
 sphinx-reredirects
 sphinx-sitemap
 sphinxcontrib.datatemplates==0.11.0
--- a/docs/sphinx/requirements.txt
+++ b/docs/sphinx/requirements.txt
@@ -187,7 +187,7 @@ requests==2.32.5
    # via
    #   pygithub
    #   sphinx
-rocm-docs-core==1.28.0
+rocm-docs-core==1.29.0
    # via -r requirements.in
 rpds-py==0.28.0
    # via
--- a/tools/rocm-build/rocm-7.1.1.xml
+++ b/tools/rocm-build/rocm-7.1.1.xml
@@ -0,0 +1,60 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <manifest>
    <remote name="rocm-org" fetch="https://github.com/ROCm/" />
    <default revision="refs/tags/rocm-7.1.1"
     remote="rocm-org"
     sync-c="true"
     sync-j="4" />
 <!--list of projects for ROCm-->
    <project name="ROCK-Kernel-Driver" />
    <project name="amdsmi" />
    <project name="rocm_bandwidth_test" />
    <project name="rocm-examples" />
 <!--HIP Projects-->
    <project name="HIPIFY" />
 <!-- The following projects are all associated with the AMDGPU LLVM compiler -->
    <project name="half" />
    <project name="llvm-project" />
    <project name="spirv-llvm-translator" />
 <!-- gdb projects -->
    <project name="ROCdbgapi" />
    <project name="ROCgdb" />
    <project name="rocr_debug_agent" />
 <!-- ROCm Libraries -->
    <project groups="mathlibs" name="AMDMIGraphX" />
    <project groups="mathlibs" name="MIVisionX" />
    <project groups="mathlibs" name="ROCmValidationSuite" />
    <project groups="mathlibs" name="composable_kernel" />
    <project groups="mathlibs" name="hipSOLVER" />
    <project groups="mathlibs" name="hipTensor" />
    <project groups="mathlibs" name="hipTensor" />
    <project groups="mathlibs" name="hipfort" />
    <project groups="mathlibs" name="rccl" />
    <project groups="mathlibs" name="rocAL" />
    <project groups="mathlibs" name="rocALUTION" />
    <project groups="mathlibs" name="rocDecode" />
    <project groups="mathlibs" name="rocJPEG" />
    <!-- The following components have been migrated to rocm-libraries:
        hipBLAS-common hipBLAS hipBLASLt hipCUB
        hipFFT hipRAND hipSPARSE hipSPARSELt
        MIOpen rocBLAS rocFFT rocPRIM rocRAND
        rocSPARSE rocThrust Tensile -->
    <project groups="mathlibs" name="rocm-libraries" />
    <!-- The following components have been migrated to rocm-systems:
        aqlprofile clr hip hip-tests hipother
        rdc rocm-core rocm_smi_lib rocminfo rocprofiler-compute 
        rocprofiler-register rocprofiler-sdk rocprofiler-systems 
        rocprofiler rocr-runtime roctracer -->
    <project groups="mathlibs" name="rocm-systems" />
    <project groups="mathlibs" name="rocPyDecode" />
    <project groups="mathlibs" name="rocSHMEM" />
    <project groups="mathlibs" name="rocSOLVER" />
    <project groups="mathlibs" name="rocWMMA" />
    <project groups="mathlibs" name="rocm-cmake" />
    <project groups="mathlibs" name="rpp" />
    <project groups="mathlibs" name="TransferBench" />
 <!-- Projects for OpenMP-Extras -->
    <project name="aomp" path="openmp-extras/aomp" />
    <project name="aomp-extras" path="openmp-extras/aomp-extras" />
    <project name="flang" path="openmp-extras/flang" />
 </manifest>
 etcd
 fas
 FBGEMM
+FiLM
 FIFOs
 FFT
 FFTs
 Fuyu
 GALB
 GAT
+GATNE
 GCC
 GCD
 GCDs
 GCN
+GCNN
 GDB
 GDDR
 GDR
 GLXT
 Gloo
 GMI
+GNN
+GNNs
 GPG
 GPR
 GPT