rocm-docs-core experiment

[docs/7.0.0-alpha] Add docs for 7.0 alpha (#4978 )
[Ex CI] add component name to artifact download filter (#4974 )
2026-01-10 23:28:03 -05:00 · 2025-06-26 15:58:18 -04:00 · 2025-06-26 15:47:42 -04:00 · 2025-06-26 13:55:03 -04:00 · 2025-06-23 16:35:02 -04:00 · 2025-06-23 15:37:52 -04:00
23 changed files with 2023 additions and 566 deletions
--- a/.azuredevops/components/hipCUB.yml
+++ b/.azuredevops/components/hipCUB.yml
@@ -61,12 +61,12 @@ parameters:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      # - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
+      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
      - { os: almalinux8, packageManager: dnf, target: gfx942 }
      - { os: almalinux8, packageManager: dnf, target: gfx90a }
      - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx1100 }
+      - { os: almalinux8, packageManager: dnf, target: gfx1100 }
      - { os: almalinux8, packageManager: dnf, target: gfx1030 }
    testJobs:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
--- a/.azuredevops/components/rocPRIM.yml
+++ b/.azuredevops/components/rocPRIM.yml
@@ -60,12 +60,12 @@ parameters:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      # - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
+      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
      - { os: almalinux8, packageManager: dnf, target: gfx942 }
      - { os: almalinux8, packageManager: dnf, target: gfx90a }
      - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx1100 }
+      - { os: almalinux8, packageManager: dnf, target: gfx1100 }
      - { os: almalinux8, packageManager: dnf, target: gfx1030 }
    testJobs:
      - { os: ubuntu2204, packageManager: apt, target: gfx942, shard: 1, shardCount: 3 }
@@ -170,7 +170,7 @@ jobs:

 - ${{ if eq(parameters.unifiedBuild, False) }}:
  - ${{ each job in parameters.jobMatrix.testJobs }}:
-    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}_${{ job.shard }}
+    - job: ${{ parameters.componentName }}_test_${{ job.os }}_${{ job.target }}_shard_${{ job.shard }}
      dependsOn: ${{ parameters.componentName }}_build_${{ job.os }}_${{ job.target }}
      condition:
        and(succeeded(),
--- a/.azuredevops/components/rocThrust.yml
+++ b/.azuredevops/components/rocThrust.yml
@@ -64,12 +64,12 @@ parameters:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
      - { os: ubuntu2204, packageManager: apt, target: gfx90a }
      - { os: ubuntu2204, packageManager: apt, target: gfx1201 }
-      # - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
+      - { os: ubuntu2204, packageManager: apt, target: gfx1100 }
      - { os: ubuntu2204, packageManager: apt, target: gfx1030 }
      - { os: almalinux8, packageManager: dnf, target: gfx942 }
      - { os: almalinux8, packageManager: dnf, target: gfx90a }
      - { os: almalinux8, packageManager: dnf, target: gfx1201 }
-      # - { os: almalinux8, packageManager: dnf, target: gfx1100 }
+      - { os: almalinux8, packageManager: dnf, target: gfx1100 }
      - { os: almalinux8, packageManager: dnf, target: gfx1030 }
    testJobs:
      - { os: ubuntu2204, packageManager: apt, target: gfx942 }
--- a/.azuredevops/nightly/rocm-nightly.yml
+++ b/.azuredevops/nightly/rocm-nightly.yml
@@ -3,12 +3,21 @@ parameters:
 - name: jobList
  type: object
  default:
-    - gfx942-staging:
-      target: gfx942
-      source: staging
-    - gfx90a-staging:
-      target: gfx90a
-      source: staging
+    - { os: ubuntu2204, target: gfx942, source: staging }
+    - { os: ubuntu2204, target: gfx90a, source: staging }
+    - { os: ubuntu2204, target: gfx1201, source: staging }
+    - { os: ubuntu2204, target: gfx1100, source: staging }
+    - { os: ubuntu2204, target: gfx1030, source: staging }
+    - { os: ubuntu2404, target: gfx942, source: staging }
+    - { os: ubuntu2404, target: gfx90a, source: staging }
+    - { os: ubuntu2404, target: gfx1201, source: staging }
+    - { os: ubuntu2404, target: gfx1100, source: staging }
+    - { os: ubuntu2404, target: gfx1030, source: staging }
+    - { os: almalinux8, target: gfx942, source: staging }
+    - { os: almalinux8, target: gfx90a, source: staging }
+    - { os: almalinux8, target: gfx1201, source: staging }
+    - { os: almalinux8, target: gfx1100, source: staging }
+    - { os: almalinux8, target: gfx1030, source: staging }
 - name: rocmDependencies
  type: object
  default:
@@ -16,9 +25,9 @@ parameters:
    - amdsmi
    - aomp-extras
    - aomp
+    - clr
    - composable_kernel
    - half
-    - HIP
    - hip-tests
    - hipBLAS
    - hipBLAS-common
@@ -83,7 +92,7 @@ schedules:

 jobs:
 - ${{ each job in parameters.jobList }}:
-  - job: rocm_nightly_${{ job.target }}_${{ job.source }}
+  - job: rocm_nightly_${{ job.os }}_${{ job.target }}_${{ job.source }}
    variables:
    - group: common
    - template: /.azuredevops/variables-global.yml
@@ -108,9 +117,9 @@ jobs:
      parameters:
        dependencySource: ${{ job.source }}
        dependencyList: ${{ parameters.rocmDependencies }}
+        os: ${{ job.os }}
        gpuTarget: ${{ job.target }}
        skipLibraryLinking: true
-        skipLlvmSymlink: true
    - script: df -h
      displayName: System disk space after ROCm
    - script: du -sh $(Agent.BuildDirectory)/rocm
--- a/.azuredevops/templates/steps/artifact-download.yml
+++ b/.azuredevops/templates/steps/artifact-download.yml
@@ -22,19 +22,16 @@ steps:
 - task: DownloadPipelineArtifact@2
  displayName: Download ${{ parameters.componentName }}
  inputs:
+    itemPattern: '**/*${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
+    targetPath: '$(Pipeline.Workspace)/d'
+    allowPartiallySucceededBuilds: true
    ${{ if parameters.aggregatePipeline }}:
      buildType: 'current'
-      itemPattern: '**/${{ parameters.componentName }}*${{ parameters.fileFilter }}*'
-      allowPartiallySucceededBuilds: true
-      targetPath: '$(Pipeline.Workspace)/d'
    ${{ else }}:
      buildType: 'specific'
      project: ROCm-CI
      specificBuildWithTriggering: true
-      allowPartiallySucceededBuilds: true
      definition: ${{ parameters.pipelineId }}
-      itemPattern: '**/*${{ parameters.fileFilter }}*'
-      targetPath: '$(Pipeline.Workspace)/d'
      branchName: refs/heads/${{ parameters.branchName }}
      ${{ if eq(parameters.componentName, 'aomp') }}:
        buildVersionToDownload: latest # aomp trigger lives in ROCm/ROCm, so cannot use ROCm/aomp branch names
--- a/.azuredevops/templates/steps/artifact-upload.yml
+++ b/.azuredevops/templates/steps/artifact-upload.yml
@@ -26,7 +26,7 @@ steps:
    includeRootFolder: false
    archiveType: 'tar'
    tarCompression: 'gz'
-    archiveFile: '$(Build.ArtifactStagingDirectory)/${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.tar.gz'
+    archiveFile: '$(Build.ArtifactStagingDirectory)/${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}_$(System.JobAttempt).tar.gz'
 - task: DeleteFiles@1
  displayName: 'Cleanup Staging Area'
  inputs:
@@ -38,7 +38,7 @@ steps:
  inputs:
    workingDirectory: $(Pipeline.Workspace)
    targetType: inline
-    script: echo "${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}.tar.gz" >> pipelineArtifacts.txt
+    script: echo "${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}_$(System.JobAttempt).tar.gz" >> pipelineArtifacts.txt
 # then publish it
 - ${{ if parameters.publish }}:
  - task: PublishPipelineArtifact@1
@@ -46,4 +46,5 @@ steps:
    displayName: '${{ parameters.artifactName }} Publish'
    retryCountOnTaskFailure: 3
    inputs:
+      artifactName: ${{ parameters.componentName }}_$(Build.BuildId)_$(Build.BuildNumber)_${{ parameters.os }}_${{ parameters.gpuTarget }}_${{ parameters.artifactName }}_$(System.JobAttempt)
      targetPath: '$(Build.ArtifactStagingDirectory)'
--- a/.azuredevops/templates/steps/dependencies-rocm.yml
+++ b/.azuredevops/templates/steps/dependencies-rocm.yml
@@ -109,7 +109,7 @@ parameters:
      hasGpuTarget: false
    hipCUB:
      pipelineId: $(HIPCUB_PIPELINE_ID)
-      stagingBranch: release-staging/rocm-rel-7.0
+      stagingBranch: develop
      mainlineBranch: develop
      hasGpuTarget: true
    hipFFT:
@@ -129,7 +129,7 @@ parameters:
      hasGpuTarget: false
    hipRAND:
      pipelineId: $(HIPRAND_PIPELINE_ID)
-      stagingBranch: release-staging/rocm-rel-7.0
+      stagingBranch: develop
      mainlineBranch: develop
      hasGpuTarget: true
    hipSOLVER:
@@ -264,7 +264,7 @@ parameters:
      hasGpuTarget: false
    rocPRIM:
      pipelineId: $(ROCPRIM_PIPELINE_ID)
-      stagingBranch: release-staging/rocm-rel-7.0
+      stagingBranch: develop
      mainlineBranch: develop
      hasGpuTarget: true
    rocprofiler:
@@ -304,7 +304,7 @@ parameters:
      hasGpuTarget: false
    rocRAND:
      pipelineId: $(ROCRAND_PIPELINE_ID)
-      stagingBranch: release-staging/rocm-rel-7.0
+      stagingBranch: develop
      mainlineBranch: develop
      hasGpuTarget: true
    rocr_debug_agent:
@@ -329,7 +329,7 @@ parameters:
      hasGpuTarget: false
    rocThrust:
      pipelineId: $(ROCTHRUST_PIPELINE_ID)
-      stagingBranch: release-staging/rocm-rel-7.0
+      stagingBranch: develop
      mainlineBranch: develop
      hasGpuTarget: true
    roctracer:
@@ -438,14 +438,14 @@ steps:
      targetType: inline
      script: |
        sudo mkdir -p $(Agent.BuildDirectory)/rocm/lib
-        sudo ln -s $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
+        sudo ln -sr $(Agent.BuildDirectory)/rocm/llvm $(Agent.BuildDirectory)/rocm/lib/llvm
  - task: Bash@3
    displayName: Symlink executables from rocm/llvm/bin to rocm/bin
    inputs:
      targetType: inline
      script: |
        for file in amdclang amdclang++ amdclang-cl amdclang-cpp amdflang amdlld aompcc mygpu mycpu offload-arch; do
-          sudo ln -s $(Agent.BuildDirectory)/rocm/llvm/bin/$file $(Agent.BuildDirectory)/rocm/bin/$file
+          sudo ln -sr $(Agent.BuildDirectory)/rocm/llvm/bin/$file $(Agent.BuildDirectory)/rocm/bin/$file
        done
 # dlopen calls within a ctest or pytest sequence runs into issues when shared library symlink convention is not followed
 # the convention is as follows:
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -1,3 +1,18 @@
+Datacenter
+GST
+IET
+LTO
+MX
+Microscaling
+NANOO
+ROCprof
+affinitization
+amdclang
+benefitting
+demangled
+inlined
+microscaling
+roofline
 AAC
 ABI
 ACE
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -34,69 +34,86 @@ project = "ROCm Documentation"
 project_path = os.path.abspath(".").replace("\\", "/")
 author = "Advanced Micro Devices, Inc."
 copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
-version = "6.4.1"
-release = "6.4.1"
+version = "7.0 Alpha"
+release = "7.0 Alpha"
 setting_all_article_info = True
 all_article_info_os = ["linux", "windows"]
 all_article_info_author = ""

 # pages with specific settings
 article_pages = [
-    {"file": "about/release-notes", "os": ["linux"], "date": "2025-05-07"},
-    {"file": "release/changelog", "os": ["linux"],},
-    {"file": "compatibility/compatibility-matrix", "os": ["linux"]},
-    {"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
-    {"file": "compatibility/ml-compatibility/tensorflow-compatibility", "os": ["linux"]},
-    {"file": "compatibility/ml-compatibility/jax-compatibility", "os": ["linux"]},
-    {"file": "how-to/deep-learning-rocm", "os": ["linux"]},
+    {"file": "preview/index", "os": ["linux"],},
+    {"file": "preview/release", "os": ["linux"],},
+    {"file": "preview/install/index", "os": ["linux"],},
+    {"file": "preview/install/instinct-driver", "os": ["linux"],},
+    {"file": "preview/install/rocm", "os": ["linux"],},
+    {"file": "preview/benchmark-docker/index", "os": ["linux"],},
+    {"file": "preview/benchmark-docker/training", "os": ["linux"],},
+    {"file": "preview/benchmark-docker/pre-training-megatron-lm-llama-3-8b", "os": ["linux"],},
+    {"file": "preview/benchmark-docker/pre-training-torchtitan-llama-3-70b", "os": ["linux"],},
+    {"file": "preview/benchmark-docker/fine-tuning-lora-llama-2-70b", "os": ["linux"],},
+    {"file": "preview/benchmark-docker/inference", "os": ["linux"],},
+    {"file": "preview/benchmark-docker/inference-vllm-llama-3.1-405b-fp4", "os": ["linux"],},
+    {"file": "preview/benchmark-docker/inference-sglang-deepseek-r1-fp4", "os": ["linux"],},

-    {"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/system-health-check", "os": ["linux"]},
-
-    {"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/prerequisite-system-validation", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/megatron-lm", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/training/scale-model-training", "os": ["linux"]},
-
-    {"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/fine-tuning/overview", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference", "os": ["linux"]},
-
-    {"file": "how-to/rocm-for-ai/inference/index", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/hugging-face-models", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/llm-inference-frameworks", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/vllm", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250513", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},
-
-    {"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference-optimization/model-quantization", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference-optimization/model-acceleration-libraries", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference-optimization/optimizing-with-composable-kernel", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference-optimization/optimizing-triton-kernel", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference-optimization/profiling-and-debugging", "os": ["linux"]},
-    {"file": "how-to/rocm-for-ai/inference-optimization/workload", "os": ["linux"]},
-
-    {"file": "how-to/system-optimization/index", "os": ["linux"]},
-    {"file": "how-to/system-optimization/mi300x", "os": ["linux"]},
-    {"file": "how-to/system-optimization/mi200", "os": ["linux"]},
-    {"file": "how-to/system-optimization/mi100", "os": ["linux"]},
-    {"file": "how-to/system-optimization/w6000-v620", "os": ["linux"]},
-    {"file": "how-to/tuning-guides/mi300x/index", "os": ["linux"]},
-    {"file": "how-to/tuning-guides/mi300x/system", "os": ["linux"]},
-    {"file": "how-to/tuning-guides/mi300x/workload", "os": ["linux"]},
-    {"file": "how-to/system-debugging", "os": ["linux"]},
-    {"file": "how-to/gpu-enabled-mpi", "os": ["linux"]},
+    # {"file": "about/release-notes", "os": ["linux"], "date": "2025-06-26"},
+    # {"file": "release/changelog", "os": ["linux"],},
+    # {"file": "compatibility/compatibility-matrix", "os": ["linux"]},
+    # {"file": "compatibility/ml-compatibility/pytorch-compatibility", "os": ["linux"]},
+    # {"file": "compatibility/ml-compatibility/tensorflow-compatibility", "os": ["linux"]},
+    # {"file": "compatibility/ml-compatibility/jax-compatibility", "os": ["linux"]},
+    # {"file": "how-to/deep-learning-rocm", "os": ["linux"]},
+    #
+    # {"file": "how-to/rocm-for-ai/index", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/install", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/system-health-check", "os": ["linux"]},
+    #
+    # {"file": "how-to/rocm-for-ai/training/index", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/training/train-a-model", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/training/prerequisite-system-validation", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/training/benchmark-docker/megatron-lm", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/training/benchmark-docker/pytorch-training", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/training/scale-model-training", "os": ["linux"]},
+    #
+    # {"file": "how-to/rocm-for-ai/fine-tuning/index", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/fine-tuning/overview", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference", "os": ["linux"]},
+    #
+    # {"file": "how-to/rocm-for-ai/inference/index", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/inference/hugging-face-models", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/inference/llm-inference-frameworks", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/inference/benchmark-docker/vllm", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-0.8.5-20250513", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/inference/deploy-your-model", "os": ["linux"]},
+    #
+    # {"file": "how-to/rocm-for-ai/inference-optimization/index", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/inference-optimization/model-quantization", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/inference-optimization/model-acceleration-libraries", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/inference-optimization/optimizing-with-composable-kernel", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/inference-optimization/optimizing-triton-kernel", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/inference-optimization/profiling-and-debugging", "os": ["linux"]},
+    # {"file": "how-to/rocm-for-ai/inference-optimization/workload", "os": ["linux"]},
+    #
+    # {"file": "how-to/system-optimization/index", "os": ["linux"]},
+    # {"file": "how-to/system-optimization/mi300x", "os": ["linux"]},
+    # {"file": "how-to/system-optimization/mi200", "os": ["linux"]},
+    # {"file": "how-to/system-optimization/mi100", "os": ["linux"]},
+    # {"file": "how-to/system-optimization/w6000-v620", "os": ["linux"]},
+    # {"file": "how-to/tuning-guides/mi300x/index", "os": ["linux"]},
+    # {"file": "how-to/tuning-guides/mi300x/system", "os": ["linux"]},
+    # {"file": "how-to/tuning-guides/mi300x/workload", "os": ["linux"]},
+    # {"file": "how-to/system-debugging", "os": ["linux"]},
+    # {"file": "how-to/gpu-enabled-mpi", "os": ["linux"]},
 ]

 external_toc_path = "./sphinx/_toc.yml"
+# Options to improve documentation build time for preview release documentation
+external_toc_exclude_missing = True # don't build files that aren't in the TOC
+external_projects_remote_repository = "" # don't fetch data to resolve intersphinx xrefs

 # Add the _extensions directory to Python's search path
 sys.path.append(str(Path(__file__).parent / 'extension'))
@@ -122,7 +139,7 @@ html_static_path = ["sphinx/static/css", "extension/how-to/rocm-for-ai/inference
 html_css_files = ["rocm_custom.css", "rocm_rn.css", "vllm-benchmark.css"]
 html_js_files = ["vllm-benchmark.js"]

-html_title = "ROCm Documentation"
+html_title = "ROCm 7.0 Alpha documentation"

 html_theme_options = {"link_main_doc": False}

--- a/docs/data/how-to/rocm-for-ai/training/pytorch-training-benchmark-models.yaml
+++ b/docs/data/how-to/rocm-for-ai/training/pytorch-training-benchmark-models.yaml
@@ -0,0 +1,120 @@
+unified_docker:
+  latest:
+    pull_tag: rocm/pytorch-training:v25.6
+    docker_hub_url: https://hub.docker.com/r/rocm/pytorch-training/tags
+    rocm_version: 6.4.1
+    pytorch_version: 2.8.0a0+git7d205b2
+    python_version: 3.10.17
+    transformer_engine_version: 1.14.0+2f85f5f2
+    flash_attention_version: 3.0.0.post1
+    hipblaslt_version: 0.15.0-8c6919d
+    triton_version: 3.3.0
+model_groups:
+  - group: Pre-training
+    tag: pre-training
+    models:
+    - model: Llama 3.1 8B
+      mad_tag: pyt_train_llama-3.1-8b
+      model_repo: Llama-3.1-8B
+      url: https://huggingface.co/meta-llama/Llama-3.1-8B
+      precision: BF16
+      training_modes: [pretrain]
+    - model: Llama 3.1 70B
+      mad_tag: pyt_train_llama-3.1-70b
+      model_repo: Llama-3.1-70B
+      url: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
+      precision: BF16
+      training_modes: [pretrain]
+    - model: FLUX.1-dev
+      mad_tag: pyt_train_flux
+      model_repo: Flux
+      url: https://huggingface.co/black-forest-labs/FLUX.1-dev
+      precision: BF16
+      training_modes: [pretrain]
+  - group: Fine-tuning
+    tag: fine-tuning
+    models:
+    - model: Llama 4 Scout 17B-16E
+      mad_tag: pyt_train_llama-4-scout-17b-16e
+      model_repo: Llama-4-17B_16E
+      url: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E
+      precision: BF16
+      training_modes: [finetune_fw, finetune_lora]
+    - model: Llama 3.3 70B
+      mad_tag: pyt_train_llama-3.3-70b
+      model_repo: Llama-3.3-70B
+      url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
+      precision: BF16
+      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
+    - model: Llama 3.2 1B
+      mad_tag: pyt_train_llama-3.2-1b
+      model_repo: Llama-3.2-1B
+      url: https://huggingface.co/meta-llama/Llama-3.2-1B
+      precision: BF16
+      training_modes: [finetune_fw, finetune_lora]
+    - model: Llama 3.2 3B
+      mad_tag: pyt_train_llama-3.2-3b
+      model_repo: Llama-3.2-3B
+      url: https://huggingface.co/meta-llama/Llama-3.2-3B
+      precision: BF16
+      training_modes: [finetune_fw, finetune_lora]
+    - model: Llama 3.2 Vision 11B
+      mad_tag: pyt_train_llama-3.2-vision-11b
+      model_repo: Llama-3.2-Vision-11B
+      url: https://huggingface.co/meta-llama/Llama-3.2-11B-Vision
+      precision: BF16
+      training_modes: [finetune_fw]
+    - model: Llama 3.2 Vision 90B
+      mad_tag: pyt_train_llama-3.2-vision-90b
+      model_repo: Llama-3.2-Vision-90B
+      url: https://huggingface.co/meta-llama/Llama-3.2-90B-Vision
+      precision: BF16
+      training_modes: [finetune_fw]
+    - model: Llama 3.1 8B
+      mad_tag: pyt_train_llama-3.1-8b
+      model_repo: Llama-3.1-8B
+      url: https://huggingface.co/meta-llama/Llama-3.1-8B
+      precision: BF16
+      training_modes: [finetune_fw, finetune_lora]
+    - model: Llama 3.1 70B
+      mad_tag: pyt_train_llama-3.1-70b
+      model_repo: Llama-3.1-70B
+      url: https://huggingface.co/meta-llama/Llama-3.1-70B
+      precision: BF16
+      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
+    - model: Llama 3.1 405B
+      mad_tag: pyt_train_llama-3.1-405b
+      model_repo: Llama-3.1-405B
+      url: https://huggingface.co/meta-llama/Llama-3.1-405B
+      precision: BF16
+      training_modes: [finetune_qlora, HF_finetune_lora]
+    - model: Llama 3 8B
+      mad_tag: pyt_train_llama-3-8b
+      model_repo: Llama-3-8B
+      url: https://huggingface.co/meta-llama/Meta-Llama-3-8B
+      precision: BF16
+      training_modes: [finetune_fw, finetune_lora]
+    - model: Llama 3 70B
+      mad_tag: pyt_train_llama-3-70b
+      model_repo: Llama-3-70B
+      url: https://huggingface.co/meta-llama/Meta-Llama-3-70B
+      precision: BF16
+      training_modes: [finetune_fw, finetune_lora]
+    - model: Llama 2 7B
+      mad_tag: pyt_train_llama-2-7b
+      model_repo: Llama-2-7B
+      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
+      precision: BF16
+      training_modes: [finetune_fw, finetune_lora, finetune_qlora]
+    - model: Llama 2 13B
+      mad_tag: pyt_train_llama-2-13b
+      model_repo: Llama-2-13B
+      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
+      precision: BF16
+      training_modes: [finetune_fw, finetune_lora]
+    - model: Llama 2 70B
+      mad_tag: pyt_train_llama-2-70b
+      model_repo: Llama-2-70B
+      url: https://github.com/meta-llama/llama-models/tree/main/models/llama2
+      precision: BF16
+      training_modes: [finetune_lora, finetune_qlora, HF_finetune_lora]
--- a/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-history.rst
+++ b/docs/how-to/rocm-for-ai/inference/benchmark-docker/previous-versions/vllm-history.rst
@@ -18,11 +18,18 @@ previous releases of the ``ROCm/vllm`` Docker image on `Docker Hub <https://hub.
     - PyTorch version
     - Resources

+   * - 6.4.0
+     - 0.9.0.1
+     - 2.7.0
+     - 
+       * :doc:`Documentation <../vllm>`
+       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250521/images/sha256-38410c51af7208897cd8b737c9bdfc126e9bc8952d4aa6b88c85482f03092a11>`_
+
   * - 6.3.1
     - 0.8.5 (0.8.6.dev)
     - 2.7.0
     - 
-       * :doc:`Documentation <../vllm>`
+       * :doc:`Documentation <vllm-0.8.5-20250521>`
       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.3.1_vllm_0.8.5_20250521/images/sha256-38410c51af7208897cd8b737c9bdfc126e9bc8952d4aa6b88c85482f03092a11>`_

   * - 6.3.1
@@ -57,7 +64,7 @@ previous releases of the ``ROCm/vllm`` Docker image on `Docker Hub <https://hub.
     - 0.6.4
     - 2.5.0
     - 
-       * :doc:`Documentation <vllm-0.4.3>`
+       * :doc:`Documentation <vllm-0.6.4>`
       * `Docker Hub <https://hub.docker.com/layers/rocm/vllm/rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4/images/sha256-ccbb74cc9e7adecb8f7bdab9555f7ac6fc73adb580836c2a35ca96ff471890d8>`_

   * - 6.2.0
--- a/docs/how-to/rocm-for-ai/install.rst
+++ b/docs/how-to/rocm-for-ai/install.rst
@@ -1,5 +1,5 @@
 .. meta::
-   :description: How to install ROCm and popular machine learning frameworks.
+   :description: How to install the ROCm 7.0 preview
   :keywords: ROCm, AI, LLM, train, fine-tune, FSDP, DeepSpeed, LLaMA, tutorial

 .. _rocm-for-ai-install:
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-history.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-history.rst
@@ -18,11 +18,18 @@ previous releases of the ``ROCm/pytorch-training`` Docker image on `Docker Hub <
     - PyTorch version
     - Resources

-   * - v25.5
+   * - v25.6
     - 6.3.4
-     - 0.4.35
+     - 2.8.0a0+git7d205b2
     - 
       * :doc:`Documentation <../pytorch-training>`
+       * `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-training/v25.6/images/sha256-a4cea3c493a4a03d199a3e81960ac071d79a4a7a391aa9866add3b30a7842661>`_
+
+   * - v25.5
+     - 6.3.4
+     - 2.7.0a0+git637433
+     - 
+       * :doc:`Documentation <pytorch-training-v25.5>`
       * `Docker Hub <https://hub.docker.com/layers/rocm/pytorch-training/v25.5/images/sha256-d47850a9b25b4a7151f796a8d24d55ea17bba545573f0d50d54d3852f96ecde5>`_

   * - v25.4
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.5.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/previous-versions/pytorch-training-v25.5.rst
@@ -0,0 +1,437 @@
+.. meta::
+   :description: How to train a model using PyTorch for ROCm.
+   :keywords: ROCm, AI, LLM, train, PyTorch, torch, Llama, flux, tutorial, docker
+
+**************************************
+Training a model with PyTorch for ROCm
+**************************************
+
+.. caution::
+
+   This documentation does not reflect the latest version of ROCm vLLM
+   performance benchmark documentation. See :doc:`../pytorch-training` for the latest version.
+
+PyTorch is an open-source machine learning framework that is widely used for
+model training with GPU-optimized components for transformer-based models.
+
+The `PyTorch for ROCm training Docker <https://hub.docker.com/layers/rocm/pytorch-training/v25.5/images/sha256-d47850a9b25b4a7151f796a8d24d55ea17bba545573f0d50d54d3852f96ecde5>`_
+(``rocm/pytorch-training:v25.5``) image
+provides a prebuilt optimized environment for fine-tuning and pretraining a
+model on AMD Instinct MI325X and MI300X accelerators. It includes the following
+software components to accelerate training workloads:
+
+--------------------------+--------------------------------+
+| Software component       | Version                        |
+==========================+================================+
+| ROCm                     | 6.3.4                          |
+--------------------------+--------------------------------+
+| PyTorch                  | 2.7.0a0+git637433              |
+--------------------------+--------------------------------+
+| Python                   | 3.10                           |
+--------------------------+--------------------------------+
+| Transformer Engine       | 1.12.0.dev0+25a33da            |
+--------------------------+--------------------------------+
+| Flash Attention          | 3.0.0                          |
+--------------------------+--------------------------------+
+| hipBLASLt                | git53b53bf                     |
+--------------------------+--------------------------------+
+| Triton                   | 3.2.0                          |
+--------------------------+--------------------------------+
+
+.. _amd-pytorch-training-model-support:
+
+Supported models
+================
+
+The following models are pre-optimized for performance on the AMD Instinct MI325X and MI300X accelerators.
+
+* Llama 3.3 70B
+
+* Llama 3.1 8B
+
+* Llama 3.1 70B
+
+* Llama 2 70B
+
+* FLUX.1-dev
+
+.. note::
+
+   Only these models are supported in the following steps.
+
+   Some models, such as Llama 3, require an external license agreement through
+   a third party (for example, Meta).
+
+.. _amd-pytorch-training-performance-measurements:
+
+Performance measurements
+========================
+
+To evaluate performance, the
+`Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8deaeb413-item-21cea50186-tab>`_
+page provides reference throughput and latency measurements for training
+popular AI models.
+
+.. note::
+
+   The performance data presented in
+   `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8deaeb413-item-21cea50186-tab>`_
+   should not be interpreted as the peak performance achievable by AMD
+   Instinct MI325X and MI300X accelerators or ROCm software.
+
+System validation
+=================
+
+Before running AI workloads, it's important to validate that your AMD hardware is configured
+correctly and performing optimally.
+
+If you have already validated your system settings, including aspects like NUMA auto-balancing, you
+can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
+optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
+before starting training.
+
+To test for optimal performance, consult the recommended :ref:`System health benchmarks
+<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
+system's configuration.
+
+This Docker image is optimized for specific model configurations outlined
+below. Performance can vary for other training workloads, as AMD 
+doesn’t validate configurations and run conditions outside those described.
+
+Benchmarking
+============
+
+Once the setup is complete, choose between two options to start benchmarking:
+
+.. tab-set::
+
+   .. tab-item:: MAD-integrated benchmarking
+
+      Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
+      directory and install the required packages on the host machine.
+
+      .. code-block:: shell
+
+         git clone https://github.com/ROCm/MAD
+         cd MAD
+         pip install -r requirements.txt
+
+      For example, use this command to run the performance benchmark test on the Llama 3.1 8B model
+      using one GPU with the float16 data type on the host machine.
+
+      .. code-block:: shell
+
+         export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
+         python3 tools/run_models.py --tags pyt_train_llama-3.1-8b --keep-model-dir --live-output --timeout 28800
+
+      The available models for MAD-integrated benchmarking are:
+
+      * ``pyt_train_llama-3.3-70b``
+
+      * ``pyt_train_llama-3.1-8b``
+
+      * ``pyt_train_llama-3.1-70b``
+
+      * ``pyt_train_flux``
+
+      MAD launches a Docker container with the name
+      ``container_ci-pyt_train_llama-3.1-8b``, for example. The latency and throughput reports of the
+      model are collected in the following path: ``~/MAD/perf.csv``.
+
+   .. tab-item:: Standalone benchmarking
+
+      .. rubric:: Download the Docker image and required packages
+
+      Use the following command to pull the Docker image from Docker Hub.
+
+      .. code-block:: shell
+
+         docker pull rocm/pytorch-training:v25.5
+
+      Run the Docker container.
+
+      .. code-block:: shell
+
+         docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME:$HOME -v  $HOME/.ssh:/root/.ssh --shm-size 64G --name training_env rocm/pytorch-training:v25.5
+
+      Use these commands if you exit the ``training_env`` container and need to return to it.
+
+      .. code-block:: shell
+
+         docker start training_env
+         docker exec -it training_env bash
+
+      In the Docker container, clone the `<https://github.com/ROCm/MAD>`__
+      repository and navigate to the benchmark scripts directory
+      ``/workspace/MAD/scripts/pytorch_train``.
+
+      .. code-block:: shell
+
+         git clone https://github.com/ROCm/MAD
+         cd MAD/scripts/pytorch_train
+
+      .. rubric:: Prepare training datasets and dependencies
+
+      The following benchmarking examples require downloading models and datasets
+      from Hugging Face. To ensure successful access to gated repos, set your
+      ``HF_TOKEN``.
+
+      .. code-block:: shell
+
+         export HF_TOKEN=$your_personal_hugging_face_access_token
+
+      Run the setup script to install libraries and datasets needed for benchmarking.
+
+      .. code-block:: shell
+
+         ./pytorch_benchmark_setup.sh
+
+      ``pytorch_benchmark_setup.sh`` installs the following libraries:
+
+      .. list-table::
+         :header-rows: 1
+
+         * - Library
+           - Benchmark model
+           - Reference
+
+         * - ``accelerate``
+           - Llama 3.1 8B, FLUX
+           - `Hugging Face Accelerate <https://huggingface.co/docs/accelerate/en/index>`_
+
+         * - ``datasets``
+           - Llama 3.1 8B, 70B, FLUX
+           - `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0
+
+         * - ``torchdata``
+           - Llama 3.1 70B
+           - `TorchData <https://pytorch.org/data/beta/index.html>`_
+
+         * - ``tomli``
+           - Llama 3.1 70B
+           - `Tomli <https://pypi.org/project/tomli/>`_
+
+         * - ``tiktoken``
+           - Llama 3.1 70B
+           - `tiktoken <https://github.com/openai/tiktoken>`_
+
+         * - ``blobfile``
+           - Llama 3.1 70B
+           - `blobfile <https://pypi.org/project/blobfile/>`_
+
+         * - ``tabulate``
+           - Llama 3.1 70B
+           - `tabulate <https://pypi.org/project/tabulate/>`_
+
+         * - ``wandb``
+           - Llama 3.1 70B
+           - `Weights & Biases <https://github.com/wandb/wandb>`_
+
+         * - ``sentencepiece``
+           - Llama 3.1 70B, FLUX
+           - `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0
+
+         * - ``tensorboard``
+           - Llama 3.1 70 B, FLUX
+           - `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0
+
+         * - ``csvkit``
+           - FLUX
+           - `csvkit <https://csvkit.readthedocs.io/en/latest/>`_ 2.0.1
+
+         * - ``deepspeed``
+           - FLUX
+           - `DeepSpeed <https://github.com/deepspeedai/DeepSpeed>`_ 0.16.2
+
+         * - ``diffusers``
+           - FLUX
+           - `Hugging Face Diffusers <https://huggingface.co/docs/diffusers/en/index>`_ 0.31.0
+
+         * - ``GitPython``
+           - FLUX
+           - `GitPython <https://github.com/gitpython-developers/GitPython>`_ 3.1.44
+
+         * - ``opencv-python-headless``
+           - FLUX
+           - `opencv-python-headless <https://pypi.org/project/opencv-python-headless/>`_ 4.10.0.84
+
+         * - ``peft``
+           - FLUX
+           - `PEFT <https://huggingface.co/docs/peft/en/index>`_ 0.14.0
+
+         * - ``protobuf``
+           - FLUX
+           - `Protocol Buffers <https://github.com/protocolbuffers/protobuf>`_ 5.29.2
+
+         * - ``pytest``
+           - FLUX
+           - `PyTest <https://docs.pytest.org/en/stable/>`_ 8.3.4
+
+         * - ``python-dotenv``
+           - FLUX
+           - `python-dotenv <https://pypi.org/project/python-dotenv/>`_ 1.0.1
+
+         * - ``seaborn``
+           - FLUX
+           - `Seaborn <https://seaborn.pydata.org/>`_ 0.13.2
+
+         * - ``transformers``
+           - FLUX
+           - `Transformers <https://huggingface.co/docs/transformers/en/index>`_ 4.47.0
+
+      ``pytorch_benchmark_setup.sh`` downloads the following models from Hugging Face:
+
+      * `meta-llama/Llama-3.1-70B-Instruct <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
+
+      * `black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
+
+      Along with the following datasets:
+
+      * `WikiText <https://huggingface.co/datasets/Salesforce/wikitext>`_
+
+      * `UltraChat 200k <https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k>`_
+
+      * `bghira/pseudo-camera-10k <https://huggingface.co/datasets/bghira/pseudo-camera-10k>`_
+
+      .. rubric:: Pretraining
+
+      To start the pretraining benchmark, use the following command with the
+      appropriate options. See the following list of options and their descriptions.
+
+      .. code-block:: shell
+
+         ./pytorch_benchmark_report.sh -t $training_mode -m $model_repo -p $datatype -s $sequence_length
+
+      .. list-table::
+         :header-rows: 1
+
+         * - Name
+           - Options
+           - Description
+
+         * - ``$training_mode``
+           - ``pretrain``
+           - Benchmark pretraining
+
+         * -
+           - ``finetune_fw``
+           - Benchmark full weight fine-tuning (Llama 3.1 70B with BF16)
+
+         * -
+           - ``finetune_lora``
+           - Benchmark LoRA fine-tuning (Llama 3.1 70B with BF16)
+
+         * -
+           - ``HF_finetune_lora``
+           - Benchmark LoRA fine-tuning with Hugging Face PEFT (Llama 2 70B with BF16)
+
+         * - ``$datatype``
+           - ``FP8`` or ``BF16``
+           - Only Llama 3.1 8B supports FP8 precision.
+
+         * - ``$model_repo``
+           - ``Llama-3.3-70B``
+           - `Llama 3.3 70B <https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct>`_
+
+         * - 
+           - ``Llama-3.1-8B``
+           - `Llama 3.1 8B <https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct>`_
+
+         * - 
+           - ``Llama-3.1-70B``
+           - `Llama 3.1 70B <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
+
+         * - 
+           - ``Llama-2-70B``
+           - `Llama 2 70B <https://huggingface.co/meta-llama/Llama-2-70B>`_
+
+         * - 
+           - ``Flux``
+           - `FLUX.1 [dev] <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
+
+         * - ``$sequence_length``
+           - Sequence length for the language model.
+           - Between 2048 and 8192. 8192 by default.
+
+      .. note::
+
+         Occasionally, downloading the Flux dataset might fail. In the event of this
+         error, manually download it from Hugging Face at
+         `black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
+         and save it to `/workspace/FluxBenchmark`. This ensures that the test script can access
+         the required dataset.
+
+      .. rubric:: Fine-tuning
+
+      To start the fine-tuning benchmark, use the following command. It will run the benchmarking example of Llama 3.1 70B
+      with the WikiText dataset using the AMD fork of `torchtune <https://github.com/AMD-AIG-AIMA/torchtune>`_.
+
+      .. code-block:: shell
+
+         ./pytorch_benchmark_report.sh -t {finetune_fw, finetune_lora} -p BF16 -m Llama-3.1-70B
+
+      Use the following command to run the benchmarking example of Llama 2 70B with the UltraChat 200k dataset using
+      `Hugging Face PEFT <https://huggingface.co/docs/peft/en/index>`_.
+
+      .. code-block:: shell
+
+         ./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
+
+      .. rubric:: Benchmarking examples
+
+      Here are some example commands to get started pretraining and fine-tuning with various model configurations.
+
+      * Example 1: Llama 3.1 70B with BF16 precision with `torchtitan <https://github.com/ROCm/torchtitan>`_.
+
+        .. code-block:: shell
+
+           ./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Llama-3.1-70B -s 8192
+
+      * Example 2: Llama 3.1 8B with FP8 precision using Transformer Engine (TE) and Hugging Face Accelerator.
+
+        .. code-block:: shell
+
+           ./pytorch_benchmark_report.sh -t pretrain -p FP8 -m Llama-3.1-70B -s 8192
+
+      * Example 3: FLUX.1-dev with BF16 precision with FluxBenchmark.
+
+        .. code-block:: shell
+
+           ./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Flux
+
+      * Example 4: Torchtune full weight fine-tuning with Llama 3.1 70B
+
+        .. code-block:: shell
+
+           ./pytorch_benchmark_report.sh -t finetune_fw -p BF16 -m Llama-3.1-70B
+
+      * Example 5: Torchtune LoRA fine-tuning with Llama 3.1 70B
+
+        .. code-block:: shell
+
+           ./pytorch_benchmark_report.sh -t finetune_lora -p BF16 -m Llama-3.1-70B
+
+      * Example 6: Torchtune full weight fine-tuning with Llama-3.3-70B
+
+        .. code-block:: shell
+
+           ./pytorch_benchmark_report.sh -t finetune_fw -p BF16 -m Llama-3.3-70B
+
+      * Example 7: Torchtune LoRA fine-tuning with Llama-3.3-70B
+
+        .. code-block:: shell
+
+           ./pytorch_benchmark_report.sh -t finetune_lora -p BF16 -m Llama-3.3-70B
+
+      * Example 8: Torchtune QLoRA fine-tuning with Llama-3.3-70B
+
+        .. code-block:: shell
+
+           ./pytorch_benchmark_report.sh -t finetune_qlora -p BF16 -m Llama-3.3-70B
+
+      * Example 9: Hugging Face PEFT LoRA fine-tuning with Llama 2 70B
+
+        .. code-block:: shell
+
+           ./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
+
--- a/docs/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
+++ b/docs/how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
@@ -9,28 +9,27 @@ Training a model with PyTorch for ROCm
 PyTorch is an open-source machine learning framework that is widely used for
 model training with GPU-optimized components for transformer-based models.

-The `PyTorch for ROCm training Docker <https://hub.docker.com/layers/rocm/pytorch-training/v25.5/images/sha256-d47850a9b25b4a7151f796a8d24d55ea17bba545573f0d50d54d3852f96ecde5>`_
-(``rocm/pytorch-training:v25.5``) image
-provides a prebuilt optimized environment for fine-tuning and pretraining a
-model on AMD Instinct MI325X and MI300X accelerators. It includes the following
-software components to accelerate training workloads:
+The `PyTorch for ROCm training Docker <https://hub.docker.com/r/rocm/pytorch-training/tags>`_
+(``rocm/pytorch-training:v25.6``) image provides a prebuilt optimized environment for fine-tuning and pretraining a
+model on AMD Instinct MI325X and MI300X accelerators. It includes the following software components to accelerate
+training workloads:

 +--------------------------+--------------------------------+
 | Software component       | Version                        |
 +==========================+================================+
 | ROCm                     | 6.3.4                          |
 +--------------------------+--------------------------------+
-| PyTorch                  | 2.7.0a0+git637433              |
+| PyTorch                  | 2.8.0a0+git7d205b2             |
 +--------------------------+--------------------------------+
-| Python                   | 3.10                           |
+| Python                   | 3.10.17                        |
 +--------------------------+--------------------------------+
-| Transformer Engine       | 1.12.0.dev0+25a33da            |
+| Transformer Engine       | 1.14.0+2f85f5f2                |
 +--------------------------+--------------------------------+
-| Flash Attention          | 3.0.0                          |
+| Flash Attention          | 3.0.0.post1                    |
 +--------------------------+--------------------------------+
-| hipBLASLt                | git53b53bf                     |
+| hipBLASLt                | 0.15.0-8c6919d                 |
 +--------------------------+--------------------------------+
-| Triton                   | 3.2.0                          |
+| Triton                   | 3.3.0                          |
 +--------------------------+--------------------------------+

 .. _amd-pytorch-training-model-support:
@@ -40,395 +39,393 @@ Supported models

 The following models are pre-optimized for performance on the AMD Instinct MI325X and MI300X accelerators.

-* Llama 3.3 70B
+.. datatemplate:yaml:: /data/how-to/rocm-for-ai/training/pytorch-training-benchmark-models.yaml

-* Llama 3.1 8B
+   {% set unified_docker = data.unified_docker.latest %}
+   {% set model_groups = data.model_groups %}

-* Llama 3.1 70B
+   .. raw:: html

-* Llama 2 70B
+      <div id="vllm-benchmark-ud-params-picker" class="container-fluid">
+        <div class="row">
+          <div class="col-2 me-2 model-param-head">Workload</div>
+          <div class="row col-10">
+   {% for model_group in model_groups %}
+            <div class="col-6 model-param" data-param-k="model-group" data-param-v="{{ model_group.tag }}" tabindex="0">{{ model_group.group }}</div>
+   {% endfor %}
+          </div>
+        </div>

-* FLUX.1-dev
+        <div class="row mt-1">
+          <div class="col-2 me-2 model-param-head">Model</div>
+          <div class="row col-10">
+   {% for model_group in model_groups %}
+      {% set models = model_group.models %}
+      {% for model in models %}
+         {% if models|length % 3 == 0 %}
+            <div class="col-4 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
+         {% else %}
+            <div class="col-6 model-param" data-param-k="model" data-param-v="{{ model.mad_tag }}" data-param-group="{{ model_group.tag }}" tabindex="0">{{ model.model }}</div>
+         {% endif %}
+      {% endfor %}
+   {% endfor %}
+          </div>
+        </div>
+      </div>

-.. note::
+   .. note::

-   Only these models are supported in the following steps.
+      Some models require an external license agreement through a third party (for example, Meta).

-   Some models, such as Llama 3, require an external license agreement through
-   a third party (for example, Meta).
+   .. _amd-pytorch-training-performance-measurements:

-.. _amd-pytorch-training-performance-measurements:
+   Performance measurements
+   ========================

-Performance measurements
-========================
-
-To evaluate performance, the
-`Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8deaeb413-item-21cea50186-tab>`_
-page provides reference throughput and latency measurements for training
-popular AI models.
-
-.. note::
-
-   The performance data presented in
+   To evaluate performance, the
   `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8deaeb413-item-21cea50186-tab>`_
-   should not be interpreted as the peak performance achievable by AMD
-   Instinct MI325X and MI300X accelerators or ROCm software.
+   page provides reference throughput and latency measurements for training
+   popular AI models.

-System validation
-=================
+   .. note::

-Before running AI workloads, it's important to validate that your AMD hardware is configured
-correctly and performing optimally.
+      The performance data presented in
+      `Performance results with AMD ROCm software <https://www.amd.com/en/developer/resources/rocm-hub/dev-ai/performance-results.html#tabs-a8deaeb413-item-21cea50186-tab>`_
+      should not be interpreted as the peak performance achievable by AMD
+      Instinct MI325X and MI300X accelerators or ROCm software.

-If you have already validated your system settings, including aspects like NUMA auto-balancing, you
-can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
-optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
-before starting training.
+   System validation
+   =================

-To test for optimal performance, consult the recommended :ref:`System health benchmarks
-<rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
-system's configuration.
+   Before running AI workloads, it's important to validate that your AMD hardware is configured
+   correctly and performing optimally.

-This Docker image is optimized for specific model configurations outlined
-below. Performance can vary for other training workloads, as AMD 
-doesn’t validate configurations and run conditions outside those described.
+   If you have already validated your system settings, including aspects like NUMA auto-balancing, you
+   can skip this step. Otherwise, complete the procedures in the :ref:`System validation and
+   optimization <rocm-for-ai-system-optimization>` guide to properly configure your system settings
+   before starting training.

-Benchmarking
-============
+   To test for optimal performance, consult the recommended :ref:`System health benchmarks
+   <rocm-for-ai-system-health-bench>`. This suite of tests will help you verify and fine-tune your
+   system's configuration.

-Once the setup is complete, choose between two options to start benchmarking:
+   This Docker image is optimized for specific model configurations outlined
+   below. Performance can vary for other training workloads, as AMD
+   doesn’t validate configurations and run conditions outside those described.

-.. tab-set::
+   Benchmarking
+   ============

-   .. tab-item:: MAD-integrated benchmarking
+   Once the setup is complete, choose between two options to start benchmarking:

-      Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
-      directory and install the required packages on the host machine.
+   .. tab-set::

-      .. code-block:: shell
+      .. tab-item:: MAD-integrated benchmarking

-         git clone https://github.com/ROCm/MAD
-         cd MAD
-         pip install -r requirements.txt
+         Clone the ROCm Model Automation and Dashboarding (`<https://github.com/ROCm/MAD>`__) repository to a local
+         directory and install the required packages on the host machine.

-      For example, use this command to run the performance benchmark test on the Llama 3.1 8B model
-      using one GPU with the float16 data type on the host machine.
+         .. code-block:: shell

-      .. code-block:: shell
+            git clone https://github.com/ROCm/MAD
+            cd MAD
+            pip install -r requirements.txt

-         export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
-         python3 tools/run_models.py --tags pyt_train_llama-3.1-8b --keep-model-dir --live-output --timeout 28800
+   {% for model_group in model_groups %}
+      {% for model in model_group.models %}

-      The available models for MAD-integrated benchmarking are:
+         .. container:: model-doc {{ model.mad_tag }}

-      * ``pyt_train_llama-3.3-70b``
+            For example, use this command to run the performance benchmark test on the {{ model.model }} model
+            using one GPU with the {{ model.precision }} data type on the host machine.

-      * ``pyt_train_llama-3.1-8b``
+            .. code-block:: shell

-      * ``pyt_train_llama-3.1-70b``
+               export MAD_SECRETS_HFTOKEN="your personal Hugging Face token to access gated models"
+               python3 tools/run_models.py --tags {{ model.mad_tag }} --keep-model-dir --live-output --timeout 28800

-      * ``pyt_train_flux``
+            MAD launches a Docker container with the name
+            ``container_ci-{{ model.mad_tag }}``, for example. The latency and throughput reports of the
+            model are collected in the following path: ``~/MAD/perf.csv``.

-      MAD launches a Docker container with the name
-      ``container_ci-pyt_train_llama-3.1-8b``, for example. The latency and throughput reports of the
-      model are collected in the following path: ``~/MAD/perf.csv``.
+      {% endfor %}
+   {% endfor %}

-   .. tab-item:: Standalone benchmarking
+      .. tab-item:: Standalone benchmarking

-      .. rubric:: Download the Docker image and required packages
+         .. rubric:: Download the Docker image and required packages

-      Use the following command to pull the Docker image from Docker Hub.
+         Use the following command to pull the Docker image from Docker Hub.

-      .. code-block:: shell
+         .. code-block:: shell

-         docker pull rocm/pytorch-training:v25.5
+            docker pull {{ unified_docker.pull_tag }}

-      Run the Docker container.
+         Run the Docker container.

-      .. code-block:: shell
+         .. code-block:: shell

-         docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME:$HOME -v  $HOME/.ssh:/root/.ssh --shm-size 64G --name training_env rocm/pytorch-training:v25.5
+            docker run -it --device /dev/dri --device /dev/kfd --network host --ipc host --group-add video --cap-add SYS_PTRACE --security-opt seccomp=unconfined --privileged -v $HOME:$HOME -v  $HOME/.ssh:/root/.ssh --shm-size 64G --name training_env {{ unified_docker.pull_tag }}

-      Use these commands if you exit the ``training_env`` container and need to return to it.
+         Use these commands if you exit the ``training_env`` container and need to return to it.

-      .. code-block:: shell
+         .. code-block:: shell

-         docker start training_env
-         docker exec -it training_env bash
+            docker start training_env
+            docker exec -it training_env bash

-      In the Docker container, clone the `<https://github.com/ROCm/MAD>`__
-      repository and navigate to the benchmark scripts directory
-      ``/workspace/MAD/scripts/pytorch_train``.
+         In the Docker container, clone the `<https://github.com/ROCm/MAD>`__
+         repository and navigate to the benchmark scripts directory
+         ``/workspace/MAD/scripts/pytorch_train``.

-      .. code-block:: shell
+         .. code-block:: shell

-         git clone https://github.com/ROCm/MAD
-         cd MAD/scripts/pytorch_train
+            git clone https://github.com/ROCm/MAD
+            cd MAD/scripts/pytorch_train

-      .. rubric:: Prepare training datasets and dependencies
+         .. rubric:: Prepare training datasets and dependencies

-      The following benchmarking examples require downloading models and datasets
-      from Hugging Face. To ensure successful access to gated repos, set your
-      ``HF_TOKEN``.
+         The following benchmarking examples require downloading models and datasets
+         from Hugging Face. To ensure successful access to gated repos, set your
+         ``HF_TOKEN``.

-      .. code-block:: shell
+         .. code-block:: shell

-         export HF_TOKEN=$your_personal_hugging_face_access_token
+            export HF_TOKEN=$your_personal_hugging_face_access_token

-      Run the setup script to install libraries and datasets needed for benchmarking.
+         Run the setup script to install libraries and datasets needed for benchmarking.

-      .. code-block:: shell
+         .. code-block:: shell

-         ./pytorch_benchmark_setup.sh
+            ./pytorch_benchmark_setup.sh

-      ``pytorch_benchmark_setup.sh`` installs the following libraries:
+         .. container:: model-doc pyt_train_llama-3.1-8b

-      .. list-table::
-         :header-rows: 1
+            ``pytorch_benchmark_setup.sh`` installs the following libraries for Llama 3.1 8B:

-         * - Library
-           - Benchmark model
-           - Reference
+            .. list-table::
+               :header-rows: 1

-         * - ``accelerate``
-           - Llama 3.1 8B, FLUX
-           - `Hugging Face Accelerate <https://huggingface.co/docs/accelerate/en/index>`_
+               * - Library
+                 - Reference

-         * - ``datasets``
-           - Llama 3.1 8B, 70B, FLUX
-           - `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0
+               * - ``accelerate``
+                 - `Hugging Face Accelerate <https://huggingface.co/docs/accelerate/en/index>`_

-         * - ``torchdata``
-           - Llama 3.1 70B
-           - `TorchData <https://pytorch.org/data/beta/index.html>`_
+               * - ``datasets``
+                 - `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0

-         * - ``tomli``
-           - Llama 3.1 70B
-           - `Tomli <https://pypi.org/project/tomli/>`_
+         .. container:: model-doc pyt_train_llama-3.1-70b

-         * - ``tiktoken``
-           - Llama 3.1 70B
-           - `tiktoken <https://github.com/openai/tiktoken>`_
+            ``pytorch_benchmark_setup.sh`` installs the following libraries for Llama 3.1 70B:

-         * - ``blobfile``
-           - Llama 3.1 70B
-           - `blobfile <https://pypi.org/project/blobfile/>`_
+            .. list-table::
+               :header-rows: 1

-         * - ``tabulate``
-           - Llama 3.1 70B
-           - `tabulate <https://pypi.org/project/tabulate/>`_
+               * - Library
+                 - Reference

-         * - ``wandb``
-           - Llama 3.1 70B
-           - `Weights & Biases <https://github.com/wandb/wandb>`_
+               * - ``datasets``
+                 - `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0

-         * - ``sentencepiece``
-           - Llama 3.1 70B, FLUX
-           - `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0
+               * - ``torchdata``
+                 - `TorchData <https://pytorch.org/data/beta/index.html>`_

-         * - ``tensorboard``
-           - Llama 3.1 70 B, FLUX
-           - `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0
+               * - ``tomli``
+                 - `Tomli <https://pypi.org/project/tomli/>`_

-         * - ``csvkit``
-           - FLUX
-           - `csvkit <https://csvkit.readthedocs.io/en/latest/>`_ 2.0.1
+               * - ``tiktoken``
+                 - `tiktoken <https://github.com/openai/tiktoken>`_

-         * - ``deepspeed``
-           - FLUX
-           - `DeepSpeed <https://github.com/deepspeedai/DeepSpeed>`_ 0.16.2
+               * - ``blobfile``
+                 - `blobfile <https://pypi.org/project/blobfile/>`_

-         * - ``diffusers``
-           - FLUX
-           - `Hugging Face Diffusers <https://huggingface.co/docs/diffusers/en/index>`_ 0.31.0
+               * - ``tabulate``
+                 - `tabulate <https://pypi.org/project/tabulate/>`_

-         * - ``GitPython``
-           - FLUX
-           - `GitPython <https://github.com/gitpython-developers/GitPython>`_ 3.1.44
+               * - ``wandb``
+                 - `Weights & Biases <https://github.com/wandb/wandb>`_

-         * - ``opencv-python-headless``
-           - FLUX
-           - `opencv-python-headless <https://pypi.org/project/opencv-python-headless/>`_ 4.10.0.84
+               * - ``sentencepiece``
+                 - `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0

-         * - ``peft``
-           - FLUX
-           - `PEFT <https://huggingface.co/docs/peft/en/index>`_ 0.14.0
+               * - ``tensorboard``
+                 - `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0

-         * - ``protobuf``
-           - FLUX
-           - `Protocol Buffers <https://github.com/protocolbuffers/protobuf>`_ 5.29.2
+         .. container:: model-doc pyt_train_flux

-         * - ``pytest``
-           - FLUX
-           - `PyTest <https://docs.pytest.org/en/stable/>`_ 8.3.4
+            ``pytorch_benchmark_setup.sh`` installs the following libraries for FLUX:

-         * - ``python-dotenv``
-           - FLUX
-           - `python-dotenv <https://pypi.org/project/python-dotenv/>`_ 1.0.1
+            .. list-table::
+               :header-rows: 1

-         * - ``seaborn``
-           - FLUX
-           - `Seaborn <https://seaborn.pydata.org/>`_ 0.13.2
+               * - Library
+                 - Reference

-         * - ``transformers``
-           - FLUX
-           - `Transformers <https://huggingface.co/docs/transformers/en/index>`_ 4.47.0
+               * - ``accelerate``
+                 - `Hugging Face Accelerate <https://huggingface.co/docs/accelerate/en/index>`_

-      ``pytorch_benchmark_setup.sh`` downloads the following models from Hugging Face:
+               * - ``datasets``
+                 - `Hugging Face Datasets <https://huggingface.co/docs/datasets/v3.2.0/en/index>`_ 3.2.0

-      * `meta-llama/Llama-3.1-70B-Instruct <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
+               * - ``sentencepiece``
+                 - `SentencePiece <https://github.com/google/sentencepiece>`_ 0.2.0

-      * `black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
+               * - ``tensorboard``
+                 - `TensorBoard <https://www.tensorflow.org/tensorboard>`_ 2.18.0

-      Along with the following datasets:
+               * - ``csvkit``
+                 - `csvkit <https://csvkit.readthedocs.io/en/latest/>`_ 2.0.1

-      * `WikiText <https://huggingface.co/datasets/Salesforce/wikitext>`_
+               * - ``deepspeed``
+                 - `DeepSpeed <https://github.com/deepspeedai/DeepSpeed>`_ 0.16.2

-      * `UltraChat 200k <https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k>`_
+               * - ``diffusers``
+                 - `Hugging Face Diffusers <https://huggingface.co/docs/diffusers/en/index>`_ 0.31.0

-      * `bghira/pseudo-camera-10k <https://huggingface.co/datasets/bghira/pseudo-camera-10k>`_
+               * - ``GitPython``
+                 - `GitPython <https://github.com/gitpython-developers/GitPython>`_ 3.1.44

-      .. rubric:: Pretraining
+               * - ``opencv-python-headless``
+                 - `opencv-python-headless <https://pypi.org/project/opencv-python-headless/>`_ 4.10.0.84

-      To start the pretraining benchmark, use the following command with the
-      appropriate options. See the following list of options and their descriptions.
+               * - ``peft``
+                 - `PEFT <https://huggingface.co/docs/peft/en/index>`_ 0.14.0

-      .. code-block:: shell
+               * - ``protobuf``
+                 - `Protocol Buffers <https://github.com/protocolbuffers/protobuf>`_ 5.29.2

-         ./pytorch_benchmark_report.sh -t $training_mode -m $model_repo -p $datatype -s $sequence_length
+               * - ``pytest``
+                 - `PyTest <https://docs.pytest.org/en/stable/>`_ 8.3.4

-      .. list-table::
-         :header-rows: 1
+               * - ``python-dotenv``
+                 - `python-dotenv <https://pypi.org/project/python-dotenv/>`_ 1.0.1

-         * - Name
-           - Options
-           - Description
+               * - ``seaborn``
+                 - `Seaborn <https://seaborn.pydata.org/>`_ 0.13.2

-         * - ``$training_mode``
-           - ``pretrain``
-           - Benchmark pretraining
+               * - ``transformers``
+                 - `Transformers <https://huggingface.co/docs/transformers/en/index>`_ 4.47.0

-         * -
-           - ``finetune_fw``
-           - Benchmark full weight fine-tuning (Llama 3.1 70B with BF16)
+         ``pytorch_benchmark_setup.sh`` downloads the following datasets from Hugging Face:

-         * -
-           - ``finetune_lora``
-           - Benchmark LoRA fine-tuning (Llama 3.1 70B with BF16)
+         * `bghira/pseudo-camera-10k <https://huggingface.co/datasets/bghira/pseudo-camera-10k>`_

-         * -
-           - ``HF_finetune_lora``
-           - Benchmark LoRA fine-tuning with Hugging Face PEFT (Llama 2 70B with BF16)
+   {% for model_group in model_groups %}
+      {% for model in model_group.models %}
+         {% if model_group.tag == "pre-training" and model.mad_tag in ["pyt_train_llama-3.1-8b", "pyt_train_llama-3.1-70b", "pyt_train_flux"] %}

-         * - ``$datatype``
-           - ``FP8`` or ``BF16``
-           - Only Llama 3.1 8B supports FP8 precision.
+         .. container:: model-doc {{ model.mad_tag }}

-         * - ``$model_repo``
-           - ``Llama-3.3-70B``
-           - `Llama 3.3 70B <https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct>`_
+            .. rubric:: Pretraining

-         * - 
-           - ``Llama-3.1-8B``
-           - `Llama 3.1 8B <https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct>`_
+            To start the pre-training benchmark, use the following command with the
+            appropriate options. See the following list of options and their descriptions.

-         * - 
-           - ``Llama-3.1-70B``
-           - `Llama 3.1 70B <https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct>`_
+            .. code-block:: shell

-         * - 
-           - ``Llama-2-70B``
-           - `Llama 2 70B <https://huggingface.co/meta-llama/Llama-2-70B>`_
+               ./pytorch_benchmark_report.sh -t pretrain -m {{ model.model_repo }} -p $datatype -s $sequence_length

-         * - 
-           - ``Flux``
-           - `FLUX.1 [dev] <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
+            .. list-table::
+               :header-rows: 1

-         * - ``$sequence_length``
-           - Sequence length for the language model.
-           - Between 2048 and 8192. 8192 by default.
+               * - Name
+                 - Options
+                 - Description

-      .. note::
+            {% if model.mad_tag == "pyt_train_llama-3.1-8b" %}
+               * - ``$datatype``
+                 - ``BF16`` or ``FP8``
+                 - Only Llama 3.1 8B supports FP8 precision.
+            {% else %}
+               * - ``$datatype``
+                 - ``BF16``
+                 - Only Llama 3.1 8B supports FP8 precision.
+            {% endif %}

-         Occasionally, downloading the Flux dataset might fail. In the event of this
-         error, manually download it from Hugging Face at
-         `black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
-         and save it to `/workspace/FluxBenchmark`. This ensures that the test script can access
-         the required dataset.
+               * - ``$sequence_length``
+                 - Sequence length for the language model.
+                 - Between 2048 and 8192. 8192 by default.

-      .. rubric:: Fine-tuning
+            {% if model.mad_tag == "pyt_train_flux" %}
+            .. container:: model-doc {{ model.mad_tag }}

-      To start the fine-tuning benchmark, use the following command. It will run the benchmarking example of Llama 3.1 70B
-      with the WikiText dataset using the AMD fork of `torchtune <https://github.com/AMD-AIG-AIMA/torchtune>`_.
+               .. note::

-      .. code-block:: shell
+                  Occasionally, downloading the Flux dataset might fail. In the event of this
+                  error, manually download it from Hugging Face at
+                  `black-forest-labs/FLUX.1-dev <https://huggingface.co/black-forest-labs/FLUX.1-dev>`_
+                  and save it to `/workspace/FluxBenchmark`. This ensures that the test script can access
+                  the required dataset.
+            {% endif %}
+         {% endif %}

-         ./pytorch_benchmark_report.sh -t {finetune_fw, finetune_lora} -p BF16 -m Llama-3.1-70B
+         {% if model_group.tag == "fine-tuning" %}
+         .. container:: model-doc {{ model.mad_tag }}

-      Use the following command to run the benchmarking example of Llama 2 70B with the UltraChat 200k dataset using
-      `Hugging Face PEFT <https://huggingface.co/docs/peft/en/index>`_.
+            .. rubric:: Fine-tuning

-      .. code-block:: shell
+            To start the fine-tuning benchmark, use the following command with the
+            appropriate options. See the following list of options and their descriptions.

-         ./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
+            .. code-block:: shell

-      .. rubric:: Benchmarking examples
+               ./pytorch_benchmark_report.sh -t $training_mode -m {{ model.model_repo }} -p BF16 -s $sequence_length

-      Here are some example commands to get started pretraining and fine-tuning with various model configurations.
+            .. list-table::
+               :header-rows: 1

-      * Example 1: Llama 3.1 70B with BF16 precision with `torchtitan <https://github.com/ROCm/torchtitan>`_.
+               * - Name
+                 - Options
+                 - Description

-        .. code-block:: shell
+               * - ``$training_mode``
+                 - ``finetune_fw``
+                 - Full weight fine-tuning (BF16 supported)

-           ./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Llama-3.1-70B -s 8192
+               * -
+                 - ``finetune_lora``
+                 - LoRA fine-tuning (BF16 supported)

-      * Example 2: Llama 3.1 8B with FP8 precision using Transformer Engine (TE) and Hugging Face Accelerator.
+               * -
+                 - ``finetune_qlora``
+                 - QLoRA fine-tuning (BF16 supported)

-        .. code-block:: shell
+               * -
+                 - ``HF_finetune_lora``
+                 - LoRA fine-tuning with Hugging Face PEFT

-           ./pytorch_benchmark_report.sh -t pretrain -p FP8 -m Llama-3.1-70B -s 8192
+               * - ``$datatype``
+                 - ``BF16``
+                 - All models support BF16.

-      * Example 3: FLUX.1-dev with BF16 precision with FluxBenchmark.
+               * - ``$sequence_length``
+                 - Between 2048 and 16384.
+                 - Sequence length for the language model.

-        .. code-block:: shell
+            .. note::

-           ./pytorch_benchmark_report.sh -t pretrain -p BF16 -m Flux
+               {{ model.model }} currently supports the following fine-tuning methods:

-      * Example 4: Torchtune full weight fine-tuning with Llama 3.1 70B
+            {% for method in model.training_modes %}
+               * ``{{ method }}``
+            {% endfor %}
+            {% if model.training_modes|length < 4 %}

-        .. code-block:: shell
+               The upstream `torchtune <https://github.com/pytorch/torchtune>`_ repository
+               does not currently provide YAML configuration files for other combinations of
+               model to fine-tuning method
+               However, you can still configure your own YAML files to enable support for
+               fine-tuning methods not listed here by following existing patterns in the
+               ``/workspace/torchtune/recipes/configs`` directory.
+            {% endif %}
+         {% endif %}
+      {% endfor %}
+   {% endfor %}

-           ./pytorch_benchmark_report.sh -t finetune_fw -p BF16 -m Llama-3.1-70B
+               .. rubric:: Benchmarking examples

-      * Example 5: Torchtune LoRA fine-tuning with Llama 3.1 70B
-
-        .. code-block:: shell
-
-           ./pytorch_benchmark_report.sh -t finetune_lora -p BF16 -m Llama-3.1-70B
-
-      * Example 6: Torchtune full weight fine-tuning with Llama-3.3-70B
-
-        .. code-block:: shell
-
-           ./pytorch_benchmark_report.sh -t finetune_fw -p BF16 -m Llama-3.3-70B
-
-      * Example 7: Torchtune LoRA fine-tuning with Llama-3.3-70B
-
-        .. code-block:: shell
-
-           ./pytorch_benchmark_report.sh -t finetune_lora -p BF16 -m Llama-3.3-70B
-
-      * Example 8: Torchtune QLoRA fine-tuning with Llama-3.3-70B
-
-        .. code-block:: shell
-
-           ./pytorch_benchmark_report.sh -t finetune_qlora -p BF16 -m Llama-3.3-70B
-
-      * Example 9: Hugging Face PEFT LoRA fine-tuning with Llama 2 70B
-
-        .. code-block:: shell
-
-           ./pytorch_benchmark_report.sh -t HF_finetune_lora -p BF16 -m Llama-2-70B
+               For examples of benchmarking commands, see `<https://github.com/ROCm/MAD/tree/develop/benchmark/pytorch_train#benchmarking-examples>`__.

 Previous versions
 =================
--- a/docs/preview/index.md
+++ b/docs/preview/index.md
@@ -0,0 +1,26 @@
+---
+myst:
+  html_meta:
+    "description": "AMD ROCm 7.0 Alpha documentation"
+    "keywords": "Radeon, open, compute, platform, install, how, conceptual, reference, home, docs"
+---
+
+# AMD ROCm 7.0 Alpha documentation
+
+AMD ROCm is an open-source software platform optimized to extract HPC and AI
+workload performance from AMD Instinct™ accelerators while maintaining
+compatibility with industry software frameworks.
+
+This documentation is intended to provide early access information about the ROCm
+software Alpha release. The preview release provides early access to new
+features under development for testing for users to provide feedback.
+It is not recommended for production use.
+
+```{note}
+See [ROCm documentation](https://rocm.docs.amd.com/en/latest/) for the latest stable release for use in production.
+```
+
+The documentation includes:
+
+- [ROCm 7.0 Alpha release notes](release.rst) with feature details and support matrix
+- [Installation instructions](install/index.rst) for the ROCm 7.0 Alpha and the Instinct Driver
--- a/docs/preview/install/index.rst
+++ b/docs/preview/install/index.rst
@@ -0,0 +1,28 @@
+.. meta::
+  :description: Installation via native package manager
+  :keywords: ROCm install, installation instructions, package manager, native package manager, AMD,
+    ROCm
+
+****************************************
+ROCm 7.0 Alpha installation instructions
+****************************************
+
+The ROCm 7.0 Alpha must be installed using your Linux distribution's native
+package manager. This release supports specific hardware and software
+configurations -- before installing, see the :ref:`supported OSes and hardware
+<alpha-system-requirements>` outlined in the Alpha release notes.
+
+.. important::
+
+   Upgrades and downgrades are not supported. You must install any existing
+   ROCm installation before installing the Alpha build.
+
+.. grid:: 2
+
+   .. grid-item-card:: Install ROCm
+
+      See :doc:`Install the ROCm 7.0 Alpha via package manager <rocm>`.
+
+   .. grid-item-card:: Install Instinct Driver
+
+      See :doc:`Install the Instinct Driver via package manager <instinct-driver>`.
--- a/docs/preview/install/instinct-driver.rst
+++ b/docs/preview/install/instinct-driver.rst
@@ -0,0 +1,212 @@
+***********************************************
+Install the Instinct Driver via package manager
+***********************************************
+
+This section describes how to install the Instinct Driver using ``apt`` on
+Ubuntu 22.04 or 24.04, or ``dnf`` on Red Hat Enterprise Linux 9.6.
+
+.. important::
+
+   Upgrades and downgrades are not supported. You must uninstall any existing
+   ROCm installation before installing the preview build.
+
+Prerequisites
+=============
+
+Before installing, complete the following prerequisites.
+
+.. tab-set::
+
+   .. tab-item:: Ubuntu 22.04
+      :sync: ubuntu-22
+
+      Install kernel headers.
+
+      .. code-block:: shell
+
+         sudo apt install "linux-headers-$(uname -r)" "linux-modules-extra-$(uname -r)" 
+
+   .. tab-item:: Ubuntu 24.04
+      :sync: ubuntu-24
+
+      Install kernel headers.
+
+      .. code-block:: shell
+
+         sudo apt install "linux-headers-$(uname -r)" "linux-modules-extra-$(uname -r)" 
+
+   .. tab-item:: RHEL 9.6
+      :sync: rhel-96
+
+      1. Register your Enterprise Linux.
+
+         .. code-block:: shell
+
+            subscription-manager register --username <username> --password <password>
+            subscription-manager attach --auto
+
+      2. Update your Enterprise Linux.
+
+         .. code-block:: shell
+
+            sudo dnf update --releasever=9.6 --exclude=\*release\*
+
+      3. Install kernel headers.
+
+         .. code-block:: shell
+
+            sudo dnf install "kernel-headers-$(uname -r)" "kernel-devel-$(uname -r)" "kernel-devel-matched-$(uname -r)"
+
+Register ROCm repositories
+==========================
+
+.. tab-set::
+
+   .. tab-item:: Ubuntu 22.04
+      :sync: ubuntu-22
+
+      1. Add the package signing key.
+
+         .. code-block:: shell
+
+            # Make the directory if it doesn't exist yet.
+            # This location is recommended by the distribution maintainers.
+            sudo mkdir --parents --mode=0755 /etc/apt/keyrings 
+            # Download the key, convert the signing-key to a full
+            # keyring required by apt and store in the keyring directory.
+            wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
+              gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null 
+
+      2. Register the kernel mode driver.
+
+         .. code-block:: shell
+
+            echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/30.10_alpha/ubuntu jammy main" \
+              | sudo tee /etc/apt/sources.list.d/amdgpu.list
+            sudo apt update 
+
+   .. tab-item:: Ubuntu 24.04
+      :sync: ubuntu-24
+
+      1. Add the package signing key.
+
+         .. code-block:: shell
+
+            # Make the directory if it doesn't exist yet.
+            # This location is recommended by the distribution maintainers.
+            sudo mkdir --parents --mode=0755 /etc/apt/keyrings 
+            # Download the key, convert the signing-key to a full
+            # keyring required by apt and store in the keyring directory.
+            wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
+              gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null 
+
+      2. Register the kernel mode driver.
+
+         .. code-block:: shell
+
+            echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/30.10_alpha/ubuntu noble main" \
+              | sudo tee /etc/apt/sources.list.d/amdgpu.list
+            sudo apt update 
+
+   .. tab-item:: RHEL 9.6
+      :sync: rhel-96
+
+      .. code-block:: shell
+
+         sudo tee /etc/yum.repos.d/amdgpu.repo <<EOF
+         [amdgpu]
+         name=amdgpu
+         baseurl=https://repo.radeon.com/amdgpu/30.10_alpha/rhel/9.6/main/x86_64/
+         enabled=1
+         priority=50
+         gpgcheck=1
+         gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
+         EOF
+         sudo dnf clean all
+
+Install the kernel driver
+=========================
+
+.. tab-set::
+
+   .. tab-item:: Ubuntu 22.04
+      :sync: ubuntu-22
+
+      .. code-block:: shell
+
+         sudo apt install amdgpu-dkms
+
+   .. tab-item:: Ubuntu 24.04
+      :sync: ubuntu-24
+
+      .. code-block:: shell
+
+         sudo apt install amdgpu-dkms
+
+   .. tab-item:: RHEL 9.6
+      :sync: rhel-96
+
+      .. code-block:: shell
+
+         sudo dnf install amdgpu-dkms
+
+Uninstalling
+============
+
+.. tab-set::
+
+   .. tab-item:: Ubuntu 22.04
+      :sync: ubuntu-22
+
+      1. Uninstall the kernel mode driver.
+
+         .. code-block:: shell
+
+            sudo apt autoremove amdgpu-dkms
+
+      2. Remove AMDGPU repositories.
+
+         .. code-block:: shell
+
+            sudo rm /etc/apt/sources.list.d/amdgpu.list
+            # Clear the cache and clean the system
+            sudo rm -rf /var/cache/apt/*
+            sudo apt clean all
+            sudo apt update
+
+   .. tab-item:: Ubuntu 24.04
+      :sync: ubuntu-24
+
+      1. Uninstall the kernel mode driver.
+
+         .. code-block:: shell
+
+            sudo apt autoremove amdgpu-dkms
+
+      2. Remove AMDGPU repositories.
+
+         .. code-block:: shell
+
+            sudo rm /etc/apt/sources.list.d/amdgpu.list
+            # Clear the cache and clean the system
+            sudo rm -rf /var/cache/apt/*
+            sudo apt clean all
+            sudo apt update
+
+   .. tab-item:: RHEL 9.6
+      :sync: rhel-96
+
+      1. Uninstall the kernel mode driver.
+
+         .. code-block:: shell
+
+            sudo dnf remove amdgpu-dkms
+
+      2. Remove AMDGPU repositories.
+
+         .. code-block:: shell
+
+            sudo rm /etc/yum.repos.d/amdgpu.repo
+            # Clear the cache and clean the system
+            sudo rm -rf /var/cache/dnf
+            sudo dnf clean all
--- a/docs/preview/install/rocm.rst
+++ b/docs/preview/install/rocm.rst
@@ -0,0 +1,288 @@
+**********************************************
+Install the ROCm 7.0 Alpha via package manager
+**********************************************
+
+This page describes how to install the ROCm 7.0 Alpha build using ``apt`` on
+Ubuntu 22.04 or 24.04, or ``dnf`` on Red Hat Enterprise Linux 9.6.
+
+.. important::
+
+   Upgrades and downgrades are not supported. You must uninstall any existing
+   ROCm installation before installing the preview build.
+
+Prerequisites
+=============
+
+Before installing, complete the following prerequisites.
+
+.. tab-set::
+
+   .. tab-item:: Ubuntu 22.04
+      :sync: ubuntu-22
+
+      1. Install development packages.
+
+         .. code-block:: shell
+
+            sudo apt install python3-setuptools python3-wheel
+
+      2. Configure user permissions for GPU access.
+
+         .. code-block:: shell
+
+            sudo usermod -a -G render,video $LOGNAME
+
+   .. tab-item:: Ubuntu 24.04
+      :sync: ubuntu-24
+
+      1. Install development packages.
+
+         .. code-block:: shell
+
+            sudo apt install python3-setuptools python3-wheel
+
+      2. Configure user permissions for GPU access.
+
+         .. code-block:: shell
+
+            sudo usermod -a -G render,video $LOGNAME
+
+   .. tab-item:: RHEL 9.6
+      :sync: rhel-96
+
+      1. Register your Enterprise Linux.
+
+         .. code-block:: shell
+
+            subscription-manager register --username <username> --password <password>
+            subscription-manager attach --auto
+
+      2. Update your Enterprise Linux.
+
+         .. code-block:: shell
+
+            sudo dnf update --releasever=9.6 --exclude=\*release\*
+
+      3. Install additional package repositories.
+
+         Add the EPEL repository:
+
+         .. code-block:: shell
+
+            wget https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
+            sudo rpm -ivh epel-release-latest-9.noarch.rpm
+
+         Enable the CodeReady Linux Build (CRB) repository.
+
+         .. code-block:: shell
+
+            sudo dnf install dnf-plugin-config-manager
+            sudo crb enable
+
+      4. Install development packages.
+
+         .. code-block:: shell
+
+            sudo dnf install python3-setuptools python3-wheel
+
+      5. Configure user permissions for GPU access.
+
+         .. code-block:: shell
+
+            sudo usermod -a -G render,video $LOGNAME
+
+Register ROCm repositories
+==========================
+
+.. tab-set::
+
+   .. tab-item:: Ubuntu 22.04
+      :sync: ubuntu-22
+
+      1. Add the package signing key.
+
+         .. code-block:: shell
+
+            # Make the directory if it doesn't exist yet.
+            # This location is recommended by the distribution maintainers.
+            sudo mkdir --parents --mode=0755 /etc/apt/keyrings 
+            # Download the key, convert the signing-key to a full
+            # keyring required by apt and store in the keyring directory.
+            wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
+              gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null 
+
+      2. Register ROCm packages.
+
+         .. code-block:: shell
+
+            echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/7.0_alpha jammy main" \
+              | sudo tee /etc/apt/sources.list.d/rocm.list
+
+            echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/graphics/7.0_alpha/ubuntu jammy main" \ 
+              | sudo tee /etc/apt/sources.list.d/rocm-graphics.list
+
+            echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \
+              | sudo tee /etc/apt/preferences.d/rocm-pin-600
+            sudo apt update
+
+   .. tab-item:: Ubuntu 24.04
+      :sync: ubuntu-24
+
+      1. Add the package signing key.
+
+         .. code-block:: shell
+
+            # Make the directory if it doesn't exist yet.
+            # This location is recommended by the distribution maintainers.
+            sudo mkdir --parents --mode=0755 /etc/apt/keyrings 
+            # Download the key, convert the signing-key to a full
+            # keyring required by apt and store in the keyring directory.
+            wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
+              gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null 
+
+      2. Register ROCm packages.
+
+         .. code-block:: shell
+
+            echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/7.0_alpha noble main" \
+              | sudo tee /etc/apt/sources.list.d/rocm.list
+
+            echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/graphics/7.0_alpha/ubuntu noble main" \
+              | sudo tee /etc/apt/sources.list.d/rocm-graphics.list
+
+            echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \
+              | sudo tee /etc/apt/preferences.d/rocm-pin-600
+            sudo apt update 
+
+   .. tab-item:: RHEL 9.6
+      :sync: rhel-96
+
+      .. code-block:: shell
+
+         sudo tee /etc/yum.repos.d/rocm.repo <<EOF
+         [ROCm-7.0.0]
+         name=ROCm7.0.0
+         baseurl=https://repo.radeon.com/rocm/el9/7.0_alpha/main
+         enabled=1
+         priority=50
+         gpgcheck=1
+         gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
+         EOF
+
+         sudo tee /etc/yum.repos.d/rocm-graphics.repo <<EOF
+         [ROCm-7.0.0-Graphics]
+         name=ROCm7.0.0-Graphics
+         baseurl=https://repo.radeon.com/graphics/7.0_alpha/rhel/9/main/x86_64/
+         enabled=1
+         priority=50
+         gpgcheck=1
+         gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
+         EOF
+         sudo dnf clean all
+
+Install ROCm
+============
+
+.. tab-set::
+
+   .. tab-item:: Ubuntu 22.04
+      :sync: ubuntu-22
+
+      .. code-block:: shell
+
+         sudo apt install rocm
+
+   .. tab-item:: Ubuntu 24.04
+      :sync: ubuntu-24
+
+      .. code-block:: shell
+
+         sudo apt install rocm
+
+   .. tab-item:: RHEL 9.6
+      :sync: rhel-96
+
+      .. code-block:: shell
+
+         sudo dnf install rocm
+
+.. _uninstall-rocm:
+
+Uninstalling
+============
+
+.. tab-set::
+
+   .. tab-item:: Ubuntu 22.04
+      :sync: ubuntu-22
+
+      1. Uninstall specific meta packages.
+
+         .. code-block:: shell
+
+            sudo apt autoremove rocm
+
+      2. Uninstall ROCm packages.
+
+         .. code-block:: shell
+
+            sudo apt autoremove rocm-core
+
+      3. Remove ROCm repositories.
+
+         .. code-block:: shell
+
+            sudo rm /etc/apt/sources.list.d/rocm*.list
+            # Clear the cache and clean the system
+            sudo rm -rf /var/cache/apt/*
+            sudo apt clean all
+            sudo apt update
+
+   .. tab-item:: Ubuntu 24.04
+      :sync: ubuntu-24
+
+      1. Uninstall specific meta packages.
+
+         .. code-block:: shell
+
+            sudo apt autoremove rocm
+
+      2. Uninstall ROCm packages.
+
+         .. code-block:: shell
+
+            sudo apt autoremove rocm-core
+
+      3. Remove ROCm repositories.
+
+         .. code-block:: shell
+
+            sudo rm /etc/apt/sources.list.d/rocm*.list
+            # Clear the cache and clean the system
+            sudo rm -rf /var/cache/apt/*
+            sudo apt clean all
+            sudo apt update
+
+   .. tab-item:: RHEL 9.6
+      :sync: rhel-96
+
+      1. Uninstall specific meta packages.
+
+         .. code-block:: shell
+
+            sudo dnf remove rocm
+
+      2. Uninstall ROCm packages.
+
+         .. code-block:: shell
+
+            sudo dnf remove rocm-core amdgpu-core
+
+      3. Remove ROCm repositories.
+
+         .. code-block:: shell
+
+            sudo rm /etc/yum.repos.d/rocm*.repo*
+            # Clear the cache and clean the system
+            sudo rm -rf /var/cache/dnf
+            sudo dnf clean all
--- a/docs/preview/release.rst
+++ b/docs/preview/release.rst
@@ -0,0 +1,270 @@
+****************************
+ROCm 7.0 Alpha release notes
+****************************
+
+The ROCm 7.0 Alpha is an early look into the upcoming ROCm 7.0 major release,
+which introduces functional support for AMD Instinct™ MI355X and MI350X
+on bare metal, single node systems. It also includes new features for current-generation
+MI300X, MI200, and MI100 series accelerators. This is an alpha-quality release;
+expect issues and limitations that will be addressed in upcoming previews.
+
+.. important::
+
+   This Alpha release is not intended for performance evaluation.
+   For the latest stable release for production-level functionality,
+   see `ROCm documentation <https://rocm.docs.amd.com/en/latest/>`_.
+
+This page provides a high-level summary of supported systems, key changes to the ROCm software
+stack, developments related to AI frameworks, current known limitations, and installation
+information.
+
+.. _alpha-system-requirements:
+
+Operating system and hardware support
+=====================================
+
+Only the accelerators and operating systems listed here are supported. Multi-node systems,
+virtualized environments, and GPU partitioning are not supported in this Alpha.
+
+* AMD accelerator: Instinct MI355X, MI350X, MI325X [#mi325x]_, MI300X, MI300A, MI250X, MI250, MI210, MI100
+* Operating system: Ubuntu 22.04, Ubuntu 24.04, or RHEL 9.6
+* System type: Bare metal, single node only
+* Partitioning: Not supported
+
+.. [#mi325x] MI325X is only supported with Ubuntu 22.04.
+
+.. _alpha-highlights:
+
+Alpha release highlights
+========================
+
+This section highlights key features enabled in the ROCm 7.0 Alpha.
+
+AI frameworks
+-------------
+
+PyTorch
+~~~~~~~
+
+The ROCm 7.0 Alpha enables the following PyTorch features:
+
+* Support for PyTorch 2.7
+
+* Integrated Fused Rope kernels in APEX
+
+* Compilation of Python C++ extensions using amdclang++
+
+* Support for channels-last NHWC format for convolutions via MIOpen
+
+TensorFlow
+~~~~~~~~~~
+
+This Alpha enables support for TensorFlow 2.19.
+
+vLLM
+~~~~
+
+* Support for Open Compute Project (OCP) ``FP8`` data type
+
+* ``FP4`` precision for Llama 3.1 405B
+
+Libraries
+---------
+
+.. _alpha-new-data-type-support:
+
+New data type support
+~~~~~~~~~~~~~~~~~~~~~
+
+MX-compliant data types bring microscaling support to ROCm. For more information, see the `OCP
+Microscaling (MX) Formats Specification
+<https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf>`_. The ROCm
+7.0 Alpha enables functional support for MX data types ``FP4``, ``FP6``, and ``FP8`` on MI355X
+systems in these ROCm libraries:
+
+* Composable Kernel (``FP4`` and ``FP8`` only)
+
+* hipBLASLt
+
+* MIGraphX (``FP4`` only)
+
+The following libraries are updated to support the Open Compute Project (OCP) floating-point ``FP8``
+format on MI355X instead of the NANOO ``FP8`` format:
+
+* Composable Kernel
+
+* hipBLASLt
+
+* hipSPARSELt
+
+* MIGraphX
+
+* rocWMMA
+
+MIGraphX now also supports ``BF16``.
+
+RCCL support
+~~~~~~~~~~~~
+
+RCCL is supported for single-node functional usage only. Multi-node communication capabilities will
+be supported in future preview releases.
+
+MIGraphX
+~~~~~~~~
+
+* Support for OCP ``FP8`` and MX ``FP4`` data types on MI355X
+
+* Support for ``BF16`` on all hardware
+
+* Support for PyTorch 2.7 via Torch-MIGraphX
+
+Tools
+-----
+
+AMD SMI
+~~~~~~~
+
+* The default output of the ``amd-smi`` CLI now displays a simple table view.
+
+* New APIs: CPU affinity shows GPUs' affinitization to each CPU in a system.
+
+ROCgdb
+~~~~~~
+
+* MX data types support: ``FP4``, ``FP6``, and ``FP8``
+
+ROCprof Compute Viewer
+~~~~~~~~~~~~~~~~~~~~~~
+
+* Initial release: ``rocprof-compute-viewer`` allows the visualization of ``rocprofv3``'s thread
+  trace output
+
+ROCprof Trace Decoder
+~~~~~~~~~~~~~~~~~~~~~
+
+* Initial release: ``rocprof-trace-decoder`` a plugin API for decoding thread traces
+
+ROCm Compute Profiler
+~~~~~~~~~~~~~~~~~~~~~
+
+* MX data types support: ``FP4``, ``FP6``, and ``FP8``
+
+* MI355X and MI350X performance counters: CPC, SPI, SQ, TA/TD/TCP, and TCC
+
+* Enhanced roofline analysis with support for ``INT8``, ``INT32``, ``FP8``, ``FP16``, and ``BF16``
+  data types
+
+* Roofline distinction for ``FP32`` and ``FP64`` data types
+
+* Selective kernel profiling
+
+ROCm Systems Profiler
+~~~~~~~~~~~~~~~~~~~~~
+
+* Trace support for computer vision APIs: H264, H265, AV1, VP9, and JPEG
+
+* Trace support for computer vision engine activity
+
+* OpenMP for C++ language and kernel activity support
+
+ROCm Validation Suite
+~~~~~~~~~~~~~~~~~~~~~
+
+* MI355X and MI350X accelerator support in the IET (Integrated Execution Test), GST (GPU Stress Test), and Babel (memory bandwidth test) modules.
+
+ROCprofiler-SDK
+~~~~~~~~~~~~~~~
+
+* Program counter (PC) sampling (host trap-based)
+
+* API for profiling applications using thread traces (beta)
+
+* Support in ``rocprofv3`` CLI tool for thread trace service
+
+HIP
+---
+
+The HIP runtime includes support for:
+
+* Open Compute Project (OCP) MX floating-point ``FP4``, ``FP6``, and ``FP8`` data types and APIs
+
+* Improved logging by adding more precise pointer information and launch arguments for better
+  tracking and debugging in dispatch methods
+
+In addition, the HIP runtime includes the following functional improvements which improve runtime
+performance and user experience:
+
+* Optimized HIP runtime lock contention in some events and kernel handling APIs. Event processing
+  and memory object look-ups now use the shared mutex implementation. Kernel object look-up during
+  C++ kernel launch can now avoid a global lock. These changes improve performance in certain
+  applications with high usage, particularly for multiple GPUs, multiple threads, and HIP streams
+  per GPU.
+
+* Programmatic support for scratch buffer limit on GPU device. Developers can now change the default
+  allocation size with the expected scratch limit.
+
+* Unified managed buffer and kernel argument buffers so the HIP runtime no longer needs to create
+  and load a separate kernel argument buffer.
+
+* Refactored memory validation to create a unique function to validate a variety of memory copy
+  operations.
+
+* Shader names are now demangled for more readable kernel logs
+
+See :ref:`HIP compatibility <hip-known-limitation>`.
+
+Compilers
+---------
+
+* The compiler driver now uses parallel code generation by default when compiling using full LTO
+  (including when using the ``-fgpu-rdc`` option) for HIP. This divides the optimized LLVM IR module
+  into roughly equal partitions before instruction selection and lowering, which can help improve
+  build times.
+
+  Each kernel in the linked LTO module may be put in a separate partition, and any non-inlined
+  function it depends on may be copied alongside it. Thus, while parallel code generation can
+  improve build time, it can duplicate non-inlined, non-kernel functions across multiple partitions,
+  potentially increasing the binary size of the final object file.
+
+  * Compiler option ``-flto-partitions=<num>``.
+
+    Equivalent to the ``--lto-partitions=<num>`` LLD option. Controls the number of partitions used for
+    parallel code generation when using full LTO (including when using ``-fgpu-rdc``). The number of
+    partitions must be greater than 0, and a value of 1 disables the feature. The default value is 8.
+
+    Developers are encouraged to experiment with different numbers of partitions using the
+    ``-flto-partitions`` Clang command line option. Recommended values are 1 to 16 partitions, with
+    especially large projects containing many kernels potentially benefitting from up to 64
+    partitions. It is not recommended to use a value greater than the number of threads on the
+    machine. Smaller projects, or projects that contain only a few kernels may also not benefit at
+    all from partitioning and may even see a slight increase in build time due to the small overhead
+    of analyzing and partitioning the modules.
+
+* HIPIFY now supports NVIDIA CUDA 12.8.0 APIs. See
+  `<https://github.com/ROCm/HIPIFY/blob/amd-develop/docs/reference/supported_apis.md>`_ for more
+  information.
+
+Instinct Driver / ROCm packaging separation
+-------------------------------------------
+
+The Instinct Driver is now distributed separately from the ROCm software stack -- it is now stored
+in its own location in the package repository at `<repo.radeon.com>`_ under ``/amdgpu/``.
+The first release is designated as Instinct Driver version 30.10 See `ROCm Gets Modular: Meet the
+Instinct Datacenter GPU Driver
+<https://rocm.blogs.amd.com/ecosystems-and-partners/instinct-gpu-driver/README.html>`_ for more
+information.
+
+Forward and backward compatibility between the Instinct Driver and ROCm are not supported in this
+Alpha release. See the :doc:`installation instructions <install/index>`.
+
+Known limitations
+=================
+
+.. _hip-known-limitation:
+
+HIP compatibility
+-----------------
+
+HIP runtime APIs in the ROCm 7.0 Alpha do not include backward-incompatible changes. See `HIP 7.0 Is
+Coming: What You Need to Know to Stay Ahead
+<https://rocm.blogs.amd.com/ecosystems-and-partners/transition-to-hip-7.0:-guidance-on-upcoming-compatibility-changes/README.html>`_ for more information.
--- a/docs/sphinx/_toc.yml.in
+++ b/docs/sphinx/_toc.yml.in
@@ -3,195 +3,206 @@
 defaults:
  numbered: False
  maxdepth: 6
-root: index
+root: preview/index
 subtrees:
 - entries:
-  - file: what-is-rocm.rst
-  - file: about/release-notes.md
-    title: Release notes
-  - file: compatibility/compatibility-matrix.rst
-    title: Compatibility matrix
-    entries:
-    - url: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html
-      title: Linux system requirements
-    - url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/reference/system-requirements.html
-      title: Windows system requirements
-
- caption: Install
-  entries:
-  - url: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/
-    title: ROCm on Linux
-  - url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/
-    title: HIP SDK on Windows
-  - url: https://rocm.docs.amd.com/projects/radeon/en/latest/index.html
-    title: ROCm on Radeon GPUs
-  - file: how-to/deep-learning-rocm.md
-    title: Deep learning frameworks
-  - file: how-to/build-rocm.rst
-    title: Build ROCm from source
-
- caption: How to
-  entries:
-  - file: how-to/rocm-for-ai/index.rst
-    title: Use ROCm for AI
+  - file: preview/release.rst
+    title: Alpha release notes
+  - file: preview/install/index.rst
+    title: Installation
    subtrees:
    - entries:
-      - file: how-to/rocm-for-ai/install.rst
-        title: Installation
-      - file: how-to/rocm-for-ai/system-health-check.rst
-        title: System health benchmarks
-      - file: how-to/rocm-for-ai/training/index.rst
-        title: Training
-        subtrees:
-        - entries:
-          - file: how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
-            title: Train a model with Megatron-LM
-          - file: how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
-            title: Train a model with PyTorch
-          - file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst
-            title: Train a model with JAX MaxText
-          - file: how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry
-            title: Train a model with LLM Foundry
-          - file: how-to/rocm-for-ai/training/scale-model-training.rst
-            title: Scale model training
-
-      - file: how-to/rocm-for-ai/fine-tuning/index.rst
-        title: Fine-tuning LLMs
-        subtrees:
-        - entries:
-          - file: how-to/rocm-for-ai/fine-tuning/overview.rst
-            title: Conceptual overview
-          - file: how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference.rst
-            title: Fine-tuning
-            subtrees:
-            - entries:
-              - file: how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference.rst
-                title: Use a single accelerator
-              - file: how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference.rst
-                title: Use multiple accelerators
-
-      - file: how-to/rocm-for-ai/inference/index.rst
-        title: Inference
-        subtrees:
-        - entries:
-          - file: how-to/rocm-for-ai/inference/hugging-face-models.rst
-            title: Run models from Hugging Face
-          - file: how-to/rocm-for-ai/inference/llm-inference-frameworks.rst
-            title: LLM inference frameworks
-          - file: how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
-            title: vLLM inference performance testing
-          - file: how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference.rst
-            title: PyTorch inference performance testing
-          - file: how-to/rocm-for-ai/inference/deploy-your-model.rst
-            title: Deploy your model
-
-      - file: how-to/rocm-for-ai/inference-optimization/index.rst
-        title: Inference optimization
-        subtrees:
-        - entries:
-          - file: how-to/rocm-for-ai/inference-optimization/model-quantization.rst
-          - file: how-to/rocm-for-ai/inference-optimization/model-acceleration-libraries.rst
-          - file: how-to/rocm-for-ai/inference-optimization/optimizing-with-composable-kernel.md
-            title: Optimize with Composable Kernel
-          - file: how-to/rocm-for-ai/inference-optimization/optimizing-triton-kernel.rst
-            title: Optimize Triton kernels
-          - file: how-to/rocm-for-ai/inference-optimization/profiling-and-debugging.rst
-            title: Profile and debug
-          - file: how-to/rocm-for-ai/inference-optimization/workload.rst
-            title: Workload optimization
-
-      - url: https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/
-        title: AI tutorials
-
-  - file: how-to/rocm-for-hpc/index.rst
-    title: Use ROCm for HPC
-  - file: how-to/system-optimization/index.rst
-    title: System optimization
-  - file: how-to/gpu-performance/mi300x.rst
-    title: AMD Instinct MI300X performance guides
-  - file: how-to/system-debugging.md
-  - file: conceptual/compiler-topics.md
-    title: Use advanced compiler features
-    subtrees:
-    - entries:
-      - url: https://rocm.docs.amd.com/projects/llvm-project/en/latest/index.html
-        title: ROCm compiler infrastructure
-      - url: https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/using-gpu-sanitizer.html
-        title: Use AddressSanitizer
-      - url: https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/openmp.html
-        title: OpenMP support
-  - file: how-to/setting-cus
-    title: Set the number of CUs
-  - file: how-to/Bar-Memory.rst
-    title: Troubleshoot BAR access limitation  
-  - url: https://github.com/amd/rocm-examples
-    title: ROCm examples
-
-
- caption: Conceptual
-  entries:
-  - file: conceptual/gpu-arch.md
-    title: GPU architecture overview
-    subtrees:
-    - entries:
-      - file: conceptual/gpu-arch/mi300.md
-        title: MI300 microarchitecture
-        subtrees:
-        - entries:
-          - url: https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf
-            title: AMD Instinct MI300/CDNA3 ISA
-          - url: https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf
-            title: White paper
-          - file: conceptual/gpu-arch/mi300-mi200-performance-counters.rst
-            title: MI300 and MI200 Performance counter
-      - file: conceptual/gpu-arch/mi250.md
-        title: MI250 microarchitecture
-        subtrees:
-        - entries:
-          - url: https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf
-            title: AMD Instinct MI200/CDNA2 ISA
-          - url: https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna2-white-paper.pdf
-            title: White paper
-      - file: conceptual/gpu-arch/mi100.md
-        title: MI100 microarchitecture
-        subtrees:
-        - entries:
-          - url: https://www.amd.com/system/files/TechDocs/instinct-mi100-cdna1-shader-instruction-set-architecture%C2%A0.pdf
-            title: AMD Instinct MI100/CDNA1 ISA
-          - url: https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna-white-paper.pdf
-            title: White paper
-  - file: conceptual/file-reorg.md
-    title: File structure (Linux FHS)
-  - file: conceptual/gpu-isolation.md
-    title: GPU isolation techniques
-  - file: conceptual/cmake-packages.rst
-    title: Using CMake
-  - file: conceptual/ai-pytorch-inception.md
-    title: Inception v3 with PyTorch
-
- caption: Reference
-  entries:
-  - file: reference/api-libraries.md
-    title: ROCm libraries
-  - file: reference/rocm-tools.md
-    title: ROCm tools, compilers, and runtimes
-  - file: reference/gpu-arch-specs.rst
-  - file: reference/gpu-atomics-operation.rst
-  - file: reference/precision-support.rst
-    title: Precision support
-  - file: reference/graph-safe-support.rst
-    title: Graph safe support
-
- caption: Contribute
-  entries:
-  - file: contribute/contributing.md
-    title: Contributing to the ROCm documentation
-    subtrees:
-    - entries:
-      - file: contribute/toolchain.md
-        title: ROCm documentation toolchain
-      - file: contribute/building.md
-  - file: contribute/feedback.md
-    title: Providing feedback about the ROCm documentation
-  - file: about/license.md
-    title: ROCm licenses
+        - file: preview/install/rocm
+          title: Install ROCm
+        - file: preview/install/instinct-driver
+          title: Install Instinct Driver
+# - entries:
+#   - file: what-is-rocm.rst
+#   - file: about/release-notes.md
+#     title: Release notes
+#   - file: compatibility/compatibility-matrix.rst
+#     title: Compatibility matrix
+#     entries:
+#     - url: https://rocm.docs.amd.com/projects/install-on-linux-internal/en/latest/reference/system-requirements.html
+#       title: Linux system requirements
+#     - url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/reference/system-requirements.html
+#       title: Windows system requirements
+#
+# - caption: Install
+#   entries:
+#   - url: https://rocm.docs.amd.com/projects/install-on-linux-internal/en/latest/
+#     title: ROCm on Linux
+#   - url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/
+#     title: HIP SDK on Windows
+#   - url: https://rocm.docs.amd.com/projects/radeon/en/latest/index.html
+#     title: ROCm on Radeon GPUs
+#   - file: how-to/deep-learning-rocm.md
+#     title: Deep learning frameworks
+#   - file: how-to/build-rocm.rst
+#     title: Build ROCm from source
+#
+# - caption: How to
+#   entries:
+#   - file: how-to/rocm-for-ai/index.rst
+#     title: Use ROCm for AI
+#     subtrees:
+#     - entries:
+#       - file: how-to/rocm-for-ai/install.rst
+#         title: Installation
+#       - file: how-to/rocm-for-ai/system-health-check.rst
+#         title: System health benchmarks
+#       - file: how-to/rocm-for-ai/training/index.rst
+#         title: Training
+#         subtrees:
+#         - entries:
+#           - file: how-to/rocm-for-ai/training/benchmark-docker/megatron-lm.rst
+#             title: Train a model with Megatron-LM
+#           - file: how-to/rocm-for-ai/training/benchmark-docker/pytorch-training.rst
+#             title: Train a model with PyTorch
+#           - file: how-to/rocm-for-ai/training/benchmark-docker/jax-maxtext.rst
+#             title: Train a model with JAX MaxText
+#           - file: how-to/rocm-for-ai/training/benchmark-docker/mpt-llm-foundry
+#             title: Train a model with LLM Foundry
+#           - file: how-to/rocm-for-ai/training/scale-model-training.rst
+#             title: Scale model training
+#
+#       - file: how-to/rocm-for-ai/fine-tuning/index.rst
+#         title: Fine-tuning LLMs
+#         subtrees:
+#         - entries:
+#           - file: how-to/rocm-for-ai/fine-tuning/overview.rst
+#             title: Conceptual overview
+#           - file: how-to/rocm-for-ai/fine-tuning/fine-tuning-and-inference.rst
+#             title: Fine-tuning
+#             subtrees:
+#             - entries:
+#               - file: how-to/rocm-for-ai/fine-tuning/single-gpu-fine-tuning-and-inference.rst
+#                 title: Use a single accelerator
+#               - file: how-to/rocm-for-ai/fine-tuning/multi-gpu-fine-tuning-and-inference.rst
+#                 title: Use multiple accelerators
+#
+#       - file: how-to/rocm-for-ai/inference/index.rst
+#         title: Inference
+#         subtrees:
+#         - entries:
+#           - file: how-to/rocm-for-ai/inference/hugging-face-models.rst
+#             title: Run models from Hugging Face
+#           - file: how-to/rocm-for-ai/inference/llm-inference-frameworks.rst
+#             title: LLM inference frameworks
+#           - file: how-to/rocm-for-ai/inference/benchmark-docker/vllm.rst
+#             title: vLLM inference performance testing
+#           - file: how-to/rocm-for-ai/inference/benchmark-docker/pytorch-inference.rst
+#             title: PyTorch inference performance testing
+#           - file: how-to/rocm-for-ai/inference/deploy-your-model.rst
+#             title: Deploy your model
+#
+#       - file: how-to/rocm-for-ai/inference-optimization/index.rst
+#         title: Inference optimization
+#         subtrees:
+#         - entries:
+#           - file: how-to/rocm-for-ai/inference-optimization/model-quantization.rst
+#           - file: how-to/rocm-for-ai/inference-optimization/model-acceleration-libraries.rst
+#           - file: how-to/rocm-for-ai/inference-optimization/optimizing-with-composable-kernel.md
+#             title: Optimize with Composable Kernel
+#           - file: how-to/rocm-for-ai/inference-optimization/optimizing-triton-kernel.rst
+#             title: Optimize Triton kernels
+#           - file: how-to/rocm-for-ai/inference-optimization/profiling-and-debugging.rst
+#             title: Profile and debug
+#           - file: how-to/rocm-for-ai/inference-optimization/workload.rst
+#             title: Workload optimization
+#
+#       - url: https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/
+#         title: AI tutorials
+#
+#   - file: how-to/rocm-for-hpc/index.rst
+#     title: Use ROCm for HPC
+#   - file: how-to/system-optimization/index.rst
+#     title: System optimization
+#   - file: how-to/gpu-performance/mi300x.rst
+#     title: AMD Instinct MI300X performance guides
+#   - file: how-to/system-debugging.md
+#   - file: conceptual/compiler-topics.md
+#     title: Use advanced compiler features
+#     subtrees:
+#     - entries:
+#       - url: https://rocm.docs.amd.com/projects/llvm-project/en/latest/index.html
+#         title: ROCm compiler infrastructure
+#       - url: https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/using-gpu-sanitizer.html
+#         title: Use AddressSanitizer
+#       - url: https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/openmp.html
+#         title: OpenMP support
+#   - file: how-to/setting-cus
+#     title: Set the number of CUs
+#   - file: how-to/Bar-Memory.rst
+#     title: Troubleshoot BAR access limitation  
+#   - url: https://github.com/amd/rocm-examples
+#     title: ROCm examples
+#
+#
+# - caption: Conceptual
+#   entries:
+#   - file: conceptual/gpu-arch.md
+#     title: GPU architecture overview
+#     subtrees:
+#     - entries:
+#       - file: conceptual/gpu-arch/mi300.md
+#         title: MI300 microarchitecture
+#         subtrees:
+#         - entries:
+#           - url: https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf
+#             title: AMD Instinct MI300/CDNA3 ISA
+#           - url: https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf
+#             title: White paper
+#           - file: conceptual/gpu-arch/mi300-mi200-performance-counters.rst
+#             title: MI300 and MI200 Performance counter
+#       - file: conceptual/gpu-arch/mi250.md
+#         title: MI250 microarchitecture
+#         subtrees:
+#         - entries:
+#           - url: https://www.amd.com/system/files/TechDocs/instinct-mi200-cdna2-instruction-set-architecture.pdf
+#             title: AMD Instinct MI200/CDNA2 ISA
+#           - url: https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna2-white-paper.pdf
+#             title: White paper
+#       - file: conceptual/gpu-arch/mi100.md
+#         title: MI100 microarchitecture
+#         subtrees:
+#         - entries:
+#           - url: https://www.amd.com/system/files/TechDocs/instinct-mi100-cdna1-shader-instruction-set-architecture%C2%A0.pdf
+#             title: AMD Instinct MI100/CDNA1 ISA
+#           - url: https://www.amd.com/content/dam/amd/en/documents/instinct-business-docs/white-papers/amd-cdna-white-paper.pdf
+#             title: White paper
+#   - file: conceptual/file-reorg.md
+#     title: File structure (Linux FHS)
+#   - file: conceptual/gpu-isolation.md
+#     title: GPU isolation techniques
+#   - file: conceptual/cmake-packages.rst
+#     title: Using CMake
+#   - file: conceptual/ai-pytorch-inception.md
+#     title: Inception v3 with PyTorch
+#
+# - caption: Reference
+#   entries:
+#   - file: reference/api-libraries.md
+#     title: ROCm libraries
+#   - file: reference/rocm-tools.md
+#     title: ROCm tools, compilers, and runtimes
+#   - file: reference/gpu-arch-specs.rst
+#   - file: reference/gpu-atomics-operation.rst
+#   - file: reference/precision-support.rst
+#     title: Precision support
+#   - file: reference/graph-safe-support.rst
+#     title: Graph safe support
+#
+# - caption: Contribute
+#   entries:
+#   - file: contribute/contributing.md
+#     title: Contributing to the ROCm documentation
+#     subtrees:
+#     - entries:
+#       - file: contribute/toolchain.md
+#         title: ROCm documentation toolchain
+#       - file: contribute/building.md
+#   - file: contribute/feedback.md
+#     title: Providing feedback about the ROCm documentation
+#   - file: about/license.md
+#     title: ROCm licenses
--- a/docs/sphinx/requirements.in
+++ b/docs/sphinx/requirements.in
@@ -1,4 +1,4 @@
-rocm-docs-core==1.20.1
 sphinx-reredirects
 sphinx-sitemap
 sphinxcontrib.datatemplates==0.11.0
+git+https://github.com/ROCm/rocm-docs-core.git@alexxu12/header-cap-space#egg=rocm-docs-core
--- a/docs/sphinx/requirements.txt
+++ b/docs/sphinx/requirements.txt
@@ -21,9 +21,11 @@ babel==2.17.0
    #   sphinx
 beautifulsoup4==4.13.4
    # via pydata-sphinx-theme
+blinker==1.9.0
+    # via flask
 breathe==4.36.0
    # via rocm-docs-core
-certifi==2025.4.26
+certifi==2025.6.15
    # via requests
 cffi==1.17.1
    # via
@@ -33,11 +35,12 @@ charset-normalizer==3.4.2
    # via requests
 click==8.2.1
    # via
+    #   flask
    #   jupyter-cache
    #   sphinx-external-toc
 comm==0.2.2
    # via ipykernel
-cryptography==45.0.3
+cryptography==45.0.4
    # via pyjwt
 debugpy==1.8.14
    # via ipykernel
@@ -60,6 +63,8 @@ fastjsonschema==2.21.1
    # via
    #   nbformat
    #   rocm-docs-core
+flask==3.1.1
+    # via sphinx-sitemap
 gitdb==4.0.12
    # via gitpython
 gitpython==3.1.44
@@ -80,10 +85,13 @@ ipython==8.37.0
    # via
    #   ipykernel
    #   myst-nb
+itsdangerous==2.2.0
+    # via flask
 jedi==0.19.2
    # via ipython
 jinja2==3.1.6
    # via
+    #   flask
    #   myst-parser
    #   sphinx
 jsonschema==4.24.0
@@ -107,7 +115,10 @@ markdown-it-py==3.0.0
    #   mdit-py-plugins
    #   myst-parser
 markupsafe==3.0.2
-    # via jinja2
+    # via
+    #   flask
+    #   jinja2
+    #   werkzeug
 matplotlib-inline==0.1.7
    # via
    #   ipykernel
@@ -134,7 +145,6 @@ nest-asyncio==1.6.0
 packaging==25.0
    # via
    #   ipykernel
-    #   pydata-sphinx-theme
    #   sphinx
 parso==0.8.4
    # via jedi
@@ -152,13 +162,13 @@ pure-eval==0.2.3
    # via stack-data
 pycparser==2.22
    # via cffi
-pydata-sphinx-theme==0.15.4
+pydata-sphinx-theme==0.16.1
    # via
    #   rocm-docs-core
    #   sphinx-book-theme
 pygithub==2.6.1
    # via rocm-docs-core
-pygments==2.19.1
+pygments==2.19.2
    # via
    #   accessible-pygments
    #   ipython
@@ -178,7 +188,7 @@ pyyaml==6.0.2
    #   rocm-docs-core
    #   sphinx-external-toc
    #   sphinxcontrib-datatemplates
-pyzmq==26.4.0
+pyzmq==27.0.0
    # via
    #   ipykernel
    #   jupyter-client
@@ -190,7 +200,8 @@ requests==2.32.4
    # via
    #   pygithub
    #   sphinx
-rocm-docs-core==1.20.1
+    #   sphinx-sitemap
+rocm-docs-core @ git+https://github.com/ROCm/rocm-docs-core.git@alexxu12/header-cap-space
    # via -r requirements.in
 rpds-py==0.25.1
    # via
@@ -215,12 +226,12 @@ sphinx==8.1.3
    #   sphinx-copybutton
    #   sphinx-design
    #   sphinx-external-toc
+    #   sphinx-last-updated-by-git
    #   sphinx-notfound-page
    #   sphinx-reredirects
-    #   sphinx-sitemap
    #   sphinxcontrib-datatemplates
    #   sphinxcontrib-runcmd
-sphinx-book-theme==1.1.4
+sphinx-book-theme==1.1.3
    # via rocm-docs-core
 sphinx-copybutton==0.5.2
    # via rocm-docs-core
@@ -228,11 +239,13 @@ sphinx-design==0.6.1
    # via rocm-docs-core
 sphinx-external-toc==1.0.1
    # via rocm-docs-core
+sphinx-last-updated-by-git==0.3.8
+    # via sphinx-sitemap
 sphinx-notfound-page==1.1.0
    # via rocm-docs-core
 sphinx-reredirects==0.1.6
    # via -r requirements.in
-sphinx-sitemap==2.6.0
+sphinx-sitemap==2.7.1
    # via -r requirements.in
 sphinxcontrib-applehelp==2.0.0
    # via sphinx
@@ -288,6 +301,8 @@ urllib3==2.5.0
    #   requests
 wcwidth==0.2.13
    # via prompt-toolkit
+werkzeug==3.1.3
+    # via flask
 wrapt==1.17.2
    # via deprecated
 zipp==3.23.0
Author	SHA1	Message	Date
Alex Xu	a4b1b2cc67	rocm-docs-core experiment	2025-06-26 15:58:18 -04:00
Peter Park	4f592f8949	[docs/7.0.0-alpha] Add docs for 7.0 alpha (#4978 )	2025-06-26 15:47:42 -04:00
Daniel Su	ac2df2961d	[Ex CI] add component name to artifact download filter (#4974 )	2025-06-26 13:55:03 -04:00
Daniel Su	f20e8dec8b	[Ex CI] revert PRIM default branch to develop (#4960 )	2025-06-23 16:35:02 -04:00
Daniel Su	10e9157f39	[Ex CI] allow rerun jobs to upload artifacts (#4959 )	2025-06-23 15:37:52 -04:00
Daniel Su	a2ce6021cb	[Ex CI] add more OSs to nightly build (#4958 )	2025-06-23 15:13:11 -04:00
Peter Park	2196fc9a2f	Fix pytorch training 25.6 doc (#4956 ) * fix pytorch-training history * fix pytorch-training fix	2025-06-23 13:45:50 -04:00
Daniel Su	925689f89e	[Ex CI] enable gfx1100 builds (#4954 )	2025-06-23 11:26:35 -04:00
Peter Park	91a541f8b9	Update PyTorch training benchmark doc for v25.6 (#4950 ) * update pytorch-training docker details * add previous version * add models data * update models data id * add models picker * update data * update fmt fmt * update data yaml * update template * update data * fix * fix vllm-0.6.4 broken link * fix vllm history	2025-06-23 09:26:15 -04:00