[BACKEND] Remove HopperHelpers.c and replace with inline ptx and LLVM codegen (#2047)

2026-04-05 03:01:17 -04:00 · 2023-08-10 15:52:37 -07:00
parent d1ce4c4950
commit 4d373aa103
35 changed files with 1392 additions and 1532 deletions
--- a/python/src/triton.cc
+++ b/python/src/triton.cc
@@ -23,6 +23,7 @@
 #include "mlir/Dialect/Index/IR/IndexOps.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "triton/Analysis/Allocation.h"
+#include "triton/Conversion/NVGPUToLLVM/NVGPUToLLVMPass.h"
 #include "triton/Conversion/TritonGPUToLLVM/TritonGPUToLLVMPass.h"
 #include "triton/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.h"
 #include "triton/Dialect/NVGPU/IR/Dialect.h"
@@ -1690,6 +1691,10 @@ void init_triton_ir(py::module &&m) {
           [](mlir::PassManager &self) {
             self.addPass(mlir::triton::createConvertTritonGPUToLLVMPass());
           })
+      .def("add_nv_gpu_to_llvm",
+           [](mlir::PassManager &self) {
+             self.addPass(mlir::triton::createConvertNVGPUToLLVMPass());
+           })
      .def("add_scf_to_cfg", [](mlir::PassManager &self) {
        self.addPass(mlir::createConvertSCFToCFPass());
      });
--- a/python/tutorials/06-fused-attention.py
+++ b/python/tutorials/06-fused-attention.py
@@ -172,7 +172,7 @@ def _bwd_kernel(
            lo = 0
        # initialize row/col offsets
        offs_qm = lo + tl.arange(0, BLOCK_M)
-        offs_n = start_n * BLOCK_M + tl.arange(0, BLOCK_M) 
+        offs_n = start_n * BLOCK_M + tl.arange(0, BLOCK_M)
        offs_m = tl.arange(0, BLOCK_N)
        offs_k = tl.arange(0, BLOCK_DMODEL)
        # initialize pointers to value-like data