[BACKEND] Remove HopperHelpers.c and replace with inline ptx and LLVM codegen (#2047)

This commit is contained in:
Zahi Moudallal
2023-08-10 15:52:37 -07:00
committed by GitHub
parent d1ce4c4950
commit 4d373aa103
35 changed files with 1392 additions and 1532 deletions

View File

@@ -23,6 +23,7 @@
#include "mlir/Dialect/Index/IR/IndexOps.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "triton/Analysis/Allocation.h"
#include "triton/Conversion/NVGPUToLLVM/NVGPUToLLVMPass.h"
#include "triton/Conversion/TritonGPUToLLVM/TritonGPUToLLVMPass.h"
#include "triton/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.h"
#include "triton/Dialect/NVGPU/IR/Dialect.h"
@@ -1690,6 +1691,10 @@ void init_triton_ir(py::module &&m) {
[](mlir::PassManager &self) {
self.addPass(mlir::triton::createConvertTritonGPUToLLVMPass());
})
.def("add_nv_gpu_to_llvm",
[](mlir::PassManager &self) {
self.addPass(mlir::triton::createConvertNVGPUToLLVMPass());
})
.def("add_scf_to_cfg", [](mlir::PassManager &self) {
self.addPass(mlir::createConvertSCFToCFPass());
});

View File

@@ -172,7 +172,7 @@ def _bwd_kernel(
lo = 0
# initialize row/col offsets
offs_qm = lo + tl.arange(0, BLOCK_M)
offs_n = start_n * BLOCK_M + tl.arange(0, BLOCK_M)
offs_n = start_n * BLOCK_M + tl.arange(0, BLOCK_M)
offs_m = tl.arange(0, BLOCK_N)
offs_k = tl.arange(0, BLOCK_DMODEL)
# initialize pointers to value-like data