Initial code merge of Hopper support (#2036)

The initial code merge of Nvidia Hopper features support. Please be
aware that the code merge is not finished yet and the trouble-shooting
is still ongoing. The new hardware features (GMMA, TMA, STMATRIX etc.)
and automatic warp-specialization are experimental for now and turned
off by default. It is recommended for a trial when version 3.0 is
released.

The work is contributed by:
ben-zhang-609, bealwang, donproc, qliu93, jsh20, allatit23, LyricZhao,
ivanyinwz, goostavz & yangjunpro
from Nvidia, in cooperation with:
ptillet, Jokeren, ThomasRaoux & zahimoud
from OpenAI.

Co-authored-by: Goostav Zhu <gzhu@nvidia.com>
This commit is contained in:
goostavz
2023-08-07 09:53:04 +08:00
committed by GitHub
parent 5df904233c
commit f1512bded1
220 changed files with 28448 additions and 2295 deletions

View File

@@ -2,6 +2,7 @@
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "triton/Analysis/Utility.h"
#include "triton/Dialect/TritonGPU/IR/Dialect.h"
#include "triton/Dialect/TritonNvidiaGPU/IR/Dialect.h"
namespace mlir {
@@ -27,17 +28,21 @@ void SharedMemoryAliasAnalysis::visitOperation(
// These ops may allocate a new shared memory buffer.
auto result = op->getResult(0);
// XXX(Keren): the following ops are always aliasing for now
if (isa<triton::gpu::ExtractSliceOp, triton::TransOp>(op)) {
if (isa<triton::gpu::ExtractSliceOp, triton::TransOp,
triton::nvidia_gpu::ExtractMBarrierOp>(op)) {
// extract_slice %src
// trans %src
aliasInfo = AliasInfo(operands[0]->getValue());
pessimistic = false;
} else if (isa<tensor::InsertSliceOp, triton::gpu::InsertSliceAsyncOp>(
op)) {
} else if (isa<tensor::InsertSliceOp, triton::gpu::InsertSliceAsyncOp,
triton::nvidia_gpu::InsertSliceAsyncV2Op>(op)) {
// insert_slice_async %src, %dst, %index
// insert_slice %src into %dst[%offsets]
aliasInfo = AliasInfo(operands[1]->getValue());
pessimistic = false;
} else if (isa<triton::nvidia_gpu::StoreAsyncOp>(op)) {
aliasInfo = AliasInfo(operands[0]->getValue());
pessimistic = false;
} else if (triton::gpu::isSharedEncoding(result)) {
aliasInfo.insert(result);
pessimistic = false;