mirror of
https://github.com/ROCm/ROCm.git
synced 2026-02-21 03:00:39 -05:00
The initial code merge of Nvidia Hopper features support. Please be aware that the code merge is not finished yet and the trouble-shooting is still ongoing. The new hardware features (GMMA, TMA, STMATRIX etc.) and automatic warp-specialization are experimental for now and turned off by default. It is recommended for a trial when version 3.0 is released. The work is contributed by: ben-zhang-609, bealwang, donproc, qliu93, jsh20, allatit23, LyricZhao, ivanyinwz, goostavz & yangjunpro from Nvidia, in cooperation with: ptillet, Jokeren, ThomasRaoux & zahimoud from OpenAI. Co-authored-by: Goostav Zhu <gzhu@nvidia.com>
72 lines
2.2 KiB
C++
72 lines
2.2 KiB
C++
#include "triton/Analysis/Alias.h"
|
|
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
|
#include "triton/Analysis/Utility.h"
|
|
#include "triton/Dialect/TritonGPU/IR/Dialect.h"
|
|
#include "triton/Dialect/TritonNvidiaGPU/IR/Dialect.h"
|
|
|
|
namespace mlir {
|
|
|
|
AliasInfo AliasInfo::join(const AliasInfo &lhs, const AliasInfo &rhs) {
|
|
if (lhs == rhs)
|
|
return lhs;
|
|
AliasInfo ret;
|
|
for (auto value : lhs.allocs) {
|
|
ret.insert(value);
|
|
}
|
|
for (auto value : rhs.allocs) {
|
|
ret.insert(value);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void SharedMemoryAliasAnalysis::visitOperation(
|
|
Operation *op, ArrayRef<const dataflow::Lattice<AliasInfo> *> operands,
|
|
ArrayRef<dataflow::Lattice<AliasInfo> *> results) {
|
|
AliasInfo aliasInfo;
|
|
bool pessimistic = true;
|
|
if (maybeSharedAllocationOp(op)) {
|
|
// These ops may allocate a new shared memory buffer.
|
|
auto result = op->getResult(0);
|
|
// XXX(Keren): the following ops are always aliasing for now
|
|
if (isa<triton::gpu::ExtractSliceOp, triton::TransOp,
|
|
triton::nvidia_gpu::ExtractMBarrierOp>(op)) {
|
|
// extract_slice %src
|
|
// trans %src
|
|
aliasInfo = AliasInfo(operands[0]->getValue());
|
|
pessimistic = false;
|
|
} else if (isa<tensor::InsertSliceOp, triton::gpu::InsertSliceAsyncOp,
|
|
triton::nvidia_gpu::InsertSliceAsyncV2Op>(op)) {
|
|
// insert_slice_async %src, %dst, %index
|
|
// insert_slice %src into %dst[%offsets]
|
|
aliasInfo = AliasInfo(operands[1]->getValue());
|
|
pessimistic = false;
|
|
} else if (isa<triton::nvidia_gpu::StoreAsyncOp>(op)) {
|
|
aliasInfo = AliasInfo(operands[0]->getValue());
|
|
pessimistic = false;
|
|
} else if (triton::gpu::isSharedEncoding(result)) {
|
|
aliasInfo.insert(result);
|
|
pessimistic = false;
|
|
}
|
|
}
|
|
|
|
if (pessimistic) {
|
|
return setAllToEntryStates(results);
|
|
}
|
|
// Join all lattice elements
|
|
for (auto *result : results)
|
|
propagateIfChanged(result, result->join(aliasInfo));
|
|
}
|
|
|
|
AliasResult SharedMemoryAliasAnalysis::alias(Value lhs, Value rhs) {
|
|
// TODO: implement
|
|
return AliasResult::MayAlias;
|
|
}
|
|
|
|
ModRefResult SharedMemoryAliasAnalysis::getModRef(Operation *op,
|
|
Value location) {
|
|
// TODO: implement
|
|
return ModRefResult::getModAndRef();
|
|
}
|
|
|
|
} // namespace mlir
|