Initial commit to resolve merge conflicts

rename tl.float8e4 to tl.float8e4nv to align with upstream

ROCM IFU: Fix python arch issues

ROCM IFU: Fix kernel launcher

ROCM IFU: Fix merge conflicts

fix debug build

Set correct threadsPerCTA
This commit is contained in:
Jason Furmanek
2023-09-12 20:43:59 +00:00
parent 74fd8e9754
commit e5d7bb4fae
36 changed files with 414 additions and 1005 deletions

View File

@@ -253,7 +253,7 @@ private:
}
#ifdef USE_ROCM
if (auto mfmaLayout = layout.dyn_cast<MfmaEncodingAttr>()) {
auto multiDimBase = emitBaseIndexForLayout(loc, rewriter, layout, type);
auto multiDimBase = emitBaseIndexForLayout(loc, rewriter, layout, type, false);
SmallVector<SmallVector<unsigned>> offsets;
assert(rank == 2);
SmallVector<Value> multiDimOffset(rank);