mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
do not emit nvidia inline asm
This commit is contained in:
committed by
Jason Furmanek
parent
39e8901d7a
commit
8bc417b9b7
@@ -258,6 +258,9 @@ Value storeShared(ConversionPatternRewriter &rewriter, Location loc, Value ptr,
|
||||
|
||||
Value loadShared(ConversionPatternRewriter &rewriter, Location loc, Value ptr,
|
||||
Value pred) {
|
||||
#if USE_ROCM
|
||||
return load(ptr);
|
||||
#else
|
||||
MLIRContext *ctx = rewriter.getContext();
|
||||
auto ptrTy = ptr.getType().cast<LLVMPointerType>();
|
||||
assert(ptrTy.getAddressSpace() == 3 && "Invalid addr space for loadShared");
|
||||
@@ -272,6 +275,7 @@ Value loadShared(ConversionPatternRewriter &rewriter, Location loc, Value ptr,
|
||||
auto &ld = builder.create<>("ld")->shared().b(bitwidth);
|
||||
ld(dOpr, ptrOpr).predicate(pred, "b");
|
||||
return builder.launch(rewriter, loc, elemTy);
|
||||
#endif
|
||||
}
|
||||
|
||||
static Value commonShflSync(Location loc, ConversionPatternRewriter &rewriter,
|
||||
|
||||
@@ -2013,10 +2013,12 @@ module attributes {"triton_gpu.compute-capability" = 80 : i32, "triton_gpu.num-c
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: copyitem
|
||||
// CHECK: st.shared.b8
|
||||
// CHECK: ld.shared.b8
|
||||
// CHECK-NOT: st.shared.b1
|
||||
// CHECK-NOT: ld.shared.b1
|
||||
// GCN: llvm.store
|
||||
// GCN: llvm.load
|
||||
// PTX: st.shared.b8
|
||||
// PTX: ld.shared.b8
|
||||
// PTX-NOT: st.shared.b1
|
||||
// PTX-NOT: ld.shared.b1
|
||||
#blocked = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [4, 8], warpsPerCTA = [1, 4], order = [0, 1], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [0, 1]}>
|
||||
module attributes {"triton_gpu.compute-capability" = 80 : i32, "triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 4 : i32, "triton_gpu.threads-per-warp" = 32 : i32} {
|
||||
tt.func public @copyitem() attributes {noinline = false} {
|
||||
|
||||
Reference in New Issue
Block a user