mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
Merge commit 'ac9fa68d18c777e421bd3f6fb1ddcfd60b6fda33' into ifu-rebase-again
Conflicts: .gitignore .gitmodules README.md bin/triton-translate.cpp include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td include/triton/Target/AMDGCN/AMDGCNTranslation.h include/triton/Target/HSACO/HSACOTranslation.h lib/Analysis/Allocation.cpp lib/Analysis/Utility.cpp lib/Conversion/TritonGPUToLLVM/CMakeLists.txt lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp lib/Conversion/TritonGPUToLLVM/ReduceOpToLLVM.cpp lib/Conversion/TritonGPUToLLVM/ScanOpToLLVM.cpp lib/Conversion/TritonGPUToLLVM/Utility.cpp lib/Conversion/TritonGPUToLLVM/Utility.h lib/Dialect/TritonGPU/IR/Dialect.cpp lib/Dialect/TritonGPU/Transforms/RemoveLayoutConversions.cpp lib/Target/HSACO/CMakeLists.txt lib/Target/HSACO/HSACOTranslation.cpp lib/Target/LLVMIR/LLVMIRTranslation.cpp python/src/triton.cc python/test/unit/language/test_core.py python/test/unit/operators/test_flash_attention.py python/triton/compiler/compiler.py python/triton/compiler/make_launcher.py python/triton/language/semantic.py python/triton/runtime/jit.py python/tutorials/06-fused-attention.py python/tutorials/11-grouped-gemm.py test/Conversion/tritongpu_to_llvm.mlir
This commit is contained in:
@@ -21,6 +21,7 @@ class AllocationAnalysis;
|
||||
SmallVector<unsigned>
|
||||
getScratchConfigForCvtLayout(triton::gpu::ConvertLayoutOp op, unsigned &inVec,
|
||||
unsigned &outVec);
|
||||
SmallVector<unsigned> getRepShapeForCvtLayout(triton::gpu::ConvertLayoutOp op);
|
||||
|
||||
} // namespace triton
|
||||
|
||||
|
||||
@@ -36,7 +36,9 @@ public:
|
||||
|
||||
triton::ReduceOp getOperation() { return op; }
|
||||
|
||||
bool isFastReduction();
|
||||
bool isReductionOnLayoutFastAxis();
|
||||
|
||||
unsigned getThreadOffsetOnReductionAxis();
|
||||
|
||||
bool isWarpSynchronous();
|
||||
|
||||
@@ -50,14 +52,16 @@ public:
|
||||
|
||||
unsigned getThreadsReductionAxis();
|
||||
|
||||
SmallVector<unsigned> getScratchConfigBasic();
|
||||
|
||||
SmallVector<SmallVector<unsigned>> getScratchConfigsFast();
|
||||
SmallVector<unsigned> getScratchConfig();
|
||||
|
||||
unsigned getScratchSizeInBytes();
|
||||
|
||||
bool isSupportedLayout();
|
||||
|
||||
bool isReduceWithinCTA();
|
||||
|
||||
unsigned getAxis() { return axis; }
|
||||
|
||||
private:
|
||||
triton::ReduceOp op;
|
||||
ArrayRef<int64_t> srcShape;
|
||||
@@ -84,8 +88,12 @@ public:
|
||||
unsigned getNonAxisNumThreadsPerCTA();
|
||||
// Return the number of warps per CTA along axis dim.
|
||||
unsigned getAxisNumWarps();
|
||||
// Return the number of warps per CTA along axis dim with unique data.
|
||||
unsigned getAxisNumWarpsWithUniqueData();
|
||||
// Return the number of threads per warp along axis dim.
|
||||
unsigned getAxisNumThreadsPerWarp();
|
||||
// Return the number of threads per warp along axis dim with unique data.
|
||||
unsigned getAxisNumThreadsPerWarpWithUniqueData();
|
||||
// Return the number of blocks along axis dim.
|
||||
unsigned getAxisNumBlocks();
|
||||
// Return the number of blocks along non axis dim.
|
||||
@@ -103,6 +111,7 @@ public:
|
||||
Location getLoc() { return scanOp.getLoc(); }
|
||||
unsigned getAxis() { return scanOp.getAxis(); }
|
||||
triton::gpu::BlockedEncodingAttr getEncoding();
|
||||
llvm::ArrayRef<int64_t> getShape();
|
||||
Region &getCombineOp();
|
||||
|
||||
private:
|
||||
@@ -128,6 +137,10 @@ bool isMmaToDotShortcut(RankedTensorType &srcTy, RankedTensorType &dstTy);
|
||||
|
||||
bool isMmaToMmaShortcut(RankedTensorType &srcTy, RankedTensorType &dstTy);
|
||||
|
||||
// Return true if the src and dst layout match.
|
||||
bool matchMmaV3AndDotOperandLayout(RankedTensorType srcTy,
|
||||
RankedTensorType dstTy);
|
||||
|
||||
// TODO: Move utility functions that belong to ConvertLayoutOp to class
|
||||
// ConvertLayoutOpHelper in the future
|
||||
bool shouldUseDistSmem(Attribute srcLayout, Attribute dstLayout);
|
||||
|
||||
@@ -27,9 +27,6 @@ def ConvertTritonGPUToLLVM : Pass<"convert-triton-gpu-to-llvm", "mlir::ModuleOp"
|
||||
Option<"computeCapability", "compute-capability",
|
||||
"int32_t", /*default*/"80",
|
||||
"device compute capability">,
|
||||
Option<"tmaMetadata", "tma-metadata",
|
||||
"mlir::triton::gpu::TMAMetadataTy*", /*default*/"nullptr",
|
||||
"tma metadata to the runtime">,
|
||||
Option<"target", "target", "enum Target", "mlir::triton::Target::Default",
|
||||
"compile for target compatible LLVM",
|
||||
"llvm::cl::values("
|
||||
|
||||
@@ -21,7 +21,8 @@ enum Target { NVVM, ROCDL, Default = NVVM };
|
||||
|
||||
std::unique_ptr<OperationPass<ModuleOp>> createConvertTritonGPUToLLVMPass();
|
||||
std::unique_ptr<OperationPass<ModuleOp>>
|
||||
createConvertTritonGPUToLLVMPass(const ConvertTritonGPUToLLVMOptions &options);
|
||||
createConvertTritonGPUToLLVMPass(int32_t computeCapability, Target target,
|
||||
mlir::triton::gpu::TMAMetadataTy *tmaMetadata);
|
||||
|
||||
} // namespace triton
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
set(MLIR_BINARY_DIR ${CMAKE_BINARY_DIR})
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS NVGPUOps.td)
|
||||
mlir_tablegen(Dialect.h.inc -gen-dialect-decls -dialect=nvgpu)
|
||||
mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs -dialect=nvgpu)
|
||||
@@ -6,6 +8,8 @@ mlir_tablegen(Ops.h.inc -gen-op-decls)
|
||||
mlir_tablegen(Ops.cpp.inc -gen-op-defs)
|
||||
mlir_tablegen(OpsEnums.h.inc -gen-enum-decls)
|
||||
mlir_tablegen(OpsEnums.cpp.inc -gen-enum-defs)
|
||||
add_mlir_doc(NVGPUDialect NVGPUDialect dialects/ -gen-dialect-doc)
|
||||
add_mlir_doc(NVGPUOps NVGPUOps dialects/ -gen-op-doc)
|
||||
add_public_tablegen_target(NVGPUTableGen)
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS NVGPUAttrDefs.td)
|
||||
|
||||
@@ -97,7 +97,15 @@ def WGMMADesc_ModeAttr : I32EnumAttr<"WGMMADescMode",
|
||||
}
|
||||
|
||||
def NVGPU_WGMMADescCreateOp : NVGPU_Op<"wgmma_desc_create", []> {
|
||||
let arguments = (ins LLVM_AnyPointer:$buffer, I32:$height, WGMMADesc_ModeAttr:$mode);
|
||||
let arguments = (ins LLVM_AnyPointer:$buffer, I32:$height, WGMMADesc_ModeAttr:$mode, I64Attr:$swizzling);
|
||||
let builders = [
|
||||
OpBuilder<(ins "Value":$buffer,
|
||||
"Value":$height,
|
||||
"WGMMADescMode":$mode), [{
|
||||
uint32_t mode_ = static_cast<uint32_t>(mode);
|
||||
uint64_t swizzling = (mode_ == 1 ? 128 : mode_ == 2 ? 64 : 32);
|
||||
build($_builder, $_state, $_builder.getIntegerType(64), buffer, height, WGMMADescModeAttr::get($_builder.getContext(), mode), $_builder.getI64IntegerAttr(swizzling));
|
||||
}]>];
|
||||
let results = (outs I64:$res);
|
||||
let assemblyFormat = "$buffer `,` $height attr-dict `:` functional-type(operands, results)";
|
||||
}
|
||||
@@ -140,12 +148,12 @@ def WGMMA_EltTypeAttr : I32EnumAttr<"WGMMAEltType",
|
||||
def WGMMA_OperandType : AnyTypeOf<[LLVM_AnyStruct, I64], "wgmma operand A/B type">;
|
||||
|
||||
def NVGPU_WGMMAOp : NVGPU_Op<"wgmma", []> {
|
||||
let arguments = (ins WGMMA_OperandType:$opA, WGMMA_OperandType:$opB, LLVM_AnyStruct:$opC,
|
||||
let arguments = (ins WGMMA_OperandType:$opA, WGMMA_OperandType:$opB, Optional<LLVM_AnyStruct>:$opC,
|
||||
I32Attr:$m, I32Attr:$n, I32Attr:$k,
|
||||
WGMMA_EltTypeAttr:$eltTypeC, WGMMA_EltTypeAttr:$eltTypeA, WGMMA_EltTypeAttr:$eltTypeB,
|
||||
WGMMA_LayoutAttr:$layoutA, WGMMA_LayoutAttr:$layoutB);
|
||||
let results = (outs LLVM_AnyStruct:$res);
|
||||
let assemblyFormat = "$opA `,` $opB `,` $opC attr-dict `:` functional-type(operands, $res)";
|
||||
let assemblyFormat = "$opA `,` $opB (`,` $opC^)? attr-dict `:` functional-type(operands, $res)";
|
||||
}
|
||||
|
||||
def NVGPU_CGABarrierSyncOp : NVGPU_Op<"cga_barrier_sync", []> {
|
||||
|
||||
@@ -1,12 +1,16 @@
|
||||
set(MLIR_BINARY_DIR ${CMAKE_BINARY_DIR})
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS TritonOps.td)
|
||||
mlir_tablegen(Ops.h.inc -gen-op-decls)
|
||||
mlir_tablegen(Ops.cpp.inc -gen-op-defs)
|
||||
mlir_tablegen(OpsEnums.h.inc -gen-enum-decls)
|
||||
mlir_tablegen(OpsEnums.cpp.inc -gen-enum-defs)
|
||||
add_mlir_doc(TritonOps TritonOps dialects/ -gen-op-doc)
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS TritonDialect.td)
|
||||
mlir_tablegen(Dialect.h.inc -gen-dialect-decls)
|
||||
mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs)
|
||||
add_mlir_doc(TritonDialect TritonDialect dialects/ -gen-dialect-doc)
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS TritonTypes.td)
|
||||
mlir_tablegen(Types.h.inc -gen-typedef-decls)
|
||||
|
||||
@@ -394,7 +394,12 @@ def TT_DotOp : TT_Op<"dot", [Pure,
|
||||
$d = matrix_multiply($a, $b) + $c
|
||||
}];
|
||||
|
||||
let arguments = (ins TT_FpIntTensor:$a, TT_FpIntTensor:$b, TT_FpIntTensor:$c, BoolAttr:$allowTF32);
|
||||
let arguments = (ins
|
||||
TT_FpIntTensor:$a,
|
||||
TT_FpIntTensor:$b,
|
||||
TT_FpIntTensor:$c,
|
||||
BoolAttr:$allowTF32,
|
||||
I32Attr:$maxNumImpreciseAcc);
|
||||
|
||||
let results = (outs TT_FpIntTensor:$d);
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
set(MLIR_BINARY_DIR ${CMAKE_BINARY_DIR})
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS TritonGPUOps.td)
|
||||
mlir_tablegen(Dialect.h.inc -gen-dialect-decls -dialect=triton_gpu)
|
||||
mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs -dialect=triton_gpu)
|
||||
@@ -5,6 +7,8 @@ mlir_tablegen(Ops.h.inc -gen-op-decls)
|
||||
mlir_tablegen(Ops.cpp.inc -gen-op-defs)
|
||||
mlir_tablegen(Types.h.inc -gen-typedef-decls -typedefs-dialect=triton_gpu)
|
||||
mlir_tablegen(Types.cpp.inc -gen-typedef-defs -typedefs-dialect=triton_gpu)
|
||||
add_mlir_doc(TritonGPUDialect TritonGPUDialect dialects/ -gen-dialect-doc)
|
||||
add_mlir_doc(TritonGPUOps TritonGPUOps dialects/ -gen-op-doc)
|
||||
add_public_tablegen_target(TritonGPUTableGen)
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS TritonGPUAttrDefs.td)
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
#include "mlir/IR/Dialect.h"
|
||||
|
||||
// TritonGPU depends on Triton
|
||||
#include "triton/Dialect/NVGPU/IR/Dialect.h"
|
||||
#include "triton/Dialect/Triton/IR/Dialect.h"
|
||||
#include "triton/Dialect/TritonGPU/IR/Attributes.h"
|
||||
#include "triton/Dialect/TritonGPU/IR/Dialect.h.inc"
|
||||
|
||||
@@ -113,6 +113,7 @@ compared to 1*64 when the hasLeadingOffset is false.
|
||||
"ArrayRef<unsigned>":$order,
|
||||
"CTALayoutAttr":$CTALayout,
|
||||
"unsigned":$typeWidthInBit), [{
|
||||
<<<<<<< HEAD
|
||||
|
||||
#ifdef USE_ROCM
|
||||
// ---- begin GFX908/GFX90A ----
|
||||
@@ -155,6 +156,18 @@ compared to 1*64 when the hasLeadingOffset is false.
|
||||
}
|
||||
}
|
||||
#endif
|
||||
=======
|
||||
bool needTrans = false; // default value
|
||||
return get(context, dotOpEnc, shape, order, CTALayout, typeWidthInBit, needTrans);
|
||||
}]>,
|
||||
|
||||
AttrBuilder<(ins "DotOperandEncodingAttr":$dotOpEnc,
|
||||
"ArrayRef<int64_t>":$shape,
|
||||
"ArrayRef<unsigned>":$order,
|
||||
"CTALayoutAttr":$CTALayout,
|
||||
"unsigned":$typeWidthInBit,
|
||||
"bool":$needTrans), [{
|
||||
>>>>>>> ac9fa68d18c777e421bd3f6fb1ddcfd60b6fda33
|
||||
auto mmaEnc = dotOpEnc.getParent().dyn_cast<MmaEncodingAttr>();
|
||||
|
||||
if(!mmaEnc)
|
||||
@@ -194,16 +207,23 @@ compared to 1*64 when the hasLeadingOffset is false.
|
||||
|
||||
// --- handle A operand ---
|
||||
if (opIdx == 0) { // compute swizzling for A operand
|
||||
int vec = (order[0] == 1) ? matShape[2] : matShape[0]; // k : m
|
||||
int mmaStride = (order[0] == 1) ? matShape[0] : matShape[2];
|
||||
int m = (needTrans) ? matShape[2] : matShape[0];
|
||||
int k = (needTrans) ? matShape[0] : matShape[2];
|
||||
int vec = (order[0] == 1) ? k : m;
|
||||
int mmaStride = (order[0] == 1) ? m : k;
|
||||
int maxPhase = mmaStride / perPhase;
|
||||
return get(context, vec, perPhase, maxPhase, order, CTALayout);
|
||||
}
|
||||
|
||||
// --- handle B operand ---
|
||||
if (opIdx == 1) {
|
||||
int vec = (order[0] == 1) ? matShape[1] : matShape[2]; // n : k
|
||||
int mmaStride = (order[0] == 1) ? matShape[2] : matShape[1];
|
||||
// we compute vec and maxPhase m, n and k size of the mma
|
||||
// instruction. when matmul operands is transposed, we should
|
||||
// consider that to get m, n and k.
|
||||
int n = needTrans ? matShape[2] : matShape[1];
|
||||
int k = needTrans ? matShape[1] : matShape[2];
|
||||
int vec = (order[0] == 1) ? n : k;
|
||||
int mmaStride = (order[0] == 1) ? k : n;
|
||||
int maxPhase = mmaStride / perPhase;
|
||||
return get(context, vec, perPhase, maxPhase, order, CTALayout);
|
||||
}
|
||||
@@ -231,6 +251,16 @@ compared to 1*64 when the hasLeadingOffset is false.
|
||||
return get(context, dotOpEnc, shape, order, CTALayout, bitwidth);
|
||||
}]>,
|
||||
|
||||
AttrBuilder<(ins "DotOperandEncodingAttr":$dotOpEnc,
|
||||
"ArrayRef<int64_t>":$shape,
|
||||
"ArrayRef<unsigned>":$order,
|
||||
"CTALayoutAttr":$CTALayout,
|
||||
"Type":$eltTy,
|
||||
"bool":$needTrans), [{
|
||||
unsigned bitwidth = eltTy.getIntOrFloatBitWidth();
|
||||
return get(context, dotOpEnc, shape, order, CTALayout, bitwidth, needTrans);
|
||||
}]>,
|
||||
|
||||
AttrBuilder<(ins "ArrayRef<int64_t>":$shape,
|
||||
"ArrayRef<unsigned>":$order,
|
||||
"CTALayoutAttr":$CTALayout,
|
||||
|
||||
@@ -16,7 +16,6 @@ def TritonGPU_Dialect : Dialect {
|
||||
|
||||
let dependentDialects = [
|
||||
"triton::TritonDialect",
|
||||
"mlir::triton::nvgpu::NVGPUDialect",
|
||||
"mlir::gpu::GPUDialect",
|
||||
"tensor::TensorDialect",
|
||||
];
|
||||
|
||||
@@ -28,7 +28,7 @@ def TTG_ConvertLayoutOp : TTG_Op<"convert_layout",
|
||||
|
||||
let results = (outs TT_Tensor:$result);
|
||||
|
||||
let hasCanonicalizeMethod = 1;
|
||||
let hasCanonicalizer = 1;
|
||||
|
||||
let assemblyFormat = "$src attr-dict `:` functional-type(operands, results)";
|
||||
}
|
||||
|
||||
@@ -141,13 +141,6 @@ Value linearize(OpBuilder &b, Location loc, ArrayRef<Value> multiDim,
|
||||
Value linearize(OpBuilder &b, Location loc, ArrayRef<Value> multiDim,
|
||||
ArrayRef<unsigned> shape);
|
||||
|
||||
// Returns null if the op is not inside a agent region (warp specialization
|
||||
// mode). Note that there should be at most one agent id attached to the
|
||||
// operation.
|
||||
std::optional<int> getWSAgentId(Operation *op);
|
||||
std::optional<int> getWSRoleId(Operation *op);
|
||||
void setRoleId(Operation *op, int roleId);
|
||||
|
||||
} // namespace mlir
|
||||
|
||||
#endif // TRITON_DIALECT_TRITONGPU_TRANSFORMS_UTILITY_H_
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
set(MLIR_BINARY_DIR ${CMAKE_BINARY_DIR})
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS TritonNvidiaGPUOps.td)
|
||||
mlir_tablegen(Dialect.h.inc -gen-dialect-decls -dialect=triton_nvidia_gpu)
|
||||
mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs -dialect=triton_nvidia_gpu)
|
||||
@@ -5,6 +7,8 @@ mlir_tablegen(Ops.h.inc -gen-op-decls)
|
||||
mlir_tablegen(Ops.cpp.inc -gen-op-defs)
|
||||
mlir_tablegen(Types.h.inc -gen-typedef-decls -typedefs-dialect=triton_nvidia_gpu)
|
||||
mlir_tablegen(Types.cpp.inc -gen-typedef-defs -typedefs-dialect=triton_nvidia_gpu)
|
||||
add_mlir_doc(TritonNvidiaGPUDialect TritonNvidiaGPUDialect dialects/ -gen-dialect-doc)
|
||||
add_mlir_doc(TritonNvidiaGPUOps TritonNvidiaGPUOps dialects/ -gen-op-doc)
|
||||
add_public_tablegen_target(TritonNvidiaGPUTableGen)
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS TritonNvidiaGPUAttrDefs.td)
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
#include "mlir/IR/Dialect.h"
|
||||
|
||||
// TritonNvidiaGPU depends on Triton
|
||||
#include "triton/Dialect/NVGPU/IR/Dialect.h"
|
||||
#include "triton/Dialect/Triton/IR/Dialect.h"
|
||||
#include "triton/Dialect/TritonGPU/IR/Traits.h"
|
||||
#include "triton/Dialect/TritonNvidiaGPU/IR/Dialect.h.inc"
|
||||
@@ -43,4 +42,15 @@
|
||||
#define GET_OP_CLASSES
|
||||
#include "triton/Dialect/TritonNvidiaGPU/IR/Ops.h.inc"
|
||||
|
||||
namespace mlir {
|
||||
|
||||
// Returns null if the op is not inside a agent region (warp specialization
|
||||
// mode). Note that there should be at most one agent id attached to the
|
||||
// operation.
|
||||
std::optional<int> getWSAgentId(Operation *op);
|
||||
std::optional<int> getWSRoleId(Operation *op);
|
||||
void setRoleId(Operation *op, int roleId);
|
||||
|
||||
} // namespace mlir
|
||||
|
||||
#endif // TRITON_DIALECT_TRITONNVIDIAGPU_IR_DIALECT_H_
|
||||
|
||||
@@ -38,7 +38,6 @@ def TritonNvidiaGPU_Dialect : Dialect {
|
||||
let dependentDialects = [
|
||||
"triton::TritonDialect",
|
||||
"triton::gpu::TritonGPUDialect",
|
||||
"mlir::triton::nvgpu::NVGPUDialect",
|
||||
"mlir::gpu::GPUDialect",
|
||||
"tensor::TensorDialect",
|
||||
];
|
||||
|
||||
@@ -258,7 +258,11 @@ def TTNG_DotAsyncOp : TTNG_Op<"dot_async", [Pure,
|
||||
$d = matrix_multiply($a, $b) + $c
|
||||
}];
|
||||
|
||||
let arguments = (ins TT_FpIntTensor:$a, TT_FpIntTensor:$b, TT_FpIntTensor:$c, BoolAttr:$allowTF32);
|
||||
let arguments = (ins TT_FpIntTensor:$a,
|
||||
TT_FpIntTensor:$b,
|
||||
TT_FpIntTensor:$c,
|
||||
BoolAttr:$allowTF32,
|
||||
I32Attr:$maxNumImpreciseAcc);
|
||||
|
||||
let results = (outs TT_FpIntTensor:$d);
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
namespace triton {
|
||||
|
||||
const std::set<std::string> ENV_VARS = {
|
||||
"ENABLE_MMA_V3", "TRITON_DISABLE_LINE_INFO", "DISABLE_FAST_REDUCTION",
|
||||
"DISABLE_MMA_V3", "TRITON_DISABLE_LINE_INFO", "DISABLE_FAST_REDUCTION",
|
||||
"ENABLE_TMA", "MLIR_ENABLE_DUMP", "LLVM_IR_ENABLE_DUMP",
|
||||
"AMDGCN_ENABLE_DUMP"};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user