Merge pull request #210 from ROCmSoftwarePlatform/tt.load_issue

fix pyt 2.0 issues
This commit is contained in:
Michael Melesse
2023-05-16 14:18:24 -04:00
committed by GitHub
12 changed files with 46 additions and 25 deletions

3
.gitignore vendored
View File

@@ -28,3 +28,6 @@ cmake-build-*
# cache dumps
triton_cache*
log_*
#
python/triton/third_party/cuda/bin/ptxas

View File

@@ -122,7 +122,7 @@ LogicalResult tritonTranslateMain(int argc, char **argv,
}
llvm::LLVMContext llvmContext;
#if 1 // USE_ROCM doesnot work here
#ifdef USE_ROCM
auto llvmir = translateTritonGPUToLLVMIR(&llvmContext, *module,
SMArch.getValue(), true /*isRocm*/);
#else

View File

@@ -337,11 +337,8 @@ translateTritonGPUToLLVMIR(llvm::LLVMContext *llvmContext,
llvm::errs() << "Pass execution failed";
return nullptr;
}
#ifdef USE_ROCM
auto llvmIR = translateLLVMToLLVMIR(llvmContext, module, true);
#else
auto llvmIR = translateLLVMToLLVMIR(llvmContext, module, false);
#endif
auto llvmIR = translateLLVMToLLVMIR(llvmContext, module, isROCM);
if (!llvmIR) {
llvm::errs() << "Translate to LLVM IR failed";
return nullptr;

View File

@@ -70,7 +70,7 @@ def optimize_ttgir(mod, num_stages, arch):
pm.enable_debug()
pm.add_tritongpu_coalesce_pass()
pm.add_tritongpu_remove_layout_conversions_pass()
if isinstance(arch, int):
if _is_cuda(arch):
pm.add_tritongpu_accelerate_matmul_pass(arch)
pm.add_tritongpu_remove_layout_conversions_pass()
pm.add_tritongpu_optimize_dot_operands_pass()
@@ -325,6 +325,9 @@ instance_descriptor = namedtuple("instance_descriptor", ["divisible_by_16", "equ
def _is_cuda(arch):
return isinstance(arch, int)
def is_hip():
return torch.version.hip is not None
def get_architecture_descriptor(capability):
if capability is None:
@@ -363,7 +366,11 @@ def add_cuda_stages(arch, extern_libs, stages):
def compile(fn, **kwargs):
arch = get_architecture_descriptor(kwargs.get("cc", None))
if is_hip():
capability = None
else:
capability = kwargs.get("cc", None)
arch = get_architecture_descriptor(capability)
is_cuda = _is_cuda(arch)
context = _triton.ir.context()
asm = dict()

Binary file not shown.

View File

@@ -83,24 +83,31 @@ if __name__ == '__main__':
print(module)
sys.exit(0)
if not args.sm:
raise argparse.ArgumentError(None, "Must specify --sm for PTX compilation")
# set arch depending on platform
if args.gfx:
arch = args.gfx
elif args.sm:
arch = args.sm
else:
raise argparse.ArgumentError(None, "Must specify --sm or --gfx for ttgir compilation")
# triton-ir -> triton-gpu-ir
module = tc.ttir_to_ttgir(module, num_warps=args.num_warps)
module = tc.optimize_ttgir(module, num_stages=3, arch=args.sm)
module = tc.optimize_ttgir(module, num_stages=3, arch=arch)
if args.target == 'triton-gpu-ir':
print(module.str())
sys.exit(0)
# triton-gpu-ir -> llvm-ir
module = tc.ttgir_to_llir(module, extern_libs=None, arch=args.sm)
module = tc.ttgir_to_llir(module, extern_libs=None, arch=arch)
if args.target == 'llvm-ir':
print(module)
sys.exit(0)
# llvm-ir -> ptx
if args.target == 'ptx':
if not args.sm:
raise argparse.ArgumentError(None, "Must specify --sm for PTX compilation")
if not args.ptx_version:
raise argparse.ArgumentError(None, "Must specify --ptx-version for PTX compilation")
module = tc.llir_to_ptx(module, arch=args.sm, ptx_version=args.ptx_version)

View File

@@ -3,14 +3,13 @@ set -x
cd python
pip uninstall -y triton
sh scripts/amd/clean.sh
bash scripts/amd/clean.sh
export MLIR_ENABLE_DUMP=1
export LLVM_IR_ENABLE_DUMP=1
export AMDGCN_ENABLE_DUMP=1
export TRITON_USE_ROCM=ON
# export MI_GPU_ARCH=gfx90a # not needed
pip install -U matplotlib pandas filelock tabulate
# pip install -U matplotlib pandas filelock tabulate
pip install --verbose -e .

View File

@@ -1,6 +1,6 @@
set -o xtrace
alias drun='sudo docker run -it --rm --network=host --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined'
alias drun='sudo docker run -it --rm --network=host --user root --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined'
# DEVICES="--gpus all"
DEVICES="--device=/dev/kfd --device=/dev/dri"
@@ -12,11 +12,9 @@ VOLUMES="-v $HOME/dockerx:/dockerx -v /data:/data"
# WORK_DIR='/root/$(basename $(pwd))'
WORK_DIR="/dockerx/$(basename $(pwd))"
# IMAGE_NAME=nvcr.io/nvidia/pytorch:21.08-py3
# IMAGE_NAME=rocm/pytorch:latest
IMAGE_NAME=rocm/pytorch:rocm5.4_ubuntu20.04_py3.8_pytorch_1.12.1
# IMAGE_NAME=rocm/pytorch:rocm4.3.1_ubuntu18.04_py3.6_pytorch_1.10.0
# IMAGE_NAME=triton_rocm_20-52 # build this docker before running
IMAGE_NAME=rocm/pytorch-nightly:latest
# IMAGE_NAME=rocm/pytorch:latest
# IMAGE_NAME=nvcr.io/nvidia/pytorch
CONTAINER_NAME=triton

8
scripts/amd/pytorch.sh Executable file
View File

@@ -0,0 +1,8 @@
# pip install transformers
# pip install --upgrade diffusers[torch]
# cd ../stuff/stable_diff
# python run.py
cd ../pytorch_rocm/
# TORCHINDUCTOR_COMPILE_THREADS=1 pytest test/inductor/test_torchinductor.py -k "test_views4_cuda"
pytest test/inductor/test_torchinductor.py -k "test_views4_cuda"

View File

@@ -12,7 +12,8 @@ chmod -R 777 $LOG_DIR
bash scripts/amd/clean.sh
bash scripts/amd/build.sh
bash scripts/amd/test.sh 2>&1 |tee $LOG_DIR/test.log
# bash scripts/amd/lit.sh 2>&1 |tee $LOG_DIR/lit.log
# bash scripts/amd/test.sh 2>&1 |tee $LOG_DIR/test.log
# bash scripts/amd/pytorch.sh 2>&1 |tee $LOG_DIR/test.log
bash scripts/amd/lit.sh 2>&1 |tee $LOG_DIR/lit.log
# bash scripts/amd/test.sh backtrace 2>&1 |tee $LOG_DIR/backtrace.log
# bash scripts/amd/cache_print.sh 2>&1 |tee $LOG_DIR/cache.log

View File

@@ -14,9 +14,10 @@ chmod -R 777 $LOG_DIR
sh scripts/amd/clean.sh
UNIT_TEST="python/test/unit/language/test_core_amd.py"
# UNIT_TEST="python/test/unit/language/test_core.py::test_empty_kernel[float32]"
# UNIT_TEST="python/test/unit/runtime/test_cache.py::test_compile_in_subproc"
# UNIT_TEST="python/test/unit/language/test_core_amd.py::test_shift_op[int8-int8-<<]"
# UNIT_TEST="python/test/unit/language/test_core_amd.py::test_shift_op[int32-int32->>]"
# UNIT_TEST="python/test/unit/language/test_core.py::test_empty_kernel[float32]"
# UNIT_TEST="python/test/unit/language/test_core.py::test_bin_op"
# UNIT_TEST="python/test/unit/language/test_core.py::test_bin_op[float32-float32-+]"
# UNIT_TEST="python/test/unit/language/test_core.py::test_bin_op[int8-float16-%]"

View File

@@ -1,4 +1,4 @@
// RUN: %PYTHON -m triton.tools.aot %s --target=llvm-ir --sm=80 | FileCheck %s
// RUN: %PYTHON -m triton.tools.aot %s --target=llvm-ir --gfx=90a | FileCheck %s
// == LLVM IR check begin ==
// CHECK-LABEL: ; ModuleID = 'LLVMDialectModule'