mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
add scripts
This commit is contained in:
20
scripts/amd/backtrace.sh
Normal file
20
scripts/amd/backtrace.sh
Normal file
@@ -0,0 +1,20 @@
|
||||
sudo apt install gdb -y
|
||||
|
||||
# export AMD_OCL_WAIT_COMMAND=1
|
||||
# export AMD_LOG_LEVEL=3
|
||||
# export HIP_LAUNCH_BLOCKING=1
|
||||
|
||||
ROOT_DIR=$(pwd)
|
||||
LOG_DIR=$ROOT_DIR/log_$(git rev-parse --symbolic-full-name --abbrev-ref HEAD)
|
||||
rm -rf $LOG_DIR
|
||||
mkdir -p $LOG_DIR
|
||||
chmod -R 777 $LOG_DIR
|
||||
|
||||
COMMAND="python/tests/test_elementwise.py::test_single_input[log-float64-float64]"
|
||||
gdb -ex "set pagination off" \
|
||||
-ex "file python" \
|
||||
-ex "run -m pytest --capture=tee-sys --verbose $COMMAND" \
|
||||
-ex "backtrace" \
|
||||
-ex "set confirm off" \
|
||||
-ex "q" \
|
||||
2>&1 | tee $LOG_DIR/gdb_backtrace.log
|
||||
16
scripts/amd/build.sh
Executable file
16
scripts/amd/build.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
set -x
|
||||
|
||||
cd python
|
||||
pip uninstall -y triton
|
||||
|
||||
sh scripts/amd/clean.sh
|
||||
|
||||
export MLIR_ENABLE_DUMP=1
|
||||
export LLVM_IR_ENABLE_DUMP=1
|
||||
export AMDGCN_ENABLE_DUMP=1
|
||||
|
||||
export TRITON_USE_ROCM=ON
|
||||
# export MI_GPU_ARCH=gfx90a # not needed
|
||||
|
||||
pip install -U matplotlib pandas filelock tabulate
|
||||
pip install --verbose -e .
|
||||
21
scripts/amd/cache_print.sh
Executable file
21
scripts/amd/cache_print.sh
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
|
||||
CACHED_FILES=$(find /root/.triton/cache/ -type f -name "*.*")
|
||||
|
||||
rm -rf triton_cache
|
||||
mkdir -p triton_cache
|
||||
|
||||
for file in ${CACHED_FILES[@]}; do
|
||||
echo "$file"
|
||||
if [[ $file == *.so ]]; then
|
||||
echo "Skipping printing .so file"
|
||||
elif [[ $file == *.cubin ]]; then
|
||||
echo "Skipping printing .cubin file"
|
||||
else
|
||||
sed -i -e '$a\' $file
|
||||
cat $file
|
||||
cp $file triton_cache
|
||||
fi
|
||||
done
|
||||
|
||||
chmod -R 777 triton_cache
|
||||
3
scripts/amd/check_llvm_src.sh
Normal file
3
scripts/amd/check_llvm_src.sh
Normal file
@@ -0,0 +1,3 @@
|
||||
shopt -s extglob
|
||||
/opt/rocm/llvm/bin/llc -mcpu=gfx908 triton_rocm_kernels/*+([0-9]).ll
|
||||
# /opt/rocm/llvm/bin/llc -mcpu=gfx908 triton_rocm_kernels/*_before_verify.ll
|
||||
7
scripts/amd/clean.sh
Executable file
7
scripts/amd/clean.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
set -x
|
||||
|
||||
rm -rf python/triton.egg-info
|
||||
rm -rf python/.pytest_cache
|
||||
rm -rf python/tests/__pycache__
|
||||
rm -rf python/build
|
||||
rm -rf /root/.triton/cache
|
||||
11
scripts/amd/collect_rocm_kernels.sh
Normal file
11
scripts/amd/collect_rocm_kernels.sh
Normal file
@@ -0,0 +1,11 @@
|
||||
# COPY kernels
|
||||
DIRNAME=triton_rocm_kernels
|
||||
rm -rf $DIRNAME
|
||||
mkdir $DIRNAME
|
||||
mv /tmp/*.ttir $DIRNAME
|
||||
mv /tmp/*.ll $DIRNAME
|
||||
mv /tmp/*.gcn $DIRNAME
|
||||
mv /tmp/*.o $DIRNAME
|
||||
mv /tmp/*.hsaco $DIRNAME
|
||||
mv /tmp/*.s $DIRNAME
|
||||
chmod -R 777 $DIRNAME
|
||||
15
scripts/amd/debug.sh
Normal file
15
scripts/amd/debug.sh
Normal file
@@ -0,0 +1,15 @@
|
||||
sudo apt install gdb -y
|
||||
|
||||
# export AMD_OCL_WAIT_COMMAND=1
|
||||
# export AMD_LOG_LEVEL=3
|
||||
# export HIP_LAUNCH_BLOCKING=1
|
||||
|
||||
gdb -ex "file python" \
|
||||
-ex 'run -m pytest --capture=tee-sys --verbose "python/test/unit/language/test_core.py::test_empty_kernel[float32]"' \
|
||||
-ex "set pagination off" \
|
||||
-ex "set confirm off" \
|
||||
-ex "break _exit" \
|
||||
-ex "commands"
|
||||
-ex "run"
|
||||
-ex 'end' \
|
||||
2>&1 | tee /dockerx/pytorch/test_core_gdb.log
|
||||
2
scripts/amd/delete_hip_files.sh
Normal file
2
scripts/amd/delete_hip_files.sh
Normal file
@@ -0,0 +1,2 @@
|
||||
# find . -name '*hip.h' -delete
|
||||
find . -name '*_hip.*' -delete
|
||||
3
scripts/amd/deps.sh
Normal file
3
scripts/amd/deps.sh
Normal file
@@ -0,0 +1,3 @@
|
||||
sudo apt update
|
||||
sudo apt install libtinfo-dev gdb
|
||||
# sudo apt install llvm-11 # install on cuda
|
||||
16
scripts/amd/docker_build.sh
Normal file
16
scripts/amd/docker_build.sh
Normal file
@@ -0,0 +1,16 @@
|
||||
# print every command
|
||||
set -o xtrace
|
||||
|
||||
# set path
|
||||
# DOCKERFILE_PATH=scripts/docker/Dockerfile.triton_rocm
|
||||
# DOCKERFILE_PATH=scripts/docker/Dockerfile.triton_cuda
|
||||
# DOCKERFILE_PATH=triton_rocm_all_archs.Dockerfile
|
||||
DOCKERFILE_PATH=triton_rocm_20-52.Dockerfile
|
||||
|
||||
# get tag
|
||||
DOCKERFILE_NAME=$(basename $DOCKERFILE_PATH)
|
||||
DOCKERIMAGE_NAME=$(echo "$DOCKERFILE_NAME" | cut -f -1 -d '.')
|
||||
echo $DOCKERIMAGE_NAME
|
||||
|
||||
# build docker
|
||||
docker build --build-arg CACHEBUST=$(date +%s) -f $DOCKERFILE_PATH -t $DOCKERIMAGE_NAME .
|
||||
32
scripts/amd/docker_run.sh
Executable file
32
scripts/amd/docker_run.sh
Executable file
@@ -0,0 +1,32 @@
|
||||
set -o xtrace
|
||||
|
||||
alias drun='sudo docker run -it --rm --network=host --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined'
|
||||
|
||||
# DEVICES="--gpus all"
|
||||
DEVICES="--device=/dev/kfd --device=/dev/dri"
|
||||
|
||||
MEMORY="--ipc=host --shm-size 16G"
|
||||
|
||||
VOLUMES="-v $HOME/dockerx:/dockerx -v /data:/data"
|
||||
|
||||
# WORK_DIR='/root/$(basename $(pwd))'
|
||||
WORK_DIR="/dockerx/$(basename $(pwd))"
|
||||
|
||||
# IMAGE_NAME=nvcr.io/nvidia/pytorch:21.08-py3
|
||||
# IMAGE_NAME=rocm/pytorch:latest
|
||||
IMAGE_NAME=rocm/pytorch:rocm5.4_ubuntu20.04_py3.8_pytorch_1.12.1
|
||||
# IMAGE_NAME=rocm/pytorch:rocm4.3.1_ubuntu18.04_py3.6_pytorch_1.10.0
|
||||
# IMAGE_NAME=triton_rocm_20-52 # build this docker before running
|
||||
|
||||
CONTAINER_NAME=triton
|
||||
|
||||
# start new container
|
||||
docker stop $CONTAINER_NAME
|
||||
docker rm $CONTAINER_NAME
|
||||
CONTAINER_ID=$(drun -d -w $WORK_DIR --name $CONTAINER_NAME $MEMORY $VOLUMES $DEVICES $IMAGE_NAME)
|
||||
echo "CONTAINER_ID: $CONTAINER_ID"
|
||||
# docker cp . $CONTAINER_ID:$WORK_DIR
|
||||
# docker exec $CONTAINER_ID bash -c "bash scripts/amd/run.sh"
|
||||
docker attach $CONTAINER_ID
|
||||
docker stop $CONTAINER_ID
|
||||
docker rm $CONTAINER_ID
|
||||
2
scripts/amd/find_lib.sh
Normal file
2
scripts/amd/find_lib.sh
Normal file
@@ -0,0 +1,2 @@
|
||||
LIB_NAME=libtinfow
|
||||
ldconfig -p | grep $LIB_NAME
|
||||
11
scripts/amd/git_checkout_all.sh
Normal file
11
scripts/amd/git_checkout_all.sh
Normal file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
CUR_BRANCH=$(git rev-parse --abbrev-ref HEAD)
|
||||
REF_BRANCH=$1
|
||||
|
||||
DIFF_FILES=$(git diff --name-only $REF_BRANCH $CUR_BRANCH)
|
||||
for file in $DIFF_FILES; do
|
||||
echo "$file"
|
||||
git checkout $REF_BRANCH $file
|
||||
done
|
||||
2
scripts/amd/git_commit_amend.sh
Normal file
2
scripts/amd/git_commit_amend.sh
Normal file
@@ -0,0 +1,2 @@
|
||||
git commit --amend --no-edit
|
||||
git push --force
|
||||
7
scripts/amd/git_config_user.sh
Normal file
7
scripts/amd/git_config_user.sh
Normal file
@@ -0,0 +1,7 @@
|
||||
# use --global flag if you want to set it for whole machine
|
||||
git config user.name "Michael Melesse"
|
||||
git config user.email "micmelesse@gmail.com"
|
||||
|
||||
# unset with
|
||||
# git config --global --unset-all user.name
|
||||
# git config --global --unset-all user.email
|
||||
1
scripts/amd/git_submodule_add.sh
Normal file
1
scripts/amd/git_submodule_add.sh
Normal file
@@ -0,0 +1 @@
|
||||
git submodule add https://github.com/ROCmSoftwarePlatform/hipify-torch third_party/hipify-torch
|
||||
10
scripts/amd/git_submodule_rm.sh
Normal file
10
scripts/amd/git_submodule_rm.sh
Normal file
@@ -0,0 +1,10 @@
|
||||
SUBMODULE=third_party/hipify-torch
|
||||
|
||||
# Remove the submodule entry from .git/config
|
||||
git submodule deinit -f $SUBMODULE
|
||||
|
||||
# Remove the submodule directory from the superproject's .git/modules directory
|
||||
rm -rf .git/modules/$SUBMODULE
|
||||
|
||||
# Remove the entry in .gitmodules and remove the submodule directory located at path/to/submodule
|
||||
git rm -f $SUBMODULE
|
||||
6
scripts/amd/git_submodule_update.sh
Normal file
6
scripts/amd/git_submodule_update.sh
Normal file
@@ -0,0 +1,6 @@
|
||||
# if you are updating an existing checkout
|
||||
git submodule sync
|
||||
git submodule update --init --recursive
|
||||
|
||||
# if you want to push every to tip
|
||||
# git submodule update --init --recursive --remote
|
||||
18
scripts/amd/grep_for_symbol.sh
Normal file
18
scripts/amd/grep_for_symbol.sh
Normal file
@@ -0,0 +1,18 @@
|
||||
# SYMBOL=_ZN4llvm11PassBuilder17OptimizationLevel2O0E
|
||||
# SYMBOL=_ZN4llvm11DDGAnalysis3KeyE
|
||||
# SYMBOL=_ZN4llvm26UnifyFunctionExitNodesPass3runERNS_8FunctionERNS_15AnalysisManagerIS1_JEEE
|
||||
# SYMBOL=_ZN4llvm12LoopFusePass3runERNS_8FunctionERNS_15AnalysisManagerIS1_JEEE
|
||||
# SYMBOL=_ZN4llvm30moveInstructionsToTheBeginningERNS_10BasicBlockES1_RNS_13DominatorTreeERKNS_17PostDominatorTreeERNS_14DependenceInfoE
|
||||
# SYMBOL=_ZN4llvm17LoopExtractorPass3runERNS_6ModuleERNS_15AnalysisManagerIS1_JEEE
|
||||
# SYMBOL=_ZN4llvm17ObjCARCExpandPass3runERNS_8FunctionERNS_15AnalysisManagerIS1_JEEE
|
||||
# SYMBOL=_ZN4llvm13CoroSplitPass3runERNS_13LazyCallGraph3SCCERNS_15AnalysisManagerIS2_JRS1_EEES5_RNS_17CGSCCUpdateResultE
|
||||
SYMBOL=_ZN4llvm20SyntheticCountsUtilsIPKNS_9CallGraphEE9propagateERKS3_NS_12function_refIFNS_8OptionalINS_12ScaledNumberImEEEEPKNS_13CallGraphNodeERKSt4pairINS8_INS_14WeakTrackingVHEEEPSC_EEEENS7_IFvSE_SA_EEE
|
||||
for lib in $(find /tmp/clang+llvm-13.0.0-x86_64-linux-gnu-ubuntu-16.04/ -name \*.a); do
|
||||
symbols=$(nm $lib | grep $SYMBOL | grep -v " U ")
|
||||
|
||||
if [ "${#symbols}" -gt "0" ]; then
|
||||
echo $lib
|
||||
echo $symbols
|
||||
fi
|
||||
|
||||
done
|
||||
53
scripts/amd/hip_kernel.cpp
Normal file
53
scripts/amd/hip_kernel.cpp
Normal file
@@ -0,0 +1,53 @@
|
||||
#include <hip/hip_fp16.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
__global__ void div_kernel(float *in_1, float *in_2, float *out) {
|
||||
int i = threadIdx.x;
|
||||
out[i] = in_1[i] / in_2[i];
|
||||
}
|
||||
|
||||
int main() {
|
||||
// kernel info
|
||||
#define nBlocks 1
|
||||
#define nThreads 2
|
||||
|
||||
// vector size
|
||||
size_t size = nThreads * sizeof(float);
|
||||
|
||||
// Allocate input vectors h_A and h_B in host memory
|
||||
float h_A[nThreads] = {4, 4};
|
||||
float h_B[nThreads] = {2, 2};
|
||||
float h_C[nThreads] = {};
|
||||
|
||||
// show data
|
||||
printf("Input Data\n");
|
||||
for (int i = 0; i < nThreads; i++) {
|
||||
printf("%f/%f = %f\n", h_A[i], h_B[i], h_C[i]);
|
||||
}
|
||||
|
||||
// Allocate vectors in device memory
|
||||
float *d_A;
|
||||
hipMalloc(&d_A, size);
|
||||
float *d_B;
|
||||
hipMalloc(&d_B, size);
|
||||
float *d_C;
|
||||
hipMalloc(&d_C, size);
|
||||
|
||||
// Copy vectors from host memory to device memory
|
||||
hipMemcpyHtoD(d_A, h_A, size);
|
||||
hipMemcpyHtoD(d_B, h_B, size);
|
||||
|
||||
// launch kernel
|
||||
div_kernel<<<nBlocks, nThreads>>>(d_A, d_B, d_C);
|
||||
hipDeviceSynchronize(); // wait for kernel before printting
|
||||
|
||||
// check kernel output
|
||||
bool pass = true;
|
||||
printf("Output Data\n");
|
||||
for (int i = 0; i < nThreads; i++) {
|
||||
if (d_A[i] / d_B[i] != d_C[i])
|
||||
pass = false;
|
||||
printf("%f/%f = %f\n", d_A[i], d_B[i], d_C[i]);
|
||||
}
|
||||
printf("Test %s\n", pass ? "PASS" : "FAIL");
|
||||
}
|
||||
1
scripts/amd/hipify.sh
Normal file
1
scripts/amd/hipify.sh
Normal file
@@ -0,0 +1 @@
|
||||
PYTHONDONTWRITEBYTECODE=1 python3 third_party/hipify-torch/hipify_cli.py --project-directory .
|
||||
1
scripts/amd/lld.sh
Normal file
1
scripts/amd/lld.sh
Normal file
@@ -0,0 +1 @@
|
||||
/opt/rocm/llvm/bin/ld.lld -flavor gnu -shared _empty.o -o _empty.hsaco
|
||||
2
scripts/amd/post.sh
Normal file
2
scripts/amd/post.sh
Normal file
@@ -0,0 +1,2 @@
|
||||
bash scripts/amd/collect_rocm_kernels.sh
|
||||
bash scripts/amd/check_llvm_src.sh
|
||||
16
scripts/amd/run.sh
Executable file
16
scripts/amd/run.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
clear
|
||||
|
||||
set -x
|
||||
|
||||
|
||||
ROOT_DIR=$(pwd)
|
||||
LOG_DIR=$ROOT_DIR/log_$(git rev-parse --symbolic-full-name --abbrev-ref HEAD)
|
||||
rm -rf $LOG_DIR
|
||||
mkdir -p $LOG_DIR
|
||||
chmod -R 777 $LOG_DIR
|
||||
|
||||
|
||||
bash scripts/amd/clean.sh
|
||||
bash scripts/amd/build.sh
|
||||
bash scripts/amd/test.sh backtrace 2>&1 |tee $LOG_DIR/test.log
|
||||
# bash scripts/amd/cache_print.sh 2>&1 |tee $LOG_DIR/cache.log
|
||||
54
scripts/amd/test.sh
Executable file
54
scripts/amd/test.sh
Executable file
@@ -0,0 +1,54 @@
|
||||
#!/bin/bash
|
||||
|
||||
# clear
|
||||
set -x
|
||||
|
||||
# log dir
|
||||
ROOT_DIR=$(pwd)
|
||||
LOG_DIR=$ROOT_DIR/log_$(git rev-parse --symbolic-full-name --abbrev-ref HEAD)
|
||||
rm -rf $LOG_DIR
|
||||
mkdir -p $LOG_DIR
|
||||
chmod -R 777 $LOG_DIR
|
||||
|
||||
# check for backtrace
|
||||
if [ "$1" == "backtrace" ]; then
|
||||
sudo apt install gdb -y
|
||||
|
||||
# COMMAND="-m pytest --capture=tee-sys --verbose python/tests/test_elementwise.py::test_single_input[log-float64-float64]"
|
||||
COMMAND="python/tutorials/05-layer-norm.py"
|
||||
gdb python \
|
||||
-ex "set pagination off" \
|
||||
-ex "run $COMMAND" \
|
||||
-ex "backtrace" \
|
||||
-ex "set confirm off" \
|
||||
-ex "q" \
|
||||
2>&1 | tee $LOG_DIR/backtrace.log
|
||||
|
||||
else
|
||||
|
||||
sh scripts/amd/clean.sh
|
||||
|
||||
# pytest -rfs --verbose python/tests 2>&1 | tee $LOG_DIR/test_all.log
|
||||
# pytest -rfs --verbose "python/tests/test_compiler.py" 2>&1 | tee $LOG_DIR/test_compiler.log
|
||||
# pytest -rfs --verbose "python/tests/test_core_amd.py" 2>&1 | tee $LOG_DIR/test_core_amd.log
|
||||
# pytest -rfs --verbose "python/tests/test_core.py" 2>&1 | tee $LOG_DIR/test_core.log
|
||||
# pytest -rfs --verbose "python/tests/test_core.py::test_math_op" | tee $LOG_DIR/test_math_op.log
|
||||
# pytest -rfs --verbose "python/tests/test_core.py::test_reduce1d[min-float16-128]" | tee $LOG_DIR/test_reduce1d.log
|
||||
# pytest -rfs --verbose "python/tests/test_core.py::test_reduce1d" | tee $LOG_DIR/test_reduce1d.log
|
||||
# pytest -rfs --verbose "python/tests/test_core.py::test_reduce2d" | tee $LOG_DIR/test_reduce2d.log
|
||||
# pytest -rfs --verbose "python/tests/test_elementwise.py" 2>&1 | tee $LOG_DIR/test_elementwise.log
|
||||
# pytest -rfs --verbose "python/tests/test_elementwise.py::test_single_input[log-float64-float64]" 2>&1 | tee $LOG_DIR/test_single_input.log
|
||||
# pytest -rfs --verbose "python/tests/test_ext_elemwise.py" 2>&1 | tee $LOG_DIR/test_ext_elemwise.log
|
||||
# pytest -rfs --verbose "python/tests/test_gemm.py" 2>&1 | tee $LOG_DIR/test_gemm.log
|
||||
# pytest -rfs --verbose "python/tests/test_reduce.py" 2>&1 | tee $LOG_DIR/test_reduce.log
|
||||
# pytest -rfs --verbose "python/tests/test_transpose.py" 2>&1 | tee $LOG_DIR/test_transpose.log
|
||||
# pytest -rfs --verbose "python/tests/test_vecadd.py" 2>&1 | tee $LOG_DIR/test_vecadd.log
|
||||
|
||||
# tutorials
|
||||
# python python/tutorials/01-vector-add.py 2>&1 | tee $LOG_DIR/01-vector-add.log
|
||||
# python python/tutorials/02-fused-softmax.py 2>&1 | tee $LOG_DIR/02-fused-softmax.log
|
||||
# python python/tutorials/03-matrix-multiplication.py 2>&1 | tee $LOG_DIR/03-matrix-multiplication.log
|
||||
# python python/tutorials/04-low-memory-dropout.py 2>&1 | tee $LOG_DIR/04-low-memory-dropout.log
|
||||
python python/tutorials/05-layer-norm.py 2>&1 | tee $LOG_DIR/05-layer-norm.log
|
||||
# python python/tutorials/06-fused-attention.py 2>&1 | tee $LOG_DIR/06-fused-attention.log
|
||||
fi
|
||||
62
scripts/amd/test_fptrunc.py
Normal file
62
scripts/amd/test_fptrunc.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import torch
|
||||
import triton
|
||||
import triton.language as tl
|
||||
import pytest
|
||||
|
||||
cvt = {
|
||||
'bool': torch.bool,
|
||||
'int8': torch.int8,
|
||||
'int16': torch.int16,
|
||||
'int32': torch.int32,
|
||||
'int64': torch.int64,
|
||||
'bfloat16': torch.bfloat16,
|
||||
'float16': torch.float16,
|
||||
'float32': torch.float32,
|
||||
'float64': torch.float64,
|
||||
}
|
||||
|
||||
int_dtypes = ['int8', 'int16', 'int32', 'int64']
|
||||
float_dtypes = ['float16', 'float32', 'float64']
|
||||
dtypes = int_dtypes + float_dtypes
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype_x, dtype_z, bitcast", [
|
||||
(dtype_x, dtype_z, False)
|
||||
for dtype_x in dtypes
|
||||
for dtype_z in dtypes
|
||||
])
|
||||
def test_fptrunc(dtype_x, dtype_z, bitcast, device='cuda'):
|
||||
SIZE = 256
|
||||
# define the kernel / launch-grid
|
||||
@triton.jit
|
||||
def kernel(Z, X, **meta):
|
||||
off = tl.arange(0, meta['SIZE'])
|
||||
x = tl.load(X + off)
|
||||
tl.store(Z + off, x)
|
||||
# inputs
|
||||
x = triton.testing.random(SIZE, dtype=cvt[dtype_x], device=device)
|
||||
|
||||
# reference result
|
||||
z_ref = x.type(dtype=cvt[dtype_z])
|
||||
|
||||
# triton result
|
||||
z_tri = torch.zeros_like(x, dtype=cvt[dtype_z])
|
||||
|
||||
# triton.testing.assert_almost_equal(z_ref, z_tri)
|
||||
|
||||
print("before kernel")
|
||||
# run load and store kernel
|
||||
kernel[(1, )](z_tri, x, SIZE=SIZE, num_warps=1)
|
||||
print("after kernel")
|
||||
|
||||
# print("x:", x)
|
||||
# print("z_ref:", z_ref)
|
||||
# print("z_tri:", z_tri)
|
||||
# compare
|
||||
print("before compare")
|
||||
triton.testing.assert_almost_equal(z_ref, z_tri)
|
||||
print("after compare")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_fptrunc()
|
||||
6
scripts/amd/test_hip_kernel.sh
Normal file
6
scripts/amd/test_hip_kernel.sh
Normal file
@@ -0,0 +1,6 @@
|
||||
rm -rf ./scripts/amd/hip_kernel.out
|
||||
rm -rf ./scripts/amd/temps
|
||||
mkdir ./scripts/amd/temps
|
||||
# hipcc -save-temps=./scripts/amd/temps scripts/amd/hip_kernel.cpp -o scripts/amd/hip_kernel.out
|
||||
hipcc -ffast-math -save-temps=./scripts/amd/temps scripts/amd/hip_kernel.cpp -o scripts/amd/hip_kernel.out
|
||||
./scripts/amd/hip_kernel.out
|
||||
Reference in New Issue
Block a user