mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
kfd driver wip (#3912)
* kfd driver wip * cleanups * kfd almost ready to ring doorbell * ding dong? * issues with signals * something * works * ops kfd * add amd_signal_t * works...sometimes * program runs * _gpu_alloc cleanup * cleanups * work * header + enable profiling (#3959) * header + enable profiling * just cleaner * measure * only local time domain * remove old comments * fix with master * elf parsing (#3965) * elf parsing * fix kernels with private * not used * clean up * clean up 2 * add flags * kfd sdma (#3970) * working sdma * remove driver, shorter * all commands we might need * svm * kfd remove hardcoded values (#4007) * remove hardcoded values * match above line * 7k lines + revert hsa * update that from origin * fix sdma reg gen * not the updated SDMA * compiler_opts * don't require kfd_ioctl * get ioctls from python * get ioctls from python * remove build_sdma_command * merge into 64-bit fields * shorter * fix property spelling and off by one --------- Co-authored-by: nimlgen <138685161+nimlgen@users.noreply.github.com>
This commit is contained in:
@@ -56,6 +56,13 @@ generate_comgr() {
|
||||
python3 -c "import tinygrad.runtime.autogen.comgr"
|
||||
}
|
||||
|
||||
generate_kfd() {
|
||||
clang2py /usr/include/linux/kfd_ioctl.h -o $BASE/kfd.py -k cdefstum
|
||||
fixup $BASE/kfd.py
|
||||
sed -i "s\import ctypes\import ctypes, os\g" $BASE/kfd.py
|
||||
python3 -c "import tinygrad.runtime.autogen.kfd"
|
||||
}
|
||||
|
||||
generate_cuda() {
|
||||
clang2py /usr/include/cuda.h /usr/include/nvrtc.h -o $BASE/cuda.py -l /usr/lib/x86_64-linux-gnu/libcuda.so -l /usr/lib/x86_64-linux-gnu/libnvrtc.so
|
||||
sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/cuda.py
|
||||
@@ -69,10 +76,19 @@ generate_hsa() {
|
||||
clang2py \
|
||||
/opt/rocm/include/hsa/hsa.h \
|
||||
/opt/rocm/include/hsa/hsa_ext_amd.h \
|
||||
/opt/rocm/include/hsa/amd_hsa_signal.h \
|
||||
/opt/rocm/include/hsa/amd_hsa_queue.h \
|
||||
/opt/rocm/include/hsa/hsa_ext_finalize.h /opt/rocm/include/hsa/hsa_ext_image.h \
|
||||
--clang-args="-I/opt/rocm/include" \
|
||||
-o $BASE/hsa.py -l /opt/rocm/lib/libhsa-runtime64.so
|
||||
|
||||
# clang2py broken when pass -x c++ to prev headers
|
||||
clang2py extra/hip_gpu_driver/sdma_registers.h \
|
||||
--clang-args="-I/opt/rocm/include -x c++" \
|
||||
-o $BASE/amd_sdma.py -l /opt/rocm/lib/libhsa-runtime64.so
|
||||
|
||||
fixup $BASE/hsa.py
|
||||
fixup $BASE/amd_sdma.py
|
||||
sed -i "s\import ctypes\import ctypes, os\g" $BASE/hsa.py
|
||||
sed -i "s\'/opt/rocm/\os.getenv('ROCM_PATH', '/opt/rocm/')+'/\g" $BASE/hsa.py
|
||||
python3 -c "import tinygrad.runtime.autogen.hsa"
|
||||
@@ -83,6 +99,7 @@ elif [ "$1" == "hip" ]; then generate_hip
|
||||
elif [ "$1" == "comgr" ]; then generate_comgr
|
||||
elif [ "$1" == "cuda" ]; then generate_cuda
|
||||
elif [ "$1" == "hsa" ]; then generate_hsa
|
||||
elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_hsa
|
||||
elif [ "$1" == "kfd" ]; then generate_kfd
|
||||
elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_hsa; generate_kfd
|
||||
else echo "usage: $0 <type>"
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user