kfd driver wip (#3912)

* kfd driver wip

* cleanups

* kfd almost ready to ring doorbell

* ding dong?

* issues with signals

* something

* works

* ops kfd

* add amd_signal_t

* works...sometimes

* program runs

* _gpu_alloc cleanup

* cleanups

* work

* header + enable profiling (#3959)

* header + enable profiling

* just cleaner

* measure

* only local time domain

* remove old comments

* fix with master

* elf parsing (#3965)

* elf parsing

* fix kernels with private

* not used

* clean up

* clean up 2

* add flags

* kfd sdma (#3970)

* working sdma

* remove driver, shorter

* all commands we might need

* svm

* kfd remove hardcoded values (#4007)

* remove hardcoded values

* match above line

* 7k lines + revert hsa

* update that from origin

* fix sdma reg gen

* not the updated SDMA

* compiler_opts

* don't require kfd_ioctl

* get ioctls from python

* get ioctls from python

* remove build_sdma_command

* merge into 64-bit fields

* shorter

* fix property spelling and off by one

---------

Co-authored-by: nimlgen <138685161+nimlgen@users.noreply.github.com>
This commit is contained in:
George Hotz
2024-03-30 15:08:12 -07:00
committed by GitHub
parent bee8eeae55
commit 2abb474d43
11 changed files with 3527 additions and 3040 deletions

View File

@@ -56,6 +56,13 @@ generate_comgr() {
python3 -c "import tinygrad.runtime.autogen.comgr"
}
generate_kfd() {
clang2py /usr/include/linux/kfd_ioctl.h -o $BASE/kfd.py -k cdefstum
fixup $BASE/kfd.py
sed -i "s\import ctypes\import ctypes, os\g" $BASE/kfd.py
python3 -c "import tinygrad.runtime.autogen.kfd"
}
generate_cuda() {
clang2py /usr/include/cuda.h /usr/include/nvrtc.h -o $BASE/cuda.py -l /usr/lib/x86_64-linux-gnu/libcuda.so -l /usr/lib/x86_64-linux-gnu/libnvrtc.so
sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/cuda.py
@@ -69,10 +76,19 @@ generate_hsa() {
clang2py \
/opt/rocm/include/hsa/hsa.h \
/opt/rocm/include/hsa/hsa_ext_amd.h \
/opt/rocm/include/hsa/amd_hsa_signal.h \
/opt/rocm/include/hsa/amd_hsa_queue.h \
/opt/rocm/include/hsa/hsa_ext_finalize.h /opt/rocm/include/hsa/hsa_ext_image.h \
--clang-args="-I/opt/rocm/include" \
-o $BASE/hsa.py -l /opt/rocm/lib/libhsa-runtime64.so
# clang2py broken when pass -x c++ to prev headers
clang2py extra/hip_gpu_driver/sdma_registers.h \
--clang-args="-I/opt/rocm/include -x c++" \
-o $BASE/amd_sdma.py -l /opt/rocm/lib/libhsa-runtime64.so
fixup $BASE/hsa.py
fixup $BASE/amd_sdma.py
sed -i "s\import ctypes\import ctypes, os\g" $BASE/hsa.py
sed -i "s\'/opt/rocm/\os.getenv('ROCM_PATH', '/opt/rocm/')+'/\g" $BASE/hsa.py
python3 -c "import tinygrad.runtime.autogen.hsa"
@@ -83,6 +99,7 @@ elif [ "$1" == "hip" ]; then generate_hip
elif [ "$1" == "comgr" ]; then generate_comgr
elif [ "$1" == "cuda" ]; then generate_cuda
elif [ "$1" == "hsa" ]; then generate_hsa
elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_hsa
elif [ "$1" == "kfd" ]; then generate_kfd
elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_hsa; generate_kfd
else echo "usage: $0 <type>"
fi