* connect to gpu

* rlc init?

* gfx comp start init

* early init is hardoded, some progress with fw

* gart

* progress, next mqd

* ring setup, still does not execute anything

* ugh write correct reg

* pci2: vm

* pci2: start psp

* vm seems to work

* pci2: gfx start

* pci2: fix psp ring resp

* pci2: try ring

* pci2: mes and some fixes

* pci2: some progress

* pci2: progress

* pci2: mm

* pci2: discovery

* pci2: correct apertures

* pci2: b

* pci2: i

* pci2: l

* pci2: o

* pci2: cmu

* pci2: mes_kiq works

* pci2: mes

* pci2: kcq does not work(

* pci2: unhalt gfx

* ops_am

* minor

* check if amdgpu is there, or we will crash

* bring back graph, it just works

* less prints

* do not init mes (not used)

* remove unused files

* ops_am: start move into core

* ops_am: works

* clcks, but still slower

* faster + no mes_kiq

* vm frags + remove mes

* cleanup fw

* gmc tiny cleanup

* move to ops_amd

* comment out what we dont really need

* driverless

* close in speed

* am clean most of ips

* gmc to ips

* cleaner

* new vm walker

* comment old one

* remove unsued autogens

* last write ups

* remove psp hardcoded values

* more

* add logs

* ih

* p2p and sdma

* vfio hal and interrupts

* smth

* amd dev iface

* minor after rebase

* bind for sdma

* Revert "bind for sdma"

This reverts commit a90766514d.

* tmp

* debug new mm

* ugh, allreduce hangs fixed

* p1

* works

* no pci.py

* cleaner a bit

* smth

* tiny cleanups

* cleaner a bit

* pciiface

* linter

* linter 2

* linter 3

* linter

* pylint

* reverted unrelated changes

* unrelated

* cmp tool

* ugh wrong fw

* clockgating

* unrelated

* alloc smaller chunks

* this

* opt sigs

* collect stat

* ops

* upd

* proclogs

* proclogs2

* vfio

* ruff

* linter pylint

* oops

* mypy p1

* mem fix

* mypy p2

* mypy p3

* mypy p4

* correct

* minor

* more tests

* linter in tests

* pci_regs header

* minor write up

* setup

* do not require libs

---------

Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com>
This commit is contained in:
nimlgen
2024-12-31 23:06:17 +03:00
committed by GitHub
parent d4a1d5211e
commit c18307e749
49 changed files with 529016 additions and 9 deletions

View File

@@ -247,6 +247,83 @@ generate_qcom() {
python3 -c "import tinygrad.runtime.autogen.qcom_dsp"
}
generate_pciaccess() {
clang2py -k cdefstum \
/usr/include/pciaccess.h \
/usr/include/linux/pci_regs.h \
-l /usr/lib/x86_64-linux-gnu/libpciaccess.so \
-o $BASE/libpciaccess.py
sed -i "s\import ctypes\import ctypes, os\g" $BASE/libpciaccess.py
fixup $BASE/libpciaccess.py
sed -i "s/ctypes\.CDLL('\([^']*\)')/ctypes.CDLL('\1') if os.path.exists('\1') else None/g" $BASE/libpciaccess.py
}
generate_vfio() {
clang2py -k cdefstum \
/usr/include/linux/vfio.h \
-o $BASE/vfio.py
fixup $BASE/vfio.py
}
generate_am() {
clang2py -k cdefstum \
extra/amdpci/headers/v11_structs.h \
extra/amdpci/headers/amdgpu_vm.h \
extra/amdpci/headers/discovery.h \
extra/amdpci/headers/amdgpu_ucode.h \
extra/amdpci/headers/soc21_enum.h \
extra/amdpci/headers/psp_gfx_if.h \
extra/amdpci/headers/amdgpu_psp.h \
extra/amdpci/headers/amdgpu_irq.h \
extra/amdpci/headers/amdgpu_doorbell.h \
extra/amdpci/headers/soc15_ih_clientid.h \
-o $BASE/am/am.py
fixup $BASE/am/am.py
clang2py -k cdefstum \
extra/amdpci/headers/mp_13_0_0_offset.h \
extra/amdpci/headers/mp_13_0_0_sh_mask.h \
-o $BASE/am/mp_13_0_0.py
fixup $BASE/am/mp_13_0_0.py
clang2py -k cdefstum \
extra/amdpci/headers/mp_11_0_offset.h \
extra/amdpci/headers/mp_11_0_sh_mask.h \
-o $BASE/am/mp_11_0.py
fixup $BASE/am/mp_11_0.py
clang2py -k cdefstum \
extra/amdpci/headers/gc_11_0_0_offset.h \
extra/amdpci/headers/gc_11_0_0_sh_mask.h \
-o $BASE/am/gc_11_0_0.py
fixup $BASE/am/gc_11_0_0.py
clang2py -k cdefstum \
extra/amdpci/headers/mmhub_3_0_0_offset.h \
extra/amdpci/headers/mmhub_3_0_0_sh_mask.h \
-o $BASE/am/mmhub_3_0_0.py
fixup $BASE/am/mmhub_3_0_0.py
clang2py -k cdefstum \
extra/amdpci/headers/nbio_4_3_0_offset.h \
extra/amdpci/headers/nbio_4_3_0_sh_mask.h \
-o $BASE/am/nbio_4_3_0.py
fixup $BASE/am/nbio_4_3_0.py
clang2py -k cdefstum \
extra/amdpci/headers/osssys_6_0_0_offset.h \
extra/amdpci/headers/osssys_6_0_0_sh_mask.h \
-o $BASE/am/osssys_6_0_0.py
fixup $BASE/am/osssys_6_0_0.py
clang2py -k cdefstum \
extra/amdpci/headers/smu_v13_0_0_ppsmc.h \
extra/amdpci/headers/smu13_driver_if_v13_0_0.h \
extra/amdpci/headers/amdgpu_smu.h \
-o $BASE/am/smu_v13_0_0.py
fixup $BASE/am/smu_v13_0_0.py
}
if [ "$1" == "opencl" ]; then generate_opencl
elif [ "$1" == "hip" ]; then generate_hip
elif [ "$1" == "comgr" ]; then generate_comgr
@@ -256,11 +333,14 @@ elif [ "$1" == "hsa" ]; then generate_hsa
elif [ "$1" == "kfd" ]; then generate_kfd
elif [ "$1" == "nv" ]; then generate_nv
elif [ "$1" == "amd" ]; then generate_amd
elif [ "$1" == "am" ]; then generate_am
elif [ "$1" == "qcom" ]; then generate_qcom
elif [ "$1" == "io_uring" ]; then generate_io_uring
elif [ "$1" == "libc" ]; then generate_libc
elif [ "$1" == "kgsl" ]; then generate_kgsl
elif [ "$1" == "adreno" ]; then generate_adreno
elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_nvrtc; generate_hsa; generate_kfd; generate_nv; generate_amd; generate_io_uring; generate_libc
elif [ "$1" == "pci" ]; then generate_pciaccess
elif [ "$1" == "vfio" ]; then generate_vfio
elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_nvrtc; generate_hsa; generate_kfd; generate_nv; generate_amd; generate_io_uring; generate_libc; generate_am
else echo "usage: $0 <type>"
fi