mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 06:34:03 -05:00
Merge branch 'master' into asm_ucode
This commit is contained in:
151
.github/workflows/autogen.yml
vendored
151
.github/workflows/autogen.yml
vendored
@@ -14,10 +14,12 @@ on:
|
||||
paths:
|
||||
- 'tinygrad/runtime/autogen/**/*'
|
||||
- 'tinygrad/runtime/support/autogen.py'
|
||||
- '.github/workflows/autogen.yml'
|
||||
workflow_dispatch:
|
||||
paths:
|
||||
- 'tinygrad/runtime/autogen/**/*'
|
||||
- 'tinygrad/runtime/support/autogen.py'
|
||||
- '.github/workflows/autogen.yml'
|
||||
|
||||
jobs:
|
||||
autogen:
|
||||
@@ -39,125 +41,45 @@ jobs:
|
||||
pydeps: 'pyyaml mako'
|
||||
- name: Install autogen support packages
|
||||
run: sudo apt-get install -y --no-install-recommends libclang-20-dev llvm-20-dev hip-dev libusb-1.0-0-dev
|
||||
- name: Verify OpenCL autogen
|
||||
continue-on-error: true
|
||||
- name: Regenerate autogen files
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/opencl.py /tmp/opencl.py.bak
|
||||
rm tinygrad/runtime/autogen/opencl.py
|
||||
python3 -c "from tinygrad.runtime.autogen import opencl"
|
||||
diff /tmp/opencl.py.bak tinygrad/runtime/autogen/opencl.py
|
||||
- name: Verify CUDA autogen
|
||||
continue-on-error: true
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/cuda.py /tmp/cuda.py.bak
|
||||
mv tinygrad/runtime/autogen/nvrtc.py /tmp/nvrtc.py.bak
|
||||
mv tinygrad/runtime/autogen/nvjitlink.py /tmp/nvjitlink.py.bak
|
||||
mv tinygrad/runtime/autogen/nv_570.py /tmp/nv_570.py.bak
|
||||
mv tinygrad/runtime/autogen/nv.py /tmp/nv.py.bak
|
||||
rm tinygrad/runtime/autogen/{cuda,nvrtc,nvjitlink,nv_570,nv}.py
|
||||
python3 -c "from tinygrad.runtime.autogen import cuda, nvrtc, nvjitlink, nv_570, nv"
|
||||
diff /tmp/cuda.py.bak tinygrad/runtime/autogen/cuda.py
|
||||
diff /tmp/nvrtc.py.bak tinygrad/runtime/autogen/nvrtc.py
|
||||
diff /tmp/nvjitlink.py.bak tinygrad/runtime/autogen/nvjitlink.py
|
||||
diff /tmp/nv_570.py.bak tinygrad/runtime/autogen/nv_570.py
|
||||
diff /tmp/nv.py.bak tinygrad/runtime/autogen/nv.py
|
||||
- name: Verify AMD autogen
|
||||
continue-on-error: true
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/comgr.py /tmp/comgr.py.bak
|
||||
mv tinygrad/runtime/autogen/hsa.py /tmp/hsa.py.bak
|
||||
mv tinygrad/runtime/autogen/hip.py /tmp/hip.py.bak
|
||||
mv tinygrad/runtime/autogen/amd_gpu.py /tmp/amd_gpu.py.bak
|
||||
mv tinygrad/runtime/autogen/sqtt.py /tmp/sqtt.py.bak
|
||||
mv tinygrad/runtime/autogen/rocprof.py /tmp/rocprof.py.bak
|
||||
mv tinygrad/runtime/autogen/am/am.py /tmp/am_am.py.bak
|
||||
mv tinygrad/runtime/autogen/am/pm4_soc15.py /tmp/am_pm4_soc15.py.bak
|
||||
mv tinygrad/runtime/autogen/am/pm4_nv.py /tmp/am_pm4_nv.py.bak
|
||||
mv tinygrad/runtime/autogen/am/sdma_4_0_0.py /tmp/am_sdma_4_0_0.py.bak
|
||||
mv tinygrad/runtime/autogen/am/sdma_5_0_0.py /tmp/am_sdma_5_0_0.py.bak
|
||||
mv tinygrad/runtime/autogen/am/sdma_6_0_0.py /tmp/am_sdma_6_0_0.py.bak
|
||||
mv tinygrad/runtime/autogen/am/smu_v13_0_0.py /tmp/am_smu_v13_0_0.py.bak
|
||||
mv tinygrad/runtime/autogen/am/smu_v14_0_2.py /tmp/am_smu_v14_0_2.py.bak
|
||||
python3 -c "from tinygrad.runtime.autogen import comgr, hsa, hip, amd_gpu, sqtt, rocprof; from tinygrad.runtime.autogen.am import am, pm4_soc15, pm4_nv, sdma_4_0_0, sdma_5_0_0, sdma_6_0_0, smu_v13_0_0, smu_v14_0_2"
|
||||
diff /tmp/comgr.py.bak tinygrad/runtime/autogen/comgr.py
|
||||
diff /tmp/hsa.py.bak tinygrad/runtime/autogen/hsa.py
|
||||
diff /tmp/hip.py.bak tinygrad/runtime/autogen/hip.py
|
||||
diff /tmp/amd_gpu.py.bak tinygrad/runtime/autogen/amd_gpu.py
|
||||
diff /tmp/sqtt.py.bak tinygrad/runtime/autogen/sqtt.py
|
||||
diff /tmp/rocprof.py.bak tinygrad/runtime/autogen/rocprof.py
|
||||
diff /tmp/am_am.py.bak tinygrad/runtime/autogen/am/am.py
|
||||
diff /tmp/am_pm4_soc15.py.bak tinygrad/runtime/autogen/am/pm4_soc15.py
|
||||
diff /tmp/am_pm4_nv.py.bak tinygrad/runtime/autogen/am/pm4_nv.py
|
||||
diff /tmp/am_sdma_4_0_0.py.bak tinygrad/runtime/autogen/am/sdma_4_0_0.py
|
||||
diff /tmp/am_sdma_5_0_0.py.bak tinygrad/runtime/autogen/am/sdma_5_0_0.py
|
||||
diff /tmp/am_sdma_6_0_0.py.bak tinygrad/runtime/autogen/am/sdma_6_0_0.py
|
||||
diff /tmp/am_smu_v13_0_0.py.bak tinygrad/runtime/autogen/am/smu_v13_0_0.py
|
||||
diff /tmp/am_smu_v14_0_2.py.bak tinygrad/runtime/autogen/am/smu_v14_0_2.py
|
||||
- name: Verify Linux autogen
|
||||
continue-on-error: true
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/libc.py /tmp/libc.py.bak
|
||||
mv tinygrad/runtime/autogen/kfd.py /tmp/kfd.py.bak
|
||||
mv tinygrad/runtime/autogen/io_uring.py /tmp/io_uring.py.bak
|
||||
mv tinygrad/runtime/autogen/ib.py /tmp/ib.py.bak
|
||||
mv tinygrad/runtime/autogen/pci.py /tmp/pci.py.bak
|
||||
mv tinygrad/runtime/autogen/vfio.py /tmp/vfio.py.bak
|
||||
rm tinygrad/runtime/autogen/{comgr,hsa,hip,amd_gpu,sqtt,rocprof}.py
|
||||
python3 -c "from tinygrad.runtime.autogen import comgr, hsa, hip, amd_gpu, sqtt, rocprof"
|
||||
rm tinygrad/runtime/autogen/am/{am,pm4_soc15,pm4_nv,sdma_4_0_0,sdma_5_0_0,sdma_6_0_0,smu_v13_0_0,smu_v14_0_2}.py
|
||||
python3 -c "from tinygrad.runtime.autogen.am import am, pm4_soc15, pm4_nv, sdma_4_0_0, sdma_5_0_0, sdma_6_0_0, smu_v13_0_0, smu_v14_0_2"
|
||||
rm tinygrad/runtime/autogen/{libc,kfd,io_uring,ib,pci,vfio}.py
|
||||
python3 -c "from tinygrad.runtime.autogen import libc, kfd, io_uring, ib, pci, vfio"
|
||||
diff /tmp/libc.py.bak tinygrad/runtime/autogen/libc.py
|
||||
diff /tmp/kfd.py.bak tinygrad/runtime/autogen/kfd.py
|
||||
diff /tmp/io_uring.py.bak tinygrad/runtime/autogen/io_uring.py
|
||||
diff /tmp/ib.py.bak tinygrad/runtime/autogen/ib.py
|
||||
diff /tmp/pci.py.bak tinygrad/runtime/autogen/pci.py
|
||||
diff /tmp/vfio.py.bak tinygrad/runtime/autogen/vfio.py
|
||||
- name: Verify LLVM autogen
|
||||
continue-on-error: true
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/llvm.py /tmp/llvm.py.bak
|
||||
rm tinygrad/runtime/autogen/llvm.py
|
||||
python3 -c "from tinygrad.runtime.autogen import llvm"
|
||||
diff /tmp/llvm.py.bak tinygrad/runtime/autogen/llvm.py
|
||||
- name: Verify WebGPU autogen
|
||||
continue-on-error: true
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/webgpu.py /tmp/webgpu.py.bak
|
||||
rm tinygrad/runtime/autogen/webgpu.py
|
||||
python3 -c "from tinygrad.runtime.autogen import webgpu"
|
||||
diff /tmp/webgpu.py.bak tinygrad/runtime/autogen/webgpu.py
|
||||
- name: Verify Qualcomm autogen
|
||||
continue-on-error: true
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/kgsl.py /tmp/kgsl.py.bak
|
||||
mv tinygrad/runtime/autogen/qcom_dsp.py /tmp/qcom_dsp.py.bak
|
||||
rm tinygrad/runtime/autogen/{kgsl,qcom_dsp}.py
|
||||
python3 -c "from tinygrad.runtime.autogen import kgsl, qcom_dsp"
|
||||
diff /tmp/kgsl.py.bak tinygrad/runtime/autogen/kgsl.py
|
||||
diff /tmp/qcom_dsp.py.bak tinygrad/runtime/autogen/qcom_dsp.py
|
||||
- name: Verify libusb autogen
|
||||
continue-on-error: true
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/libusb.py /tmp/libusb.py.bak
|
||||
rm tinygrad/runtime/autogen/libusb.py
|
||||
python3 -c "from tinygrad.runtime.autogen import libusb"
|
||||
diff /tmp/libusb.py.bak tinygrad/runtime/autogen/libusb.py
|
||||
- name: Verify mesa autogen
|
||||
continue-on-error: true
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/mesa.py /tmp/mesa.py.bak
|
||||
rm tinygrad/runtime/autogen/mesa.py
|
||||
python3 -c "from tinygrad.runtime.autogen import mesa"
|
||||
diff /tmp/mesa.py.bak tinygrad/runtime/autogen/mesa.py
|
||||
- name: Verify libclang autogen
|
||||
continue-on-error: true
|
||||
run: |
|
||||
cp tinygrad/runtime/autogen/libclang.py /tmp/libclang.py.bak
|
||||
rm tinygrad/runtime/autogen/avcodec.py
|
||||
python3 -c "from tinygrad.runtime.autogen import avcodec"
|
||||
REGEN=1 python3 -c "from tinygrad.runtime.autogen import libclang"
|
||||
diff /tmp/libclang.py.bak tinygrad/runtime/autogen/libclang.py
|
||||
- name: Generate patch for differences
|
||||
- name: Check for differences
|
||||
run: |
|
||||
if ! git diff --quiet; then
|
||||
git diff > autogen-ubuntu.patch
|
||||
echo "Autogen files out of date. Apply patch from: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}#artifacts"
|
||||
exit 1
|
||||
fi
|
||||
- name: Upload patch artifact
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: autogen-ubuntu-patch
|
||||
path: autogen-ubuntu.patch
|
||||
if-no-files-found: ignore
|
||||
- name: Fail if differences found
|
||||
run: git diff --quiet
|
||||
|
||||
autogen-mac:
|
||||
name: In-tree Autogen (macos)
|
||||
runs-on: macos-14
|
||||
@@ -169,25 +91,24 @@ jobs:
|
||||
uses: ./.github/actions/setup-tinygrad
|
||||
with:
|
||||
llvm: 'true'
|
||||
- name: Verify macos autogen
|
||||
continue-on-error: true
|
||||
- name: Regenerate autogen files
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/metal.py /tmp/metal.py.bak
|
||||
rm tinygrad/runtime/autogen/metal.py
|
||||
LIBCLANG_PATH=/opt/homebrew/opt/llvm@20/lib/libclang.dylib python3 -c "from tinygrad.runtime.autogen import metal"
|
||||
diff /tmp/metal.py.bak tinygrad/runtime/autogen/metal.py
|
||||
- name: Generate patch for differences
|
||||
- name: Check for differences
|
||||
run: |
|
||||
if ! git diff --quiet; then
|
||||
git diff > autogen-macos.patch
|
||||
echo "Autogen files out of date. Apply patch from: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}#artifacts"
|
||||
exit 1
|
||||
fi
|
||||
- name: Upload patch artifact
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: autogen-macos-patch
|
||||
path: autogen-macos.patch
|
||||
if-no-files-found: ignore
|
||||
- name: Fail if differences found
|
||||
run: git diff --quiet
|
||||
|
||||
autogen-comgr-3:
|
||||
name: In-tree Autogen (comgr 3)
|
||||
runs-on: ubuntu-24.04
|
||||
@@ -206,22 +127,20 @@ jobs:
|
||||
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt -qq update || true
|
||||
sudo apt-get install -y --no-install-recommends libclang-20-dev comgr
|
||||
- name: Verify comgr (3) autogen
|
||||
continue-on-error: true
|
||||
- name: Regenerate autogen files
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/comgr_3.py /tmp/comgr_3.py.bak
|
||||
rm tinygrad/runtime/autogen/comgr_3.py
|
||||
python3 -c "from tinygrad.runtime.autogen import comgr_3"
|
||||
diff /tmp/comgr_3.py.bak tinygrad/runtime/autogen/comgr_3.py
|
||||
- name: Generate patch for differences
|
||||
- name: Check for differences
|
||||
run: |
|
||||
if ! git diff --quiet; then
|
||||
git diff > autogen-comgr3.patch
|
||||
echo "Autogen files out of date. Apply patch from: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}#artifacts"
|
||||
exit 1
|
||||
fi
|
||||
- name: Upload patch artifact
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: autogen-comgr3-patch
|
||||
path: autogen-comgr3.patch
|
||||
if-no-files-found: ignore
|
||||
- name: Fail if differences found
|
||||
run: git diff --quiet
|
||||
|
||||
@@ -312,28 +312,31 @@ class Group:
|
||||
idxs = tuple(idx * st.cols if i == 3 else idx for i, idx in enumerate(idxs))
|
||||
src_i = ((idxs[0] * src.shape[-3] + idxs[1]) * src.shape[-2] + idxs[2]) * src.shape[-1] + idxs[3]
|
||||
|
||||
for height in self.ker.range(dst.shape[-4], track=False):
|
||||
for width in self.ker.range(dst.shape[-3], track=False):
|
||||
elements_per_thread = st.base_shape.elements_per_thread
|
||||
memcpy_per_row = st.base_shape.cols // elements_per_thread
|
||||
total_calls = st.base_shape.num_elements // (self.group_threads * elements_per_thread)
|
||||
elements_per_thread = st.base_shape.elements_per_thread
|
||||
memcpy_per_row = st.cols // elements_per_thread
|
||||
total_calls = (dst.shape[-4] * dst.shape[-3] * st.base_shape.num_elements) // (self.group_threads * elements_per_thread)
|
||||
|
||||
for outer in self.ker.range(total_calls, track=False):
|
||||
for inner in self.ker.range(elements_per_thread, axis_type=AxisType.UPCAST, track=False):
|
||||
load_idx = outer * self.group_threads + self.laneid
|
||||
row = load_idx // memcpy_per_row
|
||||
col = (load_idx * elements_per_thread) % st.base_shape.cols + inner
|
||||
for outer in self.ker.range(total_calls, track=False):
|
||||
for inner in self.ker.range(elements_per_thread, axis_type=AxisType.UPCAST, track=False):
|
||||
load_idx = outer * self.group_threads + self.laneid
|
||||
row = load_idx // memcpy_per_row
|
||||
col = (load_idx * elements_per_thread) % st.cols + inner
|
||||
height = row // st.base_shape.rows
|
||||
width = col // st.base_shape.cols
|
||||
|
||||
srow, scol = cast(ST, dst).swizzle(row, col)
|
||||
row = row % st.base_shape.rows
|
||||
col = col % st.base_shape.cols
|
||||
|
||||
src_i += height * st.base_shape.rows * row_stride + width * st.base_shape.cols
|
||||
src_i += row * row_stride + col
|
||||
srow, scol = cast(ST, dst).swizzle(row, col)
|
||||
|
||||
src_load = srcf[src_i]
|
||||
if src.dtype.base != dst.dtype.base:
|
||||
src_load = src_load.cast(dst.dtype.base)
|
||||
dst_store = dst[*dst_idxs, height, width, srow, scol].store(src_load)
|
||||
dst_store = dst_store.end(height, width, outer, inner).barrier()
|
||||
src_i += height * st.base_shape.rows * row_stride + width * st.base_shape.cols
|
||||
src_i += row * row_stride + col
|
||||
|
||||
src_load = srcf[src_i]
|
||||
if src.dtype.base != dst.dtype.base:
|
||||
src_load = src_load.cast(dst.dtype.base)
|
||||
dst_store = dst[*dst_idxs, height, width, srow, scol].store(src_load)
|
||||
dst_store = dst_store.end(height, width, outer, inner).barrier()
|
||||
elif dst_dtype.addrspace == AddrSpace.REG and src_dtype.addrspace == AddrSpace.GLOBAL and isinstance(dst, RT):
|
||||
srcf = src.flatten()
|
||||
row_stride = prod(src.shape[axis+1:])
|
||||
|
||||
@@ -178,5 +178,21 @@ class TestCfg(unittest.TestCase):
|
||||
s_endpgm(),
|
||||
])
|
||||
|
||||
def test_colored_blocks(self):
|
||||
N = 10
|
||||
asm = ["entry:", s_branch("init0"),]
|
||||
for i in range(N):
|
||||
asm += [f"init{i}:", s_mov_b32(s[1], i + 1), s_branch(loop:=f"loop{i}")]
|
||||
asm += [
|
||||
f"{loop}:",
|
||||
s_nop(i & 7),
|
||||
s_add_u32(s[1], s[1], -1),
|
||||
s_cmp_eq_i32(s[1], 0),
|
||||
s_cbranch_scc0(loop),
|
||||
s_branch(f"init{i+1}" if i + 1 < N else "end"),
|
||||
]
|
||||
asm += ["end:", s_endpgm()]
|
||||
run_asm("test_colored_blocks", asm)
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# mypy: ignore-errors
|
||||
import ctypes
|
||||
from tinygrad.helpers import unwrap
|
||||
from tinygrad.runtime.support.c import Struct, CEnum, _IO, _IOW, _IOR, _IOWR
|
||||
from tinygrad.runtime.support.c import DLL, Struct, CEnum, _IO, _IOW, _IOR, _IOWR
|
||||
enum_HEVCNALUnitType = CEnum(ctypes.c_uint32)
|
||||
HEVC_NAL_TRAIL_N = enum_HEVCNALUnitType.define('HEVC_NAL_TRAIL_N', 0)
|
||||
HEVC_NAL_TRAIL_R = enum_HEVCNALUnitType.define('HEVC_NAL_TRAIL_R', 1)
|
||||
|
||||
@@ -55,8 +55,11 @@ function addTags(root) {
|
||||
root.selectAll("text").data(d => [d]).join("text").text(d => d).attr("dy", "0.35em");
|
||||
}
|
||||
|
||||
const colorScale = d3.scaleSequential(t => t > 0 ? d3.interpolateLab(colorScheme.ACTIVE[1], colorScheme.ACTIVE[2])(t) : colorScheme.ACTIVE[0]).clamp(true);
|
||||
|
||||
const drawGraph = (data) => {
|
||||
const g = dagre.graphlib.json.read(data);
|
||||
if (data.value.colorDomain != null) colorScale.domain(data.value.colorDomain);
|
||||
// draw nodes
|
||||
d3.select("#graph-svg").on("click", () => d3.selectAll(".highlight").classed("highlight", false));
|
||||
const nodes = d3.select("#nodes").selectAll("g").data(g.nodes().map(id => g.node(id)), d => d).join("g").attr("class", d => d.className ?? "node")
|
||||
@@ -88,7 +91,7 @@ const drawGraph = (data) => {
|
||||
}
|
||||
return [ret];
|
||||
}).join("text").selectAll("tspan").data(d => d).join("tspan").attr("x", "0").attr("dy", 14).selectAll("tspan").data(d => d).join("tspan")
|
||||
.attr("fill", d => d.color).text(d => d.st).attr("xml:space", "preserve").style("font-family", g.graph().font);
|
||||
.attr("fill", d => typeof d.color === "string" ? d.color : colorScale(d.color)).text(d => d.st).attr("xml:space", "preserve").style("font-family", g.graph().font);
|
||||
addTags(nodes.selectAll("g.tag").data(d => d.tag != null ? [d] : []).join("g").attr("class", "tag")
|
||||
.attr("transform", d => `translate(${-d.width/2+8}, ${-d.height/2+8})`).datum(e => e.tag));
|
||||
// draw edges
|
||||
@@ -154,7 +157,8 @@ const formatUnit = (d, unit="") => d3.format(".3~s")(d)+unit;
|
||||
|
||||
const colorScheme = {TINY:new Map([["Schedule","#1b5745"],["get_program","#1d2e62"],["compile","#63b0cd"],["DEFAULT","#354f52"]]),
|
||||
DEFAULT:["#2b2e39", "#2c2f3a", "#31343f", "#323544", "#2d303a", "#2e313c", "#343746", "#353847", "#3c4050", "#404459", "#444862", "#4a4e65"],
|
||||
BUFFER:["#342483", "#3E2E94", "#4938A4", "#5442B4", "#5E4CC2", "#674FCA"], SE:new Map([["OCC", "#101725"], ["INST", "#0A2042"]]),}
|
||||
BUFFER:["#342483", "#3E2E94", "#4938A4", "#5442B4", "#5E4CC2", "#674FCA"], SE:new Map([["OCC", "#101725"], ["INST", "#0A2042"]]),
|
||||
ACTIVE:["#565f89", "#c8d3f5", "#7aa2f7"]}
|
||||
const cycleColors = (lst, i) => lst[i%lst.length];
|
||||
|
||||
const rescaleTrack = (source, tid, k) => {
|
||||
@@ -811,6 +815,7 @@ async function main() {
|
||||
}
|
||||
return table;
|
||||
}
|
||||
if (ret.data != null) renderDag(ret, { recenter:true });
|
||||
if (ret.cols != null) renderTable(root, ret);
|
||||
else if (ret.src != null) root.append(() => codeBlock(ret.src, ret.lang));
|
||||
return document.querySelector("#custom").replaceChildren(root.node());
|
||||
|
||||
@@ -13,21 +13,26 @@ onmessage = (e) => {
|
||||
self.close();
|
||||
}
|
||||
|
||||
const layoutCfg = (g, { blocks, paths, pc_table, colors }) => {
|
||||
const layoutCfg = (g, { blocks, paths, pc_table, counters, colors }) => {
|
||||
g.setGraph({ rankdir:"TD", font:"monospace" });
|
||||
ctx.font = `350 ${LINE_HEIGHT}px ${g.graph().font}`;
|
||||
// basic blocks render the assembly in nodes
|
||||
let maxColor = 0;
|
||||
for (const [lead, members] of Object.entries(blocks)) {
|
||||
let [width, height, label] = [0, 0, []];
|
||||
for (const m of members) {
|
||||
const text = pc_table[m][0];
|
||||
if (counters != null) {
|
||||
const num = counters[m]?.hit_count || 0;
|
||||
if (num > maxColor) maxColor = num;
|
||||
label.push([{st:text, color:num}]);
|
||||
} else { const [inst, ...operands] = text.split(" "); label.push([{st:inst+" ", color:"#7aa2f7"}, {st:operands.join(" "), color:"#9aa5ce"}]); }
|
||||
width = Math.max(width, ctx.measureText(text).width);
|
||||
height += LINE_HEIGHT;
|
||||
const [inst, ...operands] = text.split(" ");
|
||||
label.push([{st:inst+" ", color:"#7aa2f7"}, {st:operands.join(" "), color:"#9aa5ce"}]);
|
||||
}
|
||||
g.setNode(lead, { ...rectDims(width, height), label, id:lead, color:"#1a1b26" });
|
||||
}
|
||||
g.graph().colorDomain = [0, maxColor];
|
||||
// paths become edges between basic blocks
|
||||
for (const [lead, value] of Object.entries(paths)) {
|
||||
for (const [id, color] of Object.entries(value)) g.setEdge(lead, id, {label:{type:"port", text:""}, color:colors[color]});
|
||||
|
||||
@@ -285,7 +285,7 @@ def unpack_sqtt(key:tuple[str, int], data:list, p:ProfileProgramEvent) -> tuple[
|
||||
n = next(inst_units[u])
|
||||
if (events:=cu_events.get(w.cu_loc)) is None: cu_events[w.cu_loc] = events = []
|
||||
events.append(ProfileRangeEvent(w.simd_loc, loc:=f"INST WAVE:{w.wave_id} N:{n}", Decimal(w.begin_time), Decimal(w.end_time)))
|
||||
wave_insts.setdefault(w.cu_loc, {})[f"{u} N:{n}"] = {"wave":w, "disasm":disasm, "run_number":n, "loc":loc}
|
||||
wave_insts.setdefault(w.cu_loc, {})[f"{u} N:{n}"] = {"wave":w, "disasm":disasm, "prg":p, "run_number":n, "loc":loc}
|
||||
# * OCC waves
|
||||
units:dict[str, itertools.count] = {}
|
||||
wave_start:dict[str, int] = {}
|
||||
@@ -490,7 +490,9 @@ def get_render(query:str) -> dict:
|
||||
prev_instr = max(prev_instr, e.time + e.dur)
|
||||
summary = [{"label":"Total Cycles", "value":w.end_time-w.begin_time}, {"label":"SE", "value":w.se}, {"label":"CU", "value":w.cu},
|
||||
{"label":"SIMD", "value":w.simd}, {"label":"Wave ID", "value":w.wave_id}, {"label":"Run number", "value":data["run_number"]}]
|
||||
return {"rows":[tuple(v.values()) for v in rows.values()], "cols":columns, "metadata":[summary]}
|
||||
cfg = amdgpu_cfg((p:=data["prg"]).lib, device_props[p.device]["gfx_target_version"])["data"]
|
||||
cfg["counters"] = {pc-p.base:v for pc,v in rows.items()}
|
||||
return {"rows":[tuple(v.values()) for v in rows.values()], "cols":columns, "metadata":[summary], "data":cfg}
|
||||
return data
|
||||
|
||||
# ** HTTP server
|
||||
|
||||
Reference in New Issue
Block a user