mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 22:48:25 -05:00
add CDNA3+RDNA4 support (#13882)
* fix CI * remove junk * rename lib to dsl * correct * cleanups
This commit is contained in:
19
.github/workflows/test.yml
vendored
19
.github/workflows/test.yml
vendored
@@ -679,22 +679,11 @@ jobs:
|
||||
run: python -m pytest -n=auto extra/assembly/amd/ --durations 20
|
||||
- name: Install pdfplumber
|
||||
run: pip install pdfplumber
|
||||
- name: Verify RDNA3 autogen is up to date
|
||||
- name: Verify AMD autogen is up to date
|
||||
run: |
|
||||
python -m extra.assembly.amd.lib --arch rdna3
|
||||
git diff --exit-code extra/assembly/amd/autogen/rdna3/__init__.py
|
||||
- name: Verify CDNA4 autogen is up to date
|
||||
run: |
|
||||
python -m extra.assembly.amd.lib --arch cdna4
|
||||
git diff --exit-code extra/assembly/amd/autogen/cdna4/__init__.py
|
||||
- name: Verify RDNA3 pcode autogen is up to date
|
||||
run: |
|
||||
python -m extra.assembly.amd.pcode --arch rdna3
|
||||
git diff --exit-code extra/assembly/amd/autogen/rdna3/gen_pcode.py
|
||||
- name: Verify CDNA4 pcode autogen is up to date
|
||||
run: |
|
||||
python -m extra.assembly.amd.pcode --arch cdna4
|
||||
git diff --exit-code extra/assembly/amd/autogen/cdna4/gen_pcode.py
|
||||
python -m extra.assembly.amd.dsl --arch all
|
||||
python -m extra.assembly.amd.pcode --arch all
|
||||
git diff --exit-code extra/assembly/amd/autogen/
|
||||
|
||||
testnvidia:
|
||||
strategy:
|
||||
|
||||
@@ -76,10 +76,10 @@ VIZ=1 python -c "from tinygrad import Tensor; Tensor.ones(10).sum().realize()"
|
||||
## Auto-generated Files (DO NOT EDIT)
|
||||
|
||||
The following files are auto-generated and should never be edited manually:
|
||||
- `extra/assembly/amd/autogen/rdna3/__init__.py` - Generated by `python -m extra.assembly.amd.lib --arch rdna3`
|
||||
- `extra/assembly/amd/autogen/rdna3/gen_pcode.py` - Generated by `python -m extra.assembly.amd.pcode --arch rdna3`
|
||||
- `extra/assembly/amd/autogen/cdna4/__init__.py` - Generated by `python -m extra.assembly.amd.lib --arch cdna4`
|
||||
- `extra/assembly/amd/autogen/cdna4/gen_pcode.py` - Generated by `python -m extra.assembly.amd.pcode --arch cdna4`
|
||||
- `extra/assembly/amd/autogen/{arch}/__init__.py` - Generated by `python -m extra.assembly.amd.dsl --arch {arch}`
|
||||
- `extra/assembly/amd/autogen/{arch}/gen_pcode.py` - Generated by `python -m extra.assembly.amd.pcode --arch {arch}`
|
||||
|
||||
Where `{arch}` is one of: `rdna3`, `rdna4`, `cdna`
|
||||
|
||||
To add missing instruction implementations, add them to `extra/assembly/amd/emu.py` instead.
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# RDNA3 assembler and disassembler
|
||||
from __future__ import annotations
|
||||
import re
|
||||
from extra.assembly.amd.lib import Inst, RawImm, Reg, SGPR, VGPR, TTMP, s, v, ttmp, _RegFactory, FLOAT_ENC, SRC_FIELDS, unwrap
|
||||
from extra.assembly.amd.dsl import Inst, RawImm, Reg, SGPR, VGPR, TTMP, s, v, ttmp, _RegFactory, FLOAT_ENC, SRC_FIELDS, unwrap
|
||||
|
||||
# Decoding helpers
|
||||
SPECIAL_GPRS = {106: "vcc_lo", 107: "vcc_hi", 124: "null", 125: "m0", 126: "exec_lo", 127: "exec_hi", 253: "scc"}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# autogenerated from AMD CDNA4 ISA PDF by lib.py - do not edit
|
||||
# autogenerated from AMD CDNA3+CDNA4 ISA PDF by dsl.py - do not edit
|
||||
from enum import IntEnum
|
||||
from typing import Annotated
|
||||
from extra.assembly.amd.lib import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
|
||||
from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
|
||||
import functools
|
||||
|
||||
class SrcEnum(IntEnum):
|
||||
@@ -158,6 +158,12 @@ class DSOp(IntEnum):
|
||||
DS_READ2ST64_B64 = 120
|
||||
DS_ADD_RTN_F64 = 124
|
||||
DS_CONDXCHG32_RTN_B64 = 126
|
||||
DS_GWS_SEMA_RELEASE_ALL = 152
|
||||
DS_GWS_INIT = 153
|
||||
DS_GWS_SEMA_V = 154
|
||||
DS_GWS_SEMA_BR = 155
|
||||
DS_GWS_SEMA_P = 156
|
||||
DS_GWS_BARRIER = 157
|
||||
DS_READ_ADDTID_B32 = 182
|
||||
DS_PK_ADD_RTN_F16 = 183
|
||||
DS_PK_ADD_RTN_BF16 = 184
|
||||
@@ -1385,6 +1391,8 @@ class VOP3POp(IntEnum):
|
||||
V_SMFMAC_F32_16X16X128_BF8_BF8 = 59
|
||||
V_SMFMAC_F32_16X16X128_BF8_FP8 = 60
|
||||
V_SMFMAC_F32_16X16X128_FP8_BF8 = 61
|
||||
V_MFMA_F32_16X16X8_XF32 = 62
|
||||
V_MFMA_F32_32X32X4_XF32 = 63
|
||||
V_MFMA_F32_32X32X1_2B_F32 = 64
|
||||
V_MFMA_F32_16X16X1_4B_F32 = 65
|
||||
V_MFMA_F32_4X4X1_16B_F32 = 66
|
||||
@@ -1648,6 +1656,11 @@ class VOPCOp(IntEnum):
|
||||
# instruction formats
|
||||
class DPP(Inst64):
|
||||
encoding = bits[31:26] == 0b110110
|
||||
src1_sel = bits[58:56]
|
||||
src1_sext = bits[59]
|
||||
src1_neg = bits[60]
|
||||
src1_abs = bits[61]
|
||||
s1 = bits[63]
|
||||
offset0 = bits[7:0]
|
||||
offset1 = bits[15:8]
|
||||
op = bits[24:17]
|
||||
@@ -1667,6 +1680,7 @@ class DS(Inst64):
|
||||
data1:VGPRField = bits[55:48]
|
||||
offset0 = bits[7:0]
|
||||
offset1 = bits[15:8]
|
||||
gds = bits[16]
|
||||
acc = bits[25]
|
||||
|
||||
class FLAT(Inst64):
|
||||
@@ -1694,10 +1708,10 @@ class MTBUF(Inst64):
|
||||
offset:Imm = bits[11:0]
|
||||
offen = bits[12]
|
||||
idxen = bits[13]
|
||||
sc0 = bits[14]
|
||||
sc1 = bits[53]
|
||||
nt = bits[54]
|
||||
acc = bits[55]
|
||||
sc0 = bits[14]
|
||||
|
||||
class MUBUF(Inst64):
|
||||
encoding = bits[31:26] == 0b111000
|
||||
@@ -1735,6 +1749,23 @@ class SDWA(Inst64):
|
||||
sd = bits[47]
|
||||
row_mask = bits[63:60]
|
||||
|
||||
class SDWAB(Inst64):
|
||||
src0:Src = bits[39:32]
|
||||
dst_sel = bits[42:40]
|
||||
dst_u = bits[44:43]
|
||||
clmp = bits[45]
|
||||
omod = bits[47:46]
|
||||
src0_sel = bits[50:48]
|
||||
src0_sext = bits[51]
|
||||
src0_neg = bits[52]
|
||||
src0_abs = bits[53]
|
||||
s0 = bits[55]
|
||||
src1_sel = bits[58:56]
|
||||
src1_sext = bits[59]
|
||||
src1_neg = bits[60]
|
||||
src1_abs = bits[61]
|
||||
s1 = bits[63]
|
||||
|
||||
class SMEM(Inst64):
|
||||
encoding = bits[31:26] == 0b110000
|
||||
op:Annotated[BitField, SMEMOp] = bits[25:18]
|
||||
@@ -1950,6 +1981,12 @@ ds_read2_b64 = functools.partial(DS, DSOp.DS_READ2_B64)
|
||||
ds_read2st64_b64 = functools.partial(DS, DSOp.DS_READ2ST64_B64)
|
||||
ds_add_rtn_f64 = functools.partial(DS, DSOp.DS_ADD_RTN_F64)
|
||||
ds_condxchg32_rtn_b64 = functools.partial(DS, DSOp.DS_CONDXCHG32_RTN_B64)
|
||||
ds_gws_sema_release_all = functools.partial(DS, DSOp.DS_GWS_SEMA_RELEASE_ALL)
|
||||
ds_gws_init = functools.partial(DS, DSOp.DS_GWS_INIT)
|
||||
ds_gws_sema_v = functools.partial(DS, DSOp.DS_GWS_SEMA_V)
|
||||
ds_gws_sema_br = functools.partial(DS, DSOp.DS_GWS_SEMA_BR)
|
||||
ds_gws_sema_p = functools.partial(DS, DSOp.DS_GWS_SEMA_P)
|
||||
ds_gws_barrier = functools.partial(DS, DSOp.DS_GWS_BARRIER)
|
||||
ds_read_addtid_b32 = functools.partial(DS, DSOp.DS_READ_ADDTID_B32)
|
||||
ds_pk_add_rtn_f16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_F16)
|
||||
ds_pk_add_rtn_bf16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_BF16)
|
||||
@@ -3145,6 +3182,8 @@ v_smfmac_i32_16x16x128_i8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_I32_16X16X
|
||||
v_smfmac_f32_16x16x128_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_BF8)
|
||||
v_smfmac_f32_16x16x128_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_FP8)
|
||||
v_smfmac_f32_16x16x128_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_FP8_BF8)
|
||||
v_mfma_f32_16x16x8_xf32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X8_XF32)
|
||||
v_mfma_f32_32x32x4_xf32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X4_XF32)
|
||||
v_mfma_f32_32x32x1_2b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X1_2B_F32)
|
||||
v_mfma_f32_16x16x1_4b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X1_4B_F32)
|
||||
v_mfma_f32_4x4x1_16b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_4X4X1_16B_F32)
|
||||
13131
extra/assembly/amd/autogen/cdna/gen_pcode.py
Normal file
13131
extra/assembly/amd/autogen/cdna/gen_pcode.py
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,7 @@
|
||||
# autogenerated from AMD RDNA3.5 ISA PDF by lib.py - do not edit
|
||||
# autogenerated from AMD RDNA3.5 ISA PDF by dsl.py - do not edit
|
||||
from enum import IntEnum
|
||||
from typing import Annotated
|
||||
from extra.assembly.amd.lib import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
|
||||
from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
|
||||
import functools
|
||||
|
||||
class SrcEnum(IntEnum):
|
||||
|
||||
3051
extra/assembly/amd/autogen/rdna4/__init__.py
Normal file
3051
extra/assembly/amd/autogen/rdna4/__init__.py
Normal file
File diff suppressed because it is too large
Load Diff
13053
extra/assembly/amd/autogen/rdna4/gen_pcode.py
Normal file
13053
extra/assembly/amd/autogen/rdna4/gen_pcode.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -289,8 +289,10 @@ class Inst64(Inst): pass
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
PDF_URLS = {
|
||||
"rdna3": "https://docs.amd.com/api/khub/documents/UVVZM22UN7tMUeiW_4ShTQ/content",
|
||||
"cdna4": "https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-cdna4-instruction-set-architecture.pdf",
|
||||
"rdna3": "https://docs.amd.com/api/khub/documents/UVVZM22UN7tMUeiW_4ShTQ/content", # RDNA3.5
|
||||
"rdna4": "https://docs.amd.com/api/khub/documents/uQpkEvk3pv~kfAb2x~j4uw/content",
|
||||
"cdna": ["https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf",
|
||||
"https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-cdna4-instruction-set-architecture.pdf"],
|
||||
}
|
||||
FIELD_TYPES = {'SSRC0': 'SSrc', 'SSRC1': 'SSrc', 'SOFFSET': 'SSrc', 'SADDR': 'SSrc', 'SRC0': 'Src', 'SRC1': 'Src', 'SRC2': 'Src',
|
||||
'SDST': 'SGPRField', 'SBASE': 'SGPRField', 'SDATA': 'SGPRField', 'SRSRC': 'SGPRField', 'VDST': 'VGPRField', 'VSRC1': 'VGPRField', 'VDATA': 'VGPRField',
|
||||
@@ -338,28 +340,34 @@ def _parse_fields_table(table: list, fmt: str, enums: set[str]) -> list[tuple]:
|
||||
fields.append((name, hi, lo, enc_val, ftype))
|
||||
return fields
|
||||
|
||||
def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
|
||||
"""Generate instruction definitions from AMD ISA PDF. Returns dict with formats for testing."""
|
||||
def _parse_single_pdf(url: str) -> dict:
|
||||
"""Parse a single PDF and return raw data (formats, enums, src_enum, doc_name, is_cdna)."""
|
||||
import re, pdfplumber
|
||||
from tinygrad.helpers import fetch
|
||||
|
||||
pdf = pdfplumber.open(fetch(PDF_URLS[arch]))
|
||||
pdf = pdfplumber.open(fetch(url))
|
||||
|
||||
# Auto-detect document type from first page
|
||||
first_page_text = pdf.pages[0].extract_text() or ''
|
||||
is_cdna4 = 'CDNA4' in first_page_text or 'CDNA 4' in first_page_text
|
||||
doc_name = "CDNA4" if is_cdna4 else "RDNA3.5"
|
||||
is_cdna3 = 'CDNA3' in first_page_text or 'CDNA 3' in first_page_text or 'MI300' in first_page_text
|
||||
is_cdna = is_cdna3 or is_cdna4
|
||||
is_rdna4 = 'RDNA4' in first_page_text or 'RDNA 4' in first_page_text
|
||||
is_rdna35 = 'RDNA3.5' in first_page_text or 'RDNA 3.5' in first_page_text # Check 3.5 before 3
|
||||
is_rdna3 = not is_rdna35 and ('RDNA3' in first_page_text or 'RDNA 3' in first_page_text)
|
||||
doc_name = "CDNA4" if is_cdna4 else "CDNA3" if is_cdna3 else "RDNA4" if is_rdna4 else "RDNA3.5" if is_rdna35 else "RDNA3" if is_rdna3 else "Unknown"
|
||||
|
||||
# Find the "Microcode Formats" section by searching the PDF
|
||||
# Look for "Chapter X. Microcode Formats" (RDNA3) or first format subsection header (CDNA4)
|
||||
# Find the "Microcode Formats" section - search for SOP2 format definition
|
||||
microcode_start = None
|
||||
for i, page in enumerate(pdf.pages):
|
||||
text = page.extract_text() or ''
|
||||
if re.search(r'Chapter \d+\.\s+Microcode Formats', text) or \
|
||||
(i > 100 and re.search(r'^\d+\.\d+\.\d+\.\s+SOP2\s*\n', text, re.M)):
|
||||
total_pages = len(pdf.pages)
|
||||
# Search from likely locations (formats are typically 20-95% through the document - RDNA3 has them at ~25%)
|
||||
for i in range(int(total_pages * 0.2), total_pages):
|
||||
text = pdf.pages[i].extract_text() or ''
|
||||
# Look for "X.Y.Z. SOP2" section header or "Chapter X. Microcode Formats"
|
||||
if re.search(r'\d+\.\d+\.\d+\.\s+SOP2\b', text) or re.search(r'Chapter \d+\.\s+Microcode Formats', text):
|
||||
microcode_start = i
|
||||
break
|
||||
if microcode_start is None: microcode_start = 150 # fallback for RDNA3.5
|
||||
if microcode_start is None: microcode_start = int(total_pages * 0.9)
|
||||
|
||||
pages = pdf.pages[microcode_start:microcode_start + 50]
|
||||
page_texts = [p.extract_text() or '' for p in pages]
|
||||
@@ -392,16 +400,13 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
|
||||
return (pos := text.find('Field Name')) != -1 and bool(re.search(r'\d+\.\d+\.\d+\.\s+\w+\s*\n', text[:pos]))
|
||||
|
||||
# find format headers with their page indices
|
||||
format_headers = [] # (fmt_name, page_idx, header_pos)
|
||||
format_headers = []
|
||||
for i, text in enumerate(page_texts):
|
||||
# Match "X.Y.Z. FORMAT_NAME" followed by Description (RDNA3) or newline (CDNA4)
|
||||
for m in re.finditer(r'\d+\.\d+\.\d+\.\s+(\w+)\s*\n?Description', text): format_headers.append((m.group(1), i, m.start()))
|
||||
for m in re.finditer(r'\d+\.\d+\.\d+\.\s+(\w+)\s*\n', text):
|
||||
fmt_name = m.group(1)
|
||||
# For CDNA4: accept uppercase format names (SOP2, VOP1, etc) directly
|
||||
if is_cdna4 and fmt_name.isupper() and len(fmt_name) >= 2:
|
||||
if is_cdna and fmt_name.isupper() and len(fmt_name) >= 2:
|
||||
format_headers.append((fmt_name, i, m.start()))
|
||||
# For RDNA3: check for Description on next page
|
||||
elif m.start() > len(text) - 200 and 'Description' not in text[m.end():] and i + 1 < len(page_texts):
|
||||
next_text = page_texts[i + 1].lstrip()
|
||||
if next_text.startswith('Description') or (next_text.startswith('"RDNA') and 'Description' in next_text[:200]):
|
||||
@@ -414,7 +419,6 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
|
||||
text, tables = page_texts[page_idx], page_tables[page_idx]
|
||||
field_pos = text.find('Field Name', header_pos)
|
||||
|
||||
# find fields table with ENCODING (same page or up to 2 pages ahead)
|
||||
fields = None
|
||||
for offset in range(3):
|
||||
if page_idx + offset >= len(pages): break
|
||||
@@ -425,7 +429,6 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
|
||||
break
|
||||
if fields: break
|
||||
|
||||
# for modifier formats (no ENCODING), accept first fields table on same page
|
||||
if not fields and field_pos > header_pos:
|
||||
for t in tables:
|
||||
if is_fields_table(t) and (f := _parse_fields_table(t, fmt_name, enum_names)):
|
||||
@@ -435,7 +438,6 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
|
||||
if not fields: continue
|
||||
field_names = {f[0] for f in fields}
|
||||
|
||||
# check next pages for continuation fields (tables without ENCODING)
|
||||
for pg_offset in range(1, 3):
|
||||
if page_idx + pg_offset >= len(pages) or has_header_before_fields(page_texts[page_idx + pg_offset]): break
|
||||
for t in page_tables[page_idx + pg_offset]:
|
||||
@@ -447,19 +449,70 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
|
||||
break
|
||||
formats[fmt_name] = fields
|
||||
|
||||
# fix known PDF errors (verified against LLVM test vectors)
|
||||
# SMEM: PDF says DLC=bit14, GLC=bit16 but actual encoding is DLC=bit13, GLC=bit14
|
||||
# fix known PDF errors
|
||||
if 'SMEM' in formats:
|
||||
formats['SMEM'] = [(n, 13 if n == 'DLC' else 14 if n == 'GLC' else h, 13 if n == 'DLC' else 14 if n == 'GLC' else l, e, t)
|
||||
for n, h, l, e, t in formats['SMEM']]
|
||||
|
||||
return {"formats": formats, "enums": enums, "src_enum": src_enum, "doc_name": doc_name, "is_cdna": is_cdna}
|
||||
|
||||
def _merge_results(results: list[dict]) -> dict:
|
||||
"""Merge multiple PDF parse results into a superset. Asserts if any conflicts."""
|
||||
merged = {"formats": {}, "enums": {}, "src_enum": dict(SRC_EXTRAS), "doc_names": [], "is_cdna": False}
|
||||
for r in results:
|
||||
merged["doc_names"].append(r["doc_name"])
|
||||
merged["is_cdna"] = merged["is_cdna"] or r["is_cdna"]
|
||||
# Merge src_enum (union, assert no conflicts)
|
||||
for val, name in r["src_enum"].items():
|
||||
if val in merged["src_enum"]:
|
||||
assert merged["src_enum"][val] == name, f"SrcEnum conflict: {val} = {merged['src_enum'][val]} vs {name}"
|
||||
else:
|
||||
merged["src_enum"][val] = name
|
||||
# Merge enums (union of ops per enum, assert no conflicts)
|
||||
for enum_name, ops in r["enums"].items():
|
||||
if enum_name not in merged["enums"]: merged["enums"][enum_name] = {}
|
||||
for val, name in ops.items():
|
||||
if val in merged["enums"][enum_name]:
|
||||
assert merged["enums"][enum_name][val] == name, f"{enum_name} conflict: {val} = {merged['enums'][enum_name][val]} vs {name}"
|
||||
else:
|
||||
merged["enums"][enum_name][val] = name
|
||||
# Merge formats (union of fields, assert no bit position conflicts for same field name)
|
||||
for fmt_name, fields in r["formats"].items():
|
||||
if fmt_name not in merged["formats"]:
|
||||
merged["formats"][fmt_name] = list(fields)
|
||||
else:
|
||||
existing = {f[0]: (f[1], f[2]) for f in merged["formats"][fmt_name]} # name -> (hi, lo)
|
||||
for f in fields:
|
||||
name, hi, lo = f[0], f[1], f[2]
|
||||
if name in existing:
|
||||
assert existing[name] == (hi, lo), f"Format {fmt_name} field {name} conflict: bits {existing[name]} vs ({hi}, {lo})"
|
||||
else:
|
||||
merged["formats"][fmt_name].append(f)
|
||||
return merged
|
||||
|
||||
def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
|
||||
"""Generate instruction definitions from AMD ISA PDF(s). Returns dict with formats for testing."""
|
||||
urls = PDF_URLS[arch]
|
||||
if isinstance(urls, str): urls = [urls]
|
||||
|
||||
# Parse all PDFs and merge
|
||||
results = [_parse_single_pdf(url) for url in urls]
|
||||
if len(results) == 1:
|
||||
merged = results[0]
|
||||
doc_name = merged["doc_name"]
|
||||
else:
|
||||
merged = _merge_results(results)
|
||||
doc_name = "+".join(merged["doc_names"])
|
||||
|
||||
formats, enums, src_enum = merged["formats"], merged["enums"], merged["src_enum"]
|
||||
|
||||
# generate output
|
||||
def enum_lines(name, items):
|
||||
return [f"class {name}(IntEnum):"] + [f" {n} = {v}" for v, n in sorted(items.items())] + [""]
|
||||
def field_key(f): return order.index(f[0].lower()) if f[0].lower() in order else 1000
|
||||
lines = [f"# autogenerated from AMD {doc_name} ISA PDF by lib.py - do not edit", "from enum import IntEnum",
|
||||
lines = [f"# autogenerated from AMD {doc_name} ISA PDF by dsl.py - do not edit", "from enum import IntEnum",
|
||||
"from typing import Annotated",
|
||||
"from extra.assembly.amd.lib import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField",
|
||||
"from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField",
|
||||
"import functools", ""]
|
||||
lines += enum_lines("SrcEnum", src_enum) + sum([enum_lines(n, ops) for n, ops in sorted(enums.items())], [])
|
||||
# Format-specific field defaults (verified against LLVM test vectors)
|
||||
@@ -475,7 +528,6 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
|
||||
if defaults := format_defaults.get(fmt_name):
|
||||
lines.append(f" _defaults = {defaults}")
|
||||
for name, hi, lo, _, ftype in sorted([f for f in fields if f[0] != 'ENCODING'], key=field_key):
|
||||
# Wrap IntEnum types (ending in Op) with Annotated[BitField, ...] for correct typing
|
||||
if ftype and ftype.endswith('Op'):
|
||||
ann = f":Annotated[BitField, {ftype}]"
|
||||
else:
|
||||
@@ -489,23 +541,18 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
|
||||
seg = {"GLOBAL": ", seg=2", "SCRATCH": ", seg=2"}.get(fmt, "")
|
||||
tgt = {"GLOBAL": "FLAT, GLOBALOp", "SCRATCH": "FLAT, SCRATCHOp"}.get(fmt, f"{fmt}, {cls_name}")
|
||||
if fmt in formats or fmt in ("GLOBAL", "SCRATCH"):
|
||||
# VOP1/VOP2/VOPC get _e32 suffix, VOP3 promoted ops (< 512) get _e64 suffix
|
||||
if fmt in ("VOP1", "VOP2", "VOPC"):
|
||||
suffix = "_e32"
|
||||
elif fmt == "VOP3" and op_val < 512:
|
||||
suffix = "_e64"
|
||||
else:
|
||||
suffix = ""
|
||||
# FMAMK/FMAAK have a literal constant K that must be passed via literal= kwarg
|
||||
# FMAMK: D = S0.f * K + S1.f (K is 3rd operand in assembly syntax)
|
||||
# FMAAK: D = S0.f * S1.f + K (K is 4th operand in assembly syntax)
|
||||
if name in ('V_FMAMK_F32', 'V_FMAMK_F16'):
|
||||
lines.append(f"def {name.lower()}{suffix}(vdst, src0, K, vsrc1): return {fmt}({cls_name}.{name}, vdst, src0, vsrc1, literal=K)")
|
||||
elif name in ('V_FMAAK_F32', 'V_FMAAK_F16'):
|
||||
lines.append(f"def {name.lower()}{suffix}(vdst, src0, vsrc1, K): return {fmt}({cls_name}.{name}, vdst, src0, vsrc1, literal=K)")
|
||||
else:
|
||||
lines.append(f"{name.lower()}{suffix} = functools.partial({tgt}.{name}{seg})")
|
||||
# export SrcEnum values, but skip DPP8/DPP16 which conflict with class names
|
||||
skip_exports = {'DPP8', 'DPP16'}
|
||||
src_names = {name for _, name in src_enum.items()}
|
||||
lines += [""] + [f"{name} = SrcEnum.{name}" for _, name in sorted(src_enum.items()) if name not in skip_exports]
|
||||
@@ -519,7 +566,12 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Generate instruction definitions from AMD ISA PDF")
|
||||
parser.add_argument("--arch", choices=list(PDF_URLS.keys()), default="rdna3", help="Target architecture (default: rdna3)")
|
||||
parser.add_argument("--arch", choices=list(PDF_URLS.keys()) + ["all"], default="rdna3", help="Target architecture (default: rdna3)")
|
||||
args = parser.parse_args()
|
||||
if args.arch == "all":
|
||||
for arch in PDF_URLS.keys():
|
||||
result = generate(f"extra/assembly/amd/autogen/{arch}/__init__.py", arch=arch)
|
||||
print(f"{arch}: generated SrcEnum ({len(result['src_enum'])}) + {len(result['enums'])} opcode enums + {len(result['formats'])} format classes")
|
||||
else:
|
||||
result = generate(f"extra/assembly/amd/autogen/{args.arch}/__init__.py", arch=args.arch)
|
||||
print(f"generated SrcEnum ({len(result['src_enum'])}) + {len(result['enums'])} opcode enums + {len(result['formats'])} format classes")
|
||||
@@ -2,7 +2,7 @@
|
||||
# mypy: ignore-errors
|
||||
from __future__ import annotations
|
||||
import ctypes, os
|
||||
from extra.assembly.amd.lib import Inst, RawImm
|
||||
from extra.assembly.amd.dsl import Inst, RawImm
|
||||
from extra.assembly.amd.pcode import _f32, _i32, _sext, _f16, _i16, _f64, _i64
|
||||
from extra.assembly.amd.autogen.rdna3.gen_pcode import get_compiled_functions
|
||||
from extra.assembly.amd.autogen.rdna3 import (
|
||||
|
||||
@@ -702,7 +702,7 @@ class ExecContext:
|
||||
# PDF EXTRACTION AND CODE GENERATION
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
from extra.assembly.amd.lib import PDF_URLS
|
||||
from extra.assembly.amd.dsl import PDF_URLS
|
||||
INST_PATTERN = re.compile(r'^([SV]_[A-Z0-9_]+)\s+(\d+)\s*$', re.M)
|
||||
|
||||
# Patterns that can't be handled by the DSL (require special handling in emu.py)
|
||||
@@ -736,38 +736,52 @@ def extract_pseudocode(text: str) -> str | None:
|
||||
if is_code: result.append(s)
|
||||
return '\n'.join(result) if result else None
|
||||
|
||||
def parse_pseudocode_from_pdf(arch: str = "rdna3") -> dict:
|
||||
"""Parse pseudocode from PDF for all ops. Returns {enum_cls: {op: pseudocode}}."""
|
||||
def _get_op_enums(arch: str) -> list:
|
||||
"""Dynamically load op enums from the arch-specific autogen module."""
|
||||
import importlib
|
||||
autogen = importlib.import_module(f"extra.assembly.amd.autogen.{arch}")
|
||||
# Deterministic order: common enums first, then arch-specific
|
||||
enums = []
|
||||
for name in ['SOP1Op', 'SOP2Op', 'SOPCOp', 'SOPKOp', 'SOPPOp', 'VOP1Op', 'VOP2Op', 'VOP3Op', 'VOP3SDOp', 'VOP3POp', 'VOPCOp', 'VOP3AOp', 'VOP3BOp']:
|
||||
if hasattr(autogen, name): enums.append(getattr(autogen, name))
|
||||
return enums
|
||||
|
||||
def _parse_pseudocode_from_single_pdf(url: str, defined_ops: dict, OP_ENUMS: list) -> dict:
|
||||
"""Parse pseudocode from a single PDF."""
|
||||
import pdfplumber
|
||||
from tinygrad.helpers import fetch
|
||||
from extra.assembly.amd.autogen.rdna3 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp
|
||||
|
||||
OP_ENUMS = [SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp]
|
||||
defined_ops = {}
|
||||
for enum_cls in OP_ENUMS:
|
||||
for op in enum_cls:
|
||||
if op.name.startswith(('S_', 'V_')): defined_ops[(op.name, op.value)] = (enum_cls, op)
|
||||
pdf = pdfplumber.open(fetch(url))
|
||||
total_pages = len(pdf.pages)
|
||||
|
||||
pdf = pdfplumber.open(fetch(PDF_URLS[arch]))
|
||||
page_cache = {}
|
||||
def get_page_text(i):
|
||||
if i not in page_cache: page_cache[i] = pdf.pages[i].extract_text() or ''
|
||||
return page_cache[i]
|
||||
|
||||
# Find the "Instructions" chapter by looking for "Chapter X. Instructions"
|
||||
# Find the "Instructions" chapter - typically 15-40% through the document
|
||||
instr_start = None
|
||||
for i, page in enumerate(pdf.pages):
|
||||
text = page.extract_text() or ''
|
||||
if re.search(r'Chapter \d+\.\s+Instructions', text):
|
||||
search_starts = [int(total_pages * 0.2), int(total_pages * 0.1), 0]
|
||||
for start in search_starts:
|
||||
for i in range(start, min(start + 100, total_pages)):
|
||||
if re.search(r'Chapter \d+\.\s+Instructions', get_page_text(i)):
|
||||
instr_start = i
|
||||
break
|
||||
if instr_start is None: instr_start = len(pdf.pages) // 3 # fallback
|
||||
if instr_start: break
|
||||
if instr_start is None: instr_start = total_pages // 3 # fallback
|
||||
|
||||
# Find end - stop at "Microcode Formats" chapter
|
||||
instr_end = len(pdf.pages)
|
||||
for i, page in enumerate(pdf.pages[instr_start:], instr_start):
|
||||
text = page.extract_text() or ''
|
||||
if re.search(r'Chapter \d+\.\s+Microcode Formats', text):
|
||||
# Find end - stop at "Microcode Formats" chapter (typically 60-70% through)
|
||||
instr_end = total_pages
|
||||
search_starts = [int(total_pages * 0.6), int(total_pages * 0.5), instr_start]
|
||||
for start in search_starts:
|
||||
for i in range(start, min(start + 100, total_pages)):
|
||||
if re.search(r'Chapter \d+\.\s+Microcode Formats', get_page_text(i)):
|
||||
instr_end = i
|
||||
break
|
||||
if instr_end < total_pages: break
|
||||
|
||||
all_text = '\n'.join(pdf.pages[i].extract_text() or '' for i in range(instr_start, instr_end))
|
||||
# Extract remaining pages (some already cached from chapter search)
|
||||
all_text = '\n'.join(get_page_text(i) for i in range(instr_start, instr_end))
|
||||
matches = list(INST_PATTERN.finditer(all_text))
|
||||
instructions: dict = {cls: {} for cls in OP_ENUMS}
|
||||
|
||||
@@ -783,12 +797,39 @@ def parse_pseudocode_from_pdf(arch: str = "rdna3") -> dict:
|
||||
|
||||
return instructions
|
||||
|
||||
def parse_pseudocode_from_pdf(arch: str = "rdna3") -> dict:
|
||||
"""Parse pseudocode from PDF(s) for all ops. Returns {enum_cls: {op: pseudocode}}."""
|
||||
OP_ENUMS = _get_op_enums(arch)
|
||||
defined_ops = {}
|
||||
for enum_cls in OP_ENUMS:
|
||||
for op in enum_cls:
|
||||
if op.name.startswith(('S_', 'V_')): defined_ops[(op.name, op.value)] = (enum_cls, op)
|
||||
|
||||
urls = PDF_URLS[arch]
|
||||
if isinstance(urls, str): urls = [urls]
|
||||
|
||||
# Parse all PDFs and merge (union of pseudocode)
|
||||
# Reverse order so newer PDFs (RDNA3.5, CDNA4) take priority
|
||||
instructions: dict = {cls: {} for cls in OP_ENUMS}
|
||||
for url in reversed(urls):
|
||||
result = _parse_pseudocode_from_single_pdf(url, defined_ops, OP_ENUMS)
|
||||
for cls, ops in result.items():
|
||||
for op, pseudocode in ops.items():
|
||||
if op in instructions[cls]:
|
||||
if instructions[cls][op] != pseudocode:
|
||||
print(f" Ignoring {op.name} from older PDF:")
|
||||
print(f" new: {instructions[cls][op]!r}")
|
||||
print(f" old: {pseudocode!r}")
|
||||
else:
|
||||
instructions[cls][op] = pseudocode
|
||||
|
||||
return instructions
|
||||
|
||||
def generate_gen_pcode(output_path: str = "extra/assembly/amd/autogen/rdna3/gen_pcode.py", arch: str = "rdna3"):
|
||||
"""Generate gen_pcode.py - compiled pseudocode functions for the emulator."""
|
||||
from pathlib import Path
|
||||
from extra.assembly.amd.autogen.rdna3 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp
|
||||
|
||||
OP_ENUMS = [SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp]
|
||||
OP_ENUMS = _get_op_enums(arch)
|
||||
|
||||
print("Parsing pseudocode from PDF...")
|
||||
by_cls = parse_pseudocode_from_pdf(arch)
|
||||
@@ -803,11 +844,13 @@ def generate_gen_pcode(output_path: str = "extra/assembly/amd/autogen/rdna3/gen_
|
||||
print(f"Total: {total_found}/{total_ops} ({100*total_found//total_ops}%)")
|
||||
|
||||
print("\nCompiling to pseudocode functions...")
|
||||
# Build dynamic import line based on available enums
|
||||
enum_names = [e.__name__ for e in OP_ENUMS]
|
||||
lines = [f'''# autogenerated by pcode.py - do not edit
|
||||
# to regenerate: python -m extra.assembly.amd.pcode --arch {arch}
|
||||
# ruff: noqa: E501,F405,F403
|
||||
# mypy: ignore-errors
|
||||
from extra.assembly.amd.autogen.{arch} import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp
|
||||
from extra.assembly.amd.autogen.{arch} import {", ".join(enum_names)}
|
||||
from extra.assembly.amd.pcode import *
|
||||
''']
|
||||
|
||||
@@ -965,6 +1008,8 @@ from extra.assembly.amd.pcode import *
|
||||
lines.append('')
|
||||
|
||||
# Add manually implemented V_WRITELANE_B32 (not in PDF pseudocode, requires special vgpr_write handling)
|
||||
# Only add for architectures that have VOP3Op (RDNA) not VOP3AOp/VOP3BOp (CDNA)
|
||||
if 'VOP3Op' in enum_names:
|
||||
lines.append('''
|
||||
# V_WRITELANE_B32: Write scalar to specific lane's VGPR (not in PDF pseudocode)
|
||||
def _VOP3Op_V_WRITELANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0):
|
||||
@@ -987,6 +1032,10 @@ VOP3Op_FUNCTIONS[VOP3Op.V_WRITELANE_B32] = _VOP3Op_V_WRITELANE_B32
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Generate pseudocode functions from AMD ISA PDF")
|
||||
parser.add_argument("--arch", choices=list(PDF_URLS.keys()), default="rdna3", help="Target architecture (default: rdna3)")
|
||||
parser.add_argument("--arch", choices=list(PDF_URLS.keys()) + ["all"], default="rdna3", help="Target architecture (default: rdna3)")
|
||||
args = parser.parse_args()
|
||||
if args.arch == "all":
|
||||
for arch in PDF_URLS.keys():
|
||||
generate_gen_pcode(output_path=f"extra/assembly/amd/autogen/{arch}/gen_pcode.py", arch=arch)
|
||||
else:
|
||||
generate_gen_pcode(output_path=f"extra/assembly/amd/autogen/{args.arch}/gen_pcode.py", arch=args.arch)
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
import unittest
|
||||
from extra.assembly.amd.autogen.rdna3 import *
|
||||
from extra.assembly.amd.lib import Inst, RawImm, SGPR, VGPR
|
||||
from extra.assembly.amd.dsl import Inst, RawImm, SGPR, VGPR
|
||||
|
||||
class TestRegisterSliceSyntax(unittest.TestCase):
|
||||
"""
|
||||
|
||||
@@ -7,7 +7,7 @@ Set USE_HW=1 to run on both emulator and real hardware, comparing results.
|
||||
|
||||
import ctypes, unittest, os, struct
|
||||
from extra.assembly.amd.autogen.rdna3 import *
|
||||
from extra.assembly.amd.lib import RawImm
|
||||
from extra.assembly.amd.dsl import RawImm
|
||||
from extra.assembly.amd.emu import WaveState, run_asm, set_valid_mem_ranges
|
||||
from extra.assembly.amd.pcode import _i32, _f32
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"""Test MUBUF, MTBUF, MIMG, EXP, DS formats against LLVM."""
|
||||
import unittest
|
||||
from extra.assembly.amd.autogen.rdna3 import *
|
||||
from extra.assembly.amd.lib import encode_src
|
||||
from extra.assembly.amd.dsl import encode_src
|
||||
|
||||
class TestMUBUF(unittest.TestCase):
|
||||
"""Test MUBUF (buffer) instructions."""
|
||||
@@ -308,7 +308,7 @@ class TestVOP3Literal(unittest.TestCase):
|
||||
def test_vop3_with_literal(self):
|
||||
# v_add3_u32 v5, vcc_hi, 0xaf123456, v255
|
||||
# GFX11: encoding: [0x05,0x00,0x55,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
|
||||
from extra.assembly.amd.lib import RawImm
|
||||
from extra.assembly.amd.dsl import RawImm
|
||||
inst = VOP3(VOP3Op.V_ADD3_U32, vdst=v[5], src0=RawImm(107), src1=0xaf123456, src2=v[255])
|
||||
expected = bytes([0x05,0x00,0x55,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf])
|
||||
self.assertEqual(inst.to_bytes(), expected)
|
||||
@@ -316,14 +316,14 @@ class TestVOP3Literal(unittest.TestCase):
|
||||
def test_vop3_literal_null_operand(self):
|
||||
# v_add3_u32 v5, null, exec_lo, 0xaf123456
|
||||
# GFX11: encoding: [0x05,0x00,0x55,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf]
|
||||
from extra.assembly.amd.lib import RawImm
|
||||
from extra.assembly.amd.dsl import RawImm
|
||||
inst = VOP3(VOP3Op.V_ADD3_U32, vdst=v[5], src0=NULL, src1=RawImm(126), src2=0xaf123456)
|
||||
expected = bytes([0x05,0x00,0x55,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf])
|
||||
self.assertEqual(inst.to_bytes(), expected)
|
||||
|
||||
def test_vop3p_with_literal(self):
|
||||
# Test VOP3P literal encoding (also uses Inst64)
|
||||
from extra.assembly.amd.lib import RawImm
|
||||
from extra.assembly.amd.dsl import RawImm
|
||||
inst = VOP3P(VOP3POp.V_PK_ADD_F16, vdst=v[5], src0=RawImm(240), src1=0x12345678, src2=v[0])
|
||||
self.assertEqual(len(inst.to_bytes()), 12) # 8 bytes + 4 byte literal
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
import unittest, struct
|
||||
from extra.assembly.amd.autogen.rdna3 import *
|
||||
from extra.assembly.amd.lib import Inst
|
||||
from extra.assembly.amd.dsl import Inst
|
||||
from extra.assembly.amd.asm import asm
|
||||
from extra.assembly.amd.test.test_roundtrip import compile_asm
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ class TestPDFParserGenerate(unittest.TestCase):
|
||||
|
||||
def test_pdf_parser(self):
|
||||
"""Single test that validates all PDF parser outputs."""
|
||||
from extra.assembly.amd.lib import generate
|
||||
from extra.assembly.amd.dsl import generate
|
||||
result = generate()
|
||||
|
||||
# test_all_formats_present
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"""Roundtrip tests: generate tinygrad kernels, decode instructions, re-encode, verify match."""
|
||||
import unittest, io, sys, re, subprocess, os
|
||||
from extra.assembly.amd.autogen.rdna3 import *
|
||||
from extra.assembly.amd.lib import Inst
|
||||
from extra.assembly.amd.dsl import Inst
|
||||
from extra.assembly.amd.asm import asm
|
||||
from extra.assembly.amd.test.helpers import get_llvm_mc, get_llvm_objdump
|
||||
|
||||
|
||||
Reference in New Issue
Block a user