add CDNA3+RDNA4 support (#13882)

* fix CI

* remove junk

* rename lib to dsl

* correct

* cleanups
This commit is contained in:
George Hotz
2025-12-29 15:51:29 -05:00
committed by GitHub
parent 72236bbd3d
commit 9d8397be11
18 changed files with 29464 additions and 1730 deletions

View File

@@ -679,22 +679,11 @@ jobs:
run: python -m pytest -n=auto extra/assembly/amd/ --durations 20
- name: Install pdfplumber
run: pip install pdfplumber
- name: Verify RDNA3 autogen is up to date
- name: Verify AMD autogen is up to date
run: |
python -m extra.assembly.amd.lib --arch rdna3
git diff --exit-code extra/assembly/amd/autogen/rdna3/__init__.py
- name: Verify CDNA4 autogen is up to date
run: |
python -m extra.assembly.amd.lib --arch cdna4
git diff --exit-code extra/assembly/amd/autogen/cdna4/__init__.py
- name: Verify RDNA3 pcode autogen is up to date
run: |
python -m extra.assembly.amd.pcode --arch rdna3
git diff --exit-code extra/assembly/amd/autogen/rdna3/gen_pcode.py
- name: Verify CDNA4 pcode autogen is up to date
run: |
python -m extra.assembly.amd.pcode --arch cdna4
git diff --exit-code extra/assembly/amd/autogen/cdna4/gen_pcode.py
python -m extra.assembly.amd.dsl --arch all
python -m extra.assembly.amd.pcode --arch all
git diff --exit-code extra/assembly/amd/autogen/
testnvidia:
strategy:

View File

@@ -76,10 +76,10 @@ VIZ=1 python -c "from tinygrad import Tensor; Tensor.ones(10).sum().realize()"
## Auto-generated Files (DO NOT EDIT)
The following files are auto-generated and should never be edited manually:
- `extra/assembly/amd/autogen/rdna3/__init__.py` - Generated by `python -m extra.assembly.amd.lib --arch rdna3`
- `extra/assembly/amd/autogen/rdna3/gen_pcode.py` - Generated by `python -m extra.assembly.amd.pcode --arch rdna3`
- `extra/assembly/amd/autogen/cdna4/__init__.py` - Generated by `python -m extra.assembly.amd.lib --arch cdna4`
- `extra/assembly/amd/autogen/cdna4/gen_pcode.py` - Generated by `python -m extra.assembly.amd.pcode --arch cdna4`
- `extra/assembly/amd/autogen/{arch}/__init__.py` - Generated by `python -m extra.assembly.amd.dsl --arch {arch}`
- `extra/assembly/amd/autogen/{arch}/gen_pcode.py` - Generated by `python -m extra.assembly.amd.pcode --arch {arch}`
Where `{arch}` is one of: `rdna3`, `rdna4`, `cdna`
To add missing instruction implementations, add them to `extra/assembly/amd/emu.py` instead.

View File

@@ -1,7 +1,7 @@
# RDNA3 assembler and disassembler
from __future__ import annotations
import re
from extra.assembly.amd.lib import Inst, RawImm, Reg, SGPR, VGPR, TTMP, s, v, ttmp, _RegFactory, FLOAT_ENC, SRC_FIELDS, unwrap
from extra.assembly.amd.dsl import Inst, RawImm, Reg, SGPR, VGPR, TTMP, s, v, ttmp, _RegFactory, FLOAT_ENC, SRC_FIELDS, unwrap
# Decoding helpers
SPECIAL_GPRS = {106: "vcc_lo", 107: "vcc_hi", 124: "null", 125: "m0", 126: "exec_lo", 127: "exec_hi", 253: "scc"}

View File

@@ -1,7 +1,7 @@
# autogenerated from AMD CDNA4 ISA PDF by lib.py - do not edit
# autogenerated from AMD CDNA3+CDNA4 ISA PDF by dsl.py - do not edit
from enum import IntEnum
from typing import Annotated
from extra.assembly.amd.lib import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
import functools
class SrcEnum(IntEnum):
@@ -158,6 +158,12 @@ class DSOp(IntEnum):
DS_READ2ST64_B64 = 120
DS_ADD_RTN_F64 = 124
DS_CONDXCHG32_RTN_B64 = 126
DS_GWS_SEMA_RELEASE_ALL = 152
DS_GWS_INIT = 153
DS_GWS_SEMA_V = 154
DS_GWS_SEMA_BR = 155
DS_GWS_SEMA_P = 156
DS_GWS_BARRIER = 157
DS_READ_ADDTID_B32 = 182
DS_PK_ADD_RTN_F16 = 183
DS_PK_ADD_RTN_BF16 = 184
@@ -1385,6 +1391,8 @@ class VOP3POp(IntEnum):
V_SMFMAC_F32_16X16X128_BF8_BF8 = 59
V_SMFMAC_F32_16X16X128_BF8_FP8 = 60
V_SMFMAC_F32_16X16X128_FP8_BF8 = 61
V_MFMA_F32_16X16X8_XF32 = 62
V_MFMA_F32_32X32X4_XF32 = 63
V_MFMA_F32_32X32X1_2B_F32 = 64
V_MFMA_F32_16X16X1_4B_F32 = 65
V_MFMA_F32_4X4X1_16B_F32 = 66
@@ -1648,6 +1656,11 @@ class VOPCOp(IntEnum):
# instruction formats
class DPP(Inst64):
encoding = bits[31:26] == 0b110110
src1_sel = bits[58:56]
src1_sext = bits[59]
src1_neg = bits[60]
src1_abs = bits[61]
s1 = bits[63]
offset0 = bits[7:0]
offset1 = bits[15:8]
op = bits[24:17]
@@ -1667,6 +1680,7 @@ class DS(Inst64):
data1:VGPRField = bits[55:48]
offset0 = bits[7:0]
offset1 = bits[15:8]
gds = bits[16]
acc = bits[25]
class FLAT(Inst64):
@@ -1694,10 +1708,10 @@ class MTBUF(Inst64):
offset:Imm = bits[11:0]
offen = bits[12]
idxen = bits[13]
sc0 = bits[14]
sc1 = bits[53]
nt = bits[54]
acc = bits[55]
sc0 = bits[14]
class MUBUF(Inst64):
encoding = bits[31:26] == 0b111000
@@ -1735,6 +1749,23 @@ class SDWA(Inst64):
sd = bits[47]
row_mask = bits[63:60]
class SDWAB(Inst64):
src0:Src = bits[39:32]
dst_sel = bits[42:40]
dst_u = bits[44:43]
clmp = bits[45]
omod = bits[47:46]
src0_sel = bits[50:48]
src0_sext = bits[51]
src0_neg = bits[52]
src0_abs = bits[53]
s0 = bits[55]
src1_sel = bits[58:56]
src1_sext = bits[59]
src1_neg = bits[60]
src1_abs = bits[61]
s1 = bits[63]
class SMEM(Inst64):
encoding = bits[31:26] == 0b110000
op:Annotated[BitField, SMEMOp] = bits[25:18]
@@ -1950,6 +1981,12 @@ ds_read2_b64 = functools.partial(DS, DSOp.DS_READ2_B64)
ds_read2st64_b64 = functools.partial(DS, DSOp.DS_READ2ST64_B64)
ds_add_rtn_f64 = functools.partial(DS, DSOp.DS_ADD_RTN_F64)
ds_condxchg32_rtn_b64 = functools.partial(DS, DSOp.DS_CONDXCHG32_RTN_B64)
ds_gws_sema_release_all = functools.partial(DS, DSOp.DS_GWS_SEMA_RELEASE_ALL)
ds_gws_init = functools.partial(DS, DSOp.DS_GWS_INIT)
ds_gws_sema_v = functools.partial(DS, DSOp.DS_GWS_SEMA_V)
ds_gws_sema_br = functools.partial(DS, DSOp.DS_GWS_SEMA_BR)
ds_gws_sema_p = functools.partial(DS, DSOp.DS_GWS_SEMA_P)
ds_gws_barrier = functools.partial(DS, DSOp.DS_GWS_BARRIER)
ds_read_addtid_b32 = functools.partial(DS, DSOp.DS_READ_ADDTID_B32)
ds_pk_add_rtn_f16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_F16)
ds_pk_add_rtn_bf16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_BF16)
@@ -3145,6 +3182,8 @@ v_smfmac_i32_16x16x128_i8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_I32_16X16X
v_smfmac_f32_16x16x128_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_BF8)
v_smfmac_f32_16x16x128_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_FP8)
v_smfmac_f32_16x16x128_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_FP8_BF8)
v_mfma_f32_16x16x8_xf32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X8_XF32)
v_mfma_f32_32x32x4_xf32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X4_XF32)
v_mfma_f32_32x32x1_2b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X1_2B_F32)
v_mfma_f32_16x16x1_4b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X1_4B_F32)
v_mfma_f32_4x4x1_16b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_4X4X1_16B_F32)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
# autogenerated from AMD RDNA3.5 ISA PDF by lib.py - do not edit
# autogenerated from AMD RDNA3.5 ISA PDF by dsl.py - do not edit
from enum import IntEnum
from typing import Annotated
from extra.assembly.amd.lib import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField
import functools
class SrcEnum(IntEnum):

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -289,8 +289,10 @@ class Inst64(Inst): pass
# ═══════════════════════════════════════════════════════════════════════════════
PDF_URLS = {
"rdna3": "https://docs.amd.com/api/khub/documents/UVVZM22UN7tMUeiW_4ShTQ/content",
"cdna4": "https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-cdna4-instruction-set-architecture.pdf",
"rdna3": "https://docs.amd.com/api/khub/documents/UVVZM22UN7tMUeiW_4ShTQ/content", # RDNA3.5
"rdna4": "https://docs.amd.com/api/khub/documents/uQpkEvk3pv~kfAb2x~j4uw/content",
"cdna": ["https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf",
"https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-cdna4-instruction-set-architecture.pdf"],
}
FIELD_TYPES = {'SSRC0': 'SSrc', 'SSRC1': 'SSrc', 'SOFFSET': 'SSrc', 'SADDR': 'SSrc', 'SRC0': 'Src', 'SRC1': 'Src', 'SRC2': 'Src',
'SDST': 'SGPRField', 'SBASE': 'SGPRField', 'SDATA': 'SGPRField', 'SRSRC': 'SGPRField', 'VDST': 'VGPRField', 'VSRC1': 'VGPRField', 'VDATA': 'VGPRField',
@@ -338,28 +340,34 @@ def _parse_fields_table(table: list, fmt: str, enums: set[str]) -> list[tuple]:
fields.append((name, hi, lo, enc_val, ftype))
return fields
def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
"""Generate instruction definitions from AMD ISA PDF. Returns dict with formats for testing."""
def _parse_single_pdf(url: str) -> dict:
"""Parse a single PDF and return raw data (formats, enums, src_enum, doc_name, is_cdna)."""
import re, pdfplumber
from tinygrad.helpers import fetch
pdf = pdfplumber.open(fetch(PDF_URLS[arch]))
pdf = pdfplumber.open(fetch(url))
# Auto-detect document type from first page
first_page_text = pdf.pages[0].extract_text() or ''
is_cdna4 = 'CDNA4' in first_page_text or 'CDNA 4' in first_page_text
doc_name = "CDNA4" if is_cdna4 else "RDNA3.5"
is_cdna3 = 'CDNA3' in first_page_text or 'CDNA 3' in first_page_text or 'MI300' in first_page_text
is_cdna = is_cdna3 or is_cdna4
is_rdna4 = 'RDNA4' in first_page_text or 'RDNA 4' in first_page_text
is_rdna35 = 'RDNA3.5' in first_page_text or 'RDNA 3.5' in first_page_text # Check 3.5 before 3
is_rdna3 = not is_rdna35 and ('RDNA3' in first_page_text or 'RDNA 3' in first_page_text)
doc_name = "CDNA4" if is_cdna4 else "CDNA3" if is_cdna3 else "RDNA4" if is_rdna4 else "RDNA3.5" if is_rdna35 else "RDNA3" if is_rdna3 else "Unknown"
# Find the "Microcode Formats" section by searching the PDF
# Look for "Chapter X. Microcode Formats" (RDNA3) or first format subsection header (CDNA4)
# Find the "Microcode Formats" section - search for SOP2 format definition
microcode_start = None
for i, page in enumerate(pdf.pages):
text = page.extract_text() or ''
if re.search(r'Chapter \d+\.\s+Microcode Formats', text) or \
(i > 100 and re.search(r'^\d+\.\d+\.\d+\.\s+SOP2\s*\n', text, re.M)):
total_pages = len(pdf.pages)
# Search from likely locations (formats are typically 20-95% through the document - RDNA3 has them at ~25%)
for i in range(int(total_pages * 0.2), total_pages):
text = pdf.pages[i].extract_text() or ''
# Look for "X.Y.Z. SOP2" section header or "Chapter X. Microcode Formats"
if re.search(r'\d+\.\d+\.\d+\.\s+SOP2\b', text) or re.search(r'Chapter \d+\.\s+Microcode Formats', text):
microcode_start = i
break
if microcode_start is None: microcode_start = 150 # fallback for RDNA3.5
if microcode_start is None: microcode_start = int(total_pages * 0.9)
pages = pdf.pages[microcode_start:microcode_start + 50]
page_texts = [p.extract_text() or '' for p in pages]
@@ -392,16 +400,13 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
return (pos := text.find('Field Name')) != -1 and bool(re.search(r'\d+\.\d+\.\d+\.\s+\w+\s*\n', text[:pos]))
# find format headers with their page indices
format_headers = [] # (fmt_name, page_idx, header_pos)
format_headers = []
for i, text in enumerate(page_texts):
# Match "X.Y.Z. FORMAT_NAME" followed by Description (RDNA3) or newline (CDNA4)
for m in re.finditer(r'\d+\.\d+\.\d+\.\s+(\w+)\s*\n?Description', text): format_headers.append((m.group(1), i, m.start()))
for m in re.finditer(r'\d+\.\d+\.\d+\.\s+(\w+)\s*\n', text):
fmt_name = m.group(1)
# For CDNA4: accept uppercase format names (SOP2, VOP1, etc) directly
if is_cdna4 and fmt_name.isupper() and len(fmt_name) >= 2:
if is_cdna and fmt_name.isupper() and len(fmt_name) >= 2:
format_headers.append((fmt_name, i, m.start()))
# For RDNA3: check for Description on next page
elif m.start() > len(text) - 200 and 'Description' not in text[m.end():] and i + 1 < len(page_texts):
next_text = page_texts[i + 1].lstrip()
if next_text.startswith('Description') or (next_text.startswith('"RDNA') and 'Description' in next_text[:200]):
@@ -414,7 +419,6 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
text, tables = page_texts[page_idx], page_tables[page_idx]
field_pos = text.find('Field Name', header_pos)
# find fields table with ENCODING (same page or up to 2 pages ahead)
fields = None
for offset in range(3):
if page_idx + offset >= len(pages): break
@@ -425,7 +429,6 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
break
if fields: break
# for modifier formats (no ENCODING), accept first fields table on same page
if not fields and field_pos > header_pos:
for t in tables:
if is_fields_table(t) and (f := _parse_fields_table(t, fmt_name, enum_names)):
@@ -435,7 +438,6 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
if not fields: continue
field_names = {f[0] for f in fields}
# check next pages for continuation fields (tables without ENCODING)
for pg_offset in range(1, 3):
if page_idx + pg_offset >= len(pages) or has_header_before_fields(page_texts[page_idx + pg_offset]): break
for t in page_tables[page_idx + pg_offset]:
@@ -447,19 +449,70 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
break
formats[fmt_name] = fields
# fix known PDF errors (verified against LLVM test vectors)
# SMEM: PDF says DLC=bit14, GLC=bit16 but actual encoding is DLC=bit13, GLC=bit14
# fix known PDF errors
if 'SMEM' in formats:
formats['SMEM'] = [(n, 13 if n == 'DLC' else 14 if n == 'GLC' else h, 13 if n == 'DLC' else 14 if n == 'GLC' else l, e, t)
for n, h, l, e, t in formats['SMEM']]
return {"formats": formats, "enums": enums, "src_enum": src_enum, "doc_name": doc_name, "is_cdna": is_cdna}
def _merge_results(results: list[dict]) -> dict:
"""Merge multiple PDF parse results into a superset. Asserts if any conflicts."""
merged = {"formats": {}, "enums": {}, "src_enum": dict(SRC_EXTRAS), "doc_names": [], "is_cdna": False}
for r in results:
merged["doc_names"].append(r["doc_name"])
merged["is_cdna"] = merged["is_cdna"] or r["is_cdna"]
# Merge src_enum (union, assert no conflicts)
for val, name in r["src_enum"].items():
if val in merged["src_enum"]:
assert merged["src_enum"][val] == name, f"SrcEnum conflict: {val} = {merged['src_enum'][val]} vs {name}"
else:
merged["src_enum"][val] = name
# Merge enums (union of ops per enum, assert no conflicts)
for enum_name, ops in r["enums"].items():
if enum_name not in merged["enums"]: merged["enums"][enum_name] = {}
for val, name in ops.items():
if val in merged["enums"][enum_name]:
assert merged["enums"][enum_name][val] == name, f"{enum_name} conflict: {val} = {merged['enums'][enum_name][val]} vs {name}"
else:
merged["enums"][enum_name][val] = name
# Merge formats (union of fields, assert no bit position conflicts for same field name)
for fmt_name, fields in r["formats"].items():
if fmt_name not in merged["formats"]:
merged["formats"][fmt_name] = list(fields)
else:
existing = {f[0]: (f[1], f[2]) for f in merged["formats"][fmt_name]} # name -> (hi, lo)
for f in fields:
name, hi, lo = f[0], f[1], f[2]
if name in existing:
assert existing[name] == (hi, lo), f"Format {fmt_name} field {name} conflict: bits {existing[name]} vs ({hi}, {lo})"
else:
merged["formats"][fmt_name].append(f)
return merged
def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
"""Generate instruction definitions from AMD ISA PDF(s). Returns dict with formats for testing."""
urls = PDF_URLS[arch]
if isinstance(urls, str): urls = [urls]
# Parse all PDFs and merge
results = [_parse_single_pdf(url) for url in urls]
if len(results) == 1:
merged = results[0]
doc_name = merged["doc_name"]
else:
merged = _merge_results(results)
doc_name = "+".join(merged["doc_names"])
formats, enums, src_enum = merged["formats"], merged["enums"], merged["src_enum"]
# generate output
def enum_lines(name, items):
return [f"class {name}(IntEnum):"] + [f" {n} = {v}" for v, n in sorted(items.items())] + [""]
def field_key(f): return order.index(f[0].lower()) if f[0].lower() in order else 1000
lines = [f"# autogenerated from AMD {doc_name} ISA PDF by lib.py - do not edit", "from enum import IntEnum",
lines = [f"# autogenerated from AMD {doc_name} ISA PDF by dsl.py - do not edit", "from enum import IntEnum",
"from typing import Annotated",
"from extra.assembly.amd.lib import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField",
"from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField",
"import functools", ""]
lines += enum_lines("SrcEnum", src_enum) + sum([enum_lines(n, ops) for n, ops in sorted(enums.items())], [])
# Format-specific field defaults (verified against LLVM test vectors)
@@ -475,7 +528,6 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
if defaults := format_defaults.get(fmt_name):
lines.append(f" _defaults = {defaults}")
for name, hi, lo, _, ftype in sorted([f for f in fields if f[0] != 'ENCODING'], key=field_key):
# Wrap IntEnum types (ending in Op) with Annotated[BitField, ...] for correct typing
if ftype and ftype.endswith('Op'):
ann = f":Annotated[BitField, {ftype}]"
else:
@@ -489,23 +541,18 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
seg = {"GLOBAL": ", seg=2", "SCRATCH": ", seg=2"}.get(fmt, "")
tgt = {"GLOBAL": "FLAT, GLOBALOp", "SCRATCH": "FLAT, SCRATCHOp"}.get(fmt, f"{fmt}, {cls_name}")
if fmt in formats or fmt in ("GLOBAL", "SCRATCH"):
# VOP1/VOP2/VOPC get _e32 suffix, VOP3 promoted ops (< 512) get _e64 suffix
if fmt in ("VOP1", "VOP2", "VOPC"):
suffix = "_e32"
elif fmt == "VOP3" and op_val < 512:
suffix = "_e64"
else:
suffix = ""
# FMAMK/FMAAK have a literal constant K that must be passed via literal= kwarg
# FMAMK: D = S0.f * K + S1.f (K is 3rd operand in assembly syntax)
# FMAAK: D = S0.f * S1.f + K (K is 4th operand in assembly syntax)
if name in ('V_FMAMK_F32', 'V_FMAMK_F16'):
lines.append(f"def {name.lower()}{suffix}(vdst, src0, K, vsrc1): return {fmt}({cls_name}.{name}, vdst, src0, vsrc1, literal=K)")
elif name in ('V_FMAAK_F32', 'V_FMAAK_F16'):
lines.append(f"def {name.lower()}{suffix}(vdst, src0, vsrc1, K): return {fmt}({cls_name}.{name}, vdst, src0, vsrc1, literal=K)")
else:
lines.append(f"{name.lower()}{suffix} = functools.partial({tgt}.{name}{seg})")
# export SrcEnum values, but skip DPP8/DPP16 which conflict with class names
skip_exports = {'DPP8', 'DPP16'}
src_names = {name for _, name in src_enum.items()}
lines += [""] + [f"{name} = SrcEnum.{name}" for _, name in sorted(src_enum.items()) if name not in skip_exports]
@@ -519,7 +566,12 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict:
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Generate instruction definitions from AMD ISA PDF")
parser.add_argument("--arch", choices=list(PDF_URLS.keys()), default="rdna3", help="Target architecture (default: rdna3)")
parser.add_argument("--arch", choices=list(PDF_URLS.keys()) + ["all"], default="rdna3", help="Target architecture (default: rdna3)")
args = parser.parse_args()
if args.arch == "all":
for arch in PDF_URLS.keys():
result = generate(f"extra/assembly/amd/autogen/{arch}/__init__.py", arch=arch)
print(f"{arch}: generated SrcEnum ({len(result['src_enum'])}) + {len(result['enums'])} opcode enums + {len(result['formats'])} format classes")
else:
result = generate(f"extra/assembly/amd/autogen/{args.arch}/__init__.py", arch=args.arch)
print(f"generated SrcEnum ({len(result['src_enum'])}) + {len(result['enums'])} opcode enums + {len(result['formats'])} format classes")

View File

@@ -2,7 +2,7 @@
# mypy: ignore-errors
from __future__ import annotations
import ctypes, os
from extra.assembly.amd.lib import Inst, RawImm
from extra.assembly.amd.dsl import Inst, RawImm
from extra.assembly.amd.pcode import _f32, _i32, _sext, _f16, _i16, _f64, _i64
from extra.assembly.amd.autogen.rdna3.gen_pcode import get_compiled_functions
from extra.assembly.amd.autogen.rdna3 import (

View File

@@ -702,7 +702,7 @@ class ExecContext:
# PDF EXTRACTION AND CODE GENERATION
# ═══════════════════════════════════════════════════════════════════════════════
from extra.assembly.amd.lib import PDF_URLS
from extra.assembly.amd.dsl import PDF_URLS
INST_PATTERN = re.compile(r'^([SV]_[A-Z0-9_]+)\s+(\d+)\s*$', re.M)
# Patterns that can't be handled by the DSL (require special handling in emu.py)
@@ -736,38 +736,52 @@ def extract_pseudocode(text: str) -> str | None:
if is_code: result.append(s)
return '\n'.join(result) if result else None
def parse_pseudocode_from_pdf(arch: str = "rdna3") -> dict:
"""Parse pseudocode from PDF for all ops. Returns {enum_cls: {op: pseudocode}}."""
def _get_op_enums(arch: str) -> list:
"""Dynamically load op enums from the arch-specific autogen module."""
import importlib
autogen = importlib.import_module(f"extra.assembly.amd.autogen.{arch}")
# Deterministic order: common enums first, then arch-specific
enums = []
for name in ['SOP1Op', 'SOP2Op', 'SOPCOp', 'SOPKOp', 'SOPPOp', 'VOP1Op', 'VOP2Op', 'VOP3Op', 'VOP3SDOp', 'VOP3POp', 'VOPCOp', 'VOP3AOp', 'VOP3BOp']:
if hasattr(autogen, name): enums.append(getattr(autogen, name))
return enums
def _parse_pseudocode_from_single_pdf(url: str, defined_ops: dict, OP_ENUMS: list) -> dict:
"""Parse pseudocode from a single PDF."""
import pdfplumber
from tinygrad.helpers import fetch
from extra.assembly.amd.autogen.rdna3 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp
OP_ENUMS = [SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp]
defined_ops = {}
for enum_cls in OP_ENUMS:
for op in enum_cls:
if op.name.startswith(('S_', 'V_')): defined_ops[(op.name, op.value)] = (enum_cls, op)
pdf = pdfplumber.open(fetch(url))
total_pages = len(pdf.pages)
pdf = pdfplumber.open(fetch(PDF_URLS[arch]))
page_cache = {}
def get_page_text(i):
if i not in page_cache: page_cache[i] = pdf.pages[i].extract_text() or ''
return page_cache[i]
# Find the "Instructions" chapter by looking for "Chapter X. Instructions"
# Find the "Instructions" chapter - typically 15-40% through the document
instr_start = None
for i, page in enumerate(pdf.pages):
text = page.extract_text() or ''
if re.search(r'Chapter \d+\.\s+Instructions', text):
search_starts = [int(total_pages * 0.2), int(total_pages * 0.1), 0]
for start in search_starts:
for i in range(start, min(start + 100, total_pages)):
if re.search(r'Chapter \d+\.\s+Instructions', get_page_text(i)):
instr_start = i
break
if instr_start is None: instr_start = len(pdf.pages) // 3 # fallback
if instr_start: break
if instr_start is None: instr_start = total_pages // 3 # fallback
# Find end - stop at "Microcode Formats" chapter
instr_end = len(pdf.pages)
for i, page in enumerate(pdf.pages[instr_start:], instr_start):
text = page.extract_text() or ''
if re.search(r'Chapter \d+\.\s+Microcode Formats', text):
# Find end - stop at "Microcode Formats" chapter (typically 60-70% through)
instr_end = total_pages
search_starts = [int(total_pages * 0.6), int(total_pages * 0.5), instr_start]
for start in search_starts:
for i in range(start, min(start + 100, total_pages)):
if re.search(r'Chapter \d+\.\s+Microcode Formats', get_page_text(i)):
instr_end = i
break
if instr_end < total_pages: break
all_text = '\n'.join(pdf.pages[i].extract_text() or '' for i in range(instr_start, instr_end))
# Extract remaining pages (some already cached from chapter search)
all_text = '\n'.join(get_page_text(i) for i in range(instr_start, instr_end))
matches = list(INST_PATTERN.finditer(all_text))
instructions: dict = {cls: {} for cls in OP_ENUMS}
@@ -783,12 +797,39 @@ def parse_pseudocode_from_pdf(arch: str = "rdna3") -> dict:
return instructions
def parse_pseudocode_from_pdf(arch: str = "rdna3") -> dict:
"""Parse pseudocode from PDF(s) for all ops. Returns {enum_cls: {op: pseudocode}}."""
OP_ENUMS = _get_op_enums(arch)
defined_ops = {}
for enum_cls in OP_ENUMS:
for op in enum_cls:
if op.name.startswith(('S_', 'V_')): defined_ops[(op.name, op.value)] = (enum_cls, op)
urls = PDF_URLS[arch]
if isinstance(urls, str): urls = [urls]
# Parse all PDFs and merge (union of pseudocode)
# Reverse order so newer PDFs (RDNA3.5, CDNA4) take priority
instructions: dict = {cls: {} for cls in OP_ENUMS}
for url in reversed(urls):
result = _parse_pseudocode_from_single_pdf(url, defined_ops, OP_ENUMS)
for cls, ops in result.items():
for op, pseudocode in ops.items():
if op in instructions[cls]:
if instructions[cls][op] != pseudocode:
print(f" Ignoring {op.name} from older PDF:")
print(f" new: {instructions[cls][op]!r}")
print(f" old: {pseudocode!r}")
else:
instructions[cls][op] = pseudocode
return instructions
def generate_gen_pcode(output_path: str = "extra/assembly/amd/autogen/rdna3/gen_pcode.py", arch: str = "rdna3"):
"""Generate gen_pcode.py - compiled pseudocode functions for the emulator."""
from pathlib import Path
from extra.assembly.amd.autogen.rdna3 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp
OP_ENUMS = [SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp]
OP_ENUMS = _get_op_enums(arch)
print("Parsing pseudocode from PDF...")
by_cls = parse_pseudocode_from_pdf(arch)
@@ -803,11 +844,13 @@ def generate_gen_pcode(output_path: str = "extra/assembly/amd/autogen/rdna3/gen_
print(f"Total: {total_found}/{total_ops} ({100*total_found//total_ops}%)")
print("\nCompiling to pseudocode functions...")
# Build dynamic import line based on available enums
enum_names = [e.__name__ for e in OP_ENUMS]
lines = [f'''# autogenerated by pcode.py - do not edit
# to regenerate: python -m extra.assembly.amd.pcode --arch {arch}
# ruff: noqa: E501,F405,F403
# mypy: ignore-errors
from extra.assembly.amd.autogen.{arch} import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp
from extra.assembly.amd.autogen.{arch} import {", ".join(enum_names)}
from extra.assembly.amd.pcode import *
''']
@@ -965,6 +1008,8 @@ from extra.assembly.amd.pcode import *
lines.append('')
# Add manually implemented V_WRITELANE_B32 (not in PDF pseudocode, requires special vgpr_write handling)
# Only add for architectures that have VOP3Op (RDNA) not VOP3AOp/VOP3BOp (CDNA)
if 'VOP3Op' in enum_names:
lines.append('''
# V_WRITELANE_B32: Write scalar to specific lane's VGPR (not in PDF pseudocode)
def _VOP3Op_V_WRITELANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0):
@@ -987,6 +1032,10 @@ VOP3Op_FUNCTIONS[VOP3Op.V_WRITELANE_B32] = _VOP3Op_V_WRITELANE_B32
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Generate pseudocode functions from AMD ISA PDF")
parser.add_argument("--arch", choices=list(PDF_URLS.keys()), default="rdna3", help="Target architecture (default: rdna3)")
parser.add_argument("--arch", choices=list(PDF_URLS.keys()) + ["all"], default="rdna3", help="Target architecture (default: rdna3)")
args = parser.parse_args()
if args.arch == "all":
for arch in PDF_URLS.keys():
generate_gen_pcode(output_path=f"extra/assembly/amd/autogen/{arch}/gen_pcode.py", arch=arch)
else:
generate_gen_pcode(output_path=f"extra/assembly/amd/autogen/{args.arch}/gen_pcode.py", arch=args.arch)

View File

@@ -4,7 +4,7 @@
import unittest
from extra.assembly.amd.autogen.rdna3 import *
from extra.assembly.amd.lib import Inst, RawImm, SGPR, VGPR
from extra.assembly.amd.dsl import Inst, RawImm, SGPR, VGPR
class TestRegisterSliceSyntax(unittest.TestCase):
"""

View File

@@ -7,7 +7,7 @@ Set USE_HW=1 to run on both emulator and real hardware, comparing results.
import ctypes, unittest, os, struct
from extra.assembly.amd.autogen.rdna3 import *
from extra.assembly.amd.lib import RawImm
from extra.assembly.amd.dsl import RawImm
from extra.assembly.amd.emu import WaveState, run_asm, set_valid_mem_ranges
from extra.assembly.amd.pcode import _i32, _f32

View File

@@ -2,7 +2,7 @@
"""Test MUBUF, MTBUF, MIMG, EXP, DS formats against LLVM."""
import unittest
from extra.assembly.amd.autogen.rdna3 import *
from extra.assembly.amd.lib import encode_src
from extra.assembly.amd.dsl import encode_src
class TestMUBUF(unittest.TestCase):
"""Test MUBUF (buffer) instructions."""
@@ -308,7 +308,7 @@ class TestVOP3Literal(unittest.TestCase):
def test_vop3_with_literal(self):
# v_add3_u32 v5, vcc_hi, 0xaf123456, v255
# GFX11: encoding: [0x05,0x00,0x55,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]
from extra.assembly.amd.lib import RawImm
from extra.assembly.amd.dsl import RawImm
inst = VOP3(VOP3Op.V_ADD3_U32, vdst=v[5], src0=RawImm(107), src1=0xaf123456, src2=v[255])
expected = bytes([0x05,0x00,0x55,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf])
self.assertEqual(inst.to_bytes(), expected)
@@ -316,14 +316,14 @@ class TestVOP3Literal(unittest.TestCase):
def test_vop3_literal_null_operand(self):
# v_add3_u32 v5, null, exec_lo, 0xaf123456
# GFX11: encoding: [0x05,0x00,0x55,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf]
from extra.assembly.amd.lib import RawImm
from extra.assembly.amd.dsl import RawImm
inst = VOP3(VOP3Op.V_ADD3_U32, vdst=v[5], src0=NULL, src1=RawImm(126), src2=0xaf123456)
expected = bytes([0x05,0x00,0x55,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf])
self.assertEqual(inst.to_bytes(), expected)
def test_vop3p_with_literal(self):
# Test VOP3P literal encoding (also uses Inst64)
from extra.assembly.amd.lib import RawImm
from extra.assembly.amd.dsl import RawImm
inst = VOP3P(VOP3POp.V_PK_ADD_F16, vdst=v[5], src0=RawImm(240), src1=0x12345678, src2=v[0])
self.assertEqual(len(inst.to_bytes()), 12) # 8 bytes + 4 byte literal

View File

@@ -3,7 +3,7 @@
import unittest, struct
from extra.assembly.amd.autogen.rdna3 import *
from extra.assembly.amd.lib import Inst
from extra.assembly.amd.dsl import Inst
from extra.assembly.amd.asm import asm
from extra.assembly.amd.test.test_roundtrip import compile_asm

View File

@@ -41,7 +41,7 @@ class TestPDFParserGenerate(unittest.TestCase):
def test_pdf_parser(self):
"""Single test that validates all PDF parser outputs."""
from extra.assembly.amd.lib import generate
from extra.assembly.amd.dsl import generate
result = generate()
# test_all_formats_present

View File

@@ -2,7 +2,7 @@
"""Roundtrip tests: generate tinygrad kernels, decode instructions, re-encode, verify match."""
import unittest, io, sys, re, subprocess, os
from extra.assembly.amd.autogen.rdna3 import *
from extra.assembly.amd.lib import Inst
from extra.assembly.amd.dsl import Inst
from extra.assembly.amd.asm import asm
from extra.assembly.amd.test.helpers import get_llvm_mc, get_llvm_objdump