Tuple -> tuple, List -> list [pr] (#8936)

This commit is contained in:
chenyu
2025-02-06 14:21:19 -05:00
committed by GitHub
parent d5183e1584
commit a092b6395d
9 changed files with 43 additions and 47 deletions

View File

@@ -1,4 +1,3 @@
from typing import List, Tuple
from extra.models.resnet import ResNet50
from extra.mcts_search import mcts_search
from examples.mlperf.helpers import get_mlperf_bert_model
@@ -79,7 +78,7 @@ if __name__ == "__main__":
rawbufs = bufs_from_lin(Kernel(si.ast))
# "linearize" the op into uops in different ways
lins: List[Tuple[Kernel, str]] = []
lins: list[tuple[Kernel, str]] = []
# always try hand coded opt
lin = Kernel(si.ast, opts=device.renderer)

View File

@@ -1,5 +1,5 @@
import math
from typing import Union, Tuple
from typing import Union
from tinygrad import Tensor, nn, dtypes
from tinygrad.helpers import prod, argfix
@@ -56,7 +56,7 @@ class EmbeddingBert(nn.Embedding):
return (arange == idx).mul(vals).sum(2, acc_dtype=vals.dtype)
class LayerNormBert:
def __init__(self, normalized_shape:Union[int, Tuple[int, ...]], eps:float=1e-12, elementwise_affine:bool=True):
def __init__(self, normalized_shape:Union[int, tuple[int, ...]], eps:float=1e-12, elementwise_affine:bool=True):
self.normalized_shape = (normalized_shape,) if isinstance(normalized_shape, int) else tuple(normalized_shape)
self.axis, self.eps, self.elementwise_affine = tuple(-1-i for i in range(len(self.normalized_shape))), eps, elementwise_affine
self.weight, self.bias = (Tensor.ones(*self.normalized_shape, dtype=dtypes.float32), Tensor.zeros(*self.normalized_shape, dtype=dtypes.float32)) if elementwise_affine else (None, None)

View File

@@ -1,4 +1,4 @@
from typing import Tuple, Union, Optional, Dict, Any
from typing import Union, Optional, Any
from tinygrad import Tensor, Variable, TinyJit, dtypes, nn, Device
from tinygrad.helpers import getenv
@@ -15,7 +15,7 @@ def complex_mult(A, c, d):
co = a*d + b*c
return ro.cat(co, dim=-1)
def apply_rotary_emb(xq:Tensor, xk:Tensor, freqs_cis:Tensor) -> Tuple[Tensor, Tensor]:
def apply_rotary_emb(xq:Tensor, xk:Tensor, freqs_cis:Tensor) -> tuple[Tensor, Tensor]:
assert freqs_cis.shape[1] == xq.shape[1] == xk.shape[1], f"freqs_cis shape mismatch {freqs_cis.shape} xq:{xq.shape} xk:{xk.shape}"
xq = xq.reshape(*xq.shape[0:-1], -1, 2)
xk = xk.reshape(*xk.shape[0:-1], -1, 2)
@@ -181,7 +181,7 @@ class Transformer:
# *** helpers ***
def convert_from_huggingface(weights:Dict[str, Tensor], model: Transformer, n_heads: int, n_kv_heads: int, permute_layers: bool = True):
def convert_from_huggingface(weights:dict[str, Tensor], model: Transformer, n_heads: int, n_kv_heads: int, permute_layers: bool = True):
def permute(v: Tensor, n_heads: int):
return v.reshape(n_heads, 2, v.shape[0] // n_heads // 2, v.shape[1]).transpose(1, 2).reshape(*v.shape[:2])
@@ -207,7 +207,7 @@ def convert_from_huggingface(weights:Dict[str, Tensor], model: Transformer, n_he
sd[keymap[k]] = v
return sd
def convert_from_gguf(weights:Dict[str, Tensor], model: Transformer):
def convert_from_gguf(weights:dict[str, Tensor], model: Transformer):
keymap = {
"token_embd.weight": "tok_embeddings.weight",
**{f"blk.{l}.attn_norm.weight": f"layers.{l}.attention_norm.weight" for l in range(len(model.layers))},
@@ -222,7 +222,7 @@ def convert_from_gguf(weights:Dict[str, Tensor], model: Transformer):
sd["output.weight"] = weights["token_embd.weight"]
return sd
def fix_bf16(weights:Dict[Any, Tensor]):
def fix_bf16(weights:dict[Any, Tensor]):
if getenv("SUPPORT_BF16", 1):
# TODO: without casting to float16, 70B llama OOM on tinybox.
return {k:v.cast(dtypes.float16) if v.dtype == dtypes.bfloat16 else v for k,v in weights.items()}

View File

@@ -1,5 +1,4 @@
# stuff needed to unpack a kernel
from typing import Tuple
from tinygrad import Variable
from tinygrad.codegen.kernel import Opt, OptOps
from tinygrad.ops import UOp, Ops, KernelInfo

View File

@@ -1,5 +1,5 @@
import time
from typing import Callable, Optional, Tuple
from typing import Callable, Optional
import numpy as np
from tinygrad import Tensor, dtypes
from tinygrad.ops import UOp, Ops, sint
@@ -40,14 +40,14 @@ def rand_for_dtype(dt:DType, size:int):
return np.random.choice([True, False], size=size)
return np.random.uniform(-10, 10, size=size).astype(_to_np_dtype(dt))
def ast_const(dtype:DType, val:ConstType, shape:Tuple[sint, ...]=(), st:Optional[ShapeTracker]=None, st_src:Optional[Tuple[UOp]]=None) -> UOp:
def ast_const(dtype:DType, val:ConstType, shape:tuple[sint, ...]=(), st:Optional[ShapeTracker]=None, st_src:Optional[tuple[UOp]]=None) -> UOp:
if st_src is None:
st_src = (st.to_uop() if st is not None else ShapeTracker.from_shape(()).reshape((1,)*len(shape)).expand(shape).to_uop(),)
st = unwrap(st_src[0].st)
if all(v.mask is None for v in st.views): return UOp.const(dtype, val).replace(src=(st.to_uop(),))
return UOp.const(dtype, val).valid(st)
def timeit(fxn:Callable[..., T], *args, **kwargs) -> Tuple[T, float]:
def timeit(fxn:Callable[..., T], *args, **kwargs) -> tuple[T, float]:
st = time.perf_counter_ns()
ret = fxn(*args, **kwargs)
return ret, (time.perf_counter_ns()-st)*1e-6

View File

@@ -1,4 +1,4 @@
from typing import List, Tuple, Union
from typing import Union
import numpy as np
import unittest
from dataclasses import replace
@@ -10,13 +10,12 @@ from tinygrad.ops import UOp, Ops, GroupOp
from tinygrad.device import Device, Buffer, is_dtype_supported
from tinygrad.shape.shapetracker import ShapeTracker
from tinygrad.shape.view import View
# from tinygrad.ops import Variable
from tinygrad.tensor import Tensor, _to_np_dtype
from tinygrad.engine.realize import run_schedule, lower_schedule, CompiledRunner
from tinygrad.helpers import prod, Context, getenv, CI, flatten, dedup, AMX
from tinygrad.dtype import DType, dtypes
def helper_realized_ast(r:Union[Tensor, List[Tensor]]) -> Tuple[UOp, List[Buffer]]:
def helper_realized_ast(r:Union[Tensor, list[Tensor]]) -> tuple[UOp, list[Buffer]]:
if isinstance(r, Tensor): r = [r]
s = Tensor.schedule(*r)
run_schedule(s[:-1]) # run all kernels except the last one
@@ -1752,30 +1751,30 @@ class TestHandCodedOpts(unittest.TestCase):
assert k.local_dims == 1
assert k.upcasted == 1
def helper_linearizer_ast(ast:UOp, inputs:List[Tensor], *args, **kwargs):
def helper_linearizer_ast(ast:UOp, inputs:list[Tensor], *args, **kwargs):
assert isinstance(ast, UOp), "ast must be UOp"
inbufs = [x.lazydata.base.buffer for x in inputs]
outbufs = [Buffer(inbufs[-1].device if inbufs else Device.DEFAULT, out.st_arg.size, out.src[2].dtype).allocate() \
for out in ast.src]
return _helper_linearizer_opt_ast(ast, outbufs+inbufs, *args, **kwargs)
def helper_linearizer_opt(r:Union[Tensor, List[Tensor]], *args, **kwargs):
def helper_linearizer_opt(r:Union[Tensor, list[Tensor]], *args, **kwargs):
realized_ast, real_bufs = helper_realized_ast(r)
return _helper_linearizer_opt_ast(realized_ast, real_bufs, *args, **kwargs)
def copyout_outputs(lin:Kernel, outbufs:List[Buffer]) -> List[np.ndarray]:
def copyout_outputs(lin:Kernel, outbufs:list[Buffer]) -> list[np.ndarray]:
ret = []
for i,x in enumerate(outbufs):
shape: Tuple[int, ...] = lin.ast.src[i].st_arg.shape
shape: tuple[int, ...] = lin.ast.src[i].st_arg.shape
ret.append(np.frombuffer(x.as_buffer(), _to_np_dtype(x.dtype)).reshape(shape))
return ret
def reset_bufs(bufs:List[Buffer]):
def reset_bufs(bufs:list[Buffer]):
for buf in bufs: buf.copyin(np.zeros((buf.size, ), dtype=_to_np_dtype(buf.dtype)).data) # Zero to check that all values are filled
def _helper_linearizer_opt_ast(realized_ast:UOp, real_bufs:List[Buffer], opts=[],
apply_tc=False, atol=1e-4, rtol=1e-4, color_sizes=[], wanna_output=[]) -> List[Kernel]:
lins: List[Kernel] = []
def _helper_linearizer_opt_ast(realized_ast:UOp, real_bufs:list[Buffer], opts=[],
apply_tc=False, atol=1e-4, rtol=1e-4, color_sizes=[], wanna_output=[]) -> list[Kernel]:
lins: list[Kernel] = []
outbufs = [real_bufs[x.src[0].arg] for x in realized_ast.src]
def get_prg(k:Kernel): return CompiledRunner(replace(k.to_program(), device=Device.DEFAULT))

View File

@@ -1,4 +1,4 @@
from typing import Optional, Tuple, Any, List
from typing import Optional, Any
import unittest, math
import numpy as np
from tinygrad.shape.shapetracker import ShapeTracker
@@ -17,7 +17,7 @@ from tinygrad.codegen.rewriter import full_graph_rewrite, sym
from tinygrad.device import is_dtype_supported
from tinygrad.codegen.kernel import Kernel, Opt, OptOps
def to_uops_list(u:List[UOp], opts=None, skip_check=False) -> List[UOp]: return linearize_uop(full_graph_rewrite(UOp.sink(*u), opts), skip_check)
def to_uops_list(u:list[UOp], opts=None, skip_check=False) -> list[UOp]: return linearize_uop(full_graph_rewrite(UOp.sink(*u), opts), skip_check)
def _uops_to_prg(uops_list):
uops = linearize_uop(full_graph_rewrite(UOp.sink(*uops_list), opts=Device[Device.DEFAULT].renderer))
@@ -26,7 +26,7 @@ def _uops_to_prg(uops_list):
return CompiledRunner(ProgramSpec("test", src, Device.DEFAULT, uops=uops,
global_size=[1,1,1] if has_local else None, local_size=[1,1,1] if has_local else None))
def uop(uops:List[UOp], uop:Ops, dtype:Optional[DType], src:Tuple[UOp, ...], arg:Any=None) -> UOp:
def uop(uops:list[UOp], uop:Ops, dtype:Optional[DType], src:tuple[UOp, ...], arg:Any=None) -> UOp:
uops.append(UOp(uop, dtype, tuple(src), arg))
return uops[-1]

View File

@@ -1,5 +1,4 @@
from __future__ import annotations
from typing import Tuple, Any, List
import ctypes, os, mmap, tempfile, pathlib, array, functools, threading, contextlib, sys, subprocess, time, struct
assert sys.platform != 'win32'
from tinygrad.device import BufferSpec, Compiled, Allocator, Compiler, MallocAllocator
@@ -20,7 +19,7 @@ class DSPRenderer(ClangRenderer):
Ops.LOG2: lambda x,dtype: f"__builtin_log2l({x})" if dtype == dtypes.float64 else f"__builtin_log2f({x})",
Ops.EXP2: lambda x,dtype: f"__builtin_exp2l({x})" if dtype == dtypes.float64 else f"__builtin_exp2f({x})"}
def render_kernel(self, function_name:str, kernel:List[str], bufs:List[Tuple[str,Tuple[DType,bool]]], uops:List[UOp], prefix=None) -> str:
def render_kernel(self, function_name:str, kernel:list[str], bufs:list[tuple[str,tuple[DType,bool]]], uops:list[UOp], prefix=None) -> str:
ret = super().render_kernel(function_name, kernel, bufs, uops, prefix)
msrc = ['''struct dcvs_v2_req { int type; int _pad; _Bool dcvs_enable; char dcvs_option; _Bool set_latency; int latency; _Bool set_dcvs_params;
short _pad2; char target_corner; char min_corner; char max_corner; int _pad3[3]; };''', 'int HAP_power_set(void*, void*);',
@@ -55,7 +54,7 @@ class DSPProgram:
def __init__(self, dev:DSPDevice, name:str, lib:bytes):
self.dev, self.lib = dev, lib
def __call__(self, *bufs, vals:Tuple[int, ...]=(), wait=False):
def __call__(self, *bufs, vals:tuple[int, ...]=(), wait=False):
if len(bufs) >= 16: raise RuntimeError(f"Too many buffers to execute: {len(bufs)}")
pra, fds, attrs, _ = rpc_prep_args(ins=[var_vals_mv:=memoryview(bytearray((len(bufs)+len(vals))*4)), off_mv:=memoryview(bytearray(len(bufs)*4))],
@@ -66,7 +65,7 @@ class DSPProgram:
return timer[0] / 1e6
class DSPBuffer:
def __init__(self, va_addr:int, size:int, share_info:Any, offset:int=0):
def __init__(self, va_addr:int, size:int, share_info, offset:int=0):
self.va_addr, self.size, self.share_info, self.offset = va_addr, size, share_info, offset
class DSPAllocator(Allocator):
@@ -229,7 +228,7 @@ class RPCListener(threading.Thread):
# ***** mock DSP *****
class MockDSPRenderer(DSPRenderer):
def render_kernel(self, function_name:str, kernel:List[str], bufs:List[Tuple[str,Tuple[DType,bool]]], uops:List[UOp], prefix=None) -> str:
def render_kernel(self, function_name:str, kernel:list[str], bufs:list[tuple[str,tuple[DType,bool]]], uops:list[UOp], prefix=None) -> str:
ret = ClangRenderer.render_kernel(self, function_name, kernel, bufs, uops, prefix)
# https://gpages.juszkiewicz.com.pl/syscalls-table/syscalls.html
msrc = ['''static long syscall(long r0, long r1, long r2, long r3, long r4, long r5, long r6) {
@@ -254,7 +253,7 @@ class MockDSPRenderer(DSPRenderer):
class MockDSPProgram:
def __init__(self, name:str, lib:bytes): self.lib = lib
def __call__(self, *bufs, vals:Tuple[int, ...]=(), wait=False):
def __call__(self, *bufs, vals:tuple[int, ...]=(), wait=False):
with tempfile.NamedTemporaryFile(suffix=".out") as dsp_lib:
dsp_lib.write(self.lib)
dsp_lib.flush()

View File

@@ -2,7 +2,7 @@
from __future__ import annotations
import time, math, itertools, functools, struct, sys, inspect, pathlib, string, hashlib, weakref
from contextlib import ContextDecorator
from typing import List, Tuple, Callable, Optional, ClassVar, Union, Sequence, cast, get_args, Literal, TYPE_CHECKING, SupportsIndex
from typing import Callable, Optional, ClassVar, Union, Sequence, cast, get_args, Literal, TYPE_CHECKING, SupportsIndex
from tinygrad.dtype import DType, DTypeLike, dtypes, ImageDType, ConstType, least_upper_float, least_upper_dtype, sum_acc_dtype, to_dtype, truncate
from tinygrad.helpers import argfix, make_tuple, flatten, prod, all_int, round_up, merge_dicts, argsort, getenv, all_same, fully_flatten, dedup
from tinygrad.helpers import IMAGE, WINO, _METADATA, Metadata, TRACEMETA, ceildiv, fetch, polyN, unwrap
@@ -68,7 +68,7 @@ def get_shape(x) -> tuple[int, ...]:
if not all_same(subs:=[get_shape(xi) for xi in x]): raise ValueError(f"inhomogeneous shape from {x}")
return (len(subs),) + (subs[0] if subs else ())
def _frompy(x:Union[List, Tuple, bytes], dtype:DType) -> UOp:
def _frompy(x:Union[list, tuple, bytes], dtype:DType) -> UOp:
if isinstance(x, bytes): ret, data = UOp.metaop(Ops.EMPTY, (len(x)//dtype.itemsize,), dtype, "PYTHON"), x
else:
ret = UOp.metaop(Ops.EMPTY, get_shape(x), dtype, "PYTHON")
@@ -131,7 +131,7 @@ class Tensor(SimpleMathTrait):
training: ClassVar[bool] = False
no_grad: ClassVar[bool] = False
def __init__(self, data:Union[None, ConstType, bytes, List, Tuple, UOp, 'np.ndarray', pathlib.Path], # type: ignore [name-defined] # noqa: F821
def __init__(self, data:Union[None, ConstType, bytes, list, tuple, UOp, 'np.ndarray', pathlib.Path], # type: ignore [name-defined] # noqa: F821
device:Optional[Union[str, tuple, list]]=None, dtype:Optional[DTypeLike]=None, requires_grad:Optional[bool]=None):
if dtype is not None: dtype = to_dtype(dtype)
if device is None and isinstance(data, pathlib.Path): device = f"DISK:{data.resolve()}" # keep it on the disk if device is None
@@ -329,7 +329,7 @@ class Tensor(SimpleMathTrait):
assert self.numel() == 1, "must have one element for item"
return self.data()[(0,) * len(self.shape)]
# TODO: should be Tensor.tolist() -> Union[list[ConstType], ConstType]. The List is Sequence because mypy expects memoryview.tolist() -> list[int]
# TODO: should be Tensor.tolist() -> Union[list[ConstType], ConstType]. The list is Sequence because mypy expects memoryview.tolist() -> list[int]
# src: https://github.com/python/mypy/blob/release-1.6/mypy/typeshed/stdlib/builtins.pyi#L803
def tolist(self) -> Union[Sequence[ConstType], ConstType]:
"""
@@ -1185,7 +1185,7 @@ class Tensor(SimpleMathTrait):
"""
Retrieve a sub-tensor using indexing.
Supported Index Types: `int | slice | Tensor | None | List | Tuple | Ellipsis`
Supported Index Types: `int | slice | Tensor | None | list | tuple | Ellipsis`
Examples:
```python exec="true" source="above" session="tensor" result="python"
@@ -2036,7 +2036,7 @@ class Tensor(SimpleMathTrait):
raise ValueError(f"Padding must be an int or a sequence of length {dims} or {2*dims}, but got {padding=} for {self.shape=} with {dims=}.")
return [padding]*2*dims if isinstance(padding, int) else (padding if len(padding) == 2*dims else [p for p in padding for _ in range(2)][::-1])
def _apply_ceil_mode(self, pads:Sequence[int], k_:Tuple[sint, ...], s_:Union[Tuple[int, ...], int], d_:Union[Tuple[int, ...], int]) -> List[int]:
def _apply_ceil_mode(self, pads:Sequence[int], k_:tuple[sint, ...], s_:Union[tuple[int, ...], int], d_:Union[tuple[int, ...], int]) -> list[int]:
(d_,s_), i_ = (make_tuple(x, len(k_)) for x in (d_,s_)), self.shape[-len(k_):]
pads, grouped_pads = list(pads), _flat_to_grouped(pads)
# https://arxiv.org/pdf/1603.07285 section 5.1, relationship 15.
@@ -2059,10 +2059,10 @@ class Tensor(SimpleMathTrait):
1. `int` (single value):
Applies the same padding value uniformly to all spatial dimensions.
2. `Tuple[int, ...]` (length = number of spatial dimensions):
2. `tuple[int, ...]` (length = number of spatial dimensions):
Specifies a distinct padding value for each spatial dimension in the form `(padding_height, padding_width, ...)`.
3. `Tuple[int, ...]` (length = 2 * number of spatial dimensions):
3. `tuple[int, ...]` (length = 2 * number of spatial dimensions):
Specifies explicit padding for each side of each spatial dimension in the form
`(padding_left, padding_right, padding_top, padding_bottom, ...)`.
@@ -2106,10 +2106,10 @@ class Tensor(SimpleMathTrait):
1. `int` (single value):
Applies the same padding value uniformly to all spatial dimensions.
2. `Tuple[int, ...]` (length = number of spatial dimensions):
2. `tuple[int, ...]` (length = number of spatial dimensions):
Specifies a distinct padding value for each spatial dimension in the form `(padding_height, padding_width, ...)`.
3. `Tuple[int, ...]` (length = 2 * number of spatial dimensions):
3. `tuple[int, ...]` (length = 2 * number of spatial dimensions):
Specifies explicit padding for each side of each spatial dimension in the form
`(padding_left, padding_right, padding_top, padding_bottom, ...)`.
@@ -2144,10 +2144,10 @@ class Tensor(SimpleMathTrait):
1. `int` (single value):
Applies the same padding value uniformly to all spatial dimensions.
2. `Tuple[int, ...]` (length = number of spatial dimensions):
2. `tuple[int, ...]` (length = number of spatial dimensions):
Specifies a distinct padding value for each spatial dimension in the form `(padding_height, padding_width, ...)`.
3. `Tuple[int, ...]` (length = 2 * number of spatial dimensions):
3. `tuple[int, ...]` (length = 2 * number of spatial dimensions):
Specifies explicit padding for each side of each spatial dimension in the form
`(padding_left, padding_right, padding_top, padding_bottom, ...)`.
@@ -2217,10 +2217,10 @@ class Tensor(SimpleMathTrait):
1. `int` (single value):
Applies the same padding value uniformly to all spatial dimensions.
2. `Tuple[int, ...]` (length = number of spatial dimensions):
2. `tuple[int, ...]` (length = number of spatial dimensions):
Specifies a distinct padding value for each spatial dimension in the form `(padding_height, padding_width, ...)`.
3. `Tuple[int, ...]` (length = 2 * number of spatial dimensions):
3. `tuple[int, ...]` (length = 2 * number of spatial dimensions):
Specifies explicit padding for each side of each spatial dimension in the form
`(padding_left, padding_right, padding_top, padding_bottom, ...)`.