Tuple -> tuple, List -> list [pr] (#8936)

2026-01-09 15:08:02 -05:00 · 2025-02-06 14:21:19 -05:00
parent d5183e1584
commit a092b6395d
9 changed files with 43 additions and 47 deletions
--- a/examples/handcode_opt.py
+++ b/examples/handcode_opt.py
@@ -1,4 +1,3 @@
-from typing import List, Tuple
 from extra.models.resnet import ResNet50
 from extra.mcts_search import mcts_search
 from examples.mlperf.helpers import get_mlperf_bert_model
@@ -79,7 +78,7 @@ if __name__ == "__main__":
    rawbufs = bufs_from_lin(Kernel(si.ast))

    # "linearize" the op into uops in different ways
-    lins: List[Tuple[Kernel, str]] = []
+    lins: list[tuple[Kernel, str]] = []

    # always try hand coded opt
    lin = Kernel(si.ast, opts=device.renderer)
--- a/examples/mlperf/initializers.py
+++ b/examples/mlperf/initializers.py
@@ -1,5 +1,5 @@
 import math
-from typing import Union, Tuple
+from typing import Union

 from tinygrad import Tensor, nn, dtypes
 from tinygrad.helpers import prod, argfix
@@ -56,7 +56,7 @@ class EmbeddingBert(nn.Embedding):
    return (arange == idx).mul(vals).sum(2, acc_dtype=vals.dtype)

 class LayerNormBert:
-  def __init__(self, normalized_shape:Union[int, Tuple[int, ...]], eps:float=1e-12, elementwise_affine:bool=True):
+  def __init__(self, normalized_shape:Union[int, tuple[int, ...]], eps:float=1e-12, elementwise_affine:bool=True):
    self.normalized_shape = (normalized_shape,) if isinstance(normalized_shape, int) else tuple(normalized_shape)
    self.axis, self.eps, self.elementwise_affine = tuple(-1-i for i in range(len(self.normalized_shape))), eps, elementwise_affine
    self.weight, self.bias = (Tensor.ones(*self.normalized_shape, dtype=dtypes.float32), Tensor.zeros(*self.normalized_shape, dtype=dtypes.float32)) if elementwise_affine else (None, None)
--- a/extra/models/llama.py
+++ b/extra/models/llama.py
@@ -1,4 +1,4 @@
-from typing import Tuple, Union, Optional, Dict, Any
+from typing import Union, Optional, Any
 from tinygrad import Tensor, Variable, TinyJit, dtypes, nn, Device
 from tinygrad.helpers import getenv

@@ -15,7 +15,7 @@ def complex_mult(A, c, d):
  co = a*d + b*c
  return ro.cat(co, dim=-1)

-def apply_rotary_emb(xq:Tensor, xk:Tensor, freqs_cis:Tensor) -> Tuple[Tensor, Tensor]:
+def apply_rotary_emb(xq:Tensor, xk:Tensor, freqs_cis:Tensor) -> tuple[Tensor, Tensor]:
  assert freqs_cis.shape[1] == xq.shape[1] == xk.shape[1], f"freqs_cis shape mismatch {freqs_cis.shape} xq:{xq.shape} xk:{xk.shape}"
  xq = xq.reshape(*xq.shape[0:-1], -1, 2)
  xk = xk.reshape(*xk.shape[0:-1], -1, 2)
@@ -181,7 +181,7 @@ class Transformer:

 # *** helpers ***

-def convert_from_huggingface(weights:Dict[str, Tensor], model: Transformer, n_heads: int, n_kv_heads: int, permute_layers: bool = True):
+def convert_from_huggingface(weights:dict[str, Tensor], model: Transformer, n_heads: int, n_kv_heads: int, permute_layers: bool = True):
  def permute(v: Tensor, n_heads: int):
    return v.reshape(n_heads, 2, v.shape[0] // n_heads // 2, v.shape[1]).transpose(1, 2).reshape(*v.shape[:2])

@@ -207,7 +207,7 @@ def convert_from_huggingface(weights:Dict[str, Tensor], model: Transformer, n_he
    sd[keymap[k]] = v
  return sd

-def convert_from_gguf(weights:Dict[str, Tensor], model: Transformer):
+def convert_from_gguf(weights:dict[str, Tensor], model: Transformer):
  keymap = {
    "token_embd.weight": "tok_embeddings.weight",
    **{f"blk.{l}.attn_norm.weight": f"layers.{l}.attention_norm.weight" for l in range(len(model.layers))},
@@ -222,7 +222,7 @@ def convert_from_gguf(weights:Dict[str, Tensor], model: Transformer):
  sd["output.weight"] = weights["token_embd.weight"]
  return sd

-def fix_bf16(weights:Dict[Any, Tensor]):
+def fix_bf16(weights:dict[Any, Tensor]):
  if getenv("SUPPORT_BF16", 1):
    # TODO: without casting to float16, 70B llama OOM on tinybox.
    return {k:v.cast(dtypes.float16) if v.dtype == dtypes.bfloat16 else v for k,v in weights.items()}
--- a/extra/optimization/helpers.py
+++ b/extra/optimization/helpers.py
@@ -1,5 +1,4 @@
 # stuff needed to unpack a kernel
-from typing import Tuple
 from tinygrad import Variable
 from tinygrad.codegen.kernel import Opt, OptOps
 from tinygrad.ops import UOp, Ops, KernelInfo
--- a/test/helpers.py
+++ b/test/helpers.py
@@ -1,5 +1,5 @@
 import time
-from typing import Callable, Optional, Tuple
+from typing import Callable, Optional
 import numpy as np
 from tinygrad import Tensor, dtypes
 from tinygrad.ops import UOp, Ops, sint
@@ -40,14 +40,14 @@ def rand_for_dtype(dt:DType, size:int):
    return np.random.choice([True, False], size=size)
  return np.random.uniform(-10, 10, size=size).astype(_to_np_dtype(dt))

-def ast_const(dtype:DType, val:ConstType, shape:Tuple[sint, ...]=(), st:Optional[ShapeTracker]=None, st_src:Optional[Tuple[UOp]]=None) -> UOp:
+def ast_const(dtype:DType, val:ConstType, shape:tuple[sint, ...]=(), st:Optional[ShapeTracker]=None, st_src:Optional[tuple[UOp]]=None) -> UOp:
  if st_src is None:
    st_src = (st.to_uop() if st is not None else ShapeTracker.from_shape(()).reshape((1,)*len(shape)).expand(shape).to_uop(),)
  st = unwrap(st_src[0].st)
  if all(v.mask is None for v in st.views): return UOp.const(dtype, val).replace(src=(st.to_uop(),))
  return UOp.const(dtype, val).valid(st)

-def timeit(fxn:Callable[..., T], *args, **kwargs) -> Tuple[T, float]:
+def timeit(fxn:Callable[..., T], *args, **kwargs) -> tuple[T, float]:
  st = time.perf_counter_ns()
  ret = fxn(*args, **kwargs)
  return ret, (time.perf_counter_ns()-st)*1e-6
--- a/test/test_linearizer.py
+++ b/test/test_linearizer.py
@@ -1,4 +1,4 @@
-from typing import List, Tuple, Union
+from typing import Union
 import numpy as np
 import unittest
 from dataclasses import replace
@@ -10,13 +10,12 @@ from tinygrad.ops import UOp, Ops, GroupOp
 from tinygrad.device import Device, Buffer, is_dtype_supported
 from tinygrad.shape.shapetracker import ShapeTracker
 from tinygrad.shape.view import View
-# from tinygrad.ops import Variable
 from tinygrad.tensor import Tensor, _to_np_dtype
 from tinygrad.engine.realize import run_schedule, lower_schedule, CompiledRunner
 from tinygrad.helpers import prod, Context, getenv, CI, flatten, dedup, AMX
 from tinygrad.dtype import DType, dtypes

-def helper_realized_ast(r:Union[Tensor, List[Tensor]]) -> Tuple[UOp, List[Buffer]]:
+def helper_realized_ast(r:Union[Tensor, list[Tensor]]) -> tuple[UOp, list[Buffer]]:
  if isinstance(r, Tensor): r = [r]
  s = Tensor.schedule(*r)
  run_schedule(s[:-1])  # run all kernels except the last one
@@ -1752,30 +1751,30 @@ class TestHandCodedOpts(unittest.TestCase):
    assert k.local_dims == 1
    assert k.upcasted == 1

-def helper_linearizer_ast(ast:UOp, inputs:List[Tensor], *args, **kwargs):
+def helper_linearizer_ast(ast:UOp, inputs:list[Tensor], *args, **kwargs):
  assert isinstance(ast, UOp), "ast must be UOp"
  inbufs = [x.lazydata.base.buffer for x in inputs]
  outbufs = [Buffer(inbufs[-1].device if inbufs else Device.DEFAULT, out.st_arg.size, out.src[2].dtype).allocate() \
      for out in ast.src]
  return _helper_linearizer_opt_ast(ast, outbufs+inbufs, *args, **kwargs)

-def helper_linearizer_opt(r:Union[Tensor, List[Tensor]], *args, **kwargs):
+def helper_linearizer_opt(r:Union[Tensor, list[Tensor]], *args, **kwargs):
  realized_ast, real_bufs = helper_realized_ast(r)
  return _helper_linearizer_opt_ast(realized_ast, real_bufs, *args, **kwargs)

-def copyout_outputs(lin:Kernel, outbufs:List[Buffer]) -> List[np.ndarray]:
+def copyout_outputs(lin:Kernel, outbufs:list[Buffer]) -> list[np.ndarray]:
  ret = []
  for i,x in enumerate(outbufs):
-    shape: Tuple[int, ...] = lin.ast.src[i].st_arg.shape
+    shape: tuple[int, ...] = lin.ast.src[i].st_arg.shape
    ret.append(np.frombuffer(x.as_buffer(), _to_np_dtype(x.dtype)).reshape(shape))
  return ret

-def reset_bufs(bufs:List[Buffer]):
+def reset_bufs(bufs:list[Buffer]):
  for buf in bufs: buf.copyin(np.zeros((buf.size, ), dtype=_to_np_dtype(buf.dtype)).data) # Zero to check that all values are filled

-def _helper_linearizer_opt_ast(realized_ast:UOp, real_bufs:List[Buffer], opts=[],
-                               apply_tc=False, atol=1e-4, rtol=1e-4, color_sizes=[], wanna_output=[]) -> List[Kernel]:
-  lins: List[Kernel] = []
+def _helper_linearizer_opt_ast(realized_ast:UOp, real_bufs:list[Buffer], opts=[],
+                               apply_tc=False, atol=1e-4, rtol=1e-4, color_sizes=[], wanna_output=[]) -> list[Kernel]:
+  lins: list[Kernel] = []
  outbufs = [real_bufs[x.src[0].arg] for x in realized_ast.src]

  def get_prg(k:Kernel): return CompiledRunner(replace(k.to_program(), device=Device.DEFAULT))
--- a/test/test_uops.py
+++ b/test/test_uops.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple, Any, List
+from typing import Optional, Any
 import unittest, math
 import numpy as np
 from tinygrad.shape.shapetracker import ShapeTracker
@@ -17,7 +17,7 @@ from tinygrad.codegen.rewriter import full_graph_rewrite, sym
 from tinygrad.device import is_dtype_supported
 from tinygrad.codegen.kernel import Kernel, Opt, OptOps

-def to_uops_list(u:List[UOp], opts=None, skip_check=False) -> List[UOp]: return linearize_uop(full_graph_rewrite(UOp.sink(*u), opts), skip_check)
+def to_uops_list(u:list[UOp], opts=None, skip_check=False) -> list[UOp]: return linearize_uop(full_graph_rewrite(UOp.sink(*u), opts), skip_check)

 def _uops_to_prg(uops_list):
  uops = linearize_uop(full_graph_rewrite(UOp.sink(*uops_list), opts=Device[Device.DEFAULT].renderer))
@@ -26,7 +26,7 @@ def _uops_to_prg(uops_list):
  return CompiledRunner(ProgramSpec("test", src, Device.DEFAULT, uops=uops,
                                global_size=[1,1,1] if has_local else None, local_size=[1,1,1] if has_local else None))

-def uop(uops:List[UOp], uop:Ops, dtype:Optional[DType], src:Tuple[UOp, ...], arg:Any=None) -> UOp:
+def uop(uops:list[UOp], uop:Ops, dtype:Optional[DType], src:tuple[UOp, ...], arg:Any=None) -> UOp:
  uops.append(UOp(uop, dtype, tuple(src), arg))
  return uops[-1]

--- a/tinygrad/runtime/ops_dsp.py
+++ b/tinygrad/runtime/ops_dsp.py
@@ -1,5 +1,4 @@
 from __future__ import annotations
-from typing import Tuple, Any, List
 import ctypes, os, mmap, tempfile, pathlib, array, functools, threading, contextlib, sys, subprocess, time, struct
 assert sys.platform != 'win32'
 from tinygrad.device import BufferSpec, Compiled, Allocator, Compiler, MallocAllocator
@@ -20,7 +19,7 @@ class DSPRenderer(ClangRenderer):
                 Ops.LOG2: lambda x,dtype: f"__builtin_log2l({x})" if dtype == dtypes.float64 else f"__builtin_log2f({x})",
                 Ops.EXP2: lambda x,dtype: f"__builtin_exp2l({x})" if dtype == dtypes.float64 else f"__builtin_exp2f({x})"}

-  def render_kernel(self, function_name:str, kernel:List[str], bufs:List[Tuple[str,Tuple[DType,bool]]], uops:List[UOp], prefix=None) -> str:
+  def render_kernel(self, function_name:str, kernel:list[str], bufs:list[tuple[str,tuple[DType,bool]]], uops:list[UOp], prefix=None) -> str:
    ret = super().render_kernel(function_name, kernel, bufs, uops, prefix)
    msrc = ['''struct dcvs_v2_req { int type; int _pad; _Bool dcvs_enable; char dcvs_option; _Bool set_latency; int latency; _Bool set_dcvs_params;
                 short _pad2; char target_corner; char min_corner; char max_corner; int _pad3[3]; };''', 'int HAP_power_set(void*, void*);',
@@ -55,7 +54,7 @@ class DSPProgram:
  def __init__(self, dev:DSPDevice, name:str, lib:bytes):
    self.dev, self.lib = dev, lib

-  def __call__(self, *bufs, vals:Tuple[int, ...]=(), wait=False):
+  def __call__(self, *bufs, vals:tuple[int, ...]=(), wait=False):
    if len(bufs) >= 16: raise RuntimeError(f"Too many buffers to execute: {len(bufs)}")

    pra, fds, attrs, _ = rpc_prep_args(ins=[var_vals_mv:=memoryview(bytearray((len(bufs)+len(vals))*4)), off_mv:=memoryview(bytearray(len(bufs)*4))],
@@ -66,7 +65,7 @@ class DSPProgram:
    return timer[0] / 1e6

 class DSPBuffer:
-  def __init__(self, va_addr:int, size:int, share_info:Any, offset:int=0):
+  def __init__(self, va_addr:int, size:int, share_info, offset:int=0):
    self.va_addr, self.size, self.share_info, self.offset = va_addr, size, share_info, offset

 class DSPAllocator(Allocator):
@@ -229,7 +228,7 @@ class RPCListener(threading.Thread):
 # ***** mock DSP *****

 class MockDSPRenderer(DSPRenderer):
-  def render_kernel(self, function_name:str, kernel:List[str], bufs:List[Tuple[str,Tuple[DType,bool]]], uops:List[UOp], prefix=None) -> str:
+  def render_kernel(self, function_name:str, kernel:list[str], bufs:list[tuple[str,tuple[DType,bool]]], uops:list[UOp], prefix=None) -> str:
    ret = ClangRenderer.render_kernel(self, function_name, kernel, bufs, uops, prefix)
    # https://gpages.juszkiewicz.com.pl/syscalls-table/syscalls.html
    msrc = ['''static long syscall(long r0, long r1, long r2, long r3, long r4, long r5, long r6) {
@@ -254,7 +253,7 @@ class MockDSPRenderer(DSPRenderer):

 class MockDSPProgram:
  def __init__(self, name:str, lib:bytes): self.lib = lib
-  def __call__(self, *bufs, vals:Tuple[int, ...]=(), wait=False):
+  def __call__(self, *bufs, vals:tuple[int, ...]=(), wait=False):
    with tempfile.NamedTemporaryFile(suffix=".out") as dsp_lib:
      dsp_lib.write(self.lib)
      dsp_lib.flush()
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -2,7 +2,7 @@
 from __future__ import annotations
 import time, math, itertools, functools, struct, sys, inspect, pathlib, string, hashlib, weakref
 from contextlib import ContextDecorator
-from typing import List, Tuple, Callable, Optional, ClassVar, Union, Sequence, cast, get_args, Literal, TYPE_CHECKING, SupportsIndex
+from typing import Callable, Optional, ClassVar, Union, Sequence, cast, get_args, Literal, TYPE_CHECKING, SupportsIndex
 from tinygrad.dtype import DType, DTypeLike, dtypes, ImageDType, ConstType, least_upper_float, least_upper_dtype, sum_acc_dtype, to_dtype, truncate
 from tinygrad.helpers import argfix, make_tuple, flatten, prod, all_int, round_up, merge_dicts, argsort, getenv, all_same, fully_flatten, dedup
 from tinygrad.helpers import IMAGE, WINO, _METADATA, Metadata, TRACEMETA, ceildiv, fetch, polyN, unwrap
@@ -68,7 +68,7 @@ def get_shape(x) -> tuple[int, ...]:
  if not all_same(subs:=[get_shape(xi) for xi in x]): raise ValueError(f"inhomogeneous shape from {x}")
  return (len(subs),) + (subs[0] if subs else ())

-def _frompy(x:Union[List, Tuple, bytes], dtype:DType) -> UOp:
+def _frompy(x:Union[list, tuple, bytes], dtype:DType) -> UOp:
  if isinstance(x, bytes): ret, data = UOp.metaop(Ops.EMPTY, (len(x)//dtype.itemsize,), dtype, "PYTHON"), x
  else:
    ret = UOp.metaop(Ops.EMPTY, get_shape(x), dtype, "PYTHON")
@@ -131,7 +131,7 @@ class Tensor(SimpleMathTrait):
  training: ClassVar[bool] = False
  no_grad: ClassVar[bool] = False

-  def __init__(self, data:Union[None, ConstType, bytes, List, Tuple, UOp, 'np.ndarray', pathlib.Path],  # type: ignore [name-defined] # noqa: F821
+  def __init__(self, data:Union[None, ConstType, bytes, list, tuple, UOp, 'np.ndarray', pathlib.Path],  # type: ignore [name-defined] # noqa: F821
               device:Optional[Union[str, tuple, list]]=None, dtype:Optional[DTypeLike]=None, requires_grad:Optional[bool]=None):
    if dtype is not None: dtype = to_dtype(dtype)
    if device is None and isinstance(data, pathlib.Path): device = f"DISK:{data.resolve()}"  # keep it on the disk if device is None
@@ -329,7 +329,7 @@ class Tensor(SimpleMathTrait):
    assert self.numel() == 1, "must have one element for item"
    return self.data()[(0,) * len(self.shape)]

-  # TODO: should be Tensor.tolist() -> Union[list[ConstType], ConstType]. The List is Sequence because mypy expects memoryview.tolist() -> list[int]
+  # TODO: should be Tensor.tolist() -> Union[list[ConstType], ConstType]. The list is Sequence because mypy expects memoryview.tolist() -> list[int]
  # src: https://github.com/python/mypy/blob/release-1.6/mypy/typeshed/stdlib/builtins.pyi#L803
  def tolist(self) -> Union[Sequence[ConstType], ConstType]:
    """
@@ -1185,7 +1185,7 @@ class Tensor(SimpleMathTrait):
    """
    Retrieve a sub-tensor using indexing.

-    Supported Index Types: `int | slice | Tensor | None | List | Tuple | Ellipsis`
+    Supported Index Types: `int | slice | Tensor | None | list | tuple | Ellipsis`

    Examples:
    ```python exec="true" source="above" session="tensor" result="python"
@@ -2036,7 +2036,7 @@ class Tensor(SimpleMathTrait):
      raise ValueError(f"Padding must be an int or a sequence of length {dims} or {2*dims}, but got {padding=} for {self.shape=} with {dims=}.")
    return [padding]*2*dims if isinstance(padding, int) else (padding if len(padding) == 2*dims else [p for p in padding for _ in range(2)][::-1])

-  def _apply_ceil_mode(self, pads:Sequence[int], k_:Tuple[sint, ...], s_:Union[Tuple[int, ...], int], d_:Union[Tuple[int, ...], int]) -> List[int]:
+  def _apply_ceil_mode(self, pads:Sequence[int], k_:tuple[sint, ...], s_:Union[tuple[int, ...], int], d_:Union[tuple[int, ...], int]) -> list[int]:
    (d_,s_), i_ = (make_tuple(x, len(k_)) for x in (d_,s_)), self.shape[-len(k_):]
    pads, grouped_pads = list(pads), _flat_to_grouped(pads)
    # https://arxiv.org/pdf/1603.07285 section 5.1, relationship 15.
@@ -2059,10 +2059,10 @@ class Tensor(SimpleMathTrait):
    1. `int` (single value):
      Applies the same padding value uniformly to all spatial dimensions.

-    2. `Tuple[int, ...]` (length = number of spatial dimensions):
+    2. `tuple[int, ...]` (length = number of spatial dimensions):
      Specifies a distinct padding value for each spatial dimension in the form `(padding_height, padding_width, ...)`.

-    3. `Tuple[int, ...]` (length = 2 * number of spatial dimensions):
+    3. `tuple[int, ...]` (length = 2 * number of spatial dimensions):
      Specifies explicit padding for each side of each spatial dimension in the form
      `(padding_left, padding_right, padding_top, padding_bottom, ...)`.

@@ -2106,10 +2106,10 @@ class Tensor(SimpleMathTrait):
    1. `int` (single value):
      Applies the same padding value uniformly to all spatial dimensions.

-    2. `Tuple[int, ...]` (length = number of spatial dimensions):
+    2. `tuple[int, ...]` (length = number of spatial dimensions):
      Specifies a distinct padding value for each spatial dimension in the form `(padding_height, padding_width, ...)`.

-    3. `Tuple[int, ...]` (length = 2 * number of spatial dimensions):
+    3. `tuple[int, ...]` (length = 2 * number of spatial dimensions):
      Specifies explicit padding for each side of each spatial dimension in the form
      `(padding_left, padding_right, padding_top, padding_bottom, ...)`.

@@ -2144,10 +2144,10 @@ class Tensor(SimpleMathTrait):
    1. `int` (single value):
      Applies the same padding value uniformly to all spatial dimensions.

-    2. `Tuple[int, ...]` (length = number of spatial dimensions):
+    2. `tuple[int, ...]` (length = number of spatial dimensions):
      Specifies a distinct padding value for each spatial dimension in the form `(padding_height, padding_width, ...)`.

-    3. `Tuple[int, ...]` (length = 2 * number of spatial dimensions):
+    3. `tuple[int, ...]` (length = 2 * number of spatial dimensions):
      Specifies explicit padding for each side of each spatial dimension in the form
      `(padding_left, padding_right, padding_top, padding_bottom, ...)`.

@@ -2217,10 +2217,10 @@ class Tensor(SimpleMathTrait):
    1. `int` (single value):
      Applies the same padding value uniformly to all spatial dimensions.

-    2. `Tuple[int, ...]` (length = number of spatial dimensions):
+    2. `tuple[int, ...]` (length = number of spatial dimensions):
      Specifies a distinct padding value for each spatial dimension in the form `(padding_height, padding_width, ...)`.

-    3. `Tuple[int, ...]` (length = 2 * number of spatial dimensions):
+    3. `tuple[int, ...]` (length = 2 * number of spatial dimensions):
      Specifies explicit padding for each side of each spatial dimension in the form
      `(padding_left, padding_right, padding_top, padding_bottom, ...)`.