From 3f46425f1e022586446d3071c00b36e5e76b8405 Mon Sep 17 00:00:00 2001 From: chenyu Date: Tue, 24 Dec 2024 22:32:25 -0500 Subject: [PATCH] typos found by gemini [pr] (#8400) not very effective... maybe due to tokenizer --- tinygrad/codegen/kernel.py | 2 +- tinygrad/codegen/linearize.py | 2 +- tinygrad/device.py | 6 +++--- tinygrad/dtype.py | 2 +- tinygrad/renderer/llvmir.py | 2 +- tinygrad/runtime/ops_amd.py | 4 ++-- tinygrad/runtime/ops_nv.py | 4 ++-- tinygrad/runtime/ops_qcom.py | 2 +- tinygrad/runtime/support/hcq.py | 4 ++-- tinygrad/tensor.py | 4 ++-- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tinygrad/codegen/kernel.py b/tinygrad/codegen/kernel.py index d97a1e01bf..5479e5b71e 100644 --- a/tinygrad/codegen/kernel.py +++ b/tinygrad/codegen/kernel.py @@ -45,7 +45,7 @@ class TensorCoreOptions: axes: tuple[int, ...] # the location of the original N and M axes if still in the shape axes_exist: tuple[bool, ...] # true if the original N and M axes are still in the shape axis_pads: tuple[tuple[int, int], ...] - def fix_axes(self, removed_axis:int): # adjust the TC axes if necesssary when a dimension is removed + def fix_axes(self, removed_axis:int): # adjust the TC axes if necessary when a dimension is removed axes, axes_exist = list(self.axes), list(self.axes_exist) for tc_dim in [i for i in range(2) if axes_exist[i]]: if removed_axis < axes[tc_dim]: axes[tc_dim] -= 1 diff --git a/tinygrad/codegen/linearize.py b/tinygrad/codegen/linearize.py index 6b6e9fab9d..2eb7b2195c 100644 --- a/tinygrad/codegen/linearize.py +++ b/tinygrad/codegen/linearize.py @@ -36,7 +36,7 @@ def append_to_block(ctx:tuple[dict[UOp, tuple[UOp, ...]], dict[UOp, list[UOp]]], for u in x.src: if u.op is Ops.BLOCK: # merge sibling blocks. NOTE: blocks must only have one output source - assert u.arg.ctx not in old_blocks, "sibiling should never have been created" + assert u.arg.ctx not in old_blocks, "sibling should never have been created" old_blocks[u.arg.ctx] = u elif u.op not in DONT_PLACE_IN_BLOCK and set(children[u]).issubset(in_this_block): # if it can go in blocks and all its children are in the block, we add it to the block diff --git a/tinygrad/device.py b/tinygrad/device.py index 023095f1d6..c87be01b2d 100644 --- a/tinygrad/device.py +++ b/tinygrad/device.py @@ -170,9 +170,9 @@ class Buffer: # TODO: size, dest, src are the same type. can we enforce this? class Allocator: - # overriden in LRUAllocator + # overridden in LRUAllocator def alloc(self, size:int, options:Optional[BufferSpec]=None): - assert size > 0, f"alloc size must be positve, getting {size}" + assert size > 0, f"alloc size must be positive, getting {size}" return self._alloc(size, options if options is not None else BufferSpec()) def free(self, opaque, size:int, options:Optional[BufferSpec]=None): self._free(opaque, options if options is not None else BufferSpec()) @@ -271,7 +271,7 @@ def is_dtype_supported(dtype:DType, device:Optional[str]=None) -> bool: if PROFILE: @atexit.register - def finlize_profile(): + def finalize_profile(): devs = [Device[d] for d in Device._opened_devices] for dev in devs: dev.synchronize() for dev in devs: dev._at_profile_finalize() diff --git a/tinygrad/dtype.py b/tinygrad/dtype.py index b7ff9d9b7c..0d0c1568fe 100644 --- a/tinygrad/dtype.py +++ b/tinygrad/dtype.py @@ -73,7 +73,7 @@ class dtypes: @staticmethod @functools.lru_cache(None) def is_float(x: DType) -> bool: return x.scalar() in dtypes.floats or isinstance(x, ImageDType) - @staticmethod # static methds on top, or bool in the type info will refer to dtypes.bool + @staticmethod # static methods on top, or bool in the type info will refer to dtypes.bool @functools.lru_cache(None) def is_int(x: DType) -> bool: return x.scalar() in dtypes.ints @staticmethod diff --git a/tinygrad/renderer/llvmir.py b/tinygrad/renderer/llvmir.py index 725d20a30e..b6706ec802 100644 --- a/tinygrad/renderer/llvmir.py +++ b/tinygrad/renderer/llvmir.py @@ -129,7 +129,7 @@ class LLVMRenderer(Renderer): # generate the phi nodes for the assigns if u.op is Ops.RANGE: for x in acc_to_assign: - if u in x.src: # if this range is relevent for this acc + if u in x.src: # if this range is relevant for this acc vc += 1 kernel.append(f" %acc{vc} = phi {ldt(x.dtype)}" f"[{r[x]}, %loop_entry_{u.arg}], [{r[acc_to_assign[x]]}, %loop_latch_{u.arg}]") r[x] = f"%acc{vc}" diff --git a/tinygrad/runtime/ops_amd.py b/tinygrad/runtime/ops_amd.py index 7a20d1c0db..0a9db6fe1b 100644 --- a/tinygrad/runtime/ops_amd.py +++ b/tinygrad/runtime/ops_amd.py @@ -238,8 +238,8 @@ class AMDProgram(HCQProgram): self.kernargs_segment_size = image[entry_point+8:entry_point+12].cast("I")[0] lds_size = ((self.group_segment_size + 511) // 512) & 0x1FF - if lds_size > (self.dev.properties['lds_size_in_kb'] * 1024) // 512: raise RuntimeError("Too many resources requsted: group_segment_size") - if self.private_segment_size > self.dev.max_private_segment_size: raise RuntimeError("Too many resources requsted: private_segment_size") + if lds_size > (self.dev.properties['lds_size_in_kb'] * 1024) // 512: raise RuntimeError("Too many resources requested: group_segment_size") + if self.private_segment_size > self.dev.max_private_segment_size: raise RuntimeError("Too many resources requested: private_segment_size") code = hsa.amd_kernel_code_t.from_address(self.lib_gpu.va_addr + entry_point) # NOTE: this is wrong, it's not this object assert code.kernel_code_properties & 0x400 == 0x400 # ENABLE_WAVEFRONT_SIZE32 diff --git a/tinygrad/runtime/ops_nv.py b/tinygrad/runtime/ops_nv.py index 9fa1cc9513..ffe370b9a4 100644 --- a/tinygrad/runtime/ops_nv.py +++ b/tinygrad/runtime/ops_nv.py @@ -245,7 +245,7 @@ class NVProgram(HCQProgram): self.qmd.__setattr__(f'constant_buffer_size_shifted4_{i}', sz) self.qmd.__setattr__(f'constant_buffer_valid_{i}', 1) - # Registers allocation granularity per warp is 256, warp allocaiton granularity is 4. Register file size is 65536. + # Registers allocation granularity per warp is 256, warp allocation granularity is 4. Register file size is 65536. self.max_threads = ((65536 // round_up(max(1, self.regs_usage) * 32, 256)) // 4) * 4 * 32 # NV's kernargs is constbuffer (size 0x160), then arguments to the kernel follows. Kernargs also appends QMD at the end of the kernel. @@ -363,7 +363,7 @@ class NVDevice(HCQCompiled[NVSignal]): if create_range: uvm.create_external_range(self.fd_uvm, base=va_base, length=size) attrs = (nv_gpu.struct_c__SA_UvmGpuMappingAttributes*256)(nv_gpu.struct_c__SA_UvmGpuMappingAttributes(gpuUuid=self.gpu_uuid, gpuMappingType=1)) - # NOTE: va_addr is set to make rawbufs compatable with HCQBuffer protocol. + # NOTE: va_addr is set to make rawbufs compatible with HCQBuffer protocol. self._debug_mappings[(va_base, size)] = tag return HCQBuffer(va_base, size, meta=uvm.map_external_allocation(self.fd_uvm, base=va_base, length=size, rmCtrlFd=self.fd_ctl, hClient=self.root, hMemory=mem_handle, gpuAttributesCount=1, perGpuAttributes=attrs, mapped_gpu_ids=[self.gpu_uuid], has_cpu_mapping=has_cpu_mapping)) diff --git a/tinygrad/runtime/ops_qcom.py b/tinygrad/runtime/ops_qcom.py index 0b5041f914..5597e535d6 100644 --- a/tinygrad/runtime/ops_qcom.py +++ b/tinygrad/runtime/ops_qcom.py @@ -43,7 +43,7 @@ class QCOMSignal(HCQSignal): if isinstance(self.base_addr, int): QCOMDevice.signals_pool.append(self.base_addr) def _sleep(self, time_spent_waiting_ms:int): - # Sleep only for only timeline signals. Do it immidiately to free cpu. + # Sleep only for only timeline signals. Do it immediately to free cpu. if self.timeline_for_device is not None: kgsl.IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID(self.timeline_for_device.fd, context_id=self.timeline_for_device.ctx, timestamp=self.timeline_for_device.last_cmd, timeout=0xffffffff) diff --git a/tinygrad/runtime/support/hcq.py b/tinygrad/runtime/support/hcq.py index 00cde75df9..16e5ddb588 100644 --- a/tinygrad/runtime/support/hcq.py +++ b/tinygrad/runtime/support/hcq.py @@ -264,7 +264,7 @@ class HCQProgram(Generic[DeviceType]): Returns: Arguments state with the given buffers and values set for the program. """ - return self.args_state_t(kernargs_ptr or self.dev.kernargs_alloctor.alloc(self.kernargs_alloc_size), self, bufs, vals=vals) + return self.args_state_t(kernargs_ptr or self.dev.kernargs_allocator.alloc(self.kernargs_alloc_size), self, bufs, vals=vals) def __call__(self, *bufs:HCQBuffer, global_size:tuple[int,int,int]=(1,1,1), local_size:tuple[int,int,int]=(1,1,1), vals:tuple[int, ...]=(), wait:bool=False) -> Optional[float]: @@ -315,7 +315,7 @@ class HCQCompiled(Compiled, Generic[SignalType]): super().__init__(device, allocator, renderer, compiler, runtime, HCQGraph) self.kernargs_page:HCQBuffer = self.allocator.alloc(16 << 20, BufferSpec(cpu_access=True)) - self.kernargs_alloctor:BumpAllocator = BumpAllocator(self.kernargs_page.size, start=cast(int, self.kernargs_page.va_addr), wrap=True) + self.kernargs_allocator:BumpAllocator = BumpAllocator(self.kernargs_page.size, start=cast(int, self.kernargs_page.va_addr), wrap=True) self.devices.append(self) def synchronize(self): diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 5f0aa5fbe1..b1c24ac326 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -329,7 +329,7 @@ class Tensor(SimpleMathTrait): def clone(self) -> Tensor: """ - Creates a clone of this tensor allocating a seperate buffer for the data. + Creates a clone of this tensor allocating a separate buffer for the data. """ ret = Tensor(self.lazydata.clone(), self.device, requires_grad=self.requires_grad) if self.grad is not None: ret.grad = self.grad.clone() @@ -1139,7 +1139,7 @@ class Tensor(SimpleMathTrait): if any(abs(st) != 1 for st in strides): strides = tuple(abs(s) for s in strides) # pad shape to multiple of stride - if not all_int(x.shape): raise RuntimeError("symbolic shape not supprted") + if not all_int(x.shape): raise RuntimeError("symbolic shape not supported") x = x.pad(tuple((0, round_up(s, st) - s) for s, st in zip(x.shape, strides))) x = x.reshape(tuple(flatten((s // st, st) for s, st in zip(x.shape, strides)))) x = x.shrink(tuple(flatten(((0, s), (0, 1)) for s in x.shape[::2]))).reshape(x.shape[::2])