typos found by gemini [pr] (#8400)

not very effective... maybe due to tokenizer
2026-02-16 17:45:38 -05:00 · 2024-12-24 22:32:25 -05:00
parent a35eef8d58
commit 3f46425f1e
10 changed files with 16 additions and 16 deletions
--- a/tinygrad/runtime/ops_amd.py
+++ b/tinygrad/runtime/ops_amd.py
@@ -238,8 +238,8 @@ class AMDProgram(HCQProgram):
    self.kernargs_segment_size = image[entry_point+8:entry_point+12].cast("I")[0]

    lds_size = ((self.group_segment_size + 511) // 512) & 0x1FF
-    if lds_size > (self.dev.properties['lds_size_in_kb'] * 1024) // 512: raise RuntimeError("Too many resources requsted: group_segment_size")
-    if self.private_segment_size > self.dev.max_private_segment_size: raise RuntimeError("Too many resources requsted: private_segment_size")
+    if lds_size > (self.dev.properties['lds_size_in_kb'] * 1024) // 512: raise RuntimeError("Too many resources requested: group_segment_size")
+    if self.private_segment_size > self.dev.max_private_segment_size: raise RuntimeError("Too many resources requested: private_segment_size")

    code = hsa.amd_kernel_code_t.from_address(self.lib_gpu.va_addr + entry_point) # NOTE: this is wrong, it's not this object
    assert code.kernel_code_properties & 0x400 == 0x400 # ENABLE_WAVEFRONT_SIZE32
--- a/tinygrad/runtime/ops_nv.py
+++ b/tinygrad/runtime/ops_nv.py
@@ -245,7 +245,7 @@ class NVProgram(HCQProgram):
      self.qmd.__setattr__(f'constant_buffer_size_shifted4_{i}', sz)
      self.qmd.__setattr__(f'constant_buffer_valid_{i}', 1)

-    # Registers allocation granularity per warp is 256, warp allocaiton granularity is 4. Register file size is 65536.
+    # Registers allocation granularity per warp is 256, warp allocation granularity is 4. Register file size is 65536.
    self.max_threads = ((65536 // round_up(max(1, self.regs_usage) * 32, 256)) // 4) * 4 * 32

    # NV's kernargs is constbuffer (size 0x160), then arguments to the kernel follows. Kernargs also appends QMD at the end of the kernel.
@@ -363,7 +363,7 @@ class NVDevice(HCQCompiled[NVSignal]):
    if create_range: uvm.create_external_range(self.fd_uvm, base=va_base, length=size)
    attrs = (nv_gpu.struct_c__SA_UvmGpuMappingAttributes*256)(nv_gpu.struct_c__SA_UvmGpuMappingAttributes(gpuUuid=self.gpu_uuid, gpuMappingType=1))

-    # NOTE: va_addr is set to make rawbufs compatable with HCQBuffer protocol.
+    # NOTE: va_addr is set to make rawbufs compatible with HCQBuffer protocol.
    self._debug_mappings[(va_base, size)] = tag
    return HCQBuffer(va_base, size, meta=uvm.map_external_allocation(self.fd_uvm, base=va_base, length=size, rmCtrlFd=self.fd_ctl, hClient=self.root,
      hMemory=mem_handle, gpuAttributesCount=1, perGpuAttributes=attrs, mapped_gpu_ids=[self.gpu_uuid], has_cpu_mapping=has_cpu_mapping))
--- a/tinygrad/runtime/ops_qcom.py
+++ b/tinygrad/runtime/ops_qcom.py
@@ -43,7 +43,7 @@ class QCOMSignal(HCQSignal):
    if isinstance(self.base_addr, int): QCOMDevice.signals_pool.append(self.base_addr)

  def _sleep(self, time_spent_waiting_ms:int):
-    # Sleep only for only timeline signals. Do it immidiately to free cpu.
+    # Sleep only for only timeline signals. Do it immediately to free cpu.
    if self.timeline_for_device is not None:
      kgsl.IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID(self.timeline_for_device.fd, context_id=self.timeline_for_device.ctx,
                                                  timestamp=self.timeline_for_device.last_cmd, timeout=0xffffffff)
--- a/tinygrad/runtime/support/hcq.py
+++ b/tinygrad/runtime/support/hcq.py
@@ -264,7 +264,7 @@ class HCQProgram(Generic[DeviceType]):
    Returns:
      Arguments state with the given buffers and values set for the program.
    """
-    return self.args_state_t(kernargs_ptr or self.dev.kernargs_alloctor.alloc(self.kernargs_alloc_size), self, bufs, vals=vals)
+    return self.args_state_t(kernargs_ptr or self.dev.kernargs_allocator.alloc(self.kernargs_alloc_size), self, bufs, vals=vals)

  def __call__(self, *bufs:HCQBuffer, global_size:tuple[int,int,int]=(1,1,1), local_size:tuple[int,int,int]=(1,1,1),
               vals:tuple[int, ...]=(), wait:bool=False) -> Optional[float]:
@@ -315,7 +315,7 @@ class HCQCompiled(Compiled, Generic[SignalType]):
    super().__init__(device, allocator, renderer, compiler, runtime, HCQGraph)

    self.kernargs_page:HCQBuffer = self.allocator.alloc(16 << 20, BufferSpec(cpu_access=True))
-    self.kernargs_alloctor:BumpAllocator = BumpAllocator(self.kernargs_page.size, start=cast(int, self.kernargs_page.va_addr), wrap=True)
+    self.kernargs_allocator:BumpAllocator = BumpAllocator(self.kernargs_page.size, start=cast(int, self.kernargs_page.va_addr), wrap=True)
    self.devices.append(self)

  def synchronize(self):