typos found by gemini [pr] (#8400)

not very effective... maybe due to tokenizer
This commit is contained in:
chenyu
2024-12-24 22:32:25 -05:00
committed by GitHub
parent a35eef8d58
commit 3f46425f1e
10 changed files with 16 additions and 16 deletions

View File

@@ -238,8 +238,8 @@ class AMDProgram(HCQProgram):
self.kernargs_segment_size = image[entry_point+8:entry_point+12].cast("I")[0]
lds_size = ((self.group_segment_size + 511) // 512) & 0x1FF
if lds_size > (self.dev.properties['lds_size_in_kb'] * 1024) // 512: raise RuntimeError("Too many resources requsted: group_segment_size")
if self.private_segment_size > self.dev.max_private_segment_size: raise RuntimeError("Too many resources requsted: private_segment_size")
if lds_size > (self.dev.properties['lds_size_in_kb'] * 1024) // 512: raise RuntimeError("Too many resources requested: group_segment_size")
if self.private_segment_size > self.dev.max_private_segment_size: raise RuntimeError("Too many resources requested: private_segment_size")
code = hsa.amd_kernel_code_t.from_address(self.lib_gpu.va_addr + entry_point) # NOTE: this is wrong, it's not this object
assert code.kernel_code_properties & 0x400 == 0x400 # ENABLE_WAVEFRONT_SIZE32

View File

@@ -245,7 +245,7 @@ class NVProgram(HCQProgram):
self.qmd.__setattr__(f'constant_buffer_size_shifted4_{i}', sz)
self.qmd.__setattr__(f'constant_buffer_valid_{i}', 1)
# Registers allocation granularity per warp is 256, warp allocaiton granularity is 4. Register file size is 65536.
# Registers allocation granularity per warp is 256, warp allocation granularity is 4. Register file size is 65536.
self.max_threads = ((65536 // round_up(max(1, self.regs_usage) * 32, 256)) // 4) * 4 * 32
# NV's kernargs is constbuffer (size 0x160), then arguments to the kernel follows. Kernargs also appends QMD at the end of the kernel.
@@ -363,7 +363,7 @@ class NVDevice(HCQCompiled[NVSignal]):
if create_range: uvm.create_external_range(self.fd_uvm, base=va_base, length=size)
attrs = (nv_gpu.struct_c__SA_UvmGpuMappingAttributes*256)(nv_gpu.struct_c__SA_UvmGpuMappingAttributes(gpuUuid=self.gpu_uuid, gpuMappingType=1))
# NOTE: va_addr is set to make rawbufs compatable with HCQBuffer protocol.
# NOTE: va_addr is set to make rawbufs compatible with HCQBuffer protocol.
self._debug_mappings[(va_base, size)] = tag
return HCQBuffer(va_base, size, meta=uvm.map_external_allocation(self.fd_uvm, base=va_base, length=size, rmCtrlFd=self.fd_ctl, hClient=self.root,
hMemory=mem_handle, gpuAttributesCount=1, perGpuAttributes=attrs, mapped_gpu_ids=[self.gpu_uuid], has_cpu_mapping=has_cpu_mapping))

View File

@@ -43,7 +43,7 @@ class QCOMSignal(HCQSignal):
if isinstance(self.base_addr, int): QCOMDevice.signals_pool.append(self.base_addr)
def _sleep(self, time_spent_waiting_ms:int):
# Sleep only for only timeline signals. Do it immidiately to free cpu.
# Sleep only for only timeline signals. Do it immediately to free cpu.
if self.timeline_for_device is not None:
kgsl.IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID(self.timeline_for_device.fd, context_id=self.timeline_for_device.ctx,
timestamp=self.timeline_for_device.last_cmd, timeout=0xffffffff)

View File

@@ -264,7 +264,7 @@ class HCQProgram(Generic[DeviceType]):
Returns:
Arguments state with the given buffers and values set for the program.
"""
return self.args_state_t(kernargs_ptr or self.dev.kernargs_alloctor.alloc(self.kernargs_alloc_size), self, bufs, vals=vals)
return self.args_state_t(kernargs_ptr or self.dev.kernargs_allocator.alloc(self.kernargs_alloc_size), self, bufs, vals=vals)
def __call__(self, *bufs:HCQBuffer, global_size:tuple[int,int,int]=(1,1,1), local_size:tuple[int,int,int]=(1,1,1),
vals:tuple[int, ...]=(), wait:bool=False) -> Optional[float]:
@@ -315,7 +315,7 @@ class HCQCompiled(Compiled, Generic[SignalType]):
super().__init__(device, allocator, renderer, compiler, runtime, HCQGraph)
self.kernargs_page:HCQBuffer = self.allocator.alloc(16 << 20, BufferSpec(cpu_access=True))
self.kernargs_alloctor:BumpAllocator = BumpAllocator(self.kernargs_page.size, start=cast(int, self.kernargs_page.va_addr), wrap=True)
self.kernargs_allocator:BumpAllocator = BumpAllocator(self.kernargs_page.size, start=cast(int, self.kernargs_page.va_addr), wrap=True)
self.devices.append(self)
def synchronize(self):