mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-05 05:04:27 -05:00
fix misspellings (#13976)
This commit is contained in:
@@ -70,7 +70,7 @@ AMD backend supports several interfaces for communicating with devices:
|
||||
|
||||
* `KFD`: uses the amdgpu driver
|
||||
* `PCI`: uses the [AM driver](developer/am.md)
|
||||
* `USB`: USB3 interafce for asm24xx chips.
|
||||
* `USB`: USB3 interface for asm24xx chips.
|
||||
|
||||
You can force an interface by setting `AMD_IFACE` to one of these values. In the case of `AMD_IFACE=PCI`, this may unbind your GPU from the amdgpu driver.
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ SQTT is implemented on top of normal tinygrad profiling, `VIZ=1 SQTT=1` to get p
|
||||
|
||||
`SQTT_ITRACE_SE_MASK=X` to select for which shader engines instruction tracing will be enabled, -1 is all, 0 is none (instruction tracing disabled), >0 is
|
||||
bitfield/mask for SEs to enable instruction tracing on. Masking shader engines will give smaller file sizes at a cost of less hits and kernels that
|
||||
don't have any wavefront on first simd of shdaer engine with instruction tracing enabled will not have instruction timings.
|
||||
don't have any wavefront on first simd of shader engine with instruction tracing enabled will not have instruction timings.
|
||||
The default is 2 (second shader engine only), only one for file size reasons, second instead of first because dispatch starts from it so there is
|
||||
greater chance that kernels with small global size will have instruction tracing data.
|
||||
|
||||
|
||||
@@ -12,10 +12,10 @@ libc.mmap.restype = ctypes.c_void_p
|
||||
drivers = [AMDDriver(), NVDriver()]
|
||||
tracked_fds = {}
|
||||
|
||||
orignal_memoryview = builtins.memoryview
|
||||
original_memoryview = builtins.memoryview
|
||||
class TrackedMemoryView:
|
||||
def __init__(self, data, rcb, wcb):
|
||||
self.mv = orignal_memoryview(data)
|
||||
self.mv = original_memoryview(data)
|
||||
self.rcb, self.wcb = rcb, wcb
|
||||
|
||||
def __getitem__(self, index):
|
||||
@@ -41,7 +41,7 @@ def _memoryview(cls, mem):
|
||||
for d in drivers:
|
||||
for st,en,rcb,wcb in d.tracked_addresses:
|
||||
if st <= addr <= en: return TrackedMemoryView(mem, rcb, wcb)
|
||||
return orignal_memoryview(mem)
|
||||
return original_memoryview(mem)
|
||||
builtins.memoryview = type("memoryview", (), {'__new__': _memoryview}) # type: ignore
|
||||
|
||||
def _open(path, flags):
|
||||
|
||||
@@ -194,7 +194,7 @@ class TestImageDType(unittest.TestCase):
|
||||
lst = s.bufs[0].as_buffer().cast("f").tolist()
|
||||
print(lst)
|
||||
assert not np.any(np.isnan(lst))
|
||||
# NOTE: the w1 grad must realize to a seperate kernel
|
||||
# NOTE: the w1 grad must realize to a separate kernel
|
||||
assert w1.grad.uop.is_realized, f"never realized {w1.grad}"
|
||||
self.assertEqual(w1.grad.uop.base.buffer.dtype, dtypes.float32)
|
||||
self.assertEqual(len(sched), 9)
|
||||
|
||||
@@ -1087,7 +1087,7 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op([(2,3,0)], lambda x: torch.cummax(x, dim=2).values, lambda x: Tensor.cummax(x, axis=2))
|
||||
|
||||
def test_argmax(self):
|
||||
# check if it returns the first index for multiple occurences
|
||||
# check if it returns the first index for multiple occurrences
|
||||
helper_test_op(None, lambda x: x.argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[2, 2]])
|
||||
helper_test_op(None, lambda x: x.argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[1, 2, 2]])
|
||||
if not COMPILE_ONLY:
|
||||
@@ -1107,7 +1107,7 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op(None, lambda x: x.type(torch.int32).argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[True, False]])
|
||||
|
||||
def test_argmin(self):
|
||||
# check if it returns the first index for multiple occurences
|
||||
# check if it returns the first index for multiple occurrences
|
||||
helper_test_op(None, lambda x: x.argmin().type(torch.int32), lambda x: x.argmin(), forward_only=True, vals=[[2, 2]])
|
||||
helper_test_op(None, lambda x: x.argmin().type(torch.int32), lambda x: x.argmin(), forward_only=True, vals=[[3, 2, 2]])
|
||||
if not COMPILE_ONLY:
|
||||
|
||||
@@ -28,7 +28,7 @@ class TestDevice(unittest.TestCase):
|
||||
self.assertEqual(Device.canonicalize(None), device)
|
||||
Device.DEFAULT = device
|
||||
|
||||
@unittest.skipIf(WIN and CI, "skipping windows test") # TODO: subproccess causes memory violation?
|
||||
@unittest.skipIf(WIN and CI, "skipping windows test") # TODO: subprocess causes memory violation?
|
||||
def test_env_overwrite_default_compiler(self):
|
||||
if Device.DEFAULT == "CPU":
|
||||
from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler, ClangJITCompiler
|
||||
|
||||
@@ -1141,7 +1141,7 @@ class Tensor(OpMixin):
|
||||
boundary, stride = [start, stop], step
|
||||
if all(isinstance(s, int) for s in (start,stop,step)):
|
||||
# handle int slicing
|
||||
# if we're slicing a symbolic dimension into a int dimension, we can slice untill the bind size
|
||||
# if we're slicing a symbolic dimension into a int dimension, we can slice until the bind size
|
||||
# TODO: right now this is using vmax instead of the bind size because jit doesnt update the bound value of the returned tensor
|
||||
if isinstance(size, UOp): size = int(size.vmax)
|
||||
*boundary, stride = index.indices(cast(SupportsIndex, size))
|
||||
|
||||
Reference in New Issue
Block a user