mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-06 21:53:53 -05:00
fix misspellings (#13976)
This commit is contained in:
@@ -70,7 +70,7 @@ AMD backend supports several interfaces for communicating with devices:
|
|||||||
|
|
||||||
* `KFD`: uses the amdgpu driver
|
* `KFD`: uses the amdgpu driver
|
||||||
* `PCI`: uses the [AM driver](developer/am.md)
|
* `PCI`: uses the [AM driver](developer/am.md)
|
||||||
* `USB`: USB3 interafce for asm24xx chips.
|
* `USB`: USB3 interface for asm24xx chips.
|
||||||
|
|
||||||
You can force an interface by setting `AMD_IFACE` to one of these values. In the case of `AMD_IFACE=PCI`, this may unbind your GPU from the amdgpu driver.
|
You can force an interface by setting `AMD_IFACE` to one of these values. In the case of `AMD_IFACE=PCI`, this may unbind your GPU from the amdgpu driver.
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ SQTT is implemented on top of normal tinygrad profiling, `VIZ=1 SQTT=1` to get p
|
|||||||
|
|
||||||
`SQTT_ITRACE_SE_MASK=X` to select for which shader engines instruction tracing will be enabled, -1 is all, 0 is none (instruction tracing disabled), >0 is
|
`SQTT_ITRACE_SE_MASK=X` to select for which shader engines instruction tracing will be enabled, -1 is all, 0 is none (instruction tracing disabled), >0 is
|
||||||
bitfield/mask for SEs to enable instruction tracing on. Masking shader engines will give smaller file sizes at a cost of less hits and kernels that
|
bitfield/mask for SEs to enable instruction tracing on. Masking shader engines will give smaller file sizes at a cost of less hits and kernels that
|
||||||
don't have any wavefront on first simd of shdaer engine with instruction tracing enabled will not have instruction timings.
|
don't have any wavefront on first simd of shader engine with instruction tracing enabled will not have instruction timings.
|
||||||
The default is 2 (second shader engine only), only one for file size reasons, second instead of first because dispatch starts from it so there is
|
The default is 2 (second shader engine only), only one for file size reasons, second instead of first because dispatch starts from it so there is
|
||||||
greater chance that kernels with small global size will have instruction tracing data.
|
greater chance that kernels with small global size will have instruction tracing data.
|
||||||
|
|
||||||
|
|||||||
@@ -12,10 +12,10 @@ libc.mmap.restype = ctypes.c_void_p
|
|||||||
drivers = [AMDDriver(), NVDriver()]
|
drivers = [AMDDriver(), NVDriver()]
|
||||||
tracked_fds = {}
|
tracked_fds = {}
|
||||||
|
|
||||||
orignal_memoryview = builtins.memoryview
|
original_memoryview = builtins.memoryview
|
||||||
class TrackedMemoryView:
|
class TrackedMemoryView:
|
||||||
def __init__(self, data, rcb, wcb):
|
def __init__(self, data, rcb, wcb):
|
||||||
self.mv = orignal_memoryview(data)
|
self.mv = original_memoryview(data)
|
||||||
self.rcb, self.wcb = rcb, wcb
|
self.rcb, self.wcb = rcb, wcb
|
||||||
|
|
||||||
def __getitem__(self, index):
|
def __getitem__(self, index):
|
||||||
@@ -41,7 +41,7 @@ def _memoryview(cls, mem):
|
|||||||
for d in drivers:
|
for d in drivers:
|
||||||
for st,en,rcb,wcb in d.tracked_addresses:
|
for st,en,rcb,wcb in d.tracked_addresses:
|
||||||
if st <= addr <= en: return TrackedMemoryView(mem, rcb, wcb)
|
if st <= addr <= en: return TrackedMemoryView(mem, rcb, wcb)
|
||||||
return orignal_memoryview(mem)
|
return original_memoryview(mem)
|
||||||
builtins.memoryview = type("memoryview", (), {'__new__': _memoryview}) # type: ignore
|
builtins.memoryview = type("memoryview", (), {'__new__': _memoryview}) # type: ignore
|
||||||
|
|
||||||
def _open(path, flags):
|
def _open(path, flags):
|
||||||
|
|||||||
@@ -194,7 +194,7 @@ class TestImageDType(unittest.TestCase):
|
|||||||
lst = s.bufs[0].as_buffer().cast("f").tolist()
|
lst = s.bufs[0].as_buffer().cast("f").tolist()
|
||||||
print(lst)
|
print(lst)
|
||||||
assert not np.any(np.isnan(lst))
|
assert not np.any(np.isnan(lst))
|
||||||
# NOTE: the w1 grad must realize to a seperate kernel
|
# NOTE: the w1 grad must realize to a separate kernel
|
||||||
assert w1.grad.uop.is_realized, f"never realized {w1.grad}"
|
assert w1.grad.uop.is_realized, f"never realized {w1.grad}"
|
||||||
self.assertEqual(w1.grad.uop.base.buffer.dtype, dtypes.float32)
|
self.assertEqual(w1.grad.uop.base.buffer.dtype, dtypes.float32)
|
||||||
self.assertEqual(len(sched), 9)
|
self.assertEqual(len(sched), 9)
|
||||||
|
|||||||
@@ -1087,7 +1087,7 @@ class TestOps(unittest.TestCase):
|
|||||||
helper_test_op([(2,3,0)], lambda x: torch.cummax(x, dim=2).values, lambda x: Tensor.cummax(x, axis=2))
|
helper_test_op([(2,3,0)], lambda x: torch.cummax(x, dim=2).values, lambda x: Tensor.cummax(x, axis=2))
|
||||||
|
|
||||||
def test_argmax(self):
|
def test_argmax(self):
|
||||||
# check if it returns the first index for multiple occurences
|
# check if it returns the first index for multiple occurrences
|
||||||
helper_test_op(None, lambda x: x.argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[2, 2]])
|
helper_test_op(None, lambda x: x.argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[2, 2]])
|
||||||
helper_test_op(None, lambda x: x.argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[1, 2, 2]])
|
helper_test_op(None, lambda x: x.argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[1, 2, 2]])
|
||||||
if not COMPILE_ONLY:
|
if not COMPILE_ONLY:
|
||||||
@@ -1107,7 +1107,7 @@ class TestOps(unittest.TestCase):
|
|||||||
helper_test_op(None, lambda x: x.type(torch.int32).argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[True, False]])
|
helper_test_op(None, lambda x: x.type(torch.int32).argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[True, False]])
|
||||||
|
|
||||||
def test_argmin(self):
|
def test_argmin(self):
|
||||||
# check if it returns the first index for multiple occurences
|
# check if it returns the first index for multiple occurrences
|
||||||
helper_test_op(None, lambda x: x.argmin().type(torch.int32), lambda x: x.argmin(), forward_only=True, vals=[[2, 2]])
|
helper_test_op(None, lambda x: x.argmin().type(torch.int32), lambda x: x.argmin(), forward_only=True, vals=[[2, 2]])
|
||||||
helper_test_op(None, lambda x: x.argmin().type(torch.int32), lambda x: x.argmin(), forward_only=True, vals=[[3, 2, 2]])
|
helper_test_op(None, lambda x: x.argmin().type(torch.int32), lambda x: x.argmin(), forward_only=True, vals=[[3, 2, 2]])
|
||||||
if not COMPILE_ONLY:
|
if not COMPILE_ONLY:
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ class TestDevice(unittest.TestCase):
|
|||||||
self.assertEqual(Device.canonicalize(None), device)
|
self.assertEqual(Device.canonicalize(None), device)
|
||||||
Device.DEFAULT = device
|
Device.DEFAULT = device
|
||||||
|
|
||||||
@unittest.skipIf(WIN and CI, "skipping windows test") # TODO: subproccess causes memory violation?
|
@unittest.skipIf(WIN and CI, "skipping windows test") # TODO: subprocess causes memory violation?
|
||||||
def test_env_overwrite_default_compiler(self):
|
def test_env_overwrite_default_compiler(self):
|
||||||
if Device.DEFAULT == "CPU":
|
if Device.DEFAULT == "CPU":
|
||||||
from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler, ClangJITCompiler
|
from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler, ClangJITCompiler
|
||||||
|
|||||||
@@ -1141,7 +1141,7 @@ class Tensor(OpMixin):
|
|||||||
boundary, stride = [start, stop], step
|
boundary, stride = [start, stop], step
|
||||||
if all(isinstance(s, int) for s in (start,stop,step)):
|
if all(isinstance(s, int) for s in (start,stop,step)):
|
||||||
# handle int slicing
|
# handle int slicing
|
||||||
# if we're slicing a symbolic dimension into a int dimension, we can slice untill the bind size
|
# if we're slicing a symbolic dimension into a int dimension, we can slice until the bind size
|
||||||
# TODO: right now this is using vmax instead of the bind size because jit doesnt update the bound value of the returned tensor
|
# TODO: right now this is using vmax instead of the bind size because jit doesnt update the bound value of the returned tensor
|
||||||
if isinstance(size, UOp): size = int(size.vmax)
|
if isinstance(size, UOp): size = int(size.vmax)
|
||||||
*boundary, stride = index.indices(cast(SupportsIndex, size))
|
*boundary, stride = index.indices(cast(SupportsIndex, size))
|
||||||
|
|||||||
Reference in New Issue
Block a user