mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 14:43:57 -05:00
Remove webgpu, back to 5k lines (#3040)
* remove webgpu * max 5000 lines
This commit is contained in:
84
.github/workflows/test.yml
vendored
84
.github/workflows/test.yml
vendored
@@ -63,8 +63,8 @@ jobs:
|
||||
source venv/bin/activate
|
||||
pip install $GITHUB_WORKSPACE
|
||||
python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))"
|
||||
- name: Repo line count <6000 lines
|
||||
run: MAX_LINE_COUNT=6000 python sz.py
|
||||
- name: Repo line count <5000 lines
|
||||
run: MAX_LINE_COUNT=5000 python sz.py
|
||||
|
||||
testcpuimagenet:
|
||||
name: CPU and ImageNet to C Tests
|
||||
@@ -214,48 +214,48 @@ jobs:
|
||||
name: Test Beam Search
|
||||
run: PYTHONPATH="." GPU=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
|
||||
|
||||
testwebgpu:
|
||||
name: WebGPU Tests
|
||||
runs-on: macos-13
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- name: Checkout Code
|
||||
uses: actions/checkout@v3
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.11
|
||||
- name: Cache python packages
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: /Users/runner/Library/Python/3.11/lib/python/site-packages
|
||||
key: webgpu-testing-user3-packages-${{ hashFiles('**/setup.py') }}
|
||||
- name: Install Dependencies
|
||||
run: pip install --user -e '.[webgpu,testing]' --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
- name: Cache downloads
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/Library/Caches/tinygrad/downloads/
|
||||
key: downloads-cache-webgpu-${{ env.DOWNLOAD_CACHE_VERSION }}
|
||||
- name: Check Device.DEFAULT (WEBGPU) and print some source
|
||||
run: |
|
||||
WEBGPU=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT"
|
||||
WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
|
||||
#testwebgpu:
|
||||
# name: WebGPU Tests
|
||||
# runs-on: macos-13
|
||||
# timeout-minutes: 20
|
||||
# steps:
|
||||
# - name: Checkout Code
|
||||
# uses: actions/checkout@v3
|
||||
# - name: Set up Python 3.11
|
||||
# uses: actions/setup-python@v4
|
||||
# with:
|
||||
# python-version: 3.11
|
||||
# - name: Cache python packages
|
||||
# uses: actions/cache@v3
|
||||
# with:
|
||||
# path: /Users/runner/Library/Python/3.11/lib/python/site-packages
|
||||
# key: webgpu-testing-user3-packages-${{ hashFiles('**/setup.py') }}
|
||||
# - name: Install Dependencies
|
||||
# run: pip install --user -e '.[webgpu,testing]' --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
# - name: Cache downloads
|
||||
# uses: actions/cache@v3
|
||||
# with:
|
||||
# path: ~/Library/Caches/tinygrad/downloads/
|
||||
# key: downloads-cache-webgpu-${{ env.DOWNLOAD_CACHE_VERSION }}
|
||||
# - name: Check Device.DEFAULT (WEBGPU) and print some source
|
||||
# run: |
|
||||
# WEBGPU=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT"
|
||||
# WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
|
||||
#- name: Run webgpu pytest
|
||||
# run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m pytest -n=auto
|
||||
- name: Run selected webgpu tests
|
||||
run: |
|
||||
WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m pytest -n=auto test/test_ops.py test/test_dtype.py \
|
||||
test/test_jit.py test/test_symbolic_ops.py test/test_symbolic_jit.py test/test_linearizer.py \
|
||||
test/test_linearizer_failures.py test/test_nn.py
|
||||
- name: Build WEBGPU Efficientnet
|
||||
run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m examples.compile_efficientnet
|
||||
- name: Install Puppeteer
|
||||
run: npm install puppeteer
|
||||
- name: Run WEBGPU Efficientnet
|
||||
run: node test/web/test_webgpu.js
|
||||
- name: Test LLaMA compile speed
|
||||
run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py
|
||||
# - name: Run selected webgpu tests
|
||||
# run: |
|
||||
# WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m pytest -n=auto test/test_ops.py test/test_dtype.py \
|
||||
# test/test_jit.py test/test_symbolic_ops.py test/test_symbolic_jit.py test/test_linearizer.py \
|
||||
# test/test_linearizer_failures.py test/test_nn.py
|
||||
# - name: Build WEBGPU Efficientnet
|
||||
# run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m examples.compile_efficientnet
|
||||
# - name: Install Puppeteer
|
||||
# run: npm install puppeteer
|
||||
# - name: Run WEBGPU Efficientnet
|
||||
# run: node test/web/test_webgpu.js
|
||||
# - name: Test LLaMA compile speed
|
||||
# run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py
|
||||
|
||||
testmetal:
|
||||
name: Metal Tests
|
||||
|
||||
@@ -82,10 +82,8 @@ tinygrad already supports numerous accelerators, including:
|
||||
- [x] [LLVM](tinygrad/runtime/ops_llvm.py)
|
||||
- [x] [METAL](tinygrad/runtime/ops_metal.py)
|
||||
- [x] [CUDA](tinygrad/runtime/ops_cuda.py)
|
||||
- [x] [Triton](extra/triton/triton.py)
|
||||
- [x] [PyTorch](tinygrad/runtime/ops_torch.py)
|
||||
- [x] [HIP](tinygrad/runtime/ops_hip.py)
|
||||
- [x] [WebGPU](tinygrad/runtime/ops_webgpu.py)
|
||||
|
||||
And it is easy to add more! Your accelerator of choice only needs to support a total of ~25 low level ops.
|
||||
More information can be found in the [documentation for adding new accelerators](/docs/adding_new_accelerators.md).
|
||||
|
||||
1
setup.py
1
setup.py
@@ -28,7 +28,6 @@ setup(name='tinygrad',
|
||||
'llvm': ["llvmlite"],
|
||||
'arm': ["unicorn"],
|
||||
'triton': ["triton-nightly>=2.1.0.dev20231014192330"],
|
||||
'webgpu': ["wgpu>=v0.12.0"],
|
||||
'linting': [
|
||||
"pylint",
|
||||
"mypy",
|
||||
|
||||
@@ -99,8 +99,7 @@ class LazyBuffer:
|
||||
return LazyBuffer.loadop(LoadOps.CONST, tuple(), self.dtype, device, arg=self.base.arg)._view(self.st)
|
||||
|
||||
# if it's a shrink, do the shrink before the copy with CONTIGUOUS
|
||||
# TODO: why is this required on WEBGPU?
|
||||
if prod(self.st.shape) < prod(self.base.st.shape) or device == "WEBGPU":
|
||||
if prod(self.st.shape) < prod(self.base.st.shape):
|
||||
return create_lazybuffer(device, ShapeTracker.from_shape(self.shape), self.dtype, LoadOps.COPY, srcs=(self.contiguous(),))
|
||||
|
||||
# copy the base and apply the shapetracker on the new device
|
||||
@@ -118,7 +117,7 @@ class LazyBuffer:
|
||||
if op == TernaryOps.WHERE: assert srcs[0].dtype == dtypes.bool, "TernaryOps.WHERE must have the first arg be bool"
|
||||
out_dtype = srcs[-1].dtype if op not in (BinaryOps.CMPLT, BinaryOps.CMPEQ) else dtypes.bool
|
||||
ret = create_lazybuffer(self.device, ShapeTracker.from_shape(self.shape), out_dtype, op, arg, tuple(srcs))
|
||||
return ret.cast(dtypes.float32) if (out_dtype == dtypes.bool and self.device == "WEBGPU") else ret
|
||||
return ret
|
||||
|
||||
# *** reduce ops ***
|
||||
|
||||
|
||||
@@ -289,38 +289,3 @@ __device__ half16 make_half16(half x, half y, half z, half w, half a, half b, ha
|
||||
"""
|
||||
type_map = {dtypes.bfloat16: "hip_bfloat16"}
|
||||
HIPRenderer = functools.partial(uops_to_cstyle, HIPLanguage())
|
||||
|
||||
# TODO: how much of this can be merged with above?
|
||||
class WGSLLanguage(CStyleLanguage):
|
||||
code_for_workitem = {"g": lambda x: f"i32(gindex.{'xyz'[x]})", "l": lambda x: f"i32(lindex.{'xyz'[x]})"}
|
||||
size_prefix = "let"
|
||||
barrier="workgroupBarrier();"
|
||||
generic_var_prefix = "var "
|
||||
external_local_bufs = True
|
||||
code_for_op = { **CStyleLanguage().code_for_op,
|
||||
BinaryOps.CMPLT: lambda x,y,dtype: f"f32({x}<{y})", BinaryOps.CMPEQ: lambda x,y,dtype: f"f32({x}=={y})",
|
||||
TernaryOps.MULACC: lambda x,y,z,dtype: f"fma({x},{y},{z})", TernaryOps.WHERE: lambda a,b,c,dtype: f"select({c},{b},bool({a}))" }
|
||||
# HACK: write bool as f32
|
||||
type_map = {dtypes.float: "f32", dtypes.half: "f16", dtypes.int32: "i32", dtypes.uint32: "u32", dtypes.bool: "f32"}
|
||||
|
||||
def render_local(self, name: str, dtype:DType, size: int): return f"var<workgroup> {name}: array<{self.type_map[dtype]},{size}>;"
|
||||
|
||||
def render_const(self, x:Union[float,int], var_dtype) -> str:
|
||||
if math.isnan(x): return "nan()"
|
||||
elif math.isinf(x): return ("-" if x < 0 else "") + "inf(1.0)"
|
||||
return f"({super().render_const(x, var_dtype)})"
|
||||
|
||||
def render_if(self, cond: str): return f"if (bool({cond})) {{"
|
||||
|
||||
def render_kernel(self, function_name:str, kernel:List[str], bufs:List[Tuple[str,DType]], local_size:List[int], prekernel:List[str]) -> str:
|
||||
local_size = local_size[::-1] if local_size else [1]
|
||||
bind_it = iter(range(len(bufs)))
|
||||
prg = "fn nan() -> f32 { let bits = 0xffffffffu; return bitcast<f32>(bits); }\nfn inf(a: f32) -> f32 { return a/0.0; }\n"
|
||||
prg += "\n".join(prekernel+[f"@group(0) @binding({next(bind_it)}) {'var<storage,read_write>' if isinstance(dtype, PtrDType) else 'var<uniform>'} {name}: {f'array<{self.type_map[dtype]}>' if isinstance(dtype, PtrDType) else 'i32'};" for name,dtype in bufs]) # noqa: E501
|
||||
prg += f"\n@compute @workgroup_size({','.join([str(x) for x in local_size])}) fn {function_name}(@builtin(workgroup_id) gindex: vec3<u32>, @builtin(local_invocation_id) lindex: vec3<u32>) {{\n" + "\n".join(kernel) + "\n}" # noqa: E501
|
||||
return prg
|
||||
|
||||
def render_cast(self, x:List[str], var_dtype:DType, bitcast=False) -> str:
|
||||
if self.type_map[var_dtype]: return f"bitcast<{self.type_map[var_dtype]}>({x[0]})" if bitcast else f"{self.type_map[var_dtype]}({x[0]})"
|
||||
raise NotImplementedError(f"no cast for {var_dtype}")
|
||||
WGSLRenderer = functools.partial(uops_to_cstyle, WGSLLanguage())
|
||||
|
||||
@@ -863,7 +863,6 @@ class Tensor:
|
||||
def __imatmul__(self, x) -> Tensor: return self.assign(self.matmul(x))
|
||||
def __ixor__(self, x) -> Tensor: return self.assign(self.xor(x))
|
||||
|
||||
# in webgpu bool cannot be used as a storage buffer type
|
||||
def __lt__(self, x) -> Tensor: return mlops.Less.apply(*self._broadcasted(x, False))
|
||||
def __gt__(self, x) -> Tensor: return mlops.Less.apply(*self._broadcasted(x, True))
|
||||
def __ge__(self, x) -> Tensor: return (self<x).logical_not()
|
||||
|
||||
Reference in New Issue
Block a user