diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ed579d340c..ddb1087a2f 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -183,9 +183,10 @@ jobs:
       run: DEBUG=2 METAL=1 python -m pytest test/test_ops.py
     - name: Run JIT test
       run: DEBUG=2 METAL=1 python -m pytest test/test_jit.py
-    # TODO: why not testing the whole test/?
+    - name: Check Device.DEFAULT
+      run: WEBGPU=1 python -c "from tinygrad.lazy import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT"
     - name: Run webgpu pytest
-      run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m pytest -n=auto -m 'webgpu'
+      run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m pytest -n=auto --ignore test/models/ --ignore test/unit/test_example.py --ignore test/extra/test_lr_scheduler.py --ignore test/test_linearizer.py test/
     - name: Build WEBGPU Efficientnet
       run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m examples.compile_efficientnet
 
diff --git a/test/test_dtype.py b/test/test_dtype.py
index 1d2d69a702..1df71aeba2 100644
--- a/test/test_dtype.py
+++ b/test/test_dtype.py
@@ -133,11 +133,14 @@ class TestBitCast(unittest.TestCase):
 class TestInt32Dtype(unittest.TestCase):
   def test_int32_to_np(self): _test_to_np(Tensor([1,2,3,4], dtype=dtypes.int32), np.int32, [1,2,3,4])
 
+  @unittest.skipIf(Device.DEFAULT == "WEBGPU", "webgpu does not support int64")
   def test_casts_to_int32(self): _test_casts_to([1,2,3,4], source_dtypes=[dtypes.float32, dtypes.int64], target_dtype=dtypes.int32)
+  @unittest.skipIf(Device.DEFAULT == "WEBGPU", "webgpu does not support int64")
   def test_casts_from_int32(self): _test_casts_from([1,2,3,4], source_dtype=dtypes.int32, target_dtypes=[dtypes.float32, dtypes.int64])
 
   def test_int32_ops(self): _test_ops(a_dtype=dtypes.int32, b_dtype=dtypes.int32, target_dtype=dtypes.int32)
   def test_int32_upcast_float32(self): _test_ops(a_dtype=dtypes.int32, b_dtype=dtypes.float32, target_dtype=dtypes.float32)
+  @unittest.skipIf(Device.DEFAULT == "WEBGPU", "webgpu does not support int64")
   def test_int32_upcast_int64(self): _test_ops(a_dtype=dtypes.int32, b_dtype=dtypes.int64, target_dtype=dtypes.int64)
 
 if __name__ == '__main__':
diff --git a/test/test_ops.py b/test/test_ops.py
index 8b8596938a..4d39bc32e6 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -506,6 +506,7 @@ class TestOps(unittest.TestCase):
     helper_test_op([], lambda: (torch.eye(10)@torch.eye(10).flip(0)),
                        lambda: (Tensor.eye(10)@Tensor.eye(10).flip(0)), forward_only=True)
 
+  @unittest.skipIf(Device.DEFAULT == "WEBGPU", "this test uses more than 8 bufs passing the WEBGPU limit") #TODO: remove after #1461
   def test_broadcast_full(self):
     for torch_op, tinygrad_op in [(torch.add, Tensor.add), (torch.sub, Tensor.sub), (torch.mul, Tensor.mul),
                                   (torch.div, Tensor.div), (torch.pow, Tensor.pow)]:
@@ -517,6 +518,7 @@ class TestOps(unittest.TestCase):
     helper_test_op([(45,65), (45,1)], lambda x,y: x/y, lambda x,y: x/y)
     helper_test_op([(45,65), ()], lambda x,y: x/y, lambda x,y: x/y)
 
+  @unittest.skipIf(Device.DEFAULT == "WEBGPU", "this test uses more than 8 bufs passing the WEBGPU limit") #TODO: remove after #1461
   def test_broadcast_partial(self):
     for torch_op, tinygrad_op in [(torch.add, Tensor.add), (torch.sub, Tensor.sub), (torch.mul, Tensor.mul),
                                   (torch.div, Tensor.div), (torch.pow, Tensor.pow)]:
diff --git a/test/test_optim.py b/test/test_optim.py
index c973873fdc..1ee60805e0 100644
--- a/test/test_optim.py
+++ b/test/test_optim.py
@@ -1,6 +1,4 @@
 import numpy as np
-from tinygrad.helpers import dtypes
-from tinygrad.nn import Linear
 import torch
 import unittest
 from tinygrad.tensor import Tensor
@@ -69,9 +67,9 @@ class TestOptim(unittest.TestCase):
   def test_multistep_sgd_high_lr_nesterov_momentum_wd(self): self._test_sgd(10, {'lr': 9, 'momentum': 0.9, 'nesterov': True, 'weight_decay': 0.1}, 1e-5, 3e-4)
 
   def test_adam(self): self._test_adam(1, {'lr': 0.001}, 1e-5, 0)
-  def test_adam_high_lr(self): self._test_adam(1, {'lr': 10}, 1e-5, 1e-5)
+  def test_adam_high_lr(self): self._test_adam(1, {'lr': 10}, 1e-4, 1e-4)
   def test_adamw(self): self._test_adamw(1, {'lr': 0.001}, 1e-5, 0)
-  def test_adamw_high_lr(self): self._test_adamw(1, {'lr': 10}, 1e-5, 1e-5)
+  def test_adamw_high_lr(self): self._test_adamw(1, {'lr': 10}, 1e-4, 1e-4)
 
   def test_multistep_adam(self): self._test_adam(10, {'lr': 0.001}, 1e-5, 0)
   def test_multistep_adam_high_lr(self): self._test_adam(10, {'lr': 10}, 2e-4, 5e-4)
diff --git a/test/test_speed_v_torch.py b/test/test_speed_v_torch.py
index e9fc5efa67..e4edc3b39b 100644
--- a/test/test_speed_v_torch.py
+++ b/test/test_speed_v_torch.py
@@ -136,6 +136,7 @@ class TestSpeed(unittest.TestCase):
     def f(a, b): return a-b
     helper_test_generic_square('sub', 4096, f, f)
 
+  @unittest.skipIf(getenv("CI","")!="" and Device.DEFAULT == "WEBGPU", "breaking on webgpu CI")
   def test_pow(self):
     def f(a, b): return a.pow(b)
     helper_test_generic_square('pow', 2048, f, f)
diff --git a/test/test_tensor.py b/test/test_tensor.py
index a309cb652d..f5c6c8b959 100644
--- a/test/test_tensor.py
+++ b/test/test_tensor.py
@@ -1,8 +1,7 @@
-import dataclasses
 import numpy as np
 import torch
 import unittest
-from tinygrad.tensor import Tensor
+from tinygrad.tensor import Tensor, Device
 from tinygrad.helpers import dtypes
 from extra.gradcheck import numerical_jacobian, jacobian, gradcheck
 
@@ -53,6 +52,7 @@ class TestTinygrad(unittest.TestCase):
     for x,y in zip(test_tinygrad(), test_pytorch()):
       np.testing.assert_allclose(x, y, atol=1e-5)
 
+  @unittest.skipIf(Device.DEFAULT == "WEBGPU", "this test uses more than 8 bufs which breaks webgpu") #TODO: remove after #1461
   def test_backward_pass_diamond_model(self):
     def test_tinygrad():
       u = Tensor(U_init, requires_grad=True)