diff --git a/test/test_dtype.py b/test/test_dtype.py
index b932ee7db9..ab2627e85e 100644
--- a/test/test_dtype.py
+++ b/test/test_dtype.py
@@ -52,18 +52,6 @@ class TestHalfDtype(unittest.TestCase):
   def test_half_matmul_upcast_float(self): _test_matmul_upcast(Tensor([[1,2],[3,4]], dtype=dtypes.float16), Tensor.eye(2, dtype=dtypes.float32), dtypes.float32, [[1,2],[3,4]])
   def test_int8_matmul_upcast_half(self): _test_matmul_upcast(Tensor([[1,2],[3,4]], dtype=dtypes.int8), Tensor.eye(2, dtype=dtypes.float16), dtypes.float16, [[1,2],[3,4]])
 
-@unittest.skipIf(OSX and Device.DEFAULT in ["GPU", "METAL"], "GPU on Mac doesn't support float64")
-class TestFloat64Dtype(unittest.TestCase):
-  def test_float64_to_np(self): _test_to_np(Tensor([1,2,3,4], dtype=dtypes.float64), np.float64, [1,2,3,4])
-  def test_float64_add(self): _test_add(Tensor([1,2,3,4], dtype=dtypes.float64), Tensor([1,2,3,4], dtype=dtypes.float64), dtypes.float64, [2,4,6,8])
-  def test_float64_mul(self): _test_mul(Tensor([1,2,3,4], dtype=dtypes.float64), Tensor([1,2,3,4], dtype=dtypes.float64), dtypes.float64, [1,4,9,16])
-  def test_float64_matmul(self): _test_matmul(Tensor([[1,2],[3,4]], dtype=dtypes.float64), Tensor.eye(2, dtype=dtypes.float64), dtypes.float64, [[1,2],[3,4]])
-  def test_float64_cast_float(self): _test_cast(Tensor([1,2]), dtypes.float64, [1,2])
-
-  def test_float_add_upcast_float64(self): _test_add_upcast(Tensor([1,2,3,4], dtype=dtypes.float32), Tensor([1,2,3,4], dtype=dtypes.float64), dtypes.float64, [2,4,6,8])
-  def test_float_mul_upcast_float64(self): _test_mul_upcast(Tensor([1,2,3,4], dtype=dtypes.float32), Tensor([1,2,3,4], dtype=dtypes.float64), dtypes.float64, [1,4,9,16])
-  def test_float_matmul_upcast_float64(self): _test_matmul_upcast(Tensor([[1,2],[3,4]], dtype=dtypes.float32), Tensor.eye(2, dtype=dtypes.float64), dtypes.float64, [[1,2],[3,4]])
-
 class TestInt8Dtype(unittest.TestCase):
   def test_int8_to_np(self): _test_to_np(Tensor([1,2,3,4], dtype=dtypes.int8), np.int8, [1,2,3,4])
   def test_uint8_to_np(self): _test_to_np(Tensor([1,2,3,4], dtype=dtypes.uint8), np.uint8, [1,2,3,4])
diff --git a/test/test_tensor.py b/test/test_tensor.py
index 5ab3186b65..ae0e79fe08 100644
--- a/test/test_tensor.py
+++ b/test/test_tensor.py
@@ -105,8 +105,7 @@ class TestTinygrad(unittest.TestCase):
     expected = n * (1 - rate)
     np.testing.assert_allclose(non_zeros, expected, rtol=2e-3)
 
-  #@unittest.skipUnless(Device.DEFAULT == Device.CPU, "float64 not supported on GPU")
-  @unittest.skip("float64 support broken")
+  @unittest.skip("TODO: fix")
   def test_jacobian(self):
     W = np.random.RandomState(1337).random((10, 5))
     x = np.random.RandomState(7331).random((1, 10)) - 0.5
@@ -125,8 +124,7 @@ class TestTinygrad(unittest.TestCase):
     np.testing.assert_allclose(PJ, J, atol = 1e-5)
     np.testing.assert_allclose(PJ, NJ, atol = 1e-5)
 
-  #@unittest.skipUnless(Device.DEFAULT == Device.CPU, "float64 not supported on GPU")
-  @unittest.skip("float64 support broken")
+  @unittest.skip("TODO: fix")
   def test_gradcheck(self):
     W = np.random.RandomState(1337).random((10, 5))
     x = np.random.RandomState(7331).random((1, 10)) - 0.5
diff --git a/tinygrad/codegen/assembly_rdna.py b/tinygrad/codegen/assembly_rdna.py
index 03385f99d8..ce5f8159e5 100644
--- a/tinygrad/codegen/assembly_rdna.py
+++ b/tinygrad/codegen/assembly_rdna.py
@@ -62,7 +62,7 @@ class RDNACodegen(AssemblyCodegen):
       return rtor[x]
     for uop, out, vin, arg in asm:
       if uop == UOps.DEFINE_REGISTER:
-        if arg[0][0] in [dtypes.uint32, dtypes.uint64, dtypes.int64, dtypes.int32, dtypes.float32, dtypes.float64, dtypes._float4]:
+        if arg[0][0] in [dtypes.uint32, dtypes.uint64, dtypes.int64, dtypes.int32, dtypes.float32, dtypes._float4]:
           for i in range(arg[2]):
             # TODO: Re-use gaps created by this to avoid wasting registers
             align = int(arg[0][0].itemsize / 4)
diff --git a/tinygrad/codegen/cstyle.py b/tinygrad/codegen/cstyle.py
index d2b5dc5b79..d900b78e9b 100644
--- a/tinygrad/codegen/cstyle.py
+++ b/tinygrad/codegen/cstyle.py
@@ -176,7 +176,6 @@ def uops_to_cstyle(uops:List[UOp], bufs:List[Union[LocalBuffer,LazyBuffer]], lan
     [") {\n"] + list(prekernel) + ['\n'.join(kernel), "\n}"])
 
   if lang.half_prekernel and any(x.dtype == dtypes.float16 for x in bufs): prg = ''.join([f"{lang.half_prekernel}", "\n", prg])
-  if lang.double_prekernel and any(x.dtype == dtypes.float64 for x in bufs): prg = ''.join([f"{lang.double_prekernel}", "\n", prg])
   return prg, global_size, local_size
 
 class CStyleCodegen(Linearizer):
diff --git a/tinygrad/codegen/llvmir.py b/tinygrad/codegen/llvmir.py
index 5688192caf..f32fd520ed 100644
--- a/tinygrad/codegen/llvmir.py
+++ b/tinygrad/codegen/llvmir.py
@@ -37,7 +37,7 @@ def uops_to_llvm_ir(uops:List[UOp], bufs:List[LazyBuffer]) -> str:
   module = ir.Module(name=__file__)
 
   # create llvm function
-  func_dtypes = [{dtypes.float16:ir.HalfType(), dtypes.float32:ir.FloatType(), dtypes.float64:ir.DoubleType(), dtypes.int8:ir.IntType(8), dtypes.uint8:ir.IntType(8), dtypes.bool: ir.IntType(1), dtypes.int64: ir.IntType(64), dtypes.int32: ir.IntType(32)}[buf.dtype] for buf in bufs]
+  func_dtypes = [{dtypes.float16:ir.HalfType(), dtypes.float32:ir.FloatType(), dtypes.int8:ir.IntType(8), dtypes.uint8:ir.IntType(8), dtypes.bool: ir.IntType(1), dtypes.int64: ir.IntType(64), dtypes.int32: ir.IntType(32)}[buf.dtype] for buf in bufs]
   func = ir.Function(module, ir.FunctionType(ir.VoidType(), [x.as_pointer() for x in func_dtypes]), name='exec')
 
   # force llvmlite to allow us to add function attribute then add the attribute
@@ -90,8 +90,6 @@ def uops_to_llvm_ir(uops:List[UOp], bufs:List[LazyBuffer]) -> str:
       if func_dtypes[args.i] != ir.FloatType():
         if dtypes.is_int(bufs[args.i].dtype):
           val = bb[-1].uitofp(val, ir.FloatType()) if dtypes.is_unsigned(bufs[args.i].dtype) else bb[-1].sitofp(val, ir.FloatType())
-        elif bufs[args.i].dtype == dtypes.float64:
-          val = bb[-1].fptrunc(val, ir.FloatType())
         else:
           val = bb[-1].fpext(val, ir.FloatType())
       lvars[newvar] = val
@@ -102,8 +100,6 @@ def uops_to_llvm_ir(uops:List[UOp], bufs:List[LazyBuffer]) -> str:
       if func_dtypes[0] != ir.FloatType():
         if dtypes.is_int(bufs[args.i].dtype):
           element = bb[-1].fptoui(element, func_dtypes[0]) if dtypes.is_unsigned(bufs[args.i].dtype) else bb[-1].fptosi(element, func_dtypes[0])
-        elif bufs[args.i].dtype == dtypes.float64:
-          element = bb[-1].fpext(element, func_dtypes[0])
         else:
           element = bb[-1].fptrunc(element, func_dtypes[0])
       bb[-1].store(element, bb[-1].gep(func.args[args.i], [idx], inbounds=True))
diff --git a/tinygrad/helpers.py b/tinygrad/helpers.py
index 56399f88ca..dd01bc60ad 100644
--- a/tinygrad/helpers.py
+++ b/tinygrad/helpers.py
@@ -82,7 +82,7 @@ class dtypes:
   @staticmethod # static methds on top, or bool in the type info will refer to dtypes.bool
   def is_int(x: DType)-> bool: return x in (dtypes.int8, dtypes.uint8, dtypes.int32, dtypes.int64)
   @staticmethod
-  def is_float(x: DType) -> bool: return x in (dtypes.float16, dtypes.float32, dtypes.float64, dtypes._half4, dtypes._float4)
+  def is_float(x: DType) -> bool: return x in (dtypes.float16, dtypes.float32, dtypes._half4, dtypes._float4)
   @staticmethod
   def is_unsigned(x: DType) -> bool: return x in (dtypes.uint8, dtypes.uint32, dtypes.uint64)
   @staticmethod
@@ -94,7 +94,6 @@ class dtypes:
   half = float16
   float32: Final[DType] = DType(4, 4, "float", np.float32)
   float = float32
-  float64: Final[DType] = DType(5, 8, "double", np.float64)
   int8: Final[DType] = DType(0, 1, "char", np.int8)
   int32: Final[DType] = DType(1, 4, "int", np.int32)
   int64: Final[DType] = DType(2, 8, "long", np.int64)
diff --git a/tinygrad/runtime/lib.py b/tinygrad/runtime/lib.py
index 543938badb..192b427127 100644
--- a/tinygrad/runtime/lib.py
+++ b/tinygrad/runtime/lib.py
@@ -39,7 +39,7 @@ class RawBufferMapped(RawBufferCopyIn):
 
 # this one is simple enough that i moved it out of the runtimes
 class RawMallocBuffer(RawBufferMapped):
-  def __init__(self, size, dtype: DType): super().__init__(size, dtype, ({dtypes.float32: ctypes.c_float, dtypes.float16: ctypes.c_int16, dtypes.float64: ctypes.c_double, dtypes.int8: ctypes.c_int8, dtypes.uint8: ctypes.c_uint8, dtypes.bool: ctypes.c_uint8, dtypes.int32: ctypes.c_int32, dtypes.int64: ctypes.c_int64}[dtype] * size)())
+  def __init__(self, size, dtype: DType): super().__init__(size, dtype, ({dtypes.float32: ctypes.c_float, dtypes.float16: ctypes.c_int16, dtypes.int8: ctypes.c_int8, dtypes.uint8: ctypes.c_uint8, dtypes.bool: ctypes.c_uint8, dtypes.int32: ctypes.c_int32, dtypes.int64: ctypes.c_int64}[dtype] * size)())
   def _buffer(self): return memoryview(self._buf)
 
 class RawBufferCopyInOut(RawBufferCopyIn):
diff --git a/tinygrad/runtime/ops_gpu.py b/tinygrad/runtime/ops_gpu.py
index 2954907f6d..3f488d40ac 100644
--- a/tinygrad/runtime/ops_gpu.py
+++ b/tinygrad/runtime/ops_gpu.py
@@ -3,7 +3,7 @@ import pathlib
 import numpy as np
 import pyopencl as cl  # type: ignore
 from typing import Optional, List
-from tinygrad.helpers import DEBUG, getenv, prod, ImageDType, OSX, dtypes, fromimport
+from tinygrad.helpers import DEBUG, getenv, prod, ImageDType, OSX, fromimport
 from tinygrad.ops import Compiled
 from tinygrad.runtime.lib import RawBufferCopyInOut
 from tinygrad.codegen.cstyle import CStyleCodegen, CStyleLanguage
@@ -30,7 +30,6 @@ CL = _CL()
 
 class CLBuffer(RawBufferCopyInOut):
   def __init__(self, size, dtype, device='0'):
-    assert not OSX or dtype != dtypes.float64, "OpenCL on Mac doesn't support float64"
     if isinstance(dtype, ImageDType):
       fmt = cl.ImageFormat(cl.channel_order.RGBA, {2: cl.channel_type.HALF_FLOAT, 4: cl.channel_type.FLOAT}[dtype.itemsize])
       buf = cl.Image(CL.cl_ctx, cl.mem_flags.READ_WRITE, fmt, shape=(dtype.shape[1], dtype.shape[0]))
diff --git a/tinygrad/runtime/ops_metal.py b/tinygrad/runtime/ops_metal.py
index 7aa34f4f20..3d3fe06deb 100644
--- a/tinygrad/runtime/ops_metal.py
+++ b/tinygrad/runtime/ops_metal.py
@@ -3,7 +3,7 @@ import os, subprocess, pathlib
 import Metal, Cocoa, libdispatch # type: ignore
 from typing import List, Any
 from tinygrad.codegen.cstyle import CStyleCodegen, CStyleLanguage
-from tinygrad.helpers import prod, getenv, DEBUG, DType, dtypes
+from tinygrad.helpers import prod, getenv, DEBUG, DType
 from tinygrad.ops import Compiled
 from tinygrad.runtime.lib import RawBufferMapped
 
@@ -22,7 +22,6 @@ METAL = _METAL()
 
 class RawMetalBuffer(RawBufferMapped):
   def __init__(self, size:int, dtype:DType):
-    assert dtype != dtypes.float64, "metal doesn't support float64"
     super().__init__(size, dtype, METAL.device.newBufferWithLength_options_(size*dtype.itemsize, Metal.MTLResourceStorageModeShared))
   def __del__(self):
     self._buf.release()
diff --git a/tinygrad/runtime/ops_torch.py b/tinygrad/runtime/ops_torch.py
index 317443016d..3320fc089f 100644
--- a/tinygrad/runtime/ops_torch.py
+++ b/tinygrad/runtime/ops_torch.py
@@ -6,7 +6,7 @@ from tinygrad.runtime.ops_cpu import base_fxn_for_op, einsum_mulacc
 from tinygrad.runtime.lib import RawBuffer
 
 device = torch.device("cuda:0" if torch.cuda.is_available() else ("mps" if getenv("MPS", 0) else "cpu"))
-type_map = {torch.float16: dtypes.float16, torch.float32: dtypes.float32, torch.float64: dtypes.float64, torch.double: dtypes.float64, torch.int8: dtypes.int8, torch.int32: dtypes.int32, torch.int64: dtypes.int64, torch.uint8: dtypes.uint8, torch.bool: dtypes.bool}
+type_map = {torch.float16: dtypes.float16, torch.float32: dtypes.float32, torch.int8: dtypes.int8, torch.int32: dtypes.int32, torch.int64: dtypes.int64, torch.uint8: dtypes.uint8, torch.bool: dtypes.bool}
 inverse_type_map = {v:k for k,v in type_map.items()}
 
 torch_fxn_for_op: Dict[Op, Callable] = {**base_fxn_for_op, **{