Internal cast 2 with more tests (#2257)

* Change linearizer to parse CAST

* Oneliner renders for cstyle and triton

* LLVM cast and ALU implementation

* pylint fixes

* cast in gep

* remove printbufs

* use cast for post-load ops

* get rid of parse_cast

* partially supported vectorized dtypes for initial dev

* render phi as the dtype

* Revert "partially supported vectorized dtypes for initial dev"

This reverts commit 1bf1a818a3.

* Revert "render phi as the dtype"

This reverts commit d08cb270b4.

* reenable triton tests

* no vstore_half if dtype is already half

* upcast max
This commit is contained in:
qazal
2023-11-10 13:42:39 -05:00
committed by GitHub
parent c0f447d6f7
commit b6aaf12df7
5 changed files with 38 additions and 35 deletions

View File

@@ -19,8 +19,6 @@ def is_dtype_supported(dtype: DType):
if dtype == dtypes.bool:
# host-shareablity is a requirement for storage buffers, but 'bool' type is not host-shareable
if Device.DEFAULT == "WEBGPU": return False
# TODO remove triton from here once internal casting is fixed. CAST of fp32s between 0-1 is broken in triton
if getenv("TRITON") == 1: return False
return True
def get_available_cast_dtypes(dtype: DType) -> List[DType]: return [v for k, v in DTYPES_DICT.items() if v != dtype and is_dtype_supported(v) and not k.startswith("_")] # dont cast internal dtypes