linearizer! (#714)

* linearizer outputs something

* working ish

* cstyle codegen

* clang mostly works

* fix load valid

* fix numberless loop

* fancy gen

* working

* fix enet compiler

* cleanups

* float4 upcasting

* less lines

* supports_float4

* constant folding

* mulacc

* internet tests flaky in CI

* 90% image support

* fix image generic

* bugs exposed with shapetracker and single view

* new llvm

* use vload, remove OLD

* that's really poorly done

* ending up being more lines
This commit is contained in:
George Hotz
2023-03-19 23:43:49 -07:00
committed by GitHub
parent b629fd4cd8
commit 5495c7d64e
20 changed files with 792 additions and 776 deletions

View File

@@ -6,7 +6,7 @@ from typing import Optional, List
from tinygrad.helpers import DEBUG, getenv, prod, ImageDType
from tinygrad.ops import Compiled
from tinygrad.runtime.lib import RawBufferCopyInOut
from tinygrad.codegen.gpu import GPUCodegen, GPULanguage
from tinygrad.codegen.cstyle import CStyleCodegen, CStyleLanguage
OSX = platform.system() == "Darwin"
OSX_TIMING_RATIO = (125/3) if OSX else 1.0 # see test/external_osx_profiling.py to determine this ratio. it's in like GPU clocks or something
@@ -68,11 +68,11 @@ class CLProgram:
return ((e.profile.end - e.profile.start) * OSX_TIMING_RATIO) * 1e-9
return None
class CLCodegen(GPUCodegen):
lang = GPULanguage(
class CLCodegen(CStyleCodegen):
lang = CStyleLanguage(
kernel_prefix = "__kernel", buffer_prefix = "__global ", smem_prefix = "__local ",
half_prekernel = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable",
barrier = "barrier(CLK_LOCAL_MEM_FENCE);", float4 = "(float4)",
gid = [f'get_global_id({i})' for i in range(3)], lid = [f'get_local_id({i})' for i in range(3)])
gid = [f'get_global_id({i})' for i in range(3)], lid = [f'get_local_id({i})' for i in range(3)], uses_vload=True)
GPUBuffer = Compiled(CLBuffer, CLCodegen, CLProgram)