mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-02-10 14:45:35 -05:00
linearizer! (#714)
* linearizer outputs something * working ish * cstyle codegen * clang mostly works * fix load valid * fix numberless loop * fancy gen * working * fix enet compiler * cleanups * float4 upcasting * less lines * supports_float4 * constant folding * mulacc * internet tests flaky in CI * 90% image support * fix image generic * bugs exposed with shapetracker and single view * new llvm * use vload, remove OLD * that's really poorly done * ending up being more lines
This commit is contained in:
@@ -6,7 +6,7 @@ from typing import Optional, List
|
||||
from tinygrad.helpers import DEBUG, getenv, prod, ImageDType
|
||||
from tinygrad.ops import Compiled
|
||||
from tinygrad.runtime.lib import RawBufferCopyInOut
|
||||
from tinygrad.codegen.gpu import GPUCodegen, GPULanguage
|
||||
from tinygrad.codegen.cstyle import CStyleCodegen, CStyleLanguage
|
||||
|
||||
OSX = platform.system() == "Darwin"
|
||||
OSX_TIMING_RATIO = (125/3) if OSX else 1.0 # see test/external_osx_profiling.py to determine this ratio. it's in like GPU clocks or something
|
||||
@@ -68,11 +68,11 @@ class CLProgram:
|
||||
return ((e.profile.end - e.profile.start) * OSX_TIMING_RATIO) * 1e-9
|
||||
return None
|
||||
|
||||
class CLCodegen(GPUCodegen):
|
||||
lang = GPULanguage(
|
||||
class CLCodegen(CStyleCodegen):
|
||||
lang = CStyleLanguage(
|
||||
kernel_prefix = "__kernel", buffer_prefix = "__global ", smem_prefix = "__local ",
|
||||
half_prekernel = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable",
|
||||
barrier = "barrier(CLK_LOCAL_MEM_FENCE);", float4 = "(float4)",
|
||||
gid = [f'get_global_id({i})' for i in range(3)], lid = [f'get_local_id({i})' for i in range(3)])
|
||||
gid = [f'get_global_id({i})' for i in range(3)], lid = [f'get_local_id({i})' for i in range(3)], uses_vload=True)
|
||||
|
||||
GPUBuffer = Compiled(CLBuffer, CLCodegen, CLProgram)
|
||||
|
||||
Reference in New Issue
Block a user