mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 23:18:04 -05:00
Refactor GPU and Metal langauges in their own separate renderers (#2033)
* Refactor GPU and Metal langauges in their own separate renderers * remove CStyleLanguage imports * move renderers too
This commit is contained in:
16
tinygrad/renderer/metal.py
Normal file
16
tinygrad/renderer/metal.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import functools
|
||||
from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
|
||||
|
||||
class MetalLanguage(CStyleLanguage):
|
||||
kernel_prefix = "#include <metal_stdlib>\nusing namespace metal;\nkernel "
|
||||
buffer_prefix = "device "
|
||||
smem_prefix = "threadgroup "
|
||||
arg_int_prefix = "constant int&"
|
||||
barrier = "threadgroup_barrier(mem_flags::mem_threadgroup);"
|
||||
float4 = "float4"
|
||||
uses_ptr_arithmetic=True
|
||||
gid = [f"gid.{chr(120+i)}" for i in range(3)]
|
||||
lid = [f"lid.{chr(120+i)}" for i in range(3)]
|
||||
extra_args = ['uint3 gid [[threadgroup_position_in_grid]]', 'uint3 lid [[thread_position_in_threadgroup]]']
|
||||
|
||||
MetalRenderer = functools.partial(uops_to_cstyle, MetalLanguage())
|
||||
19
tinygrad/renderer/opencl.py
Normal file
19
tinygrad/renderer/opencl.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import functools
|
||||
from tinygrad.helpers import dtypes
|
||||
from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
|
||||
|
||||
type_map = { dtypes.uint8: "uchar", dtypes.uint32: "uint", dtypes.uint64: "ulong" }
|
||||
class OpenCLLanguage(CStyleLanguage):
|
||||
kernel_prefix = "__kernel "
|
||||
buffer_prefix = "__global "
|
||||
smem_align = "__attribute__ ((aligned (16))) "
|
||||
smem_prefix = "__local "
|
||||
arg_int_prefix = "const int"
|
||||
half_prekernel = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"
|
||||
barrier = "barrier(CLK_LOCAL_MEM_FENCE);"
|
||||
float4 = "(float4)"
|
||||
gid = [f'get_group_id({i})' for i in range(3)]
|
||||
lid = [f'get_local_id({i})' for i in range(3)]
|
||||
uses_vload=True
|
||||
|
||||
OpenCLRenderer = functools.partial(uops_to_cstyle, OpenCLLanguage())
|
||||
@@ -1,13 +1,13 @@
|
||||
from __future__ import annotations
|
||||
import pathlib, functools
|
||||
import pathlib
|
||||
import numpy as np
|
||||
import pyopencl as cl # type: ignore
|
||||
from typing import Optional, List
|
||||
from tinygrad.helpers import DEBUG, getenv, prod, ImageDType, OSX, fromimport
|
||||
from tinygrad.ops import Compiled
|
||||
from tinygrad.renderer.opencl import OpenCLRenderer
|
||||
from tinygrad.runtime.lib import RawBufferCopyInOut, LRUAllocator, RawBufferTransfer
|
||||
from tinygrad.codegen.kernel import LinearizerOptions
|
||||
from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
|
||||
|
||||
OSX_TIMING_RATIO = (125/3) if OSX else 1.0 # see test/external/external_osx_profiling.py to determine this ratio. it's in like GPU clocks or something
|
||||
|
||||
@@ -103,9 +103,4 @@ class CLProgram:
|
||||
return None
|
||||
return None
|
||||
|
||||
renderer = functools.partial(uops_to_cstyle, CStyleLanguage(
|
||||
kernel_prefix = "__kernel ", buffer_prefix = "__global ", smem_align = "__attribute__ ((aligned (16))) ", smem_prefix = "__local ", arg_int_prefix = "const int",
|
||||
half_prekernel = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable",
|
||||
barrier = "barrier(CLK_LOCAL_MEM_FENCE);", float4 = "(float4)",
|
||||
gid = [f'get_group_id({i})' for i in range(3)], lid = [f'get_local_id({i})' for i in range(3)], uses_vload=True))
|
||||
GPUBuffer = Compiled(CLBuffer, LinearizerOptions(), renderer, CLProgram, CL.synchronize)
|
||||
GPUBuffer = Compiled(CLBuffer, LinearizerOptions(), OpenCLRenderer, CLProgram, CL.synchronize)
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
# pip3 install pyobjc-framework-Metal pyobjc-framework-Cocoa pyobjc-framework-libdispatch
|
||||
import os, subprocess, pathlib, functools, ctypes
|
||||
import os, subprocess, pathlib, ctypes
|
||||
import Metal, Cocoa, libdispatch # type: ignore
|
||||
from typing import List, Any, Tuple
|
||||
from tinygrad.codegen.kernel import LinearizerOptions
|
||||
from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
|
||||
from tinygrad.helpers import prod, getenv, DEBUG, DType, dtypes
|
||||
from tinygrad.ops import Compiled, ASTRunner, BasicBatchExecutor
|
||||
from tinygrad.renderer.metal import MetalRenderer
|
||||
from tinygrad.runtime.lib import RawBufferMapped, LRUAllocator
|
||||
|
||||
METAL_XCODE = getenv("METAL_XCODE")
|
||||
@@ -100,9 +100,4 @@ class MetalProgram:
|
||||
return command_buffer.GPUEndTime() - command_buffer.GPUStartTime()
|
||||
METAL.mtl_buffers_in_flight.append(command_buffer)
|
||||
|
||||
renderer = functools.partial(uops_to_cstyle, CStyleLanguage(
|
||||
kernel_prefix = "#include <metal_stdlib>\nusing namespace metal;\nkernel ", buffer_prefix = "device ", smem_prefix = "threadgroup ", arg_int_prefix = "constant int&",
|
||||
barrier = "threadgroup_barrier(mem_flags::mem_threadgroup);", float4 = "float4", uses_ptr_arithmetic=True,
|
||||
gid = [f"gid.{chr(120+i)}" for i in range(3)], lid = [f"lid.{chr(120+i)}" for i in range(3)],
|
||||
extra_args = ['uint3 gid [[threadgroup_position_in_grid]]', 'uint3 lid [[thread_position_in_threadgroup]]']))
|
||||
MetalBuffer = Compiled(RawMetalBuffer, LinearizerOptions(device="METAL"), renderer, MetalProgram, METAL.synchronize, MetalBatchExecutor)
|
||||
MetalBuffer = Compiled(RawMetalBuffer, LinearizerOptions(device="METAL"), MetalRenderer, MetalProgram, METAL.synchronize, MetalBatchExecutor)
|
||||
|
||||
Reference in New Issue
Block a user