PTX assembly support (#977)

* ptx assembly

* all ops tests pass

* fix tests
This commit is contained in:
George Hotz
2023-06-13 12:31:42 -07:00
committed by GitHub
parent 727416201f
commit ba4eadb04c
9 changed files with 280 additions and 26 deletions

View File

@@ -2,7 +2,7 @@ from typing import Final, Dict, Callable, ClassVar, List, Optional, NamedTuple,
import math, collections
from tinygrad.codegen.linearizer import Linearizer, UOps, UOp, LocalBuffer, LocalTypes
from tinygrad.ops import ASTRunner, Op, UnaryOps, BinaryOps, FusedOps
from tinygrad.helpers import partition, ImageDType, DEBUG, dtypes, colored, prod
from tinygrad.helpers import partition, ImageDType, DEBUG, dtypes, colored
from tinygrad.runtime.lib import RawConst
from tinygrad.shape.symbolic import DivNode, AndNode, render_python, NumNode, Variable, Node, SumNode, MulNode
from tinygrad.lazy import LazyBuffer
@@ -188,15 +188,7 @@ class CStyleCodegen(Linearizer):
def codegen(self):
self.process()
self.hand_coded_optimizations()
# sometimes, there's more dimensions than len(self.lang.gid).
# compact all the dimensions into the first
# NOTE: this might make multiview shapetrackers
if len(self.lang.gid) and self.first_reduce > len(self.lang.gid):
num_to_merge = (self.first_reduce - len(self.lang.gid))+1
self.reshape_and_permute(lambda x: (prod(x[0:num_to_merge]),)+x[num_to_merge:], None)
if DEBUG >= 4: print("reshaped to", self.full_shape, "due to too many global dimensions")
self.limit_global_dims(len(self.lang.gid))
self.linearize()
prg, global_size, local_size = uops_to_cstyle(self.uops, self.bufs, self.lang)