mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
fix: don't use KITTENS_HOPPER for 4090 (#12954)
This commit is contained in:
@@ -46,9 +46,9 @@ __device__ static inline void arrive(int id) {
|
||||
#include "memory/memory.cuh"
|
||||
#include "shared/shared.cuh"
|
||||
#include "register/register.cuh"
|
||||
#include "mma/mma.cuh"
|
||||
|
||||
#ifdef KITTENS_HOPPER
|
||||
#include "mma/mma.cuh"
|
||||
|
||||
template<int n_reg> __device__ static inline void increase_registers() {
|
||||
static_assert(n_reg % 8 == 0, "n_reg must be a multiple of 8");
|
||||
@@ -93,4 +93,4 @@ __device__ static inline void sync() {
|
||||
using warp = group<1>; // scope used by most pre-Hopper GPUs, and also for most register operations.
|
||||
using warpgroup = group<4>; // special scope commonly used by Hopper and later.
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ from tinygrad.runtime.support.compiler_cuda import pretty_ptx, NVCCCompiler
|
||||
if __name__ == "__main__":
|
||||
code = (pathlib.Path(__file__).parent / "matmul.cu").read_text()
|
||||
device = Device["CUDA"]
|
||||
kitten_args = [f"-I{(pathlib.Path(__file__).parent / 'include').as_posix()}", "-std=c++20", "--expt-relaxed-constexpr", "-DKITTENS_HOPPER"]
|
||||
kitten_args = [f"-I{(pathlib.Path(__file__).parent / 'include').as_posix()}", "-std=c++20", "--expt-relaxed-constexpr"]
|
||||
lib = NVCCCompiler(device.compiler.arch, kitten_args).compile(code)
|
||||
kernel_name = lib.decode().split(".globl\t")[1].split("\n")[0]
|
||||
print("kernel name", kernel_name)
|
||||
|
||||
Reference in New Issue
Block a user