mirror of
https://github.com/ROCm/ROCm.git
synced 2026-02-21 03:00:39 -05:00
* Simplified `triton.kernel` API to achieve lower latency:
> .data_ptr() must now be passed as kernel argument. No more implicit
conversion from torch.tensor
> compilation options are now constant attributes, i.e., opt.d('VAR')
becomes opt.VAR
> torch.device must now be passed explicitly to triton.kernel (no
longer inferred from torch.tensor arguments)
* C++ tests moved to `python/tests/`
* C++ tutorial created in `tutorials/`
* Python tutorial created in python/tutorials/
* Version changed to 1.0alpha
* No longer copying C++ headers into the Python package
* added python/triton/ops/ package for pre-written Triton ops
13 lines
279 B
Python
13 lines
279 B
Python
# TODO: torch needs to be imported first
|
|
# or pybind11 shows `munmap_chunk(): invalid pointer`
|
|
import torch
|
|
|
|
# libtriton resources
|
|
import atexit
|
|
import triton._C.libtriton as libtriton
|
|
@atexit.register
|
|
def cleanup():
|
|
libtriton.cleanup()
|
|
|
|
from .kernel import *
|
|
from . import ops |