Files
ROCm/python/triton/language/__init__.py
peterbell10 deb2c71fb4 [FRONTEND] Add tl.expand_dims (#1614)
This exposes `semantic.expand_dims` in the public API and builds upon it
with support for expanding multiple dimensions at once. e.g.
```python
tl.expand_dims(tl.arange(0, N), (0, -1))  # shape = [1, N, 1]
```

Compared to indexing with `None`, this API is useful because the
dimensions can be constexpr values rather than hard-coded into the
source. As a basic example
```python
@triton.jit
def max_keepdim(value, dim):
    res = tl.max(value, dim)
    return tl.expand_dims(res, dim)
```
2023-05-04 09:46:24 -07:00

202 lines
2.8 KiB
Python

"""isort:skip_file"""
# Import order is significant here.
from . import math
from . import extra
from .standard import (
cdiv,
sigmoid,
softmax,
ravel,
swizzle2d,
zeros,
zeros_like,
)
from .core import (
abs,
advance,
arange,
argmin,
argmax,
atomic_add,
atomic_and,
atomic_cas,
atomic_max,
atomic_min,
atomic_or,
atomic_xchg,
atomic_xor,
bfloat16,
block_type,
broadcast,
broadcast_to,
cat,
constexpr,
cos,
debug_barrier,
device_assert,
device_print,
dot,
dtype,
exp,
expand_dims,
full,
fdiv,
float16,
float32,
float64,
float8e4,
float8e5,
function_type,
int1,
int16,
int32,
int64,
int8,
load,
log,
make_block_ptr,
max,
max_contiguous,
maximum,
min,
minimum,
multiple_of,
num_programs,
pi32_t,
pointer_type,
program_id,
reduce,
reshape,
sin,
sqrt,
static_assert,
static_print,
store,
sum,
static_range,
tensor,
trans,
triton,
uint16,
uint32,
uint64,
uint8,
umulhi,
view,
void,
where,
xor_sum,
)
from .random import (
pair_uniform_to_normal,
philox,
philox_impl,
rand,
rand4x,
randint,
randint4x,
randn,
randn4x,
uint32_to_uniform_float,
)
__all__ = [
"abs",
"advance",
"arange",
"argmin",
"argmax",
"atomic_add",
"atomic_and",
"atomic_cas",
"atomic_max",
"atomic_min",
"atomic_or",
"atomic_xchg",
"atomic_xor",
"bfloat16",
"block_type",
"broadcast",
"broadcast_to",
"builtin",
"cat",
"cdiv",
"constexpr",
"cos",
"debug_barrier",
"device_assert",
"device_print",
"dot",
"dtype",
"exp",
"expand_dims",
"extra",
"fdiv",
"float16",
"float32",
"float64",
"float8e4",
"float8e5",
"full",
"function_type",
"int1",
"int16",
"int32",
"int64",
"int8",
"ir",
"math",
"load",
"log",
"make_block_ptr",
"max",
"max_contiguous",
"maximum",
"min",
"minimum",
"multiple_of",
"num_programs",
"pair_uniform_to_normal",
"philox",
"philox_impl",
"pi32_t",
"pointer_type",
"program_id",
"rand",
"rand4x",
"randint",
"randint4x",
"randn",
"randn4x",
"ravel",
"reduce",
"reshape",
"sigmoid",
"sin",
"softmax",
"sqrt",
"static_range",
"static_assert",
"static_print",
"store",
"sum",
"swizzle2d",
"tensor",
"trans",
"triton",
"uint16",
"uint32",
"uint32_to_uniform_float",
"uint64",
"uint8",
"umulhi",
"view",
"void",
"where",
"xor_sum",
"zeros",
"zeros_like",
]