mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
Rebase Triton to LLVM-15. (#1070)
This PR rebases Triton from LLVM-14 to LLVM-15. Most changes are mechanical, except for the analysis framework changes.
This commit is contained in:
@@ -11,7 +11,7 @@
|
||||
|
||||
// CHECK-LABEL: matmul_loop
|
||||
// There shouldn't be any aliasing with the dot op encoding.
|
||||
func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
func.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
%a_ptr_init = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<128x32x!tt.ptr<f16>, #AL>
|
||||
%b_ptr_init = tt.broadcast %B : (!tt.ptr<f16>) -> tensor<32x128x!tt.ptr<f16>, #BL>
|
||||
%a_mask = arith.constant dense<true> : tensor<128x32xi1, #AL>
|
||||
@@ -36,7 +36,7 @@ func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B
|
||||
}
|
||||
|
||||
// CHECK-LABEL: alloc
|
||||
func @alloc(%A : !tt.ptr<f16>) {
|
||||
func.func @alloc(%A : !tt.ptr<f16>) {
|
||||
// CHECK: %cst -> %cst
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
%cst1 = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #AL>
|
||||
@@ -46,7 +46,7 @@ func @alloc(%A : !tt.ptr<f16>) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: convert
|
||||
func @convert(%A : !tt.ptr<f16>) {
|
||||
func.func @convert(%A : !tt.ptr<f16>) {
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
|
||||
// CHECK: %0 -> %0
|
||||
%cst1 = triton_gpu.convert_layout %cst0 : (tensor<16x16xf16, #AL>) -> tensor<16x16xf16, #A_SHARED>
|
||||
@@ -54,7 +54,7 @@ func @convert(%A : !tt.ptr<f16>) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: trans
|
||||
func @trans(%A : !tt.ptr<f16>) {
|
||||
func.func @trans(%A : !tt.ptr<f16>) {
|
||||
// CHECK: %cst -> %cst
|
||||
%tensor = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #A_SHARED>
|
||||
// CHECK: %0 -> %cst
|
||||
@@ -63,7 +63,7 @@ func @trans(%A : !tt.ptr<f16>) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: insert_slice_async
|
||||
func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
func.func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
%a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<16x16x!tt.ptr<f16>, #AL>
|
||||
%mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL>
|
||||
%other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
|
||||
@@ -76,7 +76,7 @@ func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: insert_slice
|
||||
func @insert_slice(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
func.func @insert_slice(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
%a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<16x16x!tt.ptr<f16>, #AL>
|
||||
%mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL>
|
||||
%other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
|
||||
@@ -90,7 +90,7 @@ func @insert_slice(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: extract_slice
|
||||
func @extract_slice(%A : !tt.ptr<f16>) {
|
||||
func.func @extract_slice(%A : !tt.ptr<f16>) {
|
||||
// CHECK: %cst -> %cst
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<1x16x16xf16, #A_SHARED>
|
||||
%index = arith.constant 0 : index
|
||||
@@ -100,7 +100,7 @@ func @extract_slice(%A : !tt.ptr<f16>) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: if_cat
|
||||
func @if_cat(%i1 : i1) {
|
||||
func.func @if_cat(%i1 : i1) {
|
||||
// CHECK: %cst -> %cst
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
// CHECK: %cst_0 -> %cst_0
|
||||
@@ -119,7 +119,7 @@ func @if_cat(%i1 : i1) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: if_alias
|
||||
func @if_alias(%i1 : i1) {
|
||||
func.func @if_alias(%i1 : i1) {
|
||||
// CHECK: %cst -> %cst
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
// CHECK-NEXT: %cst_0 -> %cst_0
|
||||
@@ -134,7 +134,7 @@ func @if_alias(%i1 : i1) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: for
|
||||
func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
func.func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
// CHECK: %cst -> %cst
|
||||
%a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
// CHECK-NEXT: %cst_0 -> %cst_0
|
||||
@@ -154,7 +154,7 @@ func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.p
|
||||
}
|
||||
|
||||
// CHECK-LABEL: for_if
|
||||
func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
|
||||
func.func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
|
||||
// CHECK: %cst -> %cst
|
||||
%a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
// CHECK-NEXT: %cst_0 -> %cst_0
|
||||
@@ -180,7 +180,7 @@ func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !t
|
||||
}
|
||||
|
||||
// CHECK-LABEL: for_if_for
|
||||
func @for_if_for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
|
||||
func.func @for_if_for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
|
||||
// CHECK: %cst -> %cst
|
||||
%a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
// CHECK-NEXT: %cst_0 -> %cst_0
|
||||
|
||||
@@ -1,288 +1,288 @@
|
||||
// RUN: triton-opt %s -test-print-alignment -split-input-file 2>&1 | FileCheck %s
|
||||
// RUN: triton-opt %s -test-print-alignment -split-input-file -o %t 2>&1 | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: cast
|
||||
func @cast() {
|
||||
// CHECK: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [1]
|
||||
// CHECK-LABEL: @cast
|
||||
func.func @cast() {
|
||||
// CHECK: contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1
|
||||
%cst = arith.constant 1 : i32
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [1]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1
|
||||
%0 = arith.extsi %cst : i32 to i64
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
|
||||
%cst_tensor = arith.constant dense<1> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
|
||||
%1 = tt.bitcast %cst_tensor : tensor<128xi32> -> tensor<128xi64>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: add
|
||||
func @add() {
|
||||
// CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-LABEL: @add
|
||||
func.func @add() {
|
||||
// CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
|
||||
%1 = arith.constant dense<1> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [128] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%2 = arith.addi %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [127]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 127
|
||||
%3 = arith.constant dense<127> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128
|
||||
%4 = arith.addi %1, %3 : tensor<128xi32>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: sub
|
||||
func @sub() {
|
||||
// CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-LABEL: @sub
|
||||
func.func @sub() {
|
||||
// CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
|
||||
%1 = arith.constant dense<1> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [128] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%2 = arith.subi %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [129]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 129
|
||||
%3 = arith.constant dense<129> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128
|
||||
%4 = arith.subi %3, %1 : tensor<128xi32>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: mul
|
||||
func @mul() {
|
||||
// CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-LABEL: @mul
|
||||
func.func @mul() {
|
||||
// CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
|
||||
%1 = arith.constant dense<1> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%2 = arith.muli %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128
|
||||
%3 = arith.constant dense<128> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128
|
||||
%4 = arith.muli %3, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [2] ; Constancy: [128] ; ConstantValue: [2]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [2], constancy = [128], constant_value = 2
|
||||
%5 = arith.constant dense<2> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [256] ; Constancy: [128] ; ConstantValue: [256]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [256], constancy = [128], constant_value = 256
|
||||
%6 = arith.muli %4, %5 : tensor<128xi32>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: div
|
||||
func @div() {
|
||||
// CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-LABEL: @div
|
||||
func.func @div() {
|
||||
// CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
|
||||
%1 = arith.constant dense<1> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%2 = arith.divsi %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%3 = arith.divui %1, %0 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64
|
||||
%4 = arith.constant dense<64> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [16777216] ; Constancy: [64] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [16777216], constancy = [64], constant_value = <none>
|
||||
%5 = arith.divsi %0, %4 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%6 = arith.divsi %4, %0 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64
|
||||
%7 = arith.divsi %4, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [2] ; Constancy: [128] ; ConstantValue: [66]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [2], constancy = [128], constant_value = 66
|
||||
%8 = arith.constant dense<66> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [2] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [2], constant_value = <none>
|
||||
%9 = arith.divui %0, %8 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [128] ; Divisibility: [8192] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128], divisibility = [8192], constancy = [1], constant_value = <none>
|
||||
%10 = tt.make_range {end = 8320 : i32, start = 8192 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [64] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [64], constant_value = <none>
|
||||
%11 = arith.divsi %10, %4 : tensor<128xi32>
|
||||
return
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: rem
|
||||
func @rem() {
|
||||
// CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-LABEL: @rem
|
||||
func.func @rem() {
|
||||
// CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
|
||||
%1 = arith.constant dense<1> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0
|
||||
%2 = arith.remsi %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%3 = arith.remui %1, %0 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64
|
||||
%4 = arith.constant dense<64> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [64] ; Divisibility: [64] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [64], divisibility = [64], constancy = [1], constant_value = <none>
|
||||
%5 = arith.remsi %0, %4 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [1], constant_value = <none>
|
||||
%6 = arith.remsi %4, %0 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [2] ; Constancy: [128] ; ConstantValue: [66]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [2], constancy = [128], constant_value = 66
|
||||
%7 = arith.constant dense<66> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [2] ; Divisibility: [2] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [2], divisibility = [2], constancy = [1], constant_value = <none>
|
||||
%8 = arith.remui %0, %7 : tensor<128xi32>
|
||||
return
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: broadcast
|
||||
func @broadcast() {
|
||||
// CHECK: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64]
|
||||
// CHECK-LABEL: @broadcast
|
||||
func.func @broadcast() {
|
||||
// CHECK: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64
|
||||
%0 = arith.constant dense<64> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [64, 1] ; Constancy: [128, 1] ; ConstantValue: [64]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [64, 1], constancy = [128, 1], constant_value = 64
|
||||
%1 = tt.expand_dims %0 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [64, 1] ; Constancy: [128, 128] ; ConstantValue: [64]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [64, 1], constancy = [128, 128], constant_value = 64
|
||||
%2 = tt.broadcast %1 : (tensor<128x1xi32>) -> tensor<128x128xi32>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: splat
|
||||
func @splat(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}) {
|
||||
// CHECK: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [128, 128] ; ConstantValue: [None]
|
||||
// CHECK-LABEL: @splat
|
||||
func.func @splat(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}) {
|
||||
// CHECK: contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 128], constant_value = <none>
|
||||
%0 = tt.splat %arg0 : (!tt.ptr<f32>) -> tensor<128x128x!tt.ptr<f32>>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: cmp
|
||||
func @cmp() {
|
||||
// CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-LABEL: @cmp
|
||||
func.func @cmp() {
|
||||
// CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0
|
||||
%1 = arith.constant dense<0> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>
|
||||
%2 = arith.cmpi eq, %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>
|
||||
%3 = arith.cmpi slt, %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%4 = arith.cmpi sle, %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>
|
||||
%5 = arith.cmpi sge, %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [8] ; Constancy: [128] ; ConstantValue: [8]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8
|
||||
%6 = arith.constant dense<8> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [8] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>
|
||||
%7 = arith.cmpi sgt, %0, %6 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [0]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 0
|
||||
%8 = arith.cmpi sgt, %1, %6 : tensor<128xi32>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: logic
|
||||
func @logic() {
|
||||
// CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-LABEL: @logic
|
||||
func.func @logic() {
|
||||
// CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64
|
||||
%1 = arith.constant dense<64> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [16777216] ; Constancy: [64] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [16777216], constancy = [64], constant_value = <none>
|
||||
%2 = arith.divsi %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [8] ; Constancy: [128] ; ConstantValue: [8]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8
|
||||
%3 = arith.constant dense<8> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [134217728] ; Constancy: [8] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [134217728], constancy = [8], constant_value = <none>
|
||||
%4 = arith.divsi %0, %3 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%5 = arith.andi %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%6 = arith.ori %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%7 = arith.xori %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [8] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>
|
||||
%8 = arith.andi %2, %4 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [8] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>
|
||||
%9 = arith.ori %2, %4 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [8] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>
|
||||
%10 = arith.xori %2, %4 : tensor<128xi32>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: select
|
||||
func @select() {
|
||||
// CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-LABEL: @select
|
||||
func.func @select() {
|
||||
// CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0
|
||||
%1 = arith.constant dense<0> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>
|
||||
%2 = arith.cmpi eq, %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>
|
||||
%3 = arith.cmpi slt, %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [1] ; ConstantValue: [0]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 0
|
||||
%4 = arith.constant 0 : i1
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0
|
||||
%7 = tt.splat %4 : (i1) -> tensor<128xi1>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0]
|
||||
%5 = select %4, %3, %7 : tensor<128xi1>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0
|
||||
%5 = arith.select %4, %3, %7 : tensor<128xi1>
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>
|
||||
%8 = "triton_gpu.select"(%7, %3, %2) : (tensor<128xi1>, tensor<128xi1>, tensor<128xi1>) -> tensor<128xi1>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func @shift() {
|
||||
// CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
func.func @shift() {
|
||||
// CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [8] ; Constancy: [128] ; ConstantValue: [8]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8
|
||||
%1 = arith.constant dense<8> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [4] ; Constancy: [128] ; ConstantValue: [4]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [4], constancy = [128], constant_value = 4
|
||||
%2 = arith.constant dense<4> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [274877906944] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [274877906944], constancy = [1], constant_value = <none>
|
||||
%3 = arith.shli %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [67108864] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [67108864], constancy = [1], constant_value = <none>
|
||||
%4 = arith.shrsi %0, %2 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128
|
||||
%5 = arith.shli %1, %2 : tensor<128xi32>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func @max_min() {
|
||||
// CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
func.func @max_min() {
|
||||
// CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [128] ; Divisibility: [64] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128], divisibility = [64], constancy = [1], constant_value = <none>
|
||||
%1 = tt.make_range {end = 192 : i32, start = 64 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%2 = arith.maxsi %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%3 = arith.minsi %0, %1 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [8] ; Constancy: [128] ; ConstantValue: [8]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8
|
||||
%4 = arith.constant dense<8> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [4] ; Constancy: [128] ; ConstantValue: [4]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [4], constancy = [128], constant_value = 4
|
||||
%5 = arith.constant dense<4> : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [8]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = 8
|
||||
%6 = arith.maxsi %4, %5 : tensor<128xi32>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: for
|
||||
func @for() {
|
||||
// CHECK: Contiguity: [1, 1] ; Divisibility: [4611686018427387904, 4611686018427387904] ; Constancy: [128, 32] ; ConstantValue: [0]
|
||||
// CHECK-LABEL: @for
|
||||
func.func @for() {
|
||||
// CHECK: contiguity = [1, 1], divisibility = [4611686018427387904, 4611686018427387904], constancy = [128, 32], constant_value = 0
|
||||
%a_init = arith.constant dense<0> : tensor<128x32xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [128, 32] ; ConstantValue: [1]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = 1
|
||||
%b_init = arith.constant dense<1> : tensor<128x32xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [4, 4] ; Constancy: [128, 32] ; ConstantValue: [4]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [4, 4], constancy = [128, 32], constant_value = 4
|
||||
%c_init = arith.constant dense<4> : tensor<128x32xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [1] ; ConstantValue: [128]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [1], constant_value = 128
|
||||
%ub = arith.constant 128 : index
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [1] ; ConstantValue: [0]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 0
|
||||
%lb = arith.constant 0 : index
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [16] ; Constancy: [1] ; ConstantValue: [16]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [16], constancy = [1], constant_value = 16
|
||||
%step = arith.constant 16 : index
|
||||
%a, %b, %c = scf.for %iv = %lb to %ub step %step iter_args(%a = %a_init, %b = %b_init, %c = %c_init) -> (tensor<128x32xi32>, tensor<128x32xi32>, tensor<128x32xi32>) {
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [16] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [16], constancy = [1], constant_value = <none>
|
||||
%t = arith.index_cast %iv : index to i32
|
||||
// CHECK: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [128, 32] ; ConstantValue: [None]
|
||||
// CHECK: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [128, 32] ; ConstantValue: [None]
|
||||
// CHECK: Contiguity: [1, 1] ; Divisibility: [4, 4] ; Constancy: [128, 32] ; ConstantValue: [4]
|
||||
// CHECK: contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = <none>
|
||||
// CHECK: contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = <none>
|
||||
// CHECK: contiguity = [1, 1], divisibility = [4, 4], constancy = [128, 32], constant_value = 4
|
||||
scf.yield %b, %a, %c : tensor<128x32xi32>, tensor<128x32xi32>, tensor<128x32xi32>
|
||||
}
|
||||
return
|
||||
@@ -290,53 +290,53 @@ func @for() {
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: permute_2d
|
||||
func @permute_2d(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: i32 {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg3: i32 {tt.divisibility = 16 : i32}) {
|
||||
// CHECK: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [128, 128] ; ConstantValue: [1]
|
||||
// CHECK-LABEL: @permute_2d
|
||||
func.func @permute_2d(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: i32 {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg3: i32 {tt.divisibility = 16 : i32}) {
|
||||
// CHECK: contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 128], constant_value = 1
|
||||
%cst = arith.constant dense<true> : tensor<128x128xi1>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [1, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [1, 1], constancy = [1, 1], constant_value = <none>
|
||||
%cst_0 = arith.constant dense<0.000000e+00> : tensor<128x128xf32>
|
||||
// CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%1 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [1073741824, 1] ; Constancy: [1, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128, 1], divisibility = [1073741824, 1], constancy = [1, 1], constant_value = <none>
|
||||
%2 = tt.expand_dims %0 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [128, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 1], constant_value = <none>
|
||||
%3 = tt.splat %arg1 : (i32) -> tensor<128x1xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [17179869184, 16] ; Constancy: [1, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [17179869184, 16], constancy = [1, 1], constant_value = <none>
|
||||
%4 = arith.muli %2, %3 : tensor<128x1xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [128, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 1], constant_value = <none>
|
||||
%5 = tt.splat %arg0 : (!tt.ptr<f32>) -> tensor<128x1x!tt.ptr<f32>>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [1, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 1], constant_value = <none>
|
||||
%6 = tt.addptr %5, %4 : tensor<128x1x!tt.ptr<f32>>, tensor<128x1xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 128] ; Divisibility: [1, 1073741824] ; Constancy: [1, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [1, 1], constant_value = <none>
|
||||
%7 = tt.expand_dims %1 {axis = 0 : i32}: (tensor<128xi32>) -> tensor<1x128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [1, 128] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 128], constant_value = <none>
|
||||
%8 = tt.broadcast %6 : (tensor<128x1x!tt.ptr<f32>>) -> tensor<128x128x!tt.ptr<f32>>
|
||||
// CHECK-NEXT: Contiguity: [1, 128] ; Divisibility: [1, 1073741824] ; Constancy: [128, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [128, 1], constant_value = <none>
|
||||
%9 = tt.broadcast %7 : (tensor<1x128xi32>) -> tensor<128x128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 128] ; Divisibility: [1, 16] ; Constancy: [1, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 128], divisibility = [1, 16], constancy = [1, 1], constant_value = <none>
|
||||
%10 = tt.addptr %8, %9 : tensor<128x128x!tt.ptr<f32>>, tensor<128x128xi32>
|
||||
// CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [1073741824, 1] ; Constancy: [1, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128, 1], divisibility = [1073741824, 1], constancy = [1, 1], constant_value = <none>
|
||||
%11 = tt.expand_dims %0 {axis = 1 : i32}: (tensor<128xi32>) -> tensor<128x1xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [128, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 1], constant_value = <none>
|
||||
%12 = tt.splat %arg2 : (!tt.ptr<f32>) -> tensor<128x1x!tt.ptr<f32>>
|
||||
// CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [16, 1] ; Constancy: [1, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128, 1], divisibility = [16, 1], constancy = [1, 1], constant_value = <none>
|
||||
%13 = tt.addptr %12, %11 : tensor<128x1x!tt.ptr<f32>>, tensor<128x1xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 128] ; Divisibility: [1, 1073741824] ; Constancy: [1, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [1, 1], constant_value = <none>
|
||||
%14 = tt.expand_dims %1 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [1, 128] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 128], constant_value = <none>
|
||||
%15 = tt.splat %arg3 : (i32) -> tensor<1x128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 17179869184] ; Constancy: [1, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 17179869184], constancy = [1, 1], constant_value = <none>
|
||||
%16 = arith.muli %14, %15 : tensor<1x128xi32>
|
||||
// CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [16, 1] ; Constancy: [1, 128] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128, 1], divisibility = [16, 1], constancy = [1, 128], constant_value = <none>
|
||||
%17 = tt.broadcast %13 : (tensor<128x1x!tt.ptr<f32>>) -> tensor<128x128x!tt.ptr<f32>>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 17179869184] ; Constancy: [128, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 17179869184], constancy = [128, 1], constant_value = <none>
|
||||
%18 = tt.broadcast %16 : (tensor<1x128xi32>) -> tensor<128x128xi32>
|
||||
// CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [16, 1] ; Constancy: [1, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128, 1], divisibility = [16, 1], constancy = [1, 1], constant_value = <none>
|
||||
%19 = tt.addptr %17, %18 : tensor<128x128x!tt.ptr<f32>>, tensor<128x128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [1, 1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1, 1], divisibility = [1, 1], constancy = [1, 1], constant_value = <none>
|
||||
%20 = tt.load %10, %cst, %cst_0 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<128x128xf32>
|
||||
tt.store %19, %20, %cst : tensor<128x128xf32>
|
||||
return
|
||||
@@ -347,29 +347,29 @@ func @permute_2d(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: i32 {t
|
||||
module {
|
||||
|
||||
// This is a tiny test for verifying StoreOp-related alignment, It simply store a constant to a buffer.
|
||||
// CHECK-LABEL: store_constant_align
|
||||
func @store_constant_align(%addr: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n: i32 {tt.divisibility = 16 : i32}) {
|
||||
// CHECK: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-LABEL: @store_constant_align
|
||||
func.func @store_constant_align(%addr: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n: i32 {tt.divisibility = 16 : i32}) {
|
||||
// CHECK: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%pid = tt.get_program_id {axis = 0 : i32} : i32
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [1] ; ConstantValue: [128]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [1], constant_value = 128
|
||||
%c128_i32 = arith.constant 128 : i32
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [1], constant_value = <none>
|
||||
%1 = arith.muli %pid, %c128_i32 : i32
|
||||
// CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
|
||||
%2 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = <none>
|
||||
%3 = tt.splat %1 : (i32) -> tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [128] ; Divisibility: [128] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128], divisibility = [128], constancy = [1], constant_value = <none>
|
||||
%4 = arith.addi %3, %2 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [16] ; Constancy: [128] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [16], constancy = [128], constant_value = <none>
|
||||
%5 = tt.splat %addr : (!tt.ptr<f32>) -> tensor<128x!tt.ptr<f32>>
|
||||
// CHECK-NEXT: Contiguity: [128] ; Divisibility: [16] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [128], divisibility = [16], constancy = [1], constant_value = <none>
|
||||
%6 = tt.addptr %5, %4 : tensor<128x!tt.ptr<f32>>, tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [16] ; Constancy: [128] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [16], constancy = [128], constant_value = <none>
|
||||
%9 = tt.splat %n : (i32) -> tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [16] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [16], constant_value = <none>
|
||||
%mask = arith.cmpi slt, %4, %9 : tensor<128xi32>
|
||||
// CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
|
||||
// CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%cst = arith.constant dense<0.0> : tensor<128xf32>
|
||||
tt.store %5, %cst, %mask : tensor<128xf32>
|
||||
return
|
||||
@@ -381,8 +381,8 @@ func @store_constant_align(%addr: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n:
|
||||
|
||||
// This IR is dumped from vecadd test.
|
||||
// Note, the hint {tt.divisibility = 16 : i32} for %n_elements affects the alignment of mask.
|
||||
// CHECK-LABEL: vecadd_mask_align_16
|
||||
func @vecadd_mask_align_16(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n_elements: i32 {tt.divisibility = 16 : i32}) {
|
||||
// CHECK-LABEL: @vecadd_mask_align_16
|
||||
func.func @vecadd_mask_align_16(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n_elements: i32 {tt.divisibility = 16 : i32}) {
|
||||
%c64_i32 = arith.constant 64 : i32
|
||||
%0 = tt.get_program_id {axis = 0 : i32} : i32
|
||||
%1 = arith.muli %0, %c64_i32 : i32
|
||||
@@ -394,13 +394,13 @@ func @vecadd_mask_align_16(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %ar
|
||||
%7 = tt.splat %arg1 : (!tt.ptr<f32>) -> tensor<64x!tt.ptr<f32>>
|
||||
%8 = tt.addptr %7, %4 : tensor<64x!tt.ptr<f32>>, tensor<64xi32>
|
||||
%9 = tt.splat %n_elements : (i32) -> tensor<64xi32>
|
||||
// CHECK: Contiguity: [1] ; Divisibility: [1] ; Constancy: [16] ; ConstantValue: [None] ( %{{.*}} = arith.cmpi slt, %{{.*}}, %{{.*}} : tensor<64xi32> )
|
||||
// CHECK: arith.cmpi slt, %{{.*}} => contiguity = [1], divisibility = [1], constancy = [16], constant_value = <none>
|
||||
%mask = arith.cmpi slt, %4, %9 : tensor<64xi32>
|
||||
%11 = tt.load %6, %mask {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<64xf32>
|
||||
%12 = tt.load %8, %mask {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<64xf32>
|
||||
%13 = arith.addf %11, %12 : tensor<64xf32>
|
||||
%14 = tt.splat %arg2 : (!tt.ptr<f32>) -> tensor<64x!tt.ptr<f32>>
|
||||
// CHECK: Contiguity: [64] ; Divisibility: [16] ; Constancy: [1] ; ConstantValue: [None] ( %{{.*}} = tt.addptr %{{.*}}, %{{.*}} : tensor<64x!tt.ptr<f32>>, tensor<64xi32> )
|
||||
// CHECK: tt.addptr %{{.*}} => contiguity = [64], divisibility = [16], constancy = [1], constant_value = <none>
|
||||
%15 = tt.addptr %14, %4 : tensor<64x!tt.ptr<f32>>, tensor<64xi32>
|
||||
tt.store %15, %13, %mask : tensor<64xf32>
|
||||
return
|
||||
@@ -410,8 +410,8 @@ func @vecadd_mask_align_16(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %ar
|
||||
|
||||
// This IR is dumped from vecadd test.
|
||||
// Note, there is no divisibility hint for %n_elements, Triton should assume its divisibility to be 1 by default.
|
||||
// CHECK-LABEL: vecadd_mask_align_1
|
||||
func @vecadd_mask_align_1(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n_elements: i32) {
|
||||
// CHECK-LABEL: @vecadd_mask_align_1
|
||||
func.func @vecadd_mask_align_1(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n_elements: i32) {
|
||||
%c64_i32 = arith.constant 64 : i32
|
||||
%0 = tt.get_program_id {axis = 0 : i32} : i32
|
||||
%1 = arith.muli %0, %c64_i32 : i32
|
||||
@@ -423,7 +423,7 @@ func @vecadd_mask_align_1(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg
|
||||
%7 = tt.splat %arg1 : (!tt.ptr<f32>) -> tensor<64x!tt.ptr<f32>>
|
||||
%8 = tt.addptr %7, %4 : tensor<64x!tt.ptr<f32>>, tensor<64xi32>
|
||||
%9 = tt.splat %n_elements : (i32) -> tensor<64xi32>
|
||||
// CHECK: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] ( %{{.*}} = arith.cmpi slt, %{{.*}}, %{{.*}} : tensor<64xi32> )
|
||||
// CHECK: arith.cmpi slt, %{{.*}} => contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
|
||||
%10 = arith.cmpi slt, %4, %9 : tensor<64xi32>
|
||||
%11 = tt.load %6, %10 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<64xf32>
|
||||
%12 = tt.load %8, %10 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<64xf32>
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
module attributes {"triton_gpu.num-warps" = 4 : i32} {
|
||||
|
||||
// CHECK-LABEL: matmul_loop
|
||||
func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
func.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
%a_ptr_init = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<128x32x!tt.ptr<f16>, #AL>
|
||||
%b_ptr_init = tt.broadcast %B : (!tt.ptr<f16>) -> tensor<32x128x!tt.ptr<f16>, #BL>
|
||||
|
||||
@@ -46,7 +46,7 @@ func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B
|
||||
|
||||
// Shared memory is available after a tensor's liveness range ends
|
||||
// CHECK-LABEL: reusable
|
||||
func @reusable(%A : !tt.ptr<f16>) {
|
||||
func.func @reusable(%A : !tt.ptr<f16>) {
|
||||
%cst1 = arith.constant dense<true> : tensor<128x32xi1, #AL>
|
||||
%cst2 = arith.constant dense<0.000000e+00> : tensor<128x32xf16, #AL>
|
||||
%cst3 = arith.constant dense<true> : tensor<32x128xi1, #AL>
|
||||
@@ -78,7 +78,7 @@ func @reusable(%A : !tt.ptr<f16>) {
|
||||
// %cst1->%cst4
|
||||
// %cst3->%g->%h->%i
|
||||
// CHECK-LABEL: preallocate
|
||||
func @preallocate(%A : !tt.ptr<f16>) {
|
||||
func.func @preallocate(%A : !tt.ptr<f16>) {
|
||||
// CHECK: offset = 0, size = 512
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
// CHECK-NEXT: offset = 1024, size = 512
|
||||
@@ -113,7 +113,7 @@ func @preallocate(%A : !tt.ptr<f16>) {
|
||||
|
||||
// Unused tensors are immediately released
|
||||
// CHECK-LABEL: unused
|
||||
func @unused(%A : !tt.ptr<f16>) {
|
||||
func.func @unused(%A : !tt.ptr<f16>) {
|
||||
// CHECK: offset = 0, size = 1024
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<32x16xf16, #A_SHARED>
|
||||
// CHECK-NEXT: offset = 0, size = 512
|
||||
@@ -128,7 +128,7 @@ func @unused(%A : !tt.ptr<f16>) {
|
||||
|
||||
// cst0 is alive through the entire function, it cannot be released before the end of the function
|
||||
// CHECK-LABEL: longlive
|
||||
func @longlive(%A : !tt.ptr<f16>) {
|
||||
func.func @longlive(%A : !tt.ptr<f16>) {
|
||||
// CHECK: offset = 0, size = 512
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
// CHECK-NEXT: offset = 512, size = 512
|
||||
@@ -156,7 +156,7 @@ func @longlive(%A : !tt.ptr<f16>) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: alloc
|
||||
func @alloc(%A : !tt.ptr<f16>) {
|
||||
func.func @alloc(%A : !tt.ptr<f16>) {
|
||||
// CHECK: offset = 0, size = 512
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
%cst1 = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #AL>
|
||||
@@ -167,7 +167,7 @@ func @alloc(%A : !tt.ptr<f16>) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: scratch
|
||||
func @scratch() {
|
||||
func.func @scratch() {
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
|
||||
// CHECK: scratch offset = 0, size = 512
|
||||
%b = tt.reduce %cst0 {redOp = 1 : i32, axis = 0 : i32} : tensor<16x16xf16, #AL> -> tensor<16xf16, #sliceAd0>
|
||||
@@ -176,7 +176,7 @@ func @scratch() {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: trans
|
||||
func @trans(%A : !tt.ptr<f16>) {
|
||||
func.func @trans(%A : !tt.ptr<f16>) {
|
||||
// CHECK: offset = 0, size = 1024
|
||||
%tensor = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #A_SHARED>
|
||||
%b = tt.trans %tensor : (tensor<16x32xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED_T>
|
||||
@@ -184,7 +184,7 @@ func @trans(%A : !tt.ptr<f16>) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: insert_slice_async
|
||||
func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
func.func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
%a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<16x16x!tt.ptr<f16>, #AL>
|
||||
%mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL>
|
||||
%other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
|
||||
@@ -197,7 +197,7 @@ func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: extract_slice
|
||||
func @extract_slice(%A : !tt.ptr<f16>) {
|
||||
func.func @extract_slice(%A : !tt.ptr<f16>) {
|
||||
// CHECK: offset = 0, size = 512
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<1x16x16xf16, #A_SHARED>
|
||||
%index = arith.constant 0 : index
|
||||
@@ -209,7 +209,7 @@ func @extract_slice(%A : !tt.ptr<f16>) {
|
||||
// B0 -> (B1) -> B0
|
||||
// Memory used by B1 can be reused by B0.
|
||||
// CHECK-LABEL: if
|
||||
func @if(%i1 : i1) {
|
||||
func.func @if(%i1 : i1) {
|
||||
// CHECK: offset = 0, size = 512
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
// CHECK-NEXT: offset = 512, size = 512
|
||||
@@ -233,7 +233,7 @@ func @if(%i1 : i1) {
|
||||
// B0 -> (B1) -> (B2) -> B0
|
||||
// Memory used by B0 cannot be reused by B1 or B2.
|
||||
// CHECK-LABEL: if_else
|
||||
func @if_else(%i1 : i1) {
|
||||
func.func @if_else(%i1 : i1) {
|
||||
// CHECK: offset = 0, size = 512
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
// CHECK-NEXT: offset = 512, size = 512
|
||||
@@ -260,7 +260,7 @@ func @if_else(%i1 : i1) {
|
||||
// Block arguments and yields are memory aliases that do not trigger a new
|
||||
// allocation.
|
||||
// CHECK-LABEL: for
|
||||
func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
func.func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
// CHECK: offset = 0, size = 8192
|
||||
%a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
// CHECK-NEXT: offset = 8192, size = 8192
|
||||
@@ -275,7 +275,7 @@ func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.p
|
||||
}
|
||||
|
||||
// CHECK-LABEL: for_if_slice
|
||||
func @for_if_slice(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
|
||||
func.func @for_if_slice(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
|
||||
// CHECK: offset = 0, size = 8192
|
||||
%a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
// CHECK-NEXT: offset = 8192, size = 8192
|
||||
@@ -296,7 +296,7 @@ func @for_if_slice(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %
|
||||
|
||||
// c0 cannot be released in the loop
|
||||
// CHECK-LABEL: for_use_ancestor
|
||||
func @for_use_ancestor(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
|
||||
func.func @for_use_ancestor(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
|
||||
// CHECK: offset = 0, size = 8192
|
||||
%a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
// CHECK-NEXT: offset = 8192, size = 8192
|
||||
@@ -316,7 +316,7 @@ func @for_use_ancestor(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16
|
||||
// a_shared_init, b_shared_init, and c_shared_init's liveness ranges are span over the entire function before cst2.
|
||||
// So they cannot be reused by cst0 and cst1, but can be reused by cst2.
|
||||
// CHECK-LABEL: for_if_for
|
||||
func @for_if_for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
|
||||
func.func @for_if_for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
|
||||
// CHECK: offset = 0, size = 8192
|
||||
%a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
// CHECK-NEXT: offset = 8192, size = 8192
|
||||
|
||||
@@ -14,7 +14,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} {
|
||||
|
||||
// CHECK-LABEL: matmul_loop
|
||||
// There shouldn't be any membar with the dot op encoding.
|
||||
func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
func.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
%a_ptr_init = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<128x32x!tt.ptr<f16>, #AL>
|
||||
%b_ptr_init = tt.broadcast %B : (!tt.ptr<f16>) -> tensor<32x128x!tt.ptr<f16>, #BL>
|
||||
|
||||
@@ -42,7 +42,7 @@ func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B
|
||||
}
|
||||
|
||||
// CHECK-LABEL: raw_single_block
|
||||
func @raw_single_block(%A : !tt.ptr<f16>) {
|
||||
func.func @raw_single_block(%A : !tt.ptr<f16>) {
|
||||
%cst1 = arith.constant dense<true> : tensor<128x32xi1, #AL>
|
||||
%cst2 = arith.constant dense<0.000000e+00> : tensor<128x32xf16, #AL>
|
||||
%a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<128x32x!tt.ptr<f16>, #AL>
|
||||
@@ -54,7 +54,7 @@ func @raw_single_block(%A : !tt.ptr<f16>) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: war_single_block
|
||||
func @war_single_block(%A : !tt.ptr<f16>) {
|
||||
func.func @war_single_block(%A : !tt.ptr<f16>) {
|
||||
%cst1 = arith.constant dense<true> : tensor<128x32xi1, #AL>
|
||||
%cst2 = arith.constant dense<0.000000e+00> : tensor<128x32xf16, #AL>
|
||||
%a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<128x32x!tt.ptr<f16>, #AL>
|
||||
@@ -70,7 +70,7 @@ func @war_single_block(%A : !tt.ptr<f16>) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: scratch
|
||||
func @scratch() {
|
||||
func.func @scratch() {
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
// CHECK: Membar 1
|
||||
%a = tt.cat %cst0, %cst0 {axis = 0} : (tensor<16x16xf16, #A_SHARED>, tensor<16x16xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED>
|
||||
@@ -81,7 +81,7 @@ func @scratch() {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: async_wait
|
||||
func @async_wait() {
|
||||
func.func @async_wait() {
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
// CHECK: Membar 1
|
||||
%a = tt.cat %cst0, %cst0 {axis = 0} : (tensor<16x16xf16, #A_SHARED>, tensor<16x16xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED>
|
||||
@@ -92,7 +92,7 @@ func @async_wait() {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: alloc
|
||||
func @alloc() {
|
||||
func.func @alloc() {
|
||||
%cst0 = triton_gpu.alloc_tensor : tensor<16x16xf16, #A_SHARED>
|
||||
%a = tt.cat %cst0, %cst0 {axis = 0} : (tensor<16x16xf16, #A_SHARED>, tensor<16x16xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED>
|
||||
// CHECK: Membar 2
|
||||
@@ -101,7 +101,7 @@ func @alloc() {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: extract_slice
|
||||
func @extract_slice() {
|
||||
func.func @extract_slice() {
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<1x16x16xf16, #A_SHARED>
|
||||
%index = arith.constant 0 : index
|
||||
%cst1 = tensor.extract_slice %cst0[%index, 0, 0][1, 16, 16][1, 1, 1] : tensor<1x16x16xf16, #A_SHARED> to tensor<16x16xf16, #A_SHARED>
|
||||
@@ -113,14 +113,14 @@ func @extract_slice() {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: trans
|
||||
func @trans() {
|
||||
func.func @trans() {
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #A_SHARED>
|
||||
%b = tt.trans %cst0 : (tensor<16x32xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED_T>
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: insert_slice_async
|
||||
func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
func.func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
%a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<16x16x!tt.ptr<f16>, #AL>
|
||||
%mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL>
|
||||
%other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
|
||||
@@ -135,7 +135,7 @@ func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: insert_slice
|
||||
func @insert_slice(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
func.func @insert_slice(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
%a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<16x16x!tt.ptr<f16>, #AL>
|
||||
%mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL>
|
||||
%other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
|
||||
@@ -153,7 +153,7 @@ func @insert_slice(%A : !tt.ptr<f16>, %i1 : i1) {
|
||||
|
||||
// If branch inserted a barrier for %cst0 and %cst1, but else didn't, then the barrier should be inserted in the parent region
|
||||
// CHECK-LABEL: multi_blocks
|
||||
func @multi_blocks(%i1 : i1) {
|
||||
func.func @multi_blocks(%i1 : i1) {
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
%cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
scf.if %i1 {
|
||||
@@ -174,7 +174,7 @@ func @multi_blocks(%i1 : i1) {
|
||||
|
||||
// Both branches inserted a barrier for %cst0 and %cst1, then the barrier doesn't need to be inserted in the parent region
|
||||
// CHECK-LABEL: multi_blocks_join_barrier
|
||||
func @multi_blocks_join_barrier(%i1 : i1) {
|
||||
func.func @multi_blocks_join_barrier(%i1 : i1) {
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
%cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
scf.if %i1 {
|
||||
@@ -192,7 +192,7 @@ func @multi_blocks_join_barrier(%i1 : i1) {
|
||||
|
||||
// Read yielded tensor requires a barrier
|
||||
// CHECK-LABEL: multi_blocks_yield
|
||||
func @multi_blocks_yield(%i1 : i1) {
|
||||
func.func @multi_blocks_yield(%i1 : i1) {
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
%cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
%a = scf.if %i1 -> (tensor<32x16xf16, #A_SHARED>) {
|
||||
@@ -212,7 +212,7 @@ func @multi_blocks_yield(%i1 : i1) {
|
||||
|
||||
// Conservatively add a barrier as if the branch (%i1) is never taken
|
||||
// CHECK-LABEL: multi_blocks_noelse
|
||||
func @multi_blocks_noelse(%i1 : i1) {
|
||||
func.func @multi_blocks_noelse(%i1 : i1) {
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
%cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
scf.if %i1 {
|
||||
@@ -226,7 +226,7 @@ func @multi_blocks_noelse(%i1 : i1) {
|
||||
|
||||
// Conservatively add a barrier as if the branch (%i2) is never taken
|
||||
// CHECK-LABEL: multi_blocks_nested_scf
|
||||
func @multi_blocks_nested_scf(%i1 : i1, %i2 : i1) {
|
||||
func.func @multi_blocks_nested_scf(%i1 : i1, %i2 : i1) {
|
||||
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
%cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
|
||||
scf.if %i1 {
|
||||
@@ -247,7 +247,7 @@ func @multi_blocks_nested_scf(%i1 : i1, %i2 : i1) {
|
||||
}
|
||||
|
||||
// CHECK-LABEL: for
|
||||
func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
func.func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
%a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
%b_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
%c_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
@@ -262,7 +262,7 @@ func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.p
|
||||
// Although a_shared and b_shared are synced before entering the loop,
|
||||
// they are reassociated with aliases (c_shared) and thus require a barrier.
|
||||
// CHECK-LABEL: for_alias
|
||||
func @for_alias(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
func.func @for_alias(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
%a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
%b_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
// CHECK-NEXT: Membar 2
|
||||
@@ -282,7 +282,7 @@ func @for_alias(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B :
|
||||
// Although cst2 is not an argument of scf.yield, its memory is reused by cst1.
|
||||
// So we need a barrier both before and after cst1
|
||||
// CHECK-LABEL: for_reuse
|
||||
func @for_reuse(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
func.func @for_reuse(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
%a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
%b_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
// CHECK-NEXT: Membar 2
|
||||
@@ -302,7 +302,7 @@ func @for_reuse(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B :
|
||||
|
||||
|
||||
// CHECK-LABEL: for_reuse_nested
|
||||
func @for_reuse_nested(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
func.func @for_reuse_nested(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
|
||||
%a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
%b_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
|
||||
// CHECK-NEXT: Membar 2
|
||||
|
||||
Reference in New Issue
Block a user