Rebase Triton to LLVM-15. (#1070)

This PR rebases Triton from LLVM-14 to LLVM-15. Most changes are mechanical, except for the analysis framework changes.
2026-04-05 03:01:17 -04:00 · 2023-02-16 15:40:53 +01:00
parent f21e76affe
commit fc7a8e3581
78 changed files with 807 additions and 741 deletions
--- a/test/Analysis/test-alias.mlir
+++ b/test/Analysis/test-alias.mlir
@@ -11,7 +11,7 @@

 // CHECK-LABEL: matmul_loop
 // There shouldn't be any aliasing with the dot op encoding.
-func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
+func.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
  %a_ptr_init = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<128x32x!tt.ptr<f16>, #AL>
  %b_ptr_init = tt.broadcast %B : (!tt.ptr<f16>) -> tensor<32x128x!tt.ptr<f16>, #BL>
  %a_mask = arith.constant dense<true> : tensor<128x32xi1, #AL>
@@ -36,7 +36,7 @@ func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B
 }

 // CHECK-LABEL: alloc
-func @alloc(%A : !tt.ptr<f16>) {
+func.func @alloc(%A : !tt.ptr<f16>) {
  // CHECK: %cst -> %cst
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  %cst1 = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #AL>
@@ -46,7 +46,7 @@ func @alloc(%A : !tt.ptr<f16>) {
 }

 // CHECK-LABEL: convert
-func @convert(%A : !tt.ptr<f16>) {
+func.func @convert(%A : !tt.ptr<f16>) {
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
  // CHECK: %0 -> %0
  %cst1 = triton_gpu.convert_layout %cst0 : (tensor<16x16xf16, #AL>) -> tensor<16x16xf16, #A_SHARED>
@@ -54,7 +54,7 @@ func @convert(%A : !tt.ptr<f16>) {
 }

 // CHECK-LABEL: trans
-func @trans(%A : !tt.ptr<f16>) {
+func.func @trans(%A : !tt.ptr<f16>) {
  // CHECK: %cst -> %cst
  %tensor = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #A_SHARED>
  // CHECK: %0 -> %cst
@@ -63,7 +63,7 @@ func @trans(%A : !tt.ptr<f16>) {
 }

 // CHECK-LABEL: insert_slice_async
-func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
+func.func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
  %a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<16x16x!tt.ptr<f16>, #AL>
  %mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL>
  %other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
@@ -76,7 +76,7 @@ func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
 }

 // CHECK-LABEL: insert_slice
-func @insert_slice(%A : !tt.ptr<f16>, %i1 : i1) {
+func.func @insert_slice(%A : !tt.ptr<f16>, %i1 : i1) {
  %a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<16x16x!tt.ptr<f16>, #AL>
  %mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL>
  %other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
@@ -90,7 +90,7 @@ func @insert_slice(%A : !tt.ptr<f16>, %i1 : i1) {
 }

 // CHECK-LABEL: extract_slice
-func @extract_slice(%A : !tt.ptr<f16>) {
+func.func @extract_slice(%A : !tt.ptr<f16>) {
  // CHECK: %cst -> %cst
  %cst0 = arith.constant dense<0.000000e+00> : tensor<1x16x16xf16, #A_SHARED>
  %index = arith.constant 0 : index
@@ -100,7 +100,7 @@ func @extract_slice(%A : !tt.ptr<f16>) {
 }

 // CHECK-LABEL: if_cat
-func @if_cat(%i1 : i1) {
+func.func @if_cat(%i1 : i1) {
  // CHECK: %cst -> %cst
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  // CHECK: %cst_0 -> %cst_0
@@ -119,7 +119,7 @@ func @if_cat(%i1 : i1) {
 }

 // CHECK-LABEL: if_alias
-func @if_alias(%i1 : i1) {
+func.func @if_alias(%i1 : i1) {
  // CHECK: %cst -> %cst
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  // CHECK-NEXT: %cst_0 -> %cst_0
@@ -134,7 +134,7 @@ func @if_alias(%i1 : i1) {
 }

 // CHECK-LABEL: for
-func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
+func.func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
  // CHECK: %cst -> %cst
  %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  // CHECK-NEXT: %cst_0 -> %cst_0
@@ -154,7 +154,7 @@ func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.p
 }

 // CHECK-LABEL: for_if
-func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
+func.func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
  // CHECK: %cst -> %cst
  %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  // CHECK-NEXT: %cst_0 -> %cst_0
@@ -180,7 +180,7 @@ func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !t
 }

 // CHECK-LABEL: for_if_for
-func @for_if_for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
+func.func @for_if_for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
  // CHECK: %cst -> %cst
  %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  // CHECK-NEXT: %cst_0 -> %cst_0
--- a/test/Analysis/test-alignment.mlir
+++ b/test/Analysis/test-alignment.mlir
@@ -1,288 +1,288 @@
-// RUN: triton-opt %s -test-print-alignment -split-input-file 2>&1 | FileCheck %s
+// RUN: triton-opt %s -test-print-alignment -split-input-file -o %t 2>&1 | FileCheck %s

-// CHECK-LABEL: cast
-func @cast() {
-  // CHECK: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [1]
+// CHECK-LABEL: @cast
+func.func @cast() {
+  // CHECK: contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1
  %cst = arith.constant 1 : i32
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [1]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1
  %0 = arith.extsi %cst : i32 to i64
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
  %cst_tensor = arith.constant dense<1> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
  %1 = tt.bitcast %cst_tensor : tensor<128xi32> -> tensor<128xi64>
  return
 }

 // -----

-// CHECK-LABEL: add
-func @add() {
-  // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+// CHECK-LABEL: @add
+func.func @add() {
+  // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
  %1 = arith.constant dense<1> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128], divisibility = [1], constancy = [1], constant_value = <none>
  %2 = arith.addi %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [127]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 127
  %3 = arith.constant dense<127> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128]
+  // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128
  %4 = arith.addi %1, %3 : tensor<128xi32>
  return
 }

 // -----

-// CHECK-LABEL: sub
-func @sub() {
-  // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+// CHECK-LABEL: @sub
+func.func @sub() {
+  // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
  %1 = arith.constant dense<1> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128], divisibility = [1], constancy = [1], constant_value = <none>
  %2 = arith.subi %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [129]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 129
  %3 = arith.constant dense<129> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128]
+  // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128
  %4 = arith.subi %3, %1 : tensor<128xi32>
  return
 }

 // -----

-// CHECK-LABEL: mul
-func @mul() {
-  // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+// CHECK-LABEL: @mul
+func.func @mul() {
+  // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
  %1 = arith.constant dense<1> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %2 = arith.muli %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128]
+  // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128
  %3 = arith.constant dense<128> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128]
+  // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128
  %4 = arith.muli %3, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [2] ; Constancy: [128] ; ConstantValue: [2]
+  // CHECK-NEXT: contiguity = [1], divisibility = [2], constancy = [128], constant_value = 2
  %5 = arith.constant dense<2> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [256] ; Constancy: [128] ; ConstantValue: [256]
+  // CHECK-NEXT: contiguity = [1], divisibility = [256], constancy = [128], constant_value = 256
  %6 = arith.muli %4, %5 : tensor<128xi32>
  return
 }

 // -----

-// CHECK-LABEL: div
-func @div() {
-  // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+// CHECK-LABEL: @div
+func.func @div() {
+  // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
  %1 = arith.constant dense<1> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %2 = arith.divsi %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
  %3 = arith.divui %1, %0 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64]
+  // CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64
  %4 = arith.constant dense<64> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [16777216] ; Constancy: [64] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [16777216], constancy = [64], constant_value = <none>
  %5 = arith.divsi %0, %4 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
  %6 = arith.divsi %4, %0 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64]
+  // CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64
  %7 = arith.divsi %4, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [2] ; Constancy: [128] ; ConstantValue: [66]
+  // CHECK-NEXT: contiguity = [1], divisibility = [2], constancy = [128], constant_value = 66
  %8 = arith.constant dense<66> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [2] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [2], constant_value = <none>
  %9 = arith.divui %0, %8 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [128] ; Divisibility: [8192] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128], divisibility = [8192], constancy = [1], constant_value = <none>
  %10 = tt.make_range {end = 8320 : i32, start = 8192 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [64] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [64], constant_value = <none>
  %11 = arith.divsi %10, %4 : tensor<128xi32>
-  return 
+  return
 }

 // -----

-// CHECK-LABEL: rem
-func @rem() {
-  // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+// CHECK-LABEL: @rem
+func.func @rem() {
+  // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1
  %1 = arith.constant dense<1> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0]
+  // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0
  %2 = arith.remsi %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
  %3 = arith.remui %1, %0 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64]
+  // CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64
  %4 = arith.constant dense<64> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [64] ; Divisibility: [64] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [64], divisibility = [64], constancy = [1], constant_value = <none>
  %5 = arith.remsi %0, %4 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [1], constant_value = <none>
  %6 = arith.remsi %4, %0 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [2] ; Constancy: [128] ; ConstantValue: [66]
+  // CHECK-NEXT: contiguity = [1], divisibility = [2], constancy = [128], constant_value = 66
  %7 = arith.constant dense<66> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [2] ; Divisibility: [2] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [2], divisibility = [2], constancy = [1], constant_value = <none>
  %8 = arith.remui %0, %7 : tensor<128xi32>
-  return 
+  return
 }

 // -----

-// CHECK-LABEL: broadcast
-func @broadcast() {
-  // CHECK: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64]
+// CHECK-LABEL: @broadcast
+func.func @broadcast() {
+  // CHECK: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64
  %0 = arith.constant dense<64> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [64, 1] ; Constancy: [128, 1] ; ConstantValue: [64]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [64, 1], constancy = [128, 1], constant_value = 64
  %1 = tt.expand_dims %0 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [64, 1] ; Constancy: [128, 128] ; ConstantValue: [64]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [64, 1], constancy = [128, 128], constant_value = 64
  %2 = tt.broadcast %1 : (tensor<128x1xi32>) -> tensor<128x128xi32>
  return
 }

 // -----

-// CHECK-LABEL: splat
-func @splat(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}) {
-  // CHECK: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [128, 128] ; ConstantValue: [None]
+// CHECK-LABEL: @splat
+func.func @splat(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}) {
+  // CHECK: contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 128], constant_value = <none>
  %0 = tt.splat %arg0 : (!tt.ptr<f32>) -> tensor<128x128x!tt.ptr<f32>>
  return
 }

 // -----

-// CHECK-LABEL: cmp
-func @cmp() {
-  // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+// CHECK-LABEL: @cmp
+func.func @cmp() {
+  // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0]
+  // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0
  %1 = arith.constant dense<0> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>
  %2 = arith.cmpi eq, %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>
  %3 = arith.cmpi slt, %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
  %4 = arith.cmpi sle, %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>
  %5 = arith.cmpi sge, %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [8] ; Constancy: [128] ; ConstantValue: [8]
+  // CHECK-NEXT: contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8
  %6 = arith.constant dense<8> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [8] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>
  %7 = arith.cmpi sgt, %0, %6 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [0]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 0
  %8 = arith.cmpi sgt, %1, %6 : tensor<128xi32>
  return
 }

 // -----

-// CHECK-LABEL: logic
-func @logic() {
-  // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+// CHECK-LABEL: @logic
+func.func @logic() {
+  // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64]
+  // CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64
  %1 = arith.constant dense<64> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [16777216] ; Constancy: [64] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [16777216], constancy = [64], constant_value = <none>
  %2 = arith.divsi %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [8] ; Constancy: [128] ; ConstantValue: [8]
+  // CHECK-NEXT: contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8
  %3 = arith.constant dense<8> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [134217728] ; Constancy: [8] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [134217728], constancy = [8], constant_value = <none>
  %4 = arith.divsi %0, %3 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
  %5 = arith.andi %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
  %6 = arith.ori %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
  %7 = arith.xori %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [8] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>
  %8 = arith.andi %2, %4 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [8] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>
  %9 = arith.ori %2, %4 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [8] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [8], constant_value = <none>
  %10 = arith.xori %2, %4 : tensor<128xi32>
  return
 }

 // -----

-// CHECK-LABEL: select
-func @select() {
-  // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+// CHECK-LABEL: @select
+func.func @select() {
+  // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0]
+  // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0
  %1 = arith.constant dense<0> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>
  %2 = arith.cmpi eq, %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>
  %3 = arith.cmpi slt, %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [1] ; ConstantValue: [0]
+  // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 0
  %4 = arith.constant 0 : i1
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0]
+  // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0
  %7 = tt.splat %4 : (i1) -> tensor<128xi1>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0]
-  %5 = select %4, %3, %7 : tensor<128xi1>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0
+  %5 = arith.select %4, %3, %7 : tensor<128xi1>
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = <none>
  %8 = "triton_gpu.select"(%7, %3, %2) : (tensor<128xi1>, tensor<128xi1>, tensor<128xi1>) -> tensor<128xi1>
  return
 }

 // -----

-func @shift() {
-  // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+func.func @shift() {
+  // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [8] ; Constancy: [128] ; ConstantValue: [8]
+  // CHECK-NEXT: contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8
  %1 = arith.constant dense<8> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4] ; Constancy: [128] ; ConstantValue: [4]
+  // CHECK-NEXT: contiguity = [1], divisibility = [4], constancy = [128], constant_value = 4
  %2 = arith.constant dense<4> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [274877906944] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [274877906944], constancy = [1], constant_value = <none>
  %3 = arith.shli %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [67108864] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [67108864], constancy = [1], constant_value = <none>
  %4 = arith.shrsi %0, %2 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128]
+  // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128
  %5 = arith.shli %1, %2 : tensor<128xi32>
  return
 }

 // -----

-func @max_min() {
-  // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+func.func @max_min() {
+  // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [128] ; Divisibility: [64] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128], divisibility = [64], constancy = [1], constant_value = <none>
  %1 = tt.make_range {end = 192 : i32, start = 64 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
  %2 = arith.maxsi %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
  %3 = arith.minsi %0, %1 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [8] ; Constancy: [128] ; ConstantValue: [8]
+  // CHECK-NEXT: contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8
  %4 = arith.constant dense<8> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4] ; Constancy: [128] ; ConstantValue: [4]
+  // CHECK-NEXT: contiguity = [1], divisibility = [4], constancy = [128], constant_value = 4
  %5 = arith.constant dense<4> : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [8]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = 8
  %6 = arith.maxsi %4, %5 : tensor<128xi32>
  return
 }

 // -----

-// CHECK-LABEL: for
-func @for() {
-  // CHECK: Contiguity: [1, 1] ; Divisibility: [4611686018427387904, 4611686018427387904] ; Constancy: [128, 32] ; ConstantValue: [0]
+// CHECK-LABEL: @for
+func.func @for() {
+  // CHECK: contiguity = [1, 1], divisibility = [4611686018427387904, 4611686018427387904], constancy = [128, 32], constant_value = 0
  %a_init = arith.constant dense<0> : tensor<128x32xi32>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [128, 32] ; ConstantValue: [1]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = 1
  %b_init = arith.constant dense<1> : tensor<128x32xi32>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [4, 4] ; Constancy: [128, 32] ; ConstantValue: [4]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [4, 4], constancy = [128, 32], constant_value = 4
  %c_init = arith.constant dense<4> : tensor<128x32xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [1] ; ConstantValue: [128]
+  // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [1], constant_value = 128
  %ub = arith.constant 128 : index
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [1] ; ConstantValue: [0]
+  // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 0
  %lb = arith.constant 0 : index
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [16] ; Constancy: [1] ; ConstantValue: [16]
+  // CHECK-NEXT: contiguity = [1], divisibility = [16], constancy = [1], constant_value = 16
  %step = arith.constant 16 : index
  %a, %b, %c = scf.for %iv = %lb to %ub step %step iter_args(%a = %a_init, %b = %b_init, %c = %c_init) -> (tensor<128x32xi32>, tensor<128x32xi32>, tensor<128x32xi32>) {
-    // CHECK-NEXT: Contiguity: [1] ; Divisibility: [16] ; Constancy: [1] ; ConstantValue: [None]
+    // CHECK-NEXT: contiguity = [1], divisibility = [16], constancy = [1], constant_value = <none>
    %t = arith.index_cast %iv : index to i32
-    // CHECK: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [128, 32] ; ConstantValue: [None]
-    // CHECK: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [128, 32] ; ConstantValue: [None]
-    // CHECK: Contiguity: [1, 1] ; Divisibility: [4, 4] ; Constancy: [128, 32] ; ConstantValue: [4]
+    // CHECK: contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = <none>
+    // CHECK: contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = <none>
+    // CHECK: contiguity = [1, 1], divisibility = [4, 4], constancy = [128, 32], constant_value = 4
    scf.yield %b, %a, %c : tensor<128x32xi32>, tensor<128x32xi32>, tensor<128x32xi32>
  }
  return
@@ -290,53 +290,53 @@ func @for() {

 // -----

-// CHECK-LABEL: permute_2d
-func @permute_2d(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: i32 {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg3: i32 {tt.divisibility = 16 : i32}) {
-  // CHECK: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [128, 128] ; ConstantValue: [1]
+// CHECK-LABEL: @permute_2d
+func.func @permute_2d(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: i32 {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg3: i32 {tt.divisibility = 16 : i32}) {
+  // CHECK: contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 128], constant_value = 1
  %cst = arith.constant dense<true> : tensor<128x128xi1>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [1, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [1, 1], constancy = [1, 1], constant_value = <none>
  %cst_0 = arith.constant dense<0.000000e+00> : tensor<128x128xf32>
-  // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %1 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [1073741824, 1] ; Constancy: [1, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128, 1], divisibility = [1073741824, 1], constancy = [1, 1], constant_value = <none>
  %2 = tt.expand_dims %0 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [128, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 1], constant_value = <none>
  %3 = tt.splat %arg1 : (i32) -> tensor<128x1xi32>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [17179869184, 16] ; Constancy: [1, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [17179869184, 16], constancy = [1, 1], constant_value = <none>
  %4 = arith.muli %2, %3 : tensor<128x1xi32>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [128, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 1], constant_value = <none>
  %5 = tt.splat %arg0 : (!tt.ptr<f32>) -> tensor<128x1x!tt.ptr<f32>>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [1, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 1], constant_value = <none>
  %6 = tt.addptr %5, %4 : tensor<128x1x!tt.ptr<f32>>, tensor<128x1xi32>
-  // CHECK-NEXT: Contiguity: [1, 128] ; Divisibility: [1, 1073741824] ; Constancy: [1, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [1, 1], constant_value = <none>
  %7 = tt.expand_dims %1 {axis = 0 : i32}: (tensor<128xi32>) -> tensor<1x128xi32>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [1, 128] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 128], constant_value = <none>
  %8 = tt.broadcast %6 : (tensor<128x1x!tt.ptr<f32>>) -> tensor<128x128x!tt.ptr<f32>>
-  // CHECK-NEXT: Contiguity: [1, 128] ; Divisibility: [1, 1073741824] ; Constancy: [128, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [128, 1], constant_value = <none>
  %9 = tt.broadcast %7 : (tensor<1x128xi32>) -> tensor<128x128xi32>
-  // CHECK-NEXT: Contiguity: [1, 128] ; Divisibility: [1, 16] ; Constancy: [1, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 128], divisibility = [1, 16], constancy = [1, 1], constant_value = <none>
  %10 = tt.addptr %8, %9 : tensor<128x128x!tt.ptr<f32>>, tensor<128x128xi32>
-  // CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [1073741824, 1] ; Constancy: [1, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128, 1], divisibility = [1073741824, 1], constancy = [1, 1], constant_value = <none>
  %11 = tt.expand_dims %0 {axis = 1 : i32}: (tensor<128xi32>) -> tensor<128x1xi32>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [128, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 1], constant_value = <none>
  %12 = tt.splat %arg2 : (!tt.ptr<f32>) -> tensor<128x1x!tt.ptr<f32>>
-  // CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [16, 1] ; Constancy: [1, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128, 1], divisibility = [16, 1], constancy = [1, 1], constant_value = <none>
  %13 = tt.addptr %12, %11 : tensor<128x1x!tt.ptr<f32>>, tensor<128x1xi32>
-  // CHECK-NEXT: Contiguity: [1, 128] ; Divisibility: [1, 1073741824] ; Constancy: [1, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [1, 1], constant_value = <none>
  %14 = tt.expand_dims %1 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [1, 128] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 128], constant_value = <none>
  %15 = tt.splat %arg3 : (i32) -> tensor<1x128xi32>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 17179869184] ; Constancy: [1, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 17179869184], constancy = [1, 1], constant_value = <none>
  %16 = arith.muli %14, %15 : tensor<1x128xi32>
-  // CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [16, 1] ; Constancy: [1, 128] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128, 1], divisibility = [16, 1], constancy = [1, 128], constant_value = <none>
  %17 = tt.broadcast %13 : (tensor<128x1x!tt.ptr<f32>>) -> tensor<128x128x!tt.ptr<f32>>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 17179869184] ; Constancy: [128, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 17179869184], constancy = [128, 1], constant_value = <none>
  %18 = tt.broadcast %16 : (tensor<1x128xi32>) -> tensor<128x128xi32>
-  // CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [16, 1] ; Constancy: [1, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128, 1], divisibility = [16, 1], constancy = [1, 1], constant_value = <none>
  %19 = tt.addptr %17, %18 : tensor<128x128x!tt.ptr<f32>>, tensor<128x128xi32>
-  // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [1, 1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1, 1], divisibility = [1, 1], constancy = [1, 1], constant_value = <none>
  %20 = tt.load %10, %cst, %cst_0 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<128x128xf32>
  tt.store %19, %20, %cst : tensor<128x128xf32>
  return
@@ -347,29 +347,29 @@ func @permute_2d(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: i32 {t
 module {

 // This is a tiny test for verifying StoreOp-related alignment, It simply store a constant to a buffer.
-// CHECK-LABEL: store_constant_align
-func @store_constant_align(%addr: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n: i32 {tt.divisibility = 16 : i32}) {
-  // CHECK: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
+// CHECK-LABEL: @store_constant_align
+func.func @store_constant_align(%addr: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n: i32 {tt.divisibility = 16 : i32}) {
+  // CHECK: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
  %pid = tt.get_program_id {axis = 0 : i32} : i32
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [1] ; ConstantValue: [128]
+  // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [1], constant_value = 128
  %c128_i32 = arith.constant 128 : i32
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [1], constant_value = <none>
  %1 = arith.muli %pid, %c128_i32 : i32
-  // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = <none>
  %2 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [None]
+ // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = <none>
  %3 = tt.splat %1 : (i32) -> tensor<128xi32>
- // CHECK-NEXT: Contiguity: [128] ; Divisibility: [128] ; Constancy: [1] ; ConstantValue: [None]
+ // CHECK-NEXT: contiguity = [128], divisibility = [128], constancy = [1], constant_value = <none>
  %4 = arith.addi %3, %2 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [16] ; Constancy: [128] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [16], constancy = [128], constant_value = <none>
  %5 = tt.splat %addr : (!tt.ptr<f32>) -> tensor<128x!tt.ptr<f32>>
-  // CHECK-NEXT: Contiguity: [128] ; Divisibility: [16] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [128], divisibility = [16], constancy = [1], constant_value = <none>
  %6 = tt.addptr %5, %4 : tensor<128x!tt.ptr<f32>>, tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [16] ; Constancy: [128] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [16], constancy = [128], constant_value = <none>
  %9 = tt.splat %n : (i32) -> tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [16] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [16], constant_value = <none>
  %mask = arith.cmpi slt, %4, %9 : tensor<128xi32>
-  // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None]
+  // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
  %cst = arith.constant dense<0.0> : tensor<128xf32>
  tt.store %5, %cst, %mask : tensor<128xf32>
  return
@@ -381,8 +381,8 @@ func @store_constant_align(%addr: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n:

 // This IR is dumped from vecadd test.
 // Note, the hint {tt.divisibility = 16 : i32} for %n_elements affects the alignment of mask.
-// CHECK-LABEL: vecadd_mask_align_16
-func @vecadd_mask_align_16(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n_elements: i32 {tt.divisibility = 16 : i32}) {
+// CHECK-LABEL: @vecadd_mask_align_16
+func.func @vecadd_mask_align_16(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n_elements: i32 {tt.divisibility = 16 : i32}) {
  %c64_i32 = arith.constant 64 : i32
  %0 = tt.get_program_id {axis = 0 : i32} : i32
  %1 = arith.muli %0, %c64_i32 : i32
@@ -394,13 +394,13 @@ func @vecadd_mask_align_16(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %ar
  %7 = tt.splat %arg1 : (!tt.ptr<f32>) -> tensor<64x!tt.ptr<f32>>
  %8 = tt.addptr %7, %4 : tensor<64x!tt.ptr<f32>>, tensor<64xi32>
  %9 = tt.splat %n_elements : (i32) -> tensor<64xi32>
-  // CHECK: Contiguity: [1] ; Divisibility: [1] ; Constancy: [16] ; ConstantValue: [None] ( %{{.*}} = arith.cmpi slt, %{{.*}}, %{{.*}} : tensor<64xi32> )
+  // CHECK: arith.cmpi slt, %{{.*}} => contiguity = [1], divisibility = [1], constancy = [16], constant_value = <none>
  %mask = arith.cmpi slt, %4, %9 : tensor<64xi32>
  %11 = tt.load %6, %mask {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<64xf32>
  %12 = tt.load %8, %mask {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<64xf32>
  %13 = arith.addf %11, %12 : tensor<64xf32>
  %14 = tt.splat %arg2 : (!tt.ptr<f32>) -> tensor<64x!tt.ptr<f32>>
-  // CHECK: Contiguity: [64] ; Divisibility: [16] ; Constancy: [1] ; ConstantValue: [None] ( %{{.*}} = tt.addptr %{{.*}}, %{{.*}} : tensor<64x!tt.ptr<f32>>, tensor<64xi32> )
+  // CHECK: tt.addptr %{{.*}} => contiguity = [64], divisibility = [16], constancy = [1], constant_value = <none>
  %15 = tt.addptr %14, %4 : tensor<64x!tt.ptr<f32>>, tensor<64xi32>
  tt.store %15, %13, %mask : tensor<64xf32>
  return
@@ -410,8 +410,8 @@ func @vecadd_mask_align_16(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %ar

 // This IR is dumped from vecadd test.
 // Note, there is no divisibility hint for %n_elements, Triton should assume its divisibility to be 1 by default.
-// CHECK-LABEL: vecadd_mask_align_1
-func @vecadd_mask_align_1(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n_elements: i32) {
+// CHECK-LABEL: @vecadd_mask_align_1
+func.func @vecadd_mask_align_1(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %n_elements: i32) {
  %c64_i32 = arith.constant 64 : i32
  %0 = tt.get_program_id {axis = 0 : i32} : i32
  %1 = arith.muli %0, %c64_i32 : i32
@@ -423,7 +423,7 @@ func @vecadd_mask_align_1(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %arg
  %7 = tt.splat %arg1 : (!tt.ptr<f32>) -> tensor<64x!tt.ptr<f32>>
  %8 = tt.addptr %7, %4 : tensor<64x!tt.ptr<f32>>, tensor<64xi32>
  %9 = tt.splat %n_elements : (i32) -> tensor<64xi32>
-  // CHECK: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] ( %{{.*}} = arith.cmpi slt, %{{.*}}, %{{.*}} : tensor<64xi32> )
+  // CHECK: arith.cmpi slt, %{{.*}} => contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>
  %10 = arith.cmpi slt, %4, %9 : tensor<64xi32>
  %11 = tt.load %6, %10 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<64xf32>
  %12 = tt.load %8, %10 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<64xf32>
--- a/test/Analysis/test-allocation.mlir
+++ b/test/Analysis/test-allocation.mlir
@@ -13,7 +13,7 @@
 module attributes {"triton_gpu.num-warps" = 4 : i32} {

 // CHECK-LABEL: matmul_loop
-func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
+func.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
  %a_ptr_init = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<128x32x!tt.ptr<f16>, #AL>
  %b_ptr_init = tt.broadcast %B : (!tt.ptr<f16>) -> tensor<32x128x!tt.ptr<f16>, #BL>

@@ -46,7 +46,7 @@ func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B

 // Shared memory is available after a tensor's liveness range ends
 // CHECK-LABEL: reusable
-func @reusable(%A : !tt.ptr<f16>) {
+func.func @reusable(%A : !tt.ptr<f16>) {
  %cst1 = arith.constant dense<true> : tensor<128x32xi1, #AL>
  %cst2 = arith.constant dense<0.000000e+00> : tensor<128x32xf16, #AL>
  %cst3 = arith.constant dense<true> : tensor<32x128xi1, #AL>
@@ -78,7 +78,7 @@ func @reusable(%A : !tt.ptr<f16>) {
 // %cst1->%cst4
 // %cst3->%g->%h->%i
 // CHECK-LABEL: preallocate
-func @preallocate(%A : !tt.ptr<f16>) {
+func.func @preallocate(%A : !tt.ptr<f16>) {
  // CHECK: offset = 0, size = 512
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  // CHECK-NEXT: offset = 1024, size = 512
@@ -113,7 +113,7 @@ func @preallocate(%A : !tt.ptr<f16>) {

 // Unused tensors are immediately released
 // CHECK-LABEL: unused
-func @unused(%A : !tt.ptr<f16>) {
+func.func @unused(%A : !tt.ptr<f16>) {
  // CHECK: offset = 0, size = 1024
  %cst0 = arith.constant dense<0.000000e+00> : tensor<32x16xf16, #A_SHARED>
  // CHECK-NEXT: offset = 0, size = 512
@@ -128,7 +128,7 @@ func @unused(%A : !tt.ptr<f16>) {

 // cst0 is alive through the entire function, it cannot be released before the end of the function
 // CHECK-LABEL: longlive
-func @longlive(%A : !tt.ptr<f16>) {
+func.func @longlive(%A : !tt.ptr<f16>) {
  // CHECK: offset = 0, size = 512
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  // CHECK-NEXT: offset = 512, size = 512
@@ -156,7 +156,7 @@ func @longlive(%A : !tt.ptr<f16>) {
 }

 // CHECK-LABEL: alloc
-func @alloc(%A : !tt.ptr<f16>) {
+func.func @alloc(%A : !tt.ptr<f16>) {
  // CHECK: offset = 0, size = 512
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  %cst1 = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #AL>
@@ -167,7 +167,7 @@ func @alloc(%A : !tt.ptr<f16>) {
 }

 // CHECK-LABEL: scratch
-func @scratch() {
+func.func @scratch() {
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
  // CHECK: scratch offset = 0, size = 512
  %b = tt.reduce %cst0 {redOp = 1 : i32, axis = 0 : i32} : tensor<16x16xf16, #AL> -> tensor<16xf16, #sliceAd0>
@@ -176,7 +176,7 @@ func @scratch() {
 }

 // CHECK-LABEL: trans
-func @trans(%A : !tt.ptr<f16>) {
+func.func @trans(%A : !tt.ptr<f16>) {
  // CHECK: offset = 0, size = 1024
  %tensor = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #A_SHARED>
  %b = tt.trans %tensor : (tensor<16x32xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED_T>
@@ -184,7 +184,7 @@ func @trans(%A : !tt.ptr<f16>) {
 }

 // CHECK-LABEL: insert_slice_async
-func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
+func.func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
  %a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<16x16x!tt.ptr<f16>, #AL>
  %mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL>
  %other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
@@ -197,7 +197,7 @@ func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
 }

 // CHECK-LABEL: extract_slice
-func @extract_slice(%A : !tt.ptr<f16>) {
+func.func @extract_slice(%A : !tt.ptr<f16>) {
  // CHECK: offset = 0, size = 512
  %cst0 = arith.constant dense<0.000000e+00> : tensor<1x16x16xf16, #A_SHARED>
  %index = arith.constant 0 : index
@@ -209,7 +209,7 @@ func @extract_slice(%A : !tt.ptr<f16>) {
 // B0 -> (B1) -> B0
 // Memory used by B1 can be reused by B0.
 // CHECK-LABEL: if
-func @if(%i1 : i1) {
+func.func @if(%i1 : i1) {
  // CHECK: offset = 0, size = 512
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  // CHECK-NEXT: offset = 512, size = 512
@@ -233,7 +233,7 @@ func @if(%i1 : i1) {
 // B0 -> (B1) -> (B2) -> B0
 // Memory used by B0 cannot be reused by B1 or B2.
 // CHECK-LABEL: if_else
-func @if_else(%i1 : i1) {
+func.func @if_else(%i1 : i1) {
  // CHECK: offset = 0, size = 512
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  // CHECK-NEXT: offset = 512, size = 512
@@ -260,7 +260,7 @@ func @if_else(%i1 : i1) {
 // Block arguments and yields are memory aliases that do not trigger a new
 // allocation.
 // CHECK-LABEL: for
-func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
+func.func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
  // CHECK: offset = 0, size = 8192
  %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  // CHECK-NEXT: offset = 8192, size = 8192
@@ -275,7 +275,7 @@ func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.p
 }

 // CHECK-LABEL: for_if_slice
-func @for_if_slice(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
+func.func @for_if_slice(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
  // CHECK: offset = 0, size = 8192
  %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  // CHECK-NEXT: offset = 8192, size = 8192
@@ -296,7 +296,7 @@ func @for_if_slice(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %

 // c0 cannot be released in the loop
 // CHECK-LABEL: for_use_ancestor
-func @for_use_ancestor(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
+func.func @for_use_ancestor(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
  // CHECK: offset = 0, size = 8192
  %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  // CHECK-NEXT: offset = 8192, size = 8192
@@ -316,7 +316,7 @@ func @for_use_ancestor(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16
 // a_shared_init, b_shared_init, and c_shared_init's liveness ranges are span over the entire function before cst2.
 // So they cannot be reused by cst0 and cst1, but can be reused by cst2.
 // CHECK-LABEL: for_if_for
-func @for_if_for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
+func.func @for_if_for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
  // CHECK: offset = 0, size = 8192
  %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  // CHECK-NEXT: offset = 8192, size = 8192
--- a/test/Analysis/test-membar.mlir
+++ b/test/Analysis/test-membar.mlir
@@ -14,7 +14,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} {

 // CHECK-LABEL: matmul_loop
 // There shouldn't be any membar with the dot op encoding.
-func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
+func.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
  %a_ptr_init = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<128x32x!tt.ptr<f16>, #AL>
  %b_ptr_init = tt.broadcast %B : (!tt.ptr<f16>) -> tensor<32x128x!tt.ptr<f16>, #BL>

@@ -42,7 +42,7 @@ func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B
 }

 // CHECK-LABEL: raw_single_block
-func @raw_single_block(%A : !tt.ptr<f16>) {
+func.func @raw_single_block(%A : !tt.ptr<f16>) {
  %cst1 = arith.constant dense<true> : tensor<128x32xi1, #AL>
  %cst2 = arith.constant dense<0.000000e+00> : tensor<128x32xf16, #AL>
  %a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<128x32x!tt.ptr<f16>, #AL>
@@ -54,7 +54,7 @@ func @raw_single_block(%A : !tt.ptr<f16>) {
 }

 // CHECK-LABEL: war_single_block
-func @war_single_block(%A : !tt.ptr<f16>) {
+func.func @war_single_block(%A : !tt.ptr<f16>) {
  %cst1 = arith.constant dense<true> : tensor<128x32xi1, #AL>
  %cst2 = arith.constant dense<0.000000e+00> : tensor<128x32xf16, #AL>
  %a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<128x32x!tt.ptr<f16>, #AL>
@@ -70,7 +70,7 @@ func @war_single_block(%A : !tt.ptr<f16>) {
 }

 // CHECK-LABEL: scratch
-func @scratch() {
+func.func @scratch() {
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  // CHECK: Membar 1
  %a = tt.cat %cst0, %cst0 {axis = 0} : (tensor<16x16xf16, #A_SHARED>, tensor<16x16xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED>
@@ -81,7 +81,7 @@ func @scratch() {
 }

 // CHECK-LABEL: async_wait
-func @async_wait() {
+func.func @async_wait() {
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  // CHECK: Membar 1
  %a = tt.cat %cst0, %cst0 {axis = 0} : (tensor<16x16xf16, #A_SHARED>, tensor<16x16xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED>
@@ -92,7 +92,7 @@ func @async_wait() {
 }

 // CHECK-LABEL: alloc
-func @alloc() {
+func.func @alloc() {
  %cst0 = triton_gpu.alloc_tensor : tensor<16x16xf16, #A_SHARED>
  %a = tt.cat %cst0, %cst0 {axis = 0} : (tensor<16x16xf16, #A_SHARED>, tensor<16x16xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED>
  // CHECK: Membar 2
@@ -101,7 +101,7 @@ func @alloc() {
 }

 // CHECK-LABEL: extract_slice
-func @extract_slice() {
+func.func @extract_slice() {
  %cst0 = arith.constant dense<0.000000e+00> : tensor<1x16x16xf16, #A_SHARED>
  %index = arith.constant 0 : index
  %cst1 = tensor.extract_slice %cst0[%index, 0, 0][1, 16, 16][1, 1, 1] : tensor<1x16x16xf16, #A_SHARED> to tensor<16x16xf16, #A_SHARED>
@@ -113,14 +113,14 @@ func @extract_slice() {
 }

 // CHECK-LABEL: trans
-func @trans() {
+func.func @trans() {
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #A_SHARED>
  %b = tt.trans %cst0 : (tensor<16x32xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED_T>
  return
 }

 // CHECK-LABEL: insert_slice_async
-func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
+func.func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
  %a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<16x16x!tt.ptr<f16>, #AL>
  %mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL>
  %other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
@@ -135,7 +135,7 @@ func @insert_slice_async(%A : !tt.ptr<f16>, %i1 : i1) {
 }

 // CHECK-LABEL: insert_slice
-func @insert_slice(%A : !tt.ptr<f16>, %i1 : i1) {
+func.func @insert_slice(%A : !tt.ptr<f16>, %i1 : i1) {
  %a_ptr = tt.broadcast %A : (!tt.ptr<f16>) -> tensor<16x16x!tt.ptr<f16>, #AL>
  %mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL>
  %other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
@@ -153,7 +153,7 @@ func @insert_slice(%A : !tt.ptr<f16>, %i1 : i1) {

 // If branch inserted a barrier for %cst0 and %cst1, but else didn't, then the barrier should be inserted in the parent region
 // CHECK-LABEL: multi_blocks
-func @multi_blocks(%i1 : i1) {
+func.func @multi_blocks(%i1 : i1) {
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  %cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  scf.if %i1 {
@@ -174,7 +174,7 @@ func @multi_blocks(%i1 : i1) {

 // Both branches inserted a barrier for %cst0 and %cst1, then the barrier doesn't need to be inserted in the parent region
 // CHECK-LABEL: multi_blocks_join_barrier
-func @multi_blocks_join_barrier(%i1 : i1) {
+func.func @multi_blocks_join_barrier(%i1 : i1) {
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  %cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  scf.if %i1 {
@@ -192,7 +192,7 @@ func @multi_blocks_join_barrier(%i1 : i1) {

 // Read yielded tensor requires a barrier
 // CHECK-LABEL: multi_blocks_yield
-func @multi_blocks_yield(%i1 : i1) {
+func.func @multi_blocks_yield(%i1 : i1) {
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  %cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  %a = scf.if %i1 -> (tensor<32x16xf16, #A_SHARED>) {
@@ -212,7 +212,7 @@ func @multi_blocks_yield(%i1 : i1) {

 // Conservatively add a barrier as if the branch (%i1) is never taken
 // CHECK-LABEL: multi_blocks_noelse
-func @multi_blocks_noelse(%i1 : i1) {
+func.func @multi_blocks_noelse(%i1 : i1) {
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  %cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  scf.if %i1 {
@@ -226,7 +226,7 @@ func @multi_blocks_noelse(%i1 : i1) {

 // Conservatively add a barrier as if the branch (%i2) is never taken
 // CHECK-LABEL: multi_blocks_nested_scf
-func @multi_blocks_nested_scf(%i1 : i1, %i2 : i1) {
+func.func @multi_blocks_nested_scf(%i1 : i1, %i2 : i1) {
  %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  %cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED>
  scf.if %i1 {
@@ -247,7 +247,7 @@ func @multi_blocks_nested_scf(%i1 : i1, %i2 : i1) {
 }

 // CHECK-LABEL: for
-func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
+func.func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
  %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  %b_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  %c_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
@@ -262,7 +262,7 @@ func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.p
 // Although a_shared and b_shared are synced before entering the loop,
 // they are reassociated with aliases (c_shared) and thus require a barrier.
 // CHECK-LABEL: for_alias
-func @for_alias(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
+func.func @for_alias(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
  %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  %b_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  // CHECK-NEXT: Membar 2
@@ -282,7 +282,7 @@ func @for_alias(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B :
 // Although cst2 is not an argument of scf.yield, its memory is reused by cst1.
 // So we need a barrier both before and after cst1
 // CHECK-LABEL: for_reuse
-func @for_reuse(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
+func.func @for_reuse(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
  %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  %b_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  // CHECK-NEXT: Membar 2
@@ -302,7 +302,7 @@ func @for_reuse(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B :


 // CHECK-LABEL: for_reuse_nested
-func @for_reuse_nested(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
+func.func @for_reuse_nested(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
  %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  %b_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED>
  // CHECK-NEXT: Membar 2