mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-27 03:01:52 -04:00
This folds `tl.arange(x, x + 1)` into a constant. This shows up for example when autotuning and one of the block sizes gets set to 1. Co-authored-by: Philippe Tillet <phil@openai.com>
28 lines
1.2 KiB
MLIR
28 lines
1.2 KiB
MLIR
// RUN: triton-opt %s -split-input-file -canonicalize | FileCheck %s
|
|
|
|
// CHECK-LABEL: dead_load
|
|
tt.func @dead_load(%ptr: tensor<32x128x!tt.ptr<f16>>) {
|
|
%mask = arith.constant dense<true> : tensor<32x128xi1>
|
|
%other = arith.constant dense<0.00e+00> : tensor<32x128xf16>
|
|
// CHECK-NOT: tt.load {{.*}} isVolatile = false
|
|
// CHECK: tt.load {{.*}} isVolatile = true
|
|
%a = tt.load %ptr, %mask, %other {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<32x128xf16>
|
|
%b = tt.load %ptr, %mask, %other {cache = 1 : i32, evict = 1 : i32, isVolatile = true} : tensor<32x128xf16>
|
|
tt.return
|
|
}
|
|
|
|
|
|
// CHECK-LABEL: make_range
|
|
tt.func @make_range() -> (tensor<128x1xi32>, tensor<1xi32>) {
|
|
// CHECK-DAG: %[[c:.*]] = arith.constant dense<0> : tensor<128x1xi32>
|
|
%a = tt.make_range {end = 1 : i32, start = 0 : i32} : tensor<1xi32>
|
|
%b = tt.expand_dims %a {axis = 1 : i32} : (tensor<1xi32>) -> tensor<1x1xi32>
|
|
%c = tt.broadcast %b : (tensor<1x1xi32>) -> tensor<128x1xi32>
|
|
|
|
// CHECK-DAG: %[[d:.*]] = arith.constant dense<1> : tensor<1xi32>
|
|
%d = tt.make_range {end = 2 : i32, start = 1 : i32} : tensor<1xi32>
|
|
|
|
// CHECK-DAG: tt.return %[[c]], %[[d]] : tensor<128x1xi32>, tensor<1xi32>
|
|
tt.return %c, %d : tensor<128x1xi32>, tensor<1xi32>
|
|
}
|