--- /dev/null
+// RUN: mlir-opt %s -test-math-polynomial-approximation="enable-avx2" \
+// RUN: -convert-arith-to-llvm \
+// RUN: -convert-vector-to-llvm="enable-x86vector" \
+// RUN: -convert-math-to-llvm \
+// RUN: -convert-std-to-llvm \
+// RUN: -reconcile-unrealized-casts \
+// RUN: | mlir-cpu-runner \
+// RUN: -e main -entry-point-result=void -O0 \
+// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \
+// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
+// RUN: | FileCheck %s
+// -------------------------------------------------------------------------- //
+// rsqrt.
+// -------------------------------------------------------------------------- //
+func @rsqrt() {
+ // Sanity-check that the scalar rsqrt still works OK.
+ // CHECK: inf
+ %0 = arith.constant 0.0 : f32
+ %rsqrt_0 = math.rsqrt %0 : f32
+ vector.print %rsqrt_0 : f32
+ // CHECK: 0.707107
+ %two = arith.constant 2.0: f32
+ %rsqrt_two = math.rsqrt %two : f32
+ vector.print %rsqrt_two : f32
+ // Check that the vectorized approximation is reasonably accurate.
+ // CHECK: 0.707107, 0.707107, 0.707107, 0.707107, 0.707107, 0.707107, 0.707107, 0.707107
+ %vec8 = arith.constant dense<2.0> : vector<8xf32>
+ %rsqrt_vec8 = math.rsqrt %vec8 : vector<8xf32>
+ vector.print %rsqrt_vec8 : vector<8xf32>
+ return
+func @main() {
+ call @rsqrt(): () -> ()
+ return
+++ /dev/null
-// RUN: mlir-opt %s -test-math-polynomial-approximation="enable-avx2" \
-// RUN: -convert-arith-to-llvm \
-// RUN: -convert-vector-to-llvm="enable-x86vector" \
-// RUN: -convert-math-to-llvm \
-// RUN: -convert-std-to-llvm \
-// RUN: -reconcile-unrealized-casts \
-// RUN: | mlir-cpu-runner \
-// RUN: -e main -entry-point-result=void -O0 \
-// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \
-// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-// -------------------------------------------------------------------------- //
-// rsqrt.
-// -------------------------------------------------------------------------- //
-func @rsqrt() {
- // Sanity-check that the scalar rsqrt still works OK.
- // CHECK: inf
- %0 = arith.constant 0.0 : f32
- %rsqrt_0 = math.rsqrt %0 : f32
- vector.print %rsqrt_0 : f32
- // CHECK: 0.707107
- %two = arith.constant 2.0: f32
- %rsqrt_two = math.rsqrt %two : f32
- vector.print %rsqrt_two : f32
- // Check that the vectorized approximation is reasonably accurate.
- // CHECK: 0.707107, 0.707107, 0.707107, 0.707107, 0.707107, 0.707107, 0.707107, 0.707107
- %vec8 = arith.constant dense<2.0> : vector<8xf32>
- %rsqrt_vec8 = math.rsqrt %vec8 : vector<8xf32>
- vector.print %rsqrt_vec8 : vector<8xf32>
- return
-func @main() {
- call @rsqrt(): () -> ()
- return
--- /dev/null
+// RUN: mlir-opt %s -convert-scf-to-std -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm='use-bare-ptr-memref-call-conv=1' -reconcile-unrealized-casts | mlir-cpu-runner -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s
+// Verify bare pointer memref calling convention. `simple_add1_add2_test`
+// gets two 2xf32 memrefs, adds 1.0f to the first one and 2.0f to the second
+// one. 'main' calls 'simple_add1_add2_test' with {1, 1} and {2, 2} so {2, 2}
+// and {4, 4} are the expected outputs.
+func @simple_add1_add2_test(%arg0: memref<2xf32>, %arg1: memref<2xf32>) {
+ %c2 = arith.constant 2 : index
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %cst = arith.constant 1.000000e+00 : f32
+ %cst_0 = arith.constant 2.000000e+00 : f32
+ scf.for %arg2 = %c0 to %c2 step %c1 {
+ %0 = memref.load %arg0[%arg2] : memref<2xf32>
+ %1 = arith.addf %0, %cst : f32
+ memref.store %1, %arg0[%arg2] : memref<2xf32>
+ // CHECK: 2, 2
+ %2 = memref.load %arg1[%arg2] : memref<2xf32>
+ %3 = arith.addf %1, %cst_0 : f32
+ memref.store %3, %arg1[%arg2] : memref<2xf32>
+ // CHECK-NEXT: 4, 4
+ }
+ return
+// External declarations.
+llvm.func @malloc(i64) -> !llvm.ptr<i8>
+llvm.func @free(!llvm.ptr<i8>)
+func private @printF32(%arg0: f32)
+func private @printComma()
+func private @printNewline()
+func @main()
+ %c2 = arith.constant 2 : index
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %cst = arith.constant 1.000000e+00 : f32
+ %cst_0 = arith.constant 2.000000e+00 : f32
+ %a = memref.alloc() : memref<2xf32>
+ %b = memref.alloc() : memref<2xf32>
+ scf.for %i = %c0 to %c2 step %c1 {
+ memref.store %cst, %a[%i] : memref<2xf32>
+ memref.store %cst, %b[%i] : memref<2xf32>
+ }
+ call @simple_add1_add2_test(%a, %b) : (memref<2xf32>, memref<2xf32>) -> ()
+ %l0 = memref.load %a[%c0] : memref<2xf32>
+ call @printF32(%l0) : (f32) -> ()
+ call @printComma() : () -> ()
+ %l1 = memref.load %a[%c1] : memref<2xf32>
+ call @printF32(%l1) : (f32) -> ()
+ call @printNewline() : () -> ()
+ %l2 = memref.load %b[%c0] : memref<2xf32>
+ call @printF32(%l2) : (f32) -> ()
+ call @printComma() : () -> ()
+ %l3 = memref.load %b[%c1] : memref<2xf32>
+ call @printF32(%l3) : (f32) -> ()
+ call @printNewline() : () -> ()
+ memref.dealloc %a : memref<2xf32>
+ memref.dealloc %b : memref<2xf32>
+ return
+++ /dev/null
-// RUN: mlir-opt %s -convert-scf-to-std -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm='use-bare-ptr-memref-call-conv=1' -reconcile-unrealized-casts | mlir-cpu-runner -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s
-// Verify bare pointer memref calling convention. `simple_add1_add2_test`
-// gets two 2xf32 memrefs, adds 1.0f to the first one and 2.0f to the second
-// one. 'main' calls 'simple_add1_add2_test' with {1, 1} and {2, 2} so {2, 2}
-// and {4, 4} are the expected outputs.
-func @simple_add1_add2_test(%arg0: memref<2xf32>, %arg1: memref<2xf32>) {
- %c2 = arith.constant 2 : index
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %cst = arith.constant 1.000000e+00 : f32
- %cst_0 = arith.constant 2.000000e+00 : f32
- scf.for %arg2 = %c0 to %c2 step %c1 {
- %0 = memref.load %arg0[%arg2] : memref<2xf32>
- %1 = arith.addf %0, %cst : f32
- memref.store %1, %arg0[%arg2] : memref<2xf32>
- // CHECK: 2, 2
- %2 = memref.load %arg1[%arg2] : memref<2xf32>
- %3 = arith.addf %1, %cst_0 : f32
- memref.store %3, %arg1[%arg2] : memref<2xf32>
- // CHECK-NEXT: 4, 4
- }
- return
-// External declarations.
-llvm.func @malloc(i64) -> !llvm.ptr<i8>
-llvm.func @free(!llvm.ptr<i8>)
-func private @printF32(%arg0: f32)
-func private @printComma()
-func private @printNewline()
-func @main()
- %c2 = arith.constant 2 : index
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %cst = arith.constant 1.000000e+00 : f32
- %cst_0 = arith.constant 2.000000e+00 : f32
- %a = memref.alloc() : memref<2xf32>
- %b = memref.alloc() : memref<2xf32>
- scf.for %i = %c0 to %c2 step %c1 {
- memref.store %cst, %a[%i] : memref<2xf32>
- memref.store %cst, %b[%i] : memref<2xf32>
- }
- call @simple_add1_add2_test(%a, %b) : (memref<2xf32>, memref<2xf32>) -> ()
- %l0 = memref.load %a[%c0] : memref<2xf32>
- call @printF32(%l0) : (f32) -> ()
- call @printComma() : () -> ()
- %l1 = memref.load %a[%c1] : memref<2xf32>
- call @printF32(%l1) : (f32) -> ()
- call @printNewline() : () -> ()
- %l2 = memref.load %b[%c0] : memref<2xf32>
- call @printF32(%l2) : (f32) -> ()
- call @printComma() : () -> ()
- %l3 = memref.load %b[%c1] : memref<2xf32>
- call @printF32(%l3) : (f32) -> ()
- call @printNewline() : () -> ()
- memref.dealloc %a : memref<2xf32>
- memref.dealloc %b : memref<2xf32>
- return
--- /dev/null
+// RUN: mlir-opt %s -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | mlir-cpu-runner -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
+func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
+func private @print_memref_i32(memref<*xi32>) attributes { llvm.emit_c_interface }
+func private @printNewline() -> ()
+memref.global "private" @gv0 : memref<4xf32> = dense<[0.0, 1.0, 2.0, 3.0]>
+func @test1DMemref() {
+ %0 = memref.get_global @gv0 : memref<4xf32>
+ %U = memref.cast %0 : memref<4xf32> to memref<*xf32>
+ // CHECK: rank = 1
+ // CHECK: offset = 0
+ // CHECK: sizes = [4]
+ // CHECK: strides = [1]
+ // CHECK: [0, 1, 2, 3]
+ call @print_memref_f32(%U) : (memref<*xf32>) -> ()
+ call @printNewline() : () -> ()
+ // Overwrite some of the elements.
+ %c0 = arith.constant 0 : index
+ %c2 = arith.constant 2 : index
+ %fp0 = arith.constant 4.0 : f32
+ %fp1 = arith.constant 5.0 : f32
+ memref.store %fp0, %0[%c0] : memref<4xf32>
+ memref.store %fp1, %0[%c2] : memref<4xf32>
+ // CHECK: rank = 1
+ // CHECK: offset = 0
+ // CHECK: sizes = [4]
+ // CHECK: strides = [1]
+ // CHECK: [4, 1, 5, 3]
+ call @print_memref_f32(%U) : (memref<*xf32>) -> ()
+ call @printNewline() : () -> ()
+ return
+memref.global constant @gv1 : memref<3x2xi32> = dense<[[0, 1],[2, 3],[4, 5]]>
+func @testConstantMemref() {
+ %0 = memref.get_global @gv1 : memref<3x2xi32>
+ %U = memref.cast %0 : memref<3x2xi32> to memref<*xi32>
+ // CHECK: rank = 2
+ // CHECK: offset = 0
+ // CHECK: sizes = [3, 2]
+ // CHECK: strides = [2, 1]
+ // CHECK: [0, 1]
+ // CHECK: [2, 3]
+ // CHECK: [4, 5]
+ call @print_memref_i32(%U) : (memref<*xi32>) -> ()
+ call @printNewline() : () -> ()
+ return
+memref.global "private" @gv2 : memref<4x2xf32> = dense<[[0.0, 1.0], [2.0, 3.0], [4.0, 5.0], [6.0, 7.0]]>
+func @test2DMemref() {
+ %0 = memref.get_global @gv2 : memref<4x2xf32>
+ %U = memref.cast %0 : memref<4x2xf32> to memref<*xf32>
+ // CHECK: rank = 2
+ // CHECK: offset = 0
+ // CHECK: sizes = [4, 2]
+ // CHECK: strides = [2, 1]
+ // CHECK: [0, 1]
+ // CHECK: [2, 3]
+ // CHECK: [4, 5]
+ // CHECK: [6, 7]
+ call @print_memref_f32(%U) : (memref<*xf32>) -> ()
+ call @printNewline() : () -> ()
+ // Overwrite the 1.0 (at index [0, 1]) with 10.0
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %fp10 = arith.constant 10.0 : f32
+ memref.store %fp10, %0[%c0, %c1] : memref<4x2xf32>
+ // CHECK: rank = 2
+ // CHECK: offset = 0
+ // CHECK: sizes = [4, 2]
+ // CHECK: strides = [2, 1]
+ // CHECK: [0, 10]
+ // CHECK: [2, 3]
+ // CHECK: [4, 5]
+ // CHECK: [6, 7]
+ call @print_memref_f32(%U) : (memref<*xf32>) -> ()
+ call @printNewline() : () -> ()
+ return
+memref.global @gv3 : memref<i32> = dense<11>
+func @testScalarMemref() {
+ %0 = memref.get_global @gv3 : memref<i32>
+ %U = memref.cast %0 : memref<i32> to memref<*xi32>
+ // CHECK: rank = 0
+ // CHECK: offset = 0
+ // CHECK: sizes = []
+ // CHECK: strides = []
+ // CHECK: [11]
+ call @print_memref_i32(%U) : (memref<*xi32>) -> ()
+ call @printNewline() : () -> ()
+ return
+func @main() -> () {
+ call @test1DMemref() : () -> ()
+ call @testConstantMemref() : () -> ()
+ call @test2DMemref() : () -> ()
+ call @testScalarMemref() : () -> ()
+ return
+++ /dev/null
-// RUN: mlir-opt %s -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | mlir-cpu-runner -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
-func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
-func private @print_memref_i32(memref<*xi32>) attributes { llvm.emit_c_interface }
-func private @printNewline() -> ()
-memref.global "private" @gv0 : memref<4xf32> = dense<[0.0, 1.0, 2.0, 3.0]>
-func @test1DMemref() {
- %0 = memref.get_global @gv0 : memref<4xf32>
- %U = memref.cast %0 : memref<4xf32> to memref<*xf32>
- // CHECK: rank = 1
- // CHECK: offset = 0
- // CHECK: sizes = [4]
- // CHECK: strides = [1]
- // CHECK: [0, 1, 2, 3]
- call @print_memref_f32(%U) : (memref<*xf32>) -> ()
- call @printNewline() : () -> ()
- // Overwrite some of the elements.
- %c0 = arith.constant 0 : index
- %c2 = arith.constant 2 : index
- %fp0 = arith.constant 4.0 : f32
- %fp1 = arith.constant 5.0 : f32
- memref.store %fp0, %0[%c0] : memref<4xf32>
- memref.store %fp1, %0[%c2] : memref<4xf32>
- // CHECK: rank = 1
- // CHECK: offset = 0
- // CHECK: sizes = [4]
- // CHECK: strides = [1]
- // CHECK: [4, 1, 5, 3]
- call @print_memref_f32(%U) : (memref<*xf32>) -> ()
- call @printNewline() : () -> ()
- return
-memref.global constant @gv1 : memref<3x2xi32> = dense<[[0, 1],[2, 3],[4, 5]]>
-func @testConstantMemref() {
- %0 = memref.get_global @gv1 : memref<3x2xi32>
- %U = memref.cast %0 : memref<3x2xi32> to memref<*xi32>
- // CHECK: rank = 2
- // CHECK: offset = 0
- // CHECK: sizes = [3, 2]
- // CHECK: strides = [2, 1]
- // CHECK: [0, 1]
- // CHECK: [2, 3]
- // CHECK: [4, 5]
- call @print_memref_i32(%U) : (memref<*xi32>) -> ()
- call @printNewline() : () -> ()
- return
-memref.global "private" @gv2 : memref<4x2xf32> = dense<[[0.0, 1.0], [2.0, 3.0], [4.0, 5.0], [6.0, 7.0]]>
-func @test2DMemref() {
- %0 = memref.get_global @gv2 : memref<4x2xf32>
- %U = memref.cast %0 : memref<4x2xf32> to memref<*xf32>
- // CHECK: rank = 2
- // CHECK: offset = 0
- // CHECK: sizes = [4, 2]
- // CHECK: strides = [2, 1]
- // CHECK: [0, 1]
- // CHECK: [2, 3]
- // CHECK: [4, 5]
- // CHECK: [6, 7]
- call @print_memref_f32(%U) : (memref<*xf32>) -> ()
- call @printNewline() : () -> ()
- // Overwrite the 1.0 (at index [0, 1]) with 10.0
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %fp10 = arith.constant 10.0 : f32
- memref.store %fp10, %0[%c0, %c1] : memref<4x2xf32>
- // CHECK: rank = 2
- // CHECK: offset = 0
- // CHECK: sizes = [4, 2]
- // CHECK: strides = [2, 1]
- // CHECK: [0, 10]
- // CHECK: [2, 3]
- // CHECK: [4, 5]
- // CHECK: [6, 7]
- call @print_memref_f32(%U) : (memref<*xf32>) -> ()
- call @printNewline() : () -> ()
- return
-memref.global @gv3 : memref<i32> = dense<11>
-func @testScalarMemref() {
- %0 = memref.get_global @gv3 : memref<i32>
- %U = memref.cast %0 : memref<i32> to memref<*xi32>
- // CHECK: rank = 0
- // CHECK: offset = 0
- // CHECK: sizes = []
- // CHECK: strides = []
- // CHECK: [11]
- call @print_memref_i32(%U) : (memref<*xi32>) -> ()
- call @printNewline() : () -> ()
- return
-func @main() -> () {
- call @test1DMemref() : () -> ()
- call @testConstantMemref() : () -> ()
- call @test2DMemref() : () -> ()
- call @testScalarMemref() : () -> ()
- return
--- /dev/null
+// RUN: mlir-opt %s -test-math-polynomial-approximation \
+// RUN: -convert-arith-to-llvm \
+// RUN: -convert-vector-to-llvm \
+// RUN: -convert-math-to-llvm \
+// RUN: -convert-std-to-llvm \
+// RUN: -reconcile-unrealized-casts \
+// RUN: | mlir-cpu-runner \
+// RUN: -e main -entry-point-result=void -O0 \
+// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \
+// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
+// RUN: | FileCheck %s
+// -------------------------------------------------------------------------- //
+// Tanh.
+// -------------------------------------------------------------------------- //
+func @tanh() {
+ // CHECK: 0.848284
+ %0 = arith.constant 1.25 : f32
+ %1 = math.tanh %0 : f32
+ vector.print %1 : f32
+ // CHECK: 0.244919, 0.635149, 0.761594, 0.848284
+ %2 = arith.constant dense<[0.25, 0.75, 1.0, 1.25]> : vector<4xf32>
+ %3 = math.tanh %2 : vector<4xf32>
+ vector.print %3 : vector<4xf32>
+ // CHECK: 0.099668, 0.197375, 0.291313, 0.379949, 0.462117, 0.53705, 0.604368, 0.664037
+ %4 = arith.constant dense<[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]> : vector<8xf32>
+ %5 = math.tanh %4 : vector<8xf32>
+ vector.print %5 : vector<8xf32>
+ return
+// -------------------------------------------------------------------------- //
+// Log.
+// -------------------------------------------------------------------------- //
+func @log() {
+ // CHECK: 2.64704
+ %0 = arith.constant 14.112233 : f32
+ %1 = math.log %0 : f32
+ vector.print %1 : f32
+ // CHECK: -1.38629, -0.287682, 0, 0.223144
+ %2 = arith.constant dense<[0.25, 0.75, 1.0, 1.25]> : vector<4xf32>
+ %3 = math.log %2 : vector<4xf32>
+ vector.print %3 : vector<4xf32>
+ // CHECK: -2.30259, -1.60944, -1.20397, -0.916291, -0.693147, -0.510826, -0.356675, -0.223144
+ %4 = arith.constant dense<[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]> : vector<8xf32>
+ %5 = math.log %4 : vector<8xf32>
+ vector.print %5 : vector<8xf32>
+ // CHECK: -inf
+ %zero = arith.constant 0.0 : f32
+ %log_zero = math.log %zero : f32
+ vector.print %log_zero : f32
+ // CHECK: nan
+ %neg_one = arith.constant -1.0 : f32
+ %log_neg_one = math.log %neg_one : f32
+ vector.print %log_neg_one : f32
+ // CHECK: inf
+ %inf = arith.constant 0x7f800000 : f32
+ %log_inf = math.log %inf : f32
+ vector.print %log_inf : f32
+ // CHECK: -inf, nan, inf, 0.693147
+ %special_vec = arith.constant dense<[0.0, -1.0, 0x7f800000, 2.0]> : vector<4xf32>
+ %log_special_vec = math.log %special_vec : vector<4xf32>
+ vector.print %log_special_vec : vector<4xf32>
+ return
+func @log2() {
+ // CHECK: 3.81887
+ %0 = arith.constant 14.112233 : f32
+ %1 = math.log2 %0 : f32
+ vector.print %1 : f32
+ // CHECK: -2, -0.415037, 0, 0.321928
+ %2 = arith.constant dense<[0.25, 0.75, 1.0, 1.25]> : vector<4xf32>
+ %3 = math.log2 %2 : vector<4xf32>
+ vector.print %3 : vector<4xf32>
+ // CHECK: -3.32193, -2.32193, -1.73697, -1.32193, -1, -0.736966, -0.514573, -0.321928
+ %4 = arith.constant dense<[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]> : vector<8xf32>
+ %5 = math.log2 %4 : vector<8xf32>
+ vector.print %5 : vector<8xf32>
+ // CHECK: -inf
+ %zero = arith.constant 0.0 : f32
+ %log_zero = math.log2 %zero : f32
+ vector.print %log_zero : f32
+ // CHECK: nan
+ %neg_one = arith.constant -1.0 : f32
+ %log_neg_one = math.log2 %neg_one : f32
+ vector.print %log_neg_one : f32
+ // CHECK: inf
+ %inf = arith.constant 0x7f800000 : f32
+ %log_inf = math.log2 %inf : f32
+ vector.print %log_inf : f32
+ // CHECK: -inf, nan, inf, 1.58496
+ %special_vec = arith.constant dense<[0.0, -1.0, 0x7f800000, 3.0]> : vector<4xf32>
+ %log_special_vec = math.log2 %special_vec : vector<4xf32>
+ vector.print %log_special_vec : vector<4xf32>
+ return
+func @log1p() {
+ // CHECK: 0.00995033
+ %0 = arith.constant 0.01 : f32
+ %1 = math.log1p %0 : f32
+ vector.print %1 : f32
+ // CHECK: -4.60517, -0.693147, 0, 1.38629
+ %2 = arith.constant dense<[-0.99, -0.5, 0.0, 3.0]> : vector<4xf32>
+ %3 = math.log1p %2 : vector<4xf32>
+ vector.print %3 : vector<4xf32>
+ // CHECK: 0.0953102, 0.182322, 0.262364, 0.336472, 0.405465, 0.470004, 0.530628, 0.587787
+ %4 = arith.constant dense<[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]> : vector<8xf32>
+ %5 = math.log1p %4 : vector<8xf32>
+ vector.print %5 : vector<8xf32>
+ // CHECK: -inf
+ %neg_one = arith.constant -1.0 : f32
+ %log_neg_one = math.log1p %neg_one : f32
+ vector.print %log_neg_one : f32
+ // CHECK: nan
+ %neg_two = arith.constant -2.0 : f32
+ %log_neg_two = math.log1p %neg_two : f32
+ vector.print %log_neg_two : f32
+ // CHECK: inf
+ %inf = arith.constant 0x7f800000 : f32
+ %log_inf = math.log1p %inf : f32
+ vector.print %log_inf : f32
+ // CHECK: -inf, nan, inf, 9.99995e-06
+ %special_vec = arith.constant dense<[-1.0, -1.1, 0x7f800000, 0.00001]> : vector<4xf32>
+ %log_special_vec = math.log1p %special_vec : vector<4xf32>
+ vector.print %log_special_vec : vector<4xf32>
+ return
+// -------------------------------------------------------------------------- //
+// Erf.
+// -------------------------------------------------------------------------- //
+func @erf() {
+ // CHECK: -0.000274406
+ %val1 = arith.constant -2.431864e-4 : f32
+ %erfVal1 = math.erf %val1 : f32
+ vector.print %erfVal1 : f32
+ // CHECK: 0.742095
+ %val2 = arith.constant 0.79999 : f32
+ %erfVal2 = math.erf %val2 : f32
+ vector.print %erfVal2 : f32
+ // CHECK: 0.742101
+ %val3 = arith.constant 0.8 : f32
+ %erfVal3 = math.erf %val3 : f32
+ vector.print %erfVal3 : f32
+ // CHECK: 0.995322
+ %val4 = arith.constant 1.99999 : f32
+ %erfVal4 = math.erf %val4 : f32
+ vector.print %erfVal4 : f32
+ // CHECK: 0.995322
+ %val5 = arith.constant 2.0 : f32
+ %erfVal5 = math.erf %val5 : f32
+ vector.print %erfVal5 : f32
+ // CHECK: 1
+ %val6 = arith.constant 3.74999 : f32
+ %erfVal6 = math.erf %val6 : f32
+ vector.print %erfVal6 : f32
+ // CHECK: 1
+ %val7 = arith.constant 3.75 : f32
+ %erfVal7 = math.erf %val7 : f32
+ vector.print %erfVal7 : f32
+ // CHECK: -1
+ %negativeInf = arith.constant 0xff800000 : f32
+ %erfNegativeInf = math.erf %negativeInf : f32
+ vector.print %erfNegativeInf : f32
+ // CHECK: -1, -1, -0.913759, -0.731446
+ %vecVals1 = arith.constant dense<[-3.4028235e+38, -4.54318, -1.2130899, -7.8234202e-01]> : vector<4xf32>
+ %erfVecVals1 = math.erf %vecVals1 : vector<4xf32>
+ vector.print %erfVecVals1 : vector<4xf32>
+ // CHECK: -1.3264e-38, 0, 1.3264e-38, 0.121319
+ %vecVals2 = arith.constant dense<[-1.1754944e-38, 0.0, 1.1754944e-38, 1.0793410e-01]> : vector<4xf32>
+ %erfVecVals2 = math.erf %vecVals2 : vector<4xf32>
+ vector.print %erfVecVals2 : vector<4xf32>
+ // CHECK: 0.919477, 0.999069, 1, 1
+ %vecVals3 = arith.constant dense<[1.23578, 2.34093, 3.82342, 3.4028235e+38]> : vector<4xf32>
+ %erfVecVals3 = math.erf %vecVals3 : vector<4xf32>
+ vector.print %erfVecVals3 : vector<4xf32>
+ // CHECK: 1
+ %inf = arith.constant 0x7f800000 : f32
+ %erfInf = math.erf %inf : f32
+ vector.print %erfInf : f32
+ // CHECK: nan
+ %nan = arith.constant 0x7fc00000 : f32
+ %erfNan = math.erf %nan : f32
+ vector.print %erfNan : f32
+ return
+// -------------------------------------------------------------------------- //
+// Exp.
+// -------------------------------------------------------------------------- //
+func @exp() {
+ // CHECK: 2.71828
+ %0 = arith.constant 1.0 : f32
+ %1 = math.exp %0 : f32
+ vector.print %1 : f32
+ // CHECK: 0.778802, 2.117, 2.71828, 3.85742
+ %2 = arith.constant dense<[-0.25, 0.75, 1.0, 1.35]> : vector<4xf32>
+ %3 = math.exp %2 : vector<4xf32>
+ vector.print %3 : vector<4xf32>
+ // CHECK: 1
+ %zero = arith.constant 0.0 : f32
+ %exp_zero = math.exp %zero : f32
+ vector.print %exp_zero : f32
+ // CHECK: 1.17549e-38, 1.38879e-11, 7.20049e+10, inf
+ %special_vec = arith.constant dense<[-89.0, -25.0, 25.0, 89.0]> : vector<4xf32>
+ %exp_special_vec = math.exp %special_vec : vector<4xf32>
+ vector.print %exp_special_vec : vector<4xf32>
+ // CHECK: inf
+ %inf = arith.constant 0x7f800000 : f32
+ %exp_inf = math.exp %inf : f32
+ vector.print %exp_inf : f32
+ // CHECK: 0
+ %negative_inf = arith.constant 0xff800000 : f32
+ %exp_negative_inf = math.exp %negative_inf : f32
+ vector.print %exp_negative_inf : f32
+ return
+func @expm1() {
+ // CHECK: 1e-10
+ %0 = arith.constant 1.0e-10 : f32
+ %1 = math.expm1 %0 : f32
+ vector.print %1 : f32
+ // CHECK: -0.00995016, 0.0100502, 0.648721, 6.38905
+ %2 = arith.constant dense<[-0.01, 0.01, 0.5, 2.0]> : vector<4xf32>
+ %3 = math.expm1 %2 : vector<4xf32>
+ vector.print %3 : vector<4xf32>
+ // CHECK: -0.181269, 0, 0.221403, 0.491825, 0.822119, 1.22554, 1.71828, 2.32012
+ %4 = arith.constant dense<[-0.2, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2]> : vector<8xf32>
+ %5 = math.expm1 %4 : vector<8xf32>
+ vector.print %5 : vector<8xf32>
+ // CHECK: -1
+ %neg_inf = arith.constant 0xff800000 : f32
+ %expm1_neg_inf = math.expm1 %neg_inf : f32
+ vector.print %expm1_neg_inf : f32
+ // CHECK: inf
+ %inf = arith.constant 0x7f800000 : f32
+ %expm1_inf = math.expm1 %inf : f32
+ vector.print %expm1_inf : f32
+ // CHECK: -1, inf, 1e-10
+ %special_vec = arith.constant dense<[0xff800000, 0x7f800000, 1.0e-10]> : vector<3xf32>
+ %log_special_vec = math.expm1 %special_vec : vector<3xf32>
+ vector.print %log_special_vec : vector<3xf32>
+ return
+// -------------------------------------------------------------------------- //
+// Sin.
+// -------------------------------------------------------------------------- //
+func @sin() {
+ // CHECK: 0
+ %0 = arith.constant 0.0 : f32
+ %sin_0 = math.sin %0 : f32
+ vector.print %sin_0 : f32
+ // CHECK: 0.707107
+ %pi_over_4 = arith.constant 0.78539816339 : f32
+ %sin_pi_over_4 = math.sin %pi_over_4 : f32
+ vector.print %sin_pi_over_4 : f32
+ // CHECK: 1
+ %pi_over_2 = arith.constant 1.57079632679 : f32
+ %sin_pi_over_2 = math.sin %pi_over_2 : f32
+ vector.print %sin_pi_over_2 : f32
+ // CHECK: 0
+ %pi = arith.constant 3.14159265359 : f32
+ %sin_pi = math.sin %pi : f32
+ vector.print %sin_pi : f32
+ // CHECK: -1
+ %pi_3_over_2 = arith.constant 4.71238898038 : f32
+ %sin_pi_3_over_2 = math.sin %pi_3_over_2 : f32
+ vector.print %sin_pi_3_over_2 : f32
+ // CHECK: 0, 0.866025, -1
+ %vec_x = arith.constant dense<[9.42477796077, 2.09439510239, -1.57079632679]> : vector<3xf32>
+ %sin_vec_x = math.sin %vec_x : vector<3xf32>
+ vector.print %sin_vec_x : vector<3xf32>
+ return
+// -------------------------------------------------------------------------- //
+// cos.
+// -------------------------------------------------------------------------- //
+func @cos() {
+ // CHECK: 1
+ %0 = arith.constant 0.0 : f32
+ %cos_0 = math.cos %0 : f32
+ vector.print %cos_0 : f32
+ // CHECK: 0.707107
+ %pi_over_4 = arith.constant 0.78539816339 : f32
+ %cos_pi_over_4 = math.cos %pi_over_4 : f32
+ vector.print %cos_pi_over_4 : f32
+ //// CHECK: 0
+ %pi_over_2 = arith.constant 1.57079632679 : f32
+ %cos_pi_over_2 = math.cos %pi_over_2 : f32
+ vector.print %cos_pi_over_2 : f32
+ /// CHECK: -1
+ %pi = arith.constant 3.14159265359 : f32
+ %cos_pi = math.cos %pi : f32
+ vector.print %cos_pi : f32
+ // CHECK: 0
+ %pi_3_over_2 = arith.constant 4.71238898038 : f32
+ %cos_pi_3_over_2 = math.cos %pi_3_over_2 : f32
+ vector.print %cos_pi_3_over_2 : f32
+ // CHECK: -1, -0.5, 0
+ %vec_x = arith.constant dense<[9.42477796077, 2.09439510239, -1.57079632679]> : vector<3xf32>
+ %cos_vec_x = math.cos %vec_x : vector<3xf32>
+ vector.print %cos_vec_x : vector<3xf32>
+ return
+func @main() {
+ call @tanh(): () -> ()
+ call @log(): () -> ()
+ call @log2(): () -> ()
+ call @log1p(): () -> ()
+ call @erf(): () -> ()
+ call @exp(): () -> ()
+ call @expm1(): () -> ()
+ call @sin(): () -> ()
+ call @cos(): () -> ()
+ return
+++ /dev/null
-// RUN: mlir-opt %s -test-math-polynomial-approximation \
-// RUN: -convert-arith-to-llvm \
-// RUN: -convert-vector-to-llvm \
-// RUN: -convert-math-to-llvm \
-// RUN: -convert-std-to-llvm \
-// RUN: -reconcile-unrealized-casts \
-// RUN: | mlir-cpu-runner \
-// RUN: -e main -entry-point-result=void -O0 \
-// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \
-// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
-// RUN: | FileCheck %s
-// -------------------------------------------------------------------------- //
-// Tanh.
-// -------------------------------------------------------------------------- //
-func @tanh() {
- // CHECK: 0.848284
- %0 = arith.constant 1.25 : f32
- %1 = math.tanh %0 : f32
- vector.print %1 : f32
- // CHECK: 0.244919, 0.635149, 0.761594, 0.848284
- %2 = arith.constant dense<[0.25, 0.75, 1.0, 1.25]> : vector<4xf32>
- %3 = math.tanh %2 : vector<4xf32>
- vector.print %3 : vector<4xf32>
- // CHECK: 0.099668, 0.197375, 0.291313, 0.379949, 0.462117, 0.53705, 0.604368, 0.664037
- %4 = arith.constant dense<[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]> : vector<8xf32>
- %5 = math.tanh %4 : vector<8xf32>
- vector.print %5 : vector<8xf32>
- return
-// -------------------------------------------------------------------------- //
-// Log.
-// -------------------------------------------------------------------------- //
-func @log() {
- // CHECK: 2.64704
- %0 = arith.constant 14.112233 : f32
- %1 = math.log %0 : f32
- vector.print %1 : f32
- // CHECK: -1.38629, -0.287682, 0, 0.223144
- %2 = arith.constant dense<[0.25, 0.75, 1.0, 1.25]> : vector<4xf32>
- %3 = math.log %2 : vector<4xf32>
- vector.print %3 : vector<4xf32>
- // CHECK: -2.30259, -1.60944, -1.20397, -0.916291, -0.693147, -0.510826, -0.356675, -0.223144
- %4 = arith.constant dense<[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]> : vector<8xf32>
- %5 = math.log %4 : vector<8xf32>
- vector.print %5 : vector<8xf32>
- // CHECK: -inf
- %zero = arith.constant 0.0 : f32
- %log_zero = math.log %zero : f32
- vector.print %log_zero : f32
- // CHECK: nan
- %neg_one = arith.constant -1.0 : f32
- %log_neg_one = math.log %neg_one : f32
- vector.print %log_neg_one : f32
- // CHECK: inf
- %inf = arith.constant 0x7f800000 : f32
- %log_inf = math.log %inf : f32
- vector.print %log_inf : f32
- // CHECK: -inf, nan, inf, 0.693147
- %special_vec = arith.constant dense<[0.0, -1.0, 0x7f800000, 2.0]> : vector<4xf32>
- %log_special_vec = math.log %special_vec : vector<4xf32>
- vector.print %log_special_vec : vector<4xf32>
- return
-func @log2() {
- // CHECK: 3.81887
- %0 = arith.constant 14.112233 : f32
- %1 = math.log2 %0 : f32
- vector.print %1 : f32
- // CHECK: -2, -0.415037, 0, 0.321928
- %2 = arith.constant dense<[0.25, 0.75, 1.0, 1.25]> : vector<4xf32>
- %3 = math.log2 %2 : vector<4xf32>
- vector.print %3 : vector<4xf32>
- // CHECK: -3.32193, -2.32193, -1.73697, -1.32193, -1, -0.736966, -0.514573, -0.321928
- %4 = arith.constant dense<[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]> : vector<8xf32>
- %5 = math.log2 %4 : vector<8xf32>
- vector.print %5 : vector<8xf32>
- // CHECK: -inf
- %zero = arith.constant 0.0 : f32
- %log_zero = math.log2 %zero : f32
- vector.print %log_zero : f32
- // CHECK: nan
- %neg_one = arith.constant -1.0 : f32
- %log_neg_one = math.log2 %neg_one : f32
- vector.print %log_neg_one : f32
- // CHECK: inf
- %inf = arith.constant 0x7f800000 : f32
- %log_inf = math.log2 %inf : f32
- vector.print %log_inf : f32
- // CHECK: -inf, nan, inf, 1.58496
- %special_vec = arith.constant dense<[0.0, -1.0, 0x7f800000, 3.0]> : vector<4xf32>
- %log_special_vec = math.log2 %special_vec : vector<4xf32>
- vector.print %log_special_vec : vector<4xf32>
- return
-func @log1p() {
- // CHECK: 0.00995033
- %0 = arith.constant 0.01 : f32
- %1 = math.log1p %0 : f32
- vector.print %1 : f32
- // CHECK: -4.60517, -0.693147, 0, 1.38629
- %2 = arith.constant dense<[-0.99, -0.5, 0.0, 3.0]> : vector<4xf32>
- %3 = math.log1p %2 : vector<4xf32>
- vector.print %3 : vector<4xf32>
- // CHECK: 0.0953102, 0.182322, 0.262364, 0.336472, 0.405465, 0.470004, 0.530628, 0.587787
- %4 = arith.constant dense<[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]> : vector<8xf32>
- %5 = math.log1p %4 : vector<8xf32>
- vector.print %5 : vector<8xf32>
- // CHECK: -inf
- %neg_one = arith.constant -1.0 : f32
- %log_neg_one = math.log1p %neg_one : f32
- vector.print %log_neg_one : f32
- // CHECK: nan
- %neg_two = arith.constant -2.0 : f32
- %log_neg_two = math.log1p %neg_two : f32
- vector.print %log_neg_two : f32
- // CHECK: inf
- %inf = arith.constant 0x7f800000 : f32
- %log_inf = math.log1p %inf : f32
- vector.print %log_inf : f32
- // CHECK: -inf, nan, inf, 9.99995e-06
- %special_vec = arith.constant dense<[-1.0, -1.1, 0x7f800000, 0.00001]> : vector<4xf32>
- %log_special_vec = math.log1p %special_vec : vector<4xf32>
- vector.print %log_special_vec : vector<4xf32>
- return
-// -------------------------------------------------------------------------- //
-// Erf.
-// -------------------------------------------------------------------------- //
-func @erf() {
- // CHECK: -0.000274406
- %val1 = arith.constant -2.431864e-4 : f32
- %erfVal1 = math.erf %val1 : f32
- vector.print %erfVal1 : f32
- // CHECK: 0.742095
- %val2 = arith.constant 0.79999 : f32
- %erfVal2 = math.erf %val2 : f32
- vector.print %erfVal2 : f32
- // CHECK: 0.742101
- %val3 = arith.constant 0.8 : f32
- %erfVal3 = math.erf %val3 : f32
- vector.print %erfVal3 : f32
- // CHECK: 0.995322
- %val4 = arith.constant 1.99999 : f32
- %erfVal4 = math.erf %val4 : f32
- vector.print %erfVal4 : f32
- // CHECK: 0.995322
- %val5 = arith.constant 2.0 : f32
- %erfVal5 = math.erf %val5 : f32
- vector.print %erfVal5 : f32
- // CHECK: 1
- %val6 = arith.constant 3.74999 : f32
- %erfVal6 = math.erf %val6 : f32
- vector.print %erfVal6 : f32
- // CHECK: 1
- %val7 = arith.constant 3.75 : f32
- %erfVal7 = math.erf %val7 : f32
- vector.print %erfVal7 : f32
- // CHECK: -1
- %negativeInf = arith.constant 0xff800000 : f32
- %erfNegativeInf = math.erf %negativeInf : f32
- vector.print %erfNegativeInf : f32
- // CHECK: -1, -1, -0.913759, -0.731446
- %vecVals1 = arith.constant dense<[-3.4028235e+38, -4.54318, -1.2130899, -7.8234202e-01]> : vector<4xf32>
- %erfVecVals1 = math.erf %vecVals1 : vector<4xf32>
- vector.print %erfVecVals1 : vector<4xf32>
- // CHECK: -1.3264e-38, 0, 1.3264e-38, 0.121319
- %vecVals2 = arith.constant dense<[-1.1754944e-38, 0.0, 1.1754944e-38, 1.0793410e-01]> : vector<4xf32>
- %erfVecVals2 = math.erf %vecVals2 : vector<4xf32>
- vector.print %erfVecVals2 : vector<4xf32>
- // CHECK: 0.919477, 0.999069, 1, 1
- %vecVals3 = arith.constant dense<[1.23578, 2.34093, 3.82342, 3.4028235e+38]> : vector<4xf32>
- %erfVecVals3 = math.erf %vecVals3 : vector<4xf32>
- vector.print %erfVecVals3 : vector<4xf32>
- // CHECK: 1
- %inf = arith.constant 0x7f800000 : f32
- %erfInf = math.erf %inf : f32
- vector.print %erfInf : f32
- // CHECK: nan
- %nan = arith.constant 0x7fc00000 : f32
- %erfNan = math.erf %nan : f32
- vector.print %erfNan : f32
- return
-// -------------------------------------------------------------------------- //
-// Exp.
-// -------------------------------------------------------------------------- //
-func @exp() {
- // CHECK: 2.71828
- %0 = arith.constant 1.0 : f32
- %1 = math.exp %0 : f32
- vector.print %1 : f32
- // CHECK: 0.778802, 2.117, 2.71828, 3.85742
- %2 = arith.constant dense<[-0.25, 0.75, 1.0, 1.35]> : vector<4xf32>
- %3 = math.exp %2 : vector<4xf32>
- vector.print %3 : vector<4xf32>
- // CHECK: 1
- %zero = arith.constant 0.0 : f32
- %exp_zero = math.exp %zero : f32
- vector.print %exp_zero : f32
- // CHECK: 1.17549e-38, 1.38879e-11, 7.20049e+10, inf
- %special_vec = arith.constant dense<[-89.0, -25.0, 25.0, 89.0]> : vector<4xf32>
- %exp_special_vec = math.exp %special_vec : vector<4xf32>
- vector.print %exp_special_vec : vector<4xf32>
- // CHECK: inf
- %inf = arith.constant 0x7f800000 : f32
- %exp_inf = math.exp %inf : f32
- vector.print %exp_inf : f32
- // CHECK: 0
- %negative_inf = arith.constant 0xff800000 : f32
- %exp_negative_inf = math.exp %negative_inf : f32
- vector.print %exp_negative_inf : f32
- return
-func @expm1() {
- // CHECK: 1e-10
- %0 = arith.constant 1.0e-10 : f32
- %1 = math.expm1 %0 : f32
- vector.print %1 : f32
- // CHECK: -0.00995016, 0.0100502, 0.648721, 6.38905
- %2 = arith.constant dense<[-0.01, 0.01, 0.5, 2.0]> : vector<4xf32>
- %3 = math.expm1 %2 : vector<4xf32>
- vector.print %3 : vector<4xf32>
- // CHECK: -0.181269, 0, 0.221403, 0.491825, 0.822119, 1.22554, 1.71828, 2.32012
- %4 = arith.constant dense<[-0.2, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2]> : vector<8xf32>
- %5 = math.expm1 %4 : vector<8xf32>
- vector.print %5 : vector<8xf32>
- // CHECK: -1
- %neg_inf = arith.constant 0xff800000 : f32
- %expm1_neg_inf = math.expm1 %neg_inf : f32
- vector.print %expm1_neg_inf : f32
- // CHECK: inf
- %inf = arith.constant 0x7f800000 : f32
- %expm1_inf = math.expm1 %inf : f32
- vector.print %expm1_inf : f32
- // CHECK: -1, inf, 1e-10
- %special_vec = arith.constant dense<[0xff800000, 0x7f800000, 1.0e-10]> : vector<3xf32>
- %log_special_vec = math.expm1 %special_vec : vector<3xf32>
- vector.print %log_special_vec : vector<3xf32>
- return
-// -------------------------------------------------------------------------- //
-// Sin.
-// -------------------------------------------------------------------------- //
-func @sin() {
- // CHECK: 0
- %0 = arith.constant 0.0 : f32
- %sin_0 = math.sin %0 : f32
- vector.print %sin_0 : f32
- // CHECK: 0.707107
- %pi_over_4 = arith.constant 0.78539816339 : f32
- %sin_pi_over_4 = math.sin %pi_over_4 : f32
- vector.print %sin_pi_over_4 : f32
- // CHECK: 1
- %pi_over_2 = arith.constant 1.57079632679 : f32
- %sin_pi_over_2 = math.sin %pi_over_2 : f32
- vector.print %sin_pi_over_2 : f32
- // CHECK: 0
- %pi = arith.constant 3.14159265359 : f32
- %sin_pi = math.sin %pi : f32
- vector.print %sin_pi : f32
- // CHECK: -1
- %pi_3_over_2 = arith.constant 4.71238898038 : f32
- %sin_pi_3_over_2 = math.sin %pi_3_over_2 : f32
- vector.print %sin_pi_3_over_2 : f32
- // CHECK: 0, 0.866025, -1
- %vec_x = arith.constant dense<[9.42477796077, 2.09439510239, -1.57079632679]> : vector<3xf32>
- %sin_vec_x = math.sin %vec_x : vector<3xf32>
- vector.print %sin_vec_x : vector<3xf32>
- return
-// -------------------------------------------------------------------------- //
-// cos.
-// -------------------------------------------------------------------------- //
-func @cos() {
- // CHECK: 1
- %0 = arith.constant 0.0 : f32
- %cos_0 = math.cos %0 : f32
- vector.print %cos_0 : f32
- // CHECK: 0.707107
- %pi_over_4 = arith.constant 0.78539816339 : f32
- %cos_pi_over_4 = math.cos %pi_over_4 : f32
- vector.print %cos_pi_over_4 : f32
- //// CHECK: 0
- %pi_over_2 = arith.constant 1.57079632679 : f32
- %cos_pi_over_2 = math.cos %pi_over_2 : f32
- vector.print %cos_pi_over_2 : f32
- /// CHECK: -1
- %pi = arith.constant 3.14159265359 : f32
- %cos_pi = math.cos %pi : f32
- vector.print %cos_pi : f32
- // CHECK: 0
- %pi_3_over_2 = arith.constant 4.71238898038 : f32
- %cos_pi_3_over_2 = math.cos %pi_3_over_2 : f32
- vector.print %cos_pi_3_over_2 : f32
- // CHECK: -1, -0.5, 0
- %vec_x = arith.constant dense<[9.42477796077, 2.09439510239, -1.57079632679]> : vector<3xf32>
- %cos_vec_x = math.cos %vec_x : vector<3xf32>
- vector.print %cos_vec_x : vector<3xf32>
- return
-func @main() {
- call @tanh(): () -> ()
- call @log(): () -> ()
- call @log2(): () -> ()
- call @log1p(): () -> ()
- call @erf(): () -> ()
- call @exp(): () -> ()
- call @expm1(): () -> ()
- call @sin(): () -> ()
- call @cos(): () -> ()
- return
--- /dev/null
+// RUN: mlir-opt %s -convert-scf-to-std -convert-memref-to-llvm -convert-arith-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts \
+// RUN: | mlir-cpu-runner -e main -entry-point-result=void \
+// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck %s
+func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
+func @main() -> () {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ // Initialize input.
+ %input = memref.alloc() : memref<2x3xf32>
+ %dim_x = memref.dim %input, %c0 : memref<2x3xf32>
+ %dim_y = memref.dim %input, %c1 : memref<2x3xf32>
+ scf.parallel (%i, %j) = (%c0, %c0) to (%dim_x, %dim_y) step (%c1, %c1) {
+ %prod = arith.muli %i, %dim_y : index
+ %val = arith.addi %prod, %j : index
+ %val_i64 = arith.index_cast %val : index to i64
+ %val_f32 = arith.sitofp %val_i64 : i64 to f32
+ memref.store %val_f32, %input[%i, %j] : memref<2x3xf32>
+ }
+ %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
+ call @print_memref_f32(%unranked_input) : (memref<*xf32>) -> ()
+ // CHECK: rank = 2 offset = 0 sizes = [2, 3] strides = [3, 1]
+ // CHECK-NEXT: [0, 1, 2]
+ // CHECK-NEXT: [3, 4, 5]
+ // Test cases.
+ call @cast_ranked_memref_to_static_shape(%input) : (memref<2x3xf32>) -> ()
+ call @cast_ranked_memref_to_dynamic_shape(%input) : (memref<2x3xf32>) -> ()
+ call @cast_unranked_memref_to_static_shape(%input) : (memref<2x3xf32>) -> ()
+ call @cast_unranked_memref_to_dynamic_shape(%input) : (memref<2x3xf32>) -> ()
+ memref.dealloc %input : memref<2x3xf32>
+ return
+func @cast_ranked_memref_to_static_shape(%input : memref<2x3xf32>) {
+ %output = memref.reinterpret_cast %input to
+ offset: [0], sizes: [6, 1], strides: [1, 1]
+ : memref<2x3xf32> to memref<6x1xf32>
+ %unranked_output = memref.cast %output
+ : memref<6x1xf32> to memref<*xf32>
+ call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
+ // CHECK: rank = 2 offset = 0 sizes = [6, 1] strides = [1, 1] data =
+ // CHECK-NEXT: [0],
+ // CHECK-NEXT: [1],
+ // CHECK-NEXT: [2],
+ // CHECK-NEXT: [3],
+ // CHECK-NEXT: [4],
+ // CHECK-NEXT: [5]
+ return
+func @cast_ranked_memref_to_dynamic_shape(%input : memref<2x3xf32>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c6 = arith.constant 6 : index
+ %output = memref.reinterpret_cast %input to
+ offset: [%c0], sizes: [%c1, %c6], strides: [%c6, %c1]
+ : memref<2x3xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
+ %unranked_output = memref.cast %output
+ : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<*xf32>
+ call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
+ // CHECK: rank = 2 offset = 0 sizes = [1, 6] strides = [6, 1] data =
+ // CHECK-NEXT: [0, 1, 2, 3, 4, 5]
+ return
+func @cast_unranked_memref_to_static_shape(%input : memref<2x3xf32>) {
+ %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
+ %output = memref.reinterpret_cast %unranked_input to
+ offset: [0], sizes: [6, 1], strides: [1, 1]
+ : memref<*xf32> to memref<6x1xf32>
+ %unranked_output = memref.cast %output
+ : memref<6x1xf32> to memref<*xf32>
+ call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
+ // CHECK: rank = 2 offset = 0 sizes = [6, 1] strides = [1, 1] data =
+ // CHECK-NEXT: [0],
+ // CHECK-NEXT: [1],
+ // CHECK-NEXT: [2],
+ // CHECK-NEXT: [3],
+ // CHECK-NEXT: [4],
+ // CHECK-NEXT: [5]
+ return
+func @cast_unranked_memref_to_dynamic_shape(%input : memref<2x3xf32>) {
+ %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c6 = arith.constant 6 : index
+ %output = memref.reinterpret_cast %unranked_input to
+ offset: [%c0], sizes: [%c1, %c6], strides: [%c6, %c1]
+ : memref<*xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
+ %unranked_output = memref.cast %output
+ : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<*xf32>
+ call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
+ // CHECK: rank = 2 offset = 0 sizes = [1, 6] strides = [6, 1] data =
+ // CHECK-NEXT: [0, 1, 2, 3, 4, 5]
+ return
--- /dev/null
+// RUN: mlir-opt %s -convert-scf-to-std -std-expand -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts \
+// RUN: | mlir-cpu-runner -e main -entry-point-result=void \
+// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck %s
+func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
+func @main() -> () {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ // Initialize input.
+ %input = memref.alloc() : memref<2x3xf32>
+ %dim_x = memref.dim %input, %c0 : memref<2x3xf32>
+ %dim_y = memref.dim %input, %c1 : memref<2x3xf32>
+ scf.parallel (%i, %j) = (%c0, %c0) to (%dim_x, %dim_y) step (%c1, %c1) {
+ %prod = arith.muli %i, %dim_y : index
+ %val = arith.addi %prod, %j : index
+ %val_i64 = arith.index_cast %val : index to i64
+ %val_f32 = arith.sitofp %val_i64 : i64 to f32
+ memref.store %val_f32, %input[%i, %j] : memref<2x3xf32>
+ }
+ %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
+ call @print_memref_f32(%unranked_input) : (memref<*xf32>) -> ()
+ // CHECK: rank = 2 offset = 0 sizes = [2, 3] strides = [3, 1]
+ // CHECK-NEXT: [0, 1, 2]
+ // CHECK-NEXT: [3, 4, 5]
+ // Initialize shape.
+ %shape = memref.alloc() : memref<2xindex>
+ %c2 = arith.constant 2 : index
+ %c3 = arith.constant 3 : index
+ memref.store %c3, %shape[%c0] : memref<2xindex>
+ memref.store %c2, %shape[%c1] : memref<2xindex>
+ // Test cases.
+ call @reshape_ranked_memref_to_ranked(%input, %shape)
+ : (memref<2x3xf32>, memref<2xindex>) -> ()
+ call @reshape_unranked_memref_to_ranked(%input, %shape)
+ : (memref<2x3xf32>, memref<2xindex>) -> ()
+ call @reshape_ranked_memref_to_unranked(%input, %shape)
+ : (memref<2x3xf32>, memref<2xindex>) -> ()
+ call @reshape_unranked_memref_to_unranked(%input, %shape)
+ : (memref<2x3xf32>, memref<2xindex>) -> ()
+ memref.dealloc %input : memref<2x3xf32>
+ memref.dealloc %shape : memref<2xindex>
+ return
+func @reshape_ranked_memref_to_ranked(%input : memref<2x3xf32>,
+ %shape : memref<2xindex>) {
+ %output = memref.reshape %input(%shape)
+ : (memref<2x3xf32>, memref<2xindex>) -> memref<?x?xf32>
+ %unranked_output = memref.cast %output : memref<?x?xf32> to memref<*xf32>
+ call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
+ // CHECK: rank = 2 offset = 0 sizes = [3, 2] strides = [2, 1] data =
+ // CHECK: [0, 1],
+ // CHECK: [2, 3],
+ // CHECK: [4, 5]
+ return
+func @reshape_unranked_memref_to_ranked(%input : memref<2x3xf32>,
+ %shape : memref<2xindex>) {
+ %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
+ %output = memref.reshape %input(%shape)
+ : (memref<2x3xf32>, memref<2xindex>) -> memref<?x?xf32>
+ %unranked_output = memref.cast %output : memref<?x?xf32> to memref<*xf32>
+ call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
+ // CHECK: rank = 2 offset = 0 sizes = [3, 2] strides = [2, 1] data =
+ // CHECK: [0, 1],
+ // CHECK: [2, 3],
+ // CHECK: [4, 5]
+ return
+func @reshape_ranked_memref_to_unranked(%input : memref<2x3xf32>,
+ %shape : memref<2xindex>) {
+ %dyn_size_shape = memref.cast %shape : memref<2xindex> to memref<?xindex>
+ %output = memref.reshape %input(%dyn_size_shape)
+ : (memref<2x3xf32>, memref<?xindex>) -> memref<*xf32>
+ call @print_memref_f32(%output) : (memref<*xf32>) -> ()
+ // CHECK: rank = 2 offset = 0 sizes = [3, 2] strides = [2, 1] data =
+ // CHECK: [0, 1],
+ // CHECK: [2, 3],
+ // CHECK: [4, 5]
+ return
+func @reshape_unranked_memref_to_unranked(%input : memref<2x3xf32>,
+ %shape : memref<2xindex>) {
+ %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
+ %dyn_size_shape = memref.cast %shape : memref<2xindex> to memref<?xindex>
+ %output = memref.reshape %input(%dyn_size_shape)
+ : (memref<2x3xf32>, memref<?xindex>) -> memref<*xf32>
+ call @print_memref_f32(%output) : (memref<*xf32>) -> ()
+ // CHECK: rank = 2 offset = 0 sizes = [3, 2] strides = [2, 1] data =
+ // CHECK: [0, 1],
+ // CHECK: [2, 3],
+ // CHECK: [4, 5]
+ return
+++ /dev/null
-// RUN: mlir-opt %s -convert-scf-to-std -convert-memref-to-llvm -convert-arith-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts \
-// RUN: | mlir-cpu-runner -e main -entry-point-result=void \
-// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
-// RUN: | FileCheck %s
-func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
-func @main() -> () {
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- // Initialize input.
- %input = memref.alloc() : memref<2x3xf32>
- %dim_x = memref.dim %input, %c0 : memref<2x3xf32>
- %dim_y = memref.dim %input, %c1 : memref<2x3xf32>
- scf.parallel (%i, %j) = (%c0, %c0) to (%dim_x, %dim_y) step (%c1, %c1) {
- %prod = arith.muli %i, %dim_y : index
- %val = arith.addi %prod, %j : index
- %val_i64 = arith.index_cast %val : index to i64
- %val_f32 = arith.sitofp %val_i64 : i64 to f32
- memref.store %val_f32, %input[%i, %j] : memref<2x3xf32>
- }
- %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
- call @print_memref_f32(%unranked_input) : (memref<*xf32>) -> ()
- // CHECK: rank = 2 offset = 0 sizes = [2, 3] strides = [3, 1]
- // CHECK-NEXT: [0, 1, 2]
- // CHECK-NEXT: [3, 4, 5]
- // Test cases.
- call @cast_ranked_memref_to_static_shape(%input) : (memref<2x3xf32>) -> ()
- call @cast_ranked_memref_to_dynamic_shape(%input) : (memref<2x3xf32>) -> ()
- call @cast_unranked_memref_to_static_shape(%input) : (memref<2x3xf32>) -> ()
- call @cast_unranked_memref_to_dynamic_shape(%input) : (memref<2x3xf32>) -> ()
- memref.dealloc %input : memref<2x3xf32>
- return
-func @cast_ranked_memref_to_static_shape(%input : memref<2x3xf32>) {
- %output = memref.reinterpret_cast %input to
- offset: [0], sizes: [6, 1], strides: [1, 1]
- : memref<2x3xf32> to memref<6x1xf32>
- %unranked_output = memref.cast %output
- : memref<6x1xf32> to memref<*xf32>
- call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
- // CHECK: rank = 2 offset = 0 sizes = [6, 1] strides = [1, 1] data =
- // CHECK-NEXT: [0],
- // CHECK-NEXT: [1],
- // CHECK-NEXT: [2],
- // CHECK-NEXT: [3],
- // CHECK-NEXT: [4],
- // CHECK-NEXT: [5]
- return
-func @cast_ranked_memref_to_dynamic_shape(%input : memref<2x3xf32>) {
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %c6 = arith.constant 6 : index
- %output = memref.reinterpret_cast %input to
- offset: [%c0], sizes: [%c1, %c6], strides: [%c6, %c1]
- : memref<2x3xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
- %unranked_output = memref.cast %output
- : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<*xf32>
- call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
- // CHECK: rank = 2 offset = 0 sizes = [1, 6] strides = [6, 1] data =
- // CHECK-NEXT: [0, 1, 2, 3, 4, 5]
- return
-func @cast_unranked_memref_to_static_shape(%input : memref<2x3xf32>) {
- %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
- %output = memref.reinterpret_cast %unranked_input to
- offset: [0], sizes: [6, 1], strides: [1, 1]
- : memref<*xf32> to memref<6x1xf32>
- %unranked_output = memref.cast %output
- : memref<6x1xf32> to memref<*xf32>
- call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
- // CHECK: rank = 2 offset = 0 sizes = [6, 1] strides = [1, 1] data =
- // CHECK-NEXT: [0],
- // CHECK-NEXT: [1],
- // CHECK-NEXT: [2],
- // CHECK-NEXT: [3],
- // CHECK-NEXT: [4],
- // CHECK-NEXT: [5]
- return
-func @cast_unranked_memref_to_dynamic_shape(%input : memref<2x3xf32>) {
- %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %c6 = arith.constant 6 : index
- %output = memref.reinterpret_cast %unranked_input to
- offset: [%c0], sizes: [%c1, %c6], strides: [%c6, %c1]
- : memref<*xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
- %unranked_output = memref.cast %output
- : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<*xf32>
- call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
- // CHECK: rank = 2 offset = 0 sizes = [1, 6] strides = [6, 1] data =
- // CHECK-NEXT: [0, 1, 2, 3, 4, 5]
- return
+++ /dev/null
-// RUN: mlir-opt %s -convert-scf-to-std -std-expand -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts \
-// RUN: | mlir-cpu-runner -e main -entry-point-result=void \
-// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
-// RUN: | FileCheck %s
-func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
-func @main() -> () {
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- // Initialize input.
- %input = memref.alloc() : memref<2x3xf32>
- %dim_x = memref.dim %input, %c0 : memref<2x3xf32>
- %dim_y = memref.dim %input, %c1 : memref<2x3xf32>
- scf.parallel (%i, %j) = (%c0, %c0) to (%dim_x, %dim_y) step (%c1, %c1) {
- %prod = arith.muli %i, %dim_y : index
- %val = arith.addi %prod, %j : index
- %val_i64 = arith.index_cast %val : index to i64
- %val_f32 = arith.sitofp %val_i64 : i64 to f32
- memref.store %val_f32, %input[%i, %j] : memref<2x3xf32>
- }
- %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
- call @print_memref_f32(%unranked_input) : (memref<*xf32>) -> ()
- // CHECK: rank = 2 offset = 0 sizes = [2, 3] strides = [3, 1]
- // CHECK-NEXT: [0, 1, 2]
- // CHECK-NEXT: [3, 4, 5]
- // Initialize shape.
- %shape = memref.alloc() : memref<2xindex>
- %c2 = arith.constant 2 : index
- %c3 = arith.constant 3 : index
- memref.store %c3, %shape[%c0] : memref<2xindex>
- memref.store %c2, %shape[%c1] : memref<2xindex>
- // Test cases.
- call @reshape_ranked_memref_to_ranked(%input, %shape)
- : (memref<2x3xf32>, memref<2xindex>) -> ()
- call @reshape_unranked_memref_to_ranked(%input, %shape)
- : (memref<2x3xf32>, memref<2xindex>) -> ()
- call @reshape_ranked_memref_to_unranked(%input, %shape)
- : (memref<2x3xf32>, memref<2xindex>) -> ()
- call @reshape_unranked_memref_to_unranked(%input, %shape)
- : (memref<2x3xf32>, memref<2xindex>) -> ()
- memref.dealloc %input : memref<2x3xf32>
- memref.dealloc %shape : memref<2xindex>
- return
-func @reshape_ranked_memref_to_ranked(%input : memref<2x3xf32>,
- %shape : memref<2xindex>) {
- %output = memref.reshape %input(%shape)
- : (memref<2x3xf32>, memref<2xindex>) -> memref<?x?xf32>
- %unranked_output = memref.cast %output : memref<?x?xf32> to memref<*xf32>
- call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
- // CHECK: rank = 2 offset = 0 sizes = [3, 2] strides = [2, 1] data =
- // CHECK: [0, 1],
- // CHECK: [2, 3],
- // CHECK: [4, 5]
- return
-func @reshape_unranked_memref_to_ranked(%input : memref<2x3xf32>,
- %shape : memref<2xindex>) {
- %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
- %output = memref.reshape %input(%shape)
- : (memref<2x3xf32>, memref<2xindex>) -> memref<?x?xf32>
- %unranked_output = memref.cast %output : memref<?x?xf32> to memref<*xf32>
- call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
- // CHECK: rank = 2 offset = 0 sizes = [3, 2] strides = [2, 1] data =
- // CHECK: [0, 1],
- // CHECK: [2, 3],
- // CHECK: [4, 5]
- return
-func @reshape_ranked_memref_to_unranked(%input : memref<2x3xf32>,
- %shape : memref<2xindex>) {
- %dyn_size_shape = memref.cast %shape : memref<2xindex> to memref<?xindex>
- %output = memref.reshape %input(%dyn_size_shape)
- : (memref<2x3xf32>, memref<?xindex>) -> memref<*xf32>
- call @print_memref_f32(%output) : (memref<*xf32>) -> ()
- // CHECK: rank = 2 offset = 0 sizes = [3, 2] strides = [2, 1] data =
- // CHECK: [0, 1],
- // CHECK: [2, 3],
- // CHECK: [4, 5]
- return
-func @reshape_unranked_memref_to_unranked(%input : memref<2x3xf32>,
- %shape : memref<2xindex>) {
- %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
- %dyn_size_shape = memref.cast %shape : memref<2xindex> to memref<?xindex>
- %output = memref.reshape %input(%dyn_size_shape)
- : (memref<2x3xf32>, memref<?xindex>) -> memref<*xf32>
- call @print_memref_f32(%output) : (memref<*xf32>) -> ()
- // CHECK: rank = 2 offset = 0 sizes = [3, 2] strides = [2, 1] data =
- // CHECK: [0, 1],
- // CHECK: [2, 3],
- // CHECK: [4, 5]
- return
--- /dev/null
+// RUN: mlir-opt -convert-linalg-to-loops -lower-affine -convert-scf-to-std -convert-arith-to-llvm -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
+func @main() {
+ %A = memref.alloc() : memref<16x16xf32>
+ %B = memref.alloc() : memref<16x16xf32>
+ %C = memref.alloc() : memref<16x16xf32>
+ %cf1 = arith.constant 1.00000e+00 : f32
+ linalg.fill(%cf1, %A) : f32, memref<16x16xf32>
+ linalg.fill(%cf1, %B) : f32, memref<16x16xf32>
+ %reps = arith.constant 1 : index
+ %t_start = call @rtclock() : () -> f64
+ affine.for %arg0 = 0 to 5 {
+ linalg.fill(%cf1, %C) : f32, memref<16x16xf32>
+ call @sgemm_naive(%A, %B, %C) : (memref<16x16xf32>, memref<16x16xf32>, memref<16x16xf32>) -> ()
+ }
+ %t_end = call @rtclock() : () -> f64
+ %t = arith.subf %t_end, %t_start : f64
+ %res = affine.load %C[0, 0]: memref<16x16xf32>
+ vector.print %res: f32
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c2 = arith.constant 2 : index
+ %M = memref.dim %C, %c0 : memref<16x16xf32>
+ %N = memref.dim %C, %c1 : memref<16x16xf32>
+ %K = memref.dim %A, %c1 : memref<16x16xf32>
+ %f1 = arith.muli %M, %N : index
+ %f2 = arith.muli %f1, %K : index
+ // 2*M*N*K.
+ %f3 = arith.muli %c2, %f2 : index
+ %num_flops = arith.muli %reps, %f3 : index
+ %num_flops_i = arith.index_cast %num_flops : index to i16
+ %num_flops_f = arith.sitofp %num_flops_i : i16 to f64
+ %flops = arith.divf %num_flops_f, %t : f64
+ call @print_flops(%flops) : (f64) -> ()
+ memref.dealloc %A : memref<16x16xf32>
+ memref.dealloc %B : memref<16x16xf32>
+ memref.dealloc %C : memref<16x16xf32>
+ return
+// CHECK: 17
+func @sgemm_naive(%arg0: memref<16x16xf32>, %arg1: memref<16x16xf32>, %arg2: memref<16x16xf32>) {
+ %c0 = arith.constant 0 : index
+ affine.for %arg3 = 0 to 16 {
+ affine.for %arg4 = 0 to 16 {
+ %m = memref.alloc() : memref<1xf32>
+ %v = affine.load %arg2[%arg3, %arg4] : memref<16x16xf32>
+ affine.store %v, %m[%c0] : memref<1xf32>
+ affine.for %arg5 = 0 to 16 {
+ %3 = affine.load %arg0[%arg3, %arg5] : memref<16x16xf32>
+ %4 = affine.load %arg1[%arg5, %arg4] : memref<16x16xf32>
+ %5 = affine.load %m[0] : memref<1xf32>
+ %6 = arith.mulf %3, %4 : f32
+ %7 = arith.addf %6, %5 : f32
+ affine.store %7, %m[0] : memref<1xf32>
+ }
+ %s = affine.load %m[%c0] : memref<1xf32>
+ affine.store %s, %arg2[%arg3, %arg4] : memref<16x16xf32>
+ memref.dealloc %m : memref<1xf32>
+ }
+ }
+ return
+func private @print_flops(f64)
+func private @rtclock() -> f64
+++ /dev/null
-// RUN: mlir-opt -convert-linalg-to-loops -lower-affine -convert-scf-to-std -convert-arith-to-llvm -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
-func @main() {
- %A = memref.alloc() : memref<16x16xf32>
- %B = memref.alloc() : memref<16x16xf32>
- %C = memref.alloc() : memref<16x16xf32>
- %cf1 = arith.constant 1.00000e+00 : f32
- linalg.fill(%cf1, %A) : f32, memref<16x16xf32>
- linalg.fill(%cf1, %B) : f32, memref<16x16xf32>
- %reps = arith.constant 1 : index
- %t_start = call @rtclock() : () -> f64
- affine.for %arg0 = 0 to 5 {
- linalg.fill(%cf1, %C) : f32, memref<16x16xf32>
- call @sgemm_naive(%A, %B, %C) : (memref<16x16xf32>, memref<16x16xf32>, memref<16x16xf32>) -> ()
- }
- %t_end = call @rtclock() : () -> f64
- %t = arith.subf %t_end, %t_start : f64
- %res = affine.load %C[0, 0]: memref<16x16xf32>
- vector.print %res: f32
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %c2 = arith.constant 2 : index
- %M = memref.dim %C, %c0 : memref<16x16xf32>
- %N = memref.dim %C, %c1 : memref<16x16xf32>
- %K = memref.dim %A, %c1 : memref<16x16xf32>
- %f1 = arith.muli %M, %N : index
- %f2 = arith.muli %f1, %K : index
- // 2*M*N*K.
- %f3 = arith.muli %c2, %f2 : index
- %num_flops = arith.muli %reps, %f3 : index
- %num_flops_i = arith.index_cast %num_flops : index to i16
- %num_flops_f = arith.sitofp %num_flops_i : i16 to f64
- %flops = arith.divf %num_flops_f, %t : f64
- call @print_flops(%flops) : (f64) -> ()
- memref.dealloc %A : memref<16x16xf32>
- memref.dealloc %B : memref<16x16xf32>
- memref.dealloc %C : memref<16x16xf32>
- return
-// CHECK: 17
-func @sgemm_naive(%arg0: memref<16x16xf32>, %arg1: memref<16x16xf32>, %arg2: memref<16x16xf32>) {
- %c0 = arith.constant 0 : index
- affine.for %arg3 = 0 to 16 {
- affine.for %arg4 = 0 to 16 {
- %m = memref.alloc() : memref<1xf32>
- %v = affine.load %arg2[%arg3, %arg4] : memref<16x16xf32>
- affine.store %v, %m[%c0] : memref<1xf32>
- affine.for %arg5 = 0 to 16 {
- %3 = affine.load %arg0[%arg3, %arg5] : memref<16x16xf32>
- %4 = affine.load %arg1[%arg5, %arg4] : memref<16x16xf32>
- %5 = affine.load %m[0] : memref<1xf32>
- %6 = arith.mulf %3, %4 : f32
- %7 = arith.addf %6, %5 : f32
- affine.store %7, %m[0] : memref<1xf32>
- }
- %s = affine.load %m[%c0] : memref<1xf32>
- affine.store %s, %arg2[%arg3, %arg4] : memref<16x16xf32>
- memref.dealloc %m : memref<1xf32>
- }
- }
- return
-func private @print_flops(f64)
-func private @rtclock() -> f64
--- /dev/null
+// RUN: mlir-opt %s -convert-linalg-to-loops \
+// RUN: -convert-scf-to-std \
+// RUN: -convert-arith-to-llvm \
+// RUN: -convert-linalg-to-llvm \
+// RUN: -convert-memref-to-llvm \
+// RUN: -convert-std-to-llvm \
+// RUN: -reconcile-unrealized-casts | \
+// RUN: mlir-cpu-runner -e main -entry-point-result=void \
+// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
+// CHECK: rank = 2
+// CHECK-SAME: sizes = [10, 3]
+// CHECK-SAME: strides = [3, 1]
+// CHECK-COUNT-10: [10, 10, 10]
+// CHECK: rank = 2
+// CHECK-SAME: sizes = [10, 3]
+// CHECK-SAME: strides = [3, 1]
+// CHECK-COUNT-10: [5, 5, 5]
+// CHECK: rank = 2
+// CHECK-SAME: sizes = [10, 3]
+// CHECK-SAME: strides = [3, 1]
+// CHECK-COUNT-10: [2, 2, 2]
+// CHECK: rank = 0
+// 122 is ASCII for 'z'.
+// CHECK: [z]
+// CHECK: rank = 2
+// CHECK-SAME: sizes = [4, 3]
+// CHECK-SAME: strides = [3, 1]
+// CHECK-COUNT-4: [1, 1, 1]
+// CHECK: rank = 2
+// CHECK-SAME: sizes = [4, 3]
+// CHECK-SAME: strides = [3, 1]
+// CHECK-COUNT-4: [1, 1, 1]
+// CHECK: rank = 2
+// CHECK-SAME: sizes = [4, 3]
+// CHECK-SAME: strides = [3, 1]
+// CHECK-COUNT-4: [1, 1, 1]
+func @main() -> () {
+ %A = memref.alloc() : memref<10x3xf32, 0>
+ %f2 = arith.constant 2.00000e+00 : f32
+ %f5 = arith.constant 5.00000e+00 : f32
+ %f10 = arith.constant 10.00000e+00 : f32
+ %V = memref.cast %A : memref<10x3xf32, 0> to memref<?x?xf32>
+ linalg.fill(%f10, %V) : f32, memref<?x?xf32, 0>
+ %U = memref.cast %A : memref<10x3xf32, 0> to memref<*xf32>
+ call @print_memref_f32(%U) : (memref<*xf32>) -> ()
+ %V2 = memref.cast %U : memref<*xf32> to memref<?x?xf32>
+ linalg.fill(%f5, %V2) : f32, memref<?x?xf32, 0>
+ %U2 = memref.cast %V2 : memref<?x?xf32, 0> to memref<*xf32>
+ call @print_memref_f32(%U2) : (memref<*xf32>) -> ()
+ %V3 = memref.cast %V2 : memref<?x?xf32> to memref<*xf32>
+ %V4 = memref.cast %V3 : memref<*xf32> to memref<?x?xf32>
+ linalg.fill(%f2, %V4) : f32, memref<?x?xf32, 0>
+ %U3 = memref.cast %V2 : memref<?x?xf32> to memref<*xf32>
+ call @print_memref_f32(%U3) : (memref<*xf32>) -> ()
+ // 122 is ASCII for 'z'.
+ %i8_z = arith.constant 122 : i8
+ %I8 = memref.alloc() : memref<i8>
+ memref.store %i8_z, %I8[]: memref<i8>
+ %U4 = memref.cast %I8 : memref<i8> to memref<*xi8>
+ call @print_memref_i8(%U4) : (memref<*xi8>) -> ()
+ memref.dealloc %U4 : memref<*xi8>
+ memref.dealloc %A : memref<10x3xf32, 0>
+ call @return_var_memref_caller() : () -> ()
+ call @return_two_var_memref_caller() : () -> ()
+ call @dim_op_of_unranked() : () -> ()
+ return
+func private @print_memref_i8(memref<*xi8>) attributes { llvm.emit_c_interface }
+func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
+func @return_two_var_memref_caller() {
+ %0 = memref.alloca() : memref<4x3xf32>
+ %c0f32 = arith.constant 1.0 : f32
+ linalg.fill(%c0f32, %0) : f32, memref<4x3xf32>
+ %1:2 = call @return_two_var_memref(%0) : (memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>)
+ call @print_memref_f32(%1#0) : (memref<*xf32>) -> ()
+ call @print_memref_f32(%1#1) : (memref<*xf32>) -> ()
+ return
+ }
+ func @return_two_var_memref(%arg0: memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>) {
+ %0 = memref.cast %arg0 : memref<4x3xf32> to memref<*xf32>
+ return %0, %0 : memref<*xf32>, memref<*xf32>
+func @return_var_memref_caller() {
+ %0 = memref.alloca() : memref<4x3xf32>
+ %c0f32 = arith.constant 1.0 : f32
+ linalg.fill(%c0f32, %0) : f32, memref<4x3xf32>
+ %1 = call @return_var_memref(%0) : (memref<4x3xf32>) -> memref<*xf32>
+ call @print_memref_f32(%1) : (memref<*xf32>) -> ()
+ return
+func @return_var_memref(%arg0: memref<4x3xf32>) -> memref<*xf32> {
+ %0 = memref.cast %arg0: memref<4x3xf32> to memref<*xf32>
+ return %0 : memref<*xf32>
+func private @printU64(index) -> ()
+func private @printNewline() -> ()
+func @dim_op_of_unranked() {
+ %ranked = memref.alloca() : memref<4x3xf32>
+ %unranked = memref.cast %ranked: memref<4x3xf32> to memref<*xf32>
+ %c0 = arith.constant 0 : index
+ %dim_0 = memref.dim %unranked, %c0 : memref<*xf32>
+ call @printU64(%dim_0) : (index) -> ()
+ call @printNewline() : () -> ()
+ // CHECK: 4
+ %c1 = arith.constant 1 : index
+ %dim_1 = memref.dim %unranked, %c1 : memref<*xf32>
+ call @printU64(%dim_1) : (index) -> ()
+ call @printNewline() : () -> ()
+ // CHECK: 3
+ return
+++ /dev/null
-// RUN: mlir-opt %s -convert-linalg-to-loops \
-// RUN: -convert-scf-to-std \
-// RUN: -convert-arith-to-llvm \
-// RUN: -convert-linalg-to-llvm \
-// RUN: -convert-memref-to-llvm \
-// RUN: -convert-std-to-llvm \
-// RUN: -reconcile-unrealized-casts | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
-// CHECK: rank = 2
-// CHECK-SAME: sizes = [10, 3]
-// CHECK-SAME: strides = [3, 1]
-// CHECK-COUNT-10: [10, 10, 10]
-// CHECK: rank = 2
-// CHECK-SAME: sizes = [10, 3]
-// CHECK-SAME: strides = [3, 1]
-// CHECK-COUNT-10: [5, 5, 5]
-// CHECK: rank = 2
-// CHECK-SAME: sizes = [10, 3]
-// CHECK-SAME: strides = [3, 1]
-// CHECK-COUNT-10: [2, 2, 2]
-// CHECK: rank = 0
-// 122 is ASCII for 'z'.
-// CHECK: [z]
-// CHECK: rank = 2
-// CHECK-SAME: sizes = [4, 3]
-// CHECK-SAME: strides = [3, 1]
-// CHECK-COUNT-4: [1, 1, 1]
-// CHECK: rank = 2
-// CHECK-SAME: sizes = [4, 3]
-// CHECK-SAME: strides = [3, 1]
-// CHECK-COUNT-4: [1, 1, 1]
-// CHECK: rank = 2
-// CHECK-SAME: sizes = [4, 3]
-// CHECK-SAME: strides = [3, 1]
-// CHECK-COUNT-4: [1, 1, 1]
-func @main() -> () {
- %A = memref.alloc() : memref<10x3xf32, 0>
- %f2 = arith.constant 2.00000e+00 : f32
- %f5 = arith.constant 5.00000e+00 : f32
- %f10 = arith.constant 10.00000e+00 : f32
- %V = memref.cast %A : memref<10x3xf32, 0> to memref<?x?xf32>
- linalg.fill(%f10, %V) : f32, memref<?x?xf32, 0>
- %U = memref.cast %A : memref<10x3xf32, 0> to memref<*xf32>
- call @print_memref_f32(%U) : (memref<*xf32>) -> ()
- %V2 = memref.cast %U : memref<*xf32> to memref<?x?xf32>
- linalg.fill(%f5, %V2) : f32, memref<?x?xf32, 0>
- %U2 = memref.cast %V2 : memref<?x?xf32, 0> to memref<*xf32>
- call @print_memref_f32(%U2) : (memref<*xf32>) -> ()
- %V3 = memref.cast %V2 : memref<?x?xf32> to memref<*xf32>
- %V4 = memref.cast %V3 : memref<*xf32> to memref<?x?xf32>
- linalg.fill(%f2, %V4) : f32, memref<?x?xf32, 0>
- %U3 = memref.cast %V2 : memref<?x?xf32> to memref<*xf32>
- call @print_memref_f32(%U3) : (memref<*xf32>) -> ()
- // 122 is ASCII for 'z'.
- %i8_z = arith.constant 122 : i8
- %I8 = memref.alloc() : memref<i8>
- memref.store %i8_z, %I8[]: memref<i8>
- %U4 = memref.cast %I8 : memref<i8> to memref<*xi8>
- call @print_memref_i8(%U4) : (memref<*xi8>) -> ()
- memref.dealloc %U4 : memref<*xi8>
- memref.dealloc %A : memref<10x3xf32, 0>
- call @return_var_memref_caller() : () -> ()
- call @return_two_var_memref_caller() : () -> ()
- call @dim_op_of_unranked() : () -> ()
- return
-func private @print_memref_i8(memref<*xi8>) attributes { llvm.emit_c_interface }
-func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
-func @return_two_var_memref_caller() {
- %0 = memref.alloca() : memref<4x3xf32>
- %c0f32 = arith.constant 1.0 : f32
- linalg.fill(%c0f32, %0) : f32, memref<4x3xf32>
- %1:2 = call @return_two_var_memref(%0) : (memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>)
- call @print_memref_f32(%1#0) : (memref<*xf32>) -> ()
- call @print_memref_f32(%1#1) : (memref<*xf32>) -> ()
- return
- }
- func @return_two_var_memref(%arg0: memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>) {
- %0 = memref.cast %arg0 : memref<4x3xf32> to memref<*xf32>
- return %0, %0 : memref<*xf32>, memref<*xf32>
-func @return_var_memref_caller() {
- %0 = memref.alloca() : memref<4x3xf32>
- %c0f32 = arith.constant 1.0 : f32
- linalg.fill(%c0f32, %0) : f32, memref<4x3xf32>
- %1 = call @return_var_memref(%0) : (memref<4x3xf32>) -> memref<*xf32>
- call @print_memref_f32(%1) : (memref<*xf32>) -> ()
- return
-func @return_var_memref(%arg0: memref<4x3xf32>) -> memref<*xf32> {
- %0 = memref.cast %arg0: memref<4x3xf32> to memref<*xf32>
- return %0 : memref<*xf32>
-func private @printU64(index) -> ()
-func private @printNewline() -> ()
-func @dim_op_of_unranked() {
- %ranked = memref.alloca() : memref<4x3xf32>
- %unranked = memref.cast %ranked: memref<4x3xf32> to memref<*xf32>
- %c0 = arith.constant 0 : index
- %dim_0 = memref.dim %unranked, %c0 : memref<*xf32>
- call @printU64(%dim_0) : (index) -> ()
- call @printNewline() : () -> ()
- // CHECK: 4
- %c1 = arith.constant 1 : index
- %dim_1 = memref.dim %unranked, %c1 : memref<*xf32>
- call @printU64(%dim_1) : (index) -> ()
- call @printNewline() : () -> ()
- // CHECK: 3
- return