From 55565752306e352e655bf8a4ba919c14d6b195c2 Mon Sep 17 00:00:00 2001 From: Rob Suderman Date: Thu, 13 Aug 2020 14:59:58 -0700 Subject: [PATCH] Added std.floor operation to match std.ceil There should be an equivalent std.floor op to std.ceil. This includes matching lowerings for SPIRV, NVVM, ROCDL, and LLVM. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D85940 --- mlir/include/mlir/Dialect/StandardOps/IR/Ops.td | 33 ++++++++++++++++++++++ .../Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 5 +++- .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 5 +++- .../Conversion/StandardToLLVM/StandardToLLVM.cpp | 2 ++ .../StandardToSPIRV/ConvertStandardToSPIRV.cpp | 1 + mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 15 ++++++++++ mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir | 15 ++++++++++ .../StandardToLLVM/standard-to-llvm.mlir | 21 ++++++++++++++ .../StandardToSPIRV/std-ops-to-spirv.mlir | 2 ++ mlir/test/IR/core-ops.mlir | 12 ++++++++ 10 files changed, 109 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 088f262..510d485 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -815,6 +815,39 @@ def CeilFOp : FloatUnaryOp<"ceilf"> { } //===----------------------------------------------------------------------===// +// FloorFOp +//===----------------------------------------------------------------------===// + +def FloorFOp : FloatUnaryOp<"floorf"> { + let summary = "floor of the specified value"; + let description = [{ + Syntax: + + ``` + operation ::= ssa-id `=` `std.floorf` ssa-use `:` type + ``` + + The `floorf` operation computes the floor of a given value. It takes one + operand and returns one result of the same type. This type may be a float + scalar type, a vector whose element type is float, or a tensor of floats. + It has no standard attributes. + + Example: + + ```mlir + // Scalar floor value. + %a = floorf %b : f64 + + // SIMD vector element-wise floor value. + %f = floorf %g : vector<4xf32> + + // Tensor element-wise floor value. + %x = floorf %y : tensor<4x?xf8> + ``` + }]; +} + +//===----------------------------------------------------------------------===// // CmpFOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 76c1668..d11cc51 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -137,7 +137,8 @@ struct LowerGpuOpsToNVVMOpsPass LLVMConversionTarget target(getContext()); target.addIllegalDialect(); target.addIllegalOp(); + LLVM::FFloorOp, LLVM::LogOp, LLVM::Log10Op, + LLVM::Log2Op>(); target.addIllegalOp(); target.addLegalDialect(); // TODO: Remove once we support replacing non-root ops. @@ -174,6 +175,8 @@ void mlir::populateGpuToNVVMConversionPatterns( "__nv_cos"); patterns.insert>(converter, "__nv_expf", "__nv_exp"); + patterns.insert>(converter, "__nv_floorf", + "__nv_floor"); patterns.insert>(converter, "__nv_logf", "__nv_log"); patterns.insert>(converter, "__nv_log10f", diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 697f807..40cf097 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -71,7 +71,8 @@ struct LowerGpuOpsToROCDLOpsPass LLVMConversionTarget target(getContext()); target.addIllegalDialect(); target.addIllegalOp(); + LLVM::FFloorOp, LLVM::LogOp, LLVM::Log10Op, + LLVM::Log2Op>(); target.addIllegalOp(); target.addLegalDialect(); // TODO: Remove once we support replacing non-root ops. @@ -104,6 +105,8 @@ void mlir::populateGpuToROCDLConversionPatterns( "__ocml_cos_f64"); patterns.insert>(converter, "__ocml_exp_f32", "__ocml_exp_f64"); + patterns.insert>(converter, "__ocml_floor_f32", + "__ocml_floor_f64"); patterns.insert>(converter, "__ocml_log_f32", "__ocml_log_f64"); patterns.insert>(converter, "__ocml_log10_f32", diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 4a06196..0ee1166 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -1418,6 +1418,7 @@ using CosOpLowering = VectorConvertToLLVMPattern; using DivFOpLowering = VectorConvertToLLVMPattern; using ExpOpLowering = VectorConvertToLLVMPattern; using Exp2OpLowering = VectorConvertToLLVMPattern; +using FloorFOpLowering = VectorConvertToLLVMPattern; using Log10OpLowering = VectorConvertToLLVMPattern; using Log2OpLowering = VectorConvertToLLVMPattern; using LogOpLowering = VectorConvertToLLVMPattern; @@ -3285,6 +3286,7 @@ void mlir::populateStdToLLVMNonMemoryConversionPatterns( DivFOpLowering, ExpOpLowering, Exp2OpLowering, + FloorFOpLowering, GenericAtomicRMWOpLowering, LogOpLowering, Log10OpLowering, diff --git a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp index 268139f..6ae17c3 100644 --- a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp +++ b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp @@ -1076,6 +1076,7 @@ void populateStandardToSPIRVPatterns(MLIRContext *context, UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, + UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index df38df1..6b071a0 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -173,6 +173,21 @@ gpu.module @test_module { // ----- gpu.module @test_module { + // CHECK: llvm.func @__nv_floorf(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__nv_floor(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_floor + func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = std.floorf %arg_f32 : f32 + // CHECK: llvm.call @__nv_floorf(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.floorf %arg_f64 : f64 + // CHECK: llvm.call @__nv_floor(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return %result32, %result64 : f32, f64 + } +} + +// ----- + +gpu.module @test_module { // CHECK: llvm.func @__nv_cosf(!llvm.float) -> !llvm.float // CHECK: llvm.func @__nv_cos(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_cos diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index a7565bb..b17d75f 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -115,6 +115,21 @@ gpu.module @test_module { // ----- gpu.module @test_module { + // CHECK: llvm.func @__ocml_floor_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__ocml_floor_f64(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_floor + func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = std.floorf %arg_f32 : f32 + // CHECK: llvm.call @__ocml_floor_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.floorf %arg_f64 : f64 + // CHECK: llvm.call @__ocml_floor_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return %result32, %result64 : f32, f64 + } +} + +// ----- + +gpu.module @test_module { // CHECK: llvm.func @__ocml_cos_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @__ocml_cos_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_cos diff --git a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir index c55950a..c736308 100644 --- a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir +++ b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir @@ -40,6 +40,27 @@ func @sine(%arg0 : f32) { // ----- +// CHECK-LABEL: func @ceilf( +// CHECK-SAME: !llvm.float +func @ceilf(%arg0 : f32) { + // CHECK: "llvm.intr.ceil"(%arg0) : (!llvm.float) -> !llvm.float + %0 = ceilf %arg0 : f32 + std.return +} + +// ----- + +// CHECK-LABEL: func @floorf( +// CHECK-SAME: !llvm.float +func @floorf(%arg0 : f32) { + // CHECK: "llvm.intr.floor"(%arg0) : (!llvm.float) -> !llvm.float + %0 = floorf %arg0 : f32 + std.return +} + +// ----- + + // CHECK-LABEL: func @rsqrt_double( // CHECK-SAME: !llvm.double func @rsqrt_double(%arg0 : f64) { diff --git a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir index e85f78f..1b83af1 100644 --- a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir +++ b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir @@ -65,6 +65,8 @@ func @float32_unary_scalar(%arg0: f32) { %8 = tanh %arg0 : f32 // CHECK: spv.GLSL.Sin %{{.*}}: f32 %9 = sin %arg0 : f32 + // CHECK: spv.GLSL.Floor %{{.*}}: f32 + %10 = floorf %arg0 : f32 return } diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index 7447071..69e974b 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -554,6 +554,18 @@ func @standard_instrs(tensor<4x4x?xf32>, f32, i32, index, i64, f16) { // CHECK: = fptosi {{.*}} : f16 to i64 %162 = fptosi %half : f16 to i64 + // CHECK: floorf %arg1 : f32 + %163 = "std.floorf"(%f) : (f32) -> f32 + + // CHECK: %{{[0-9]+}} = floorf %arg1 : f32 + %164 = floorf %f : f32 + + // CHECK: %{{[0-9]+}} = floorf %cst_8 : vector<4xf32> + %165 = floorf %vcf32 : vector<4xf32> + + // CHECK: %{{[0-9]+}} = floorf %arg0 : tensor<4x4x?xf32> + %166 = floorf %t : tensor<4x4x?xf32> + return } -- 2.7.4