}
//===----------------------------------------------------------------------===//
+// FloorFOp
+//===----------------------------------------------------------------------===//
+
+def FloorFOp : FloatUnaryOp<"floorf"> {
+ let summary = "floor of the specified value";
+ let description = [{
+ Syntax:
+
+ ```
+ operation ::= ssa-id `=` `std.floorf` ssa-use `:` type
+ ```
+
+ The `floorf` operation computes the floor of a given value. It takes one
+ operand and returns one result of the same type. This type may be a float
+ scalar type, a vector whose element type is float, or a tensor of floats.
+ It has no standard attributes.
+
+ Example:
+
+ ```mlir
+ // Scalar floor value.
+ %a = floorf %b : f64
+
+ // SIMD vector element-wise floor value.
+ %f = floorf %g : vector<4xf32>
+
+ // Tensor element-wise floor value.
+ %x = floorf %y : tensor<4x?xf8>
+ ```
+ }];
+}
+
+//===----------------------------------------------------------------------===//
// CmpFOp
//===----------------------------------------------------------------------===//
LLVMConversionTarget target(getContext());
target.addIllegalDialect<gpu::GPUDialect>();
target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::FAbsOp, LLVM::FCeilOp,
- LLVM::LogOp, LLVM::Log10Op, LLVM::Log2Op>();
+ LLVM::FFloorOp, LLVM::LogOp, LLVM::Log10Op,
+ LLVM::Log2Op>();
target.addIllegalOp<FuncOp>();
target.addLegalDialect<NVVM::NVVMDialect>();
// TODO: Remove once we support replacing non-root ops.
"__nv_cos");
patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "__nv_expf",
"__nv_exp");
+ patterns.insert<OpToFuncCallLowering<FloorFOp>>(converter, "__nv_floorf",
+ "__nv_floor");
patterns.insert<OpToFuncCallLowering<LogOp>>(converter, "__nv_logf",
"__nv_log");
patterns.insert<OpToFuncCallLowering<Log10Op>>(converter, "__nv_log10f",
LLVMConversionTarget target(getContext());
target.addIllegalDialect<gpu::GPUDialect>();
target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::FAbsOp, LLVM::FCeilOp,
- LLVM::LogOp, LLVM::Log10Op, LLVM::Log2Op>();
+ LLVM::FFloorOp, LLVM::LogOp, LLVM::Log10Op,
+ LLVM::Log2Op>();
target.addIllegalOp<FuncOp>();
target.addLegalDialect<ROCDL::ROCDLDialect>();
// TODO: Remove once we support replacing non-root ops.
"__ocml_cos_f64");
patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "__ocml_exp_f32",
"__ocml_exp_f64");
+ patterns.insert<OpToFuncCallLowering<FloorFOp>>(converter, "__ocml_floor_f32",
+ "__ocml_floor_f64");
patterns.insert<OpToFuncCallLowering<LogOp>>(converter, "__ocml_log_f32",
"__ocml_log_f64");
patterns.insert<OpToFuncCallLowering<Log10Op>>(converter, "__ocml_log10_f32",
using DivFOpLowering = VectorConvertToLLVMPattern<DivFOp, LLVM::FDivOp>;
using ExpOpLowering = VectorConvertToLLVMPattern<ExpOp, LLVM::ExpOp>;
using Exp2OpLowering = VectorConvertToLLVMPattern<Exp2Op, LLVM::Exp2Op>;
+using FloorFOpLowering = VectorConvertToLLVMPattern<FloorFOp, LLVM::FFloorOp>;
using Log10OpLowering = VectorConvertToLLVMPattern<Log10Op, LLVM::Log10Op>;
using Log2OpLowering = VectorConvertToLLVMPattern<Log2Op, LLVM::Log2Op>;
using LogOpLowering = VectorConvertToLLVMPattern<LogOp, LLVM::LogOp>;
DivFOpLowering,
ExpOpLowering,
Exp2OpLowering,
+ FloorFOpLowering,
GenericAtomicRMWOpLowering,
LogOpLowering,
Log10OpLowering,
UnaryAndBinaryOpPattern<CosOp, spirv::GLSLCosOp>,
UnaryAndBinaryOpPattern<DivFOp, spirv::FDivOp>,
UnaryAndBinaryOpPattern<ExpOp, spirv::GLSLExpOp>,
+ UnaryAndBinaryOpPattern<FloorFOp, spirv::GLSLFloorOp>,
UnaryAndBinaryOpPattern<LogOp, spirv::GLSLLogOp>,
UnaryAndBinaryOpPattern<MulFOp, spirv::FMulOp>,
UnaryAndBinaryOpPattern<MulIOp, spirv::IMulOp>,
// -----
gpu.module @test_module {
+ // CHECK: llvm.func @__nv_floorf(!llvm.float) -> !llvm.float
+ // CHECK: llvm.func @__nv_floor(!llvm.double) -> !llvm.double
+ // CHECK-LABEL: func @gpu_floor
+ func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+ %result32 = std.floorf %arg_f32 : f32
+ // CHECK: llvm.call @__nv_floorf(%{{.*}}) : (!llvm.float) -> !llvm.float
+ %result64 = std.floorf %arg_f64 : f64
+ // CHECK: llvm.call @__nv_floor(%{{.*}}) : (!llvm.double) -> !llvm.double
+ std.return %result32, %result64 : f32, f64
+ }
+}
+
+// -----
+
+gpu.module @test_module {
// CHECK: llvm.func @__nv_cosf(!llvm.float) -> !llvm.float
// CHECK: llvm.func @__nv_cos(!llvm.double) -> !llvm.double
// CHECK-LABEL: func @gpu_cos
// -----
gpu.module @test_module {
+ // CHECK: llvm.func @__ocml_floor_f32(!llvm.float) -> !llvm.float
+ // CHECK: llvm.func @__ocml_floor_f64(!llvm.double) -> !llvm.double
+ // CHECK-LABEL: func @gpu_floor
+ func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+ %result32 = std.floorf %arg_f32 : f32
+ // CHECK: llvm.call @__ocml_floor_f32(%{{.*}}) : (!llvm.float) -> !llvm.float
+ %result64 = std.floorf %arg_f64 : f64
+ // CHECK: llvm.call @__ocml_floor_f64(%{{.*}}) : (!llvm.double) -> !llvm.double
+ std.return %result32, %result64 : f32, f64
+ }
+}
+
+// -----
+
+gpu.module @test_module {
// CHECK: llvm.func @__ocml_cos_f32(!llvm.float) -> !llvm.float
// CHECK: llvm.func @__ocml_cos_f64(!llvm.double) -> !llvm.double
// CHECK-LABEL: func @gpu_cos
// -----
+// CHECK-LABEL: func @ceilf(
+// CHECK-SAME: !llvm.float
+func @ceilf(%arg0 : f32) {
+ // CHECK: "llvm.intr.ceil"(%arg0) : (!llvm.float) -> !llvm.float
+ %0 = ceilf %arg0 : f32
+ std.return
+}
+
+// -----
+
+// CHECK-LABEL: func @floorf(
+// CHECK-SAME: !llvm.float
+func @floorf(%arg0 : f32) {
+ // CHECK: "llvm.intr.floor"(%arg0) : (!llvm.float) -> !llvm.float
+ %0 = floorf %arg0 : f32
+ std.return
+}
+
+// -----
+
+
// CHECK-LABEL: func @rsqrt_double(
// CHECK-SAME: !llvm.double
func @rsqrt_double(%arg0 : f64) {
%8 = tanh %arg0 : f32
// CHECK: spv.GLSL.Sin %{{.*}}: f32
%9 = sin %arg0 : f32
+ // CHECK: spv.GLSL.Floor %{{.*}}: f32
+ %10 = floorf %arg0 : f32
return
}
// CHECK: = fptosi {{.*}} : f16 to i64
%162 = fptosi %half : f16 to i64
+ // CHECK: floorf %arg1 : f32
+ %163 = "std.floorf"(%f) : (f32) -> f32
+
+ // CHECK: %{{[0-9]+}} = floorf %arg1 : f32
+ %164 = floorf %f : f32
+
+ // CHECK: %{{[0-9]+}} = floorf %cst_8 : vector<4xf32>
+ %165 = floorf %vcf32 : vector<4xf32>
+
+ // CHECK: %{{[0-9]+}} = floorf %arg0 : tensor<4x4x?xf32>
+ %166 = floorf %t : tensor<4x4x?xf32>
+
return
}