From 39e1c1fa9ee03e91751e505d747275e58069e6de Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Thu, 27 Feb 2020 13:03:05 +0100 Subject: [PATCH] Add GPU lowerings for the different log ops. Summary: This adds GPU lowerings for log, log10 and log2. Reviewers: mravishankar, herhut Subscribers: jholewinski, mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, nicolasvasilache, csigg, arpith-jacob, mgester, lucyrfox, liufengdb, Joonsoo, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D75239 --- .../Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 10 ++- .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 10 ++- mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 75 +++++++++++++++++----- mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir | 75 +++++++++++++++++----- 4 files changed, 136 insertions(+), 34 deletions(-) diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index bb7e187..a2f16b1 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -681,8 +681,8 @@ public: populateGpuToNVVMConversionPatterns(converter, patterns); ConversionTarget target(getContext()); target.addIllegalDialect(); - target.addIllegalOp(); + target.addIllegalOp(); target.addIllegalOp(); target.addLegalDialect(); target.addLegalDialect(); @@ -719,6 +719,12 @@ void mlir::populateGpuToNVVMConversionPatterns( "__nv_cos"); patterns.insert>(converter, "__nv_expf", "__nv_exp"); + patterns.insert>(converter, "__nv_logf", + "__nv_log"); + patterns.insert>(converter, "__nv_log10f", + "__nv_log10"); + patterns.insert>(converter, "__nv_log2f", + "__nv_log2"); patterns.insert>(converter, "__nv_tanhf", "__nv_tanh"); } diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 0fd8be0..238821e 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -58,13 +58,19 @@ public: "__ocml_cos_f64"); patterns.insert>(converter, "__ocml_exp_f32", "__ocml_exp_f64"); + patterns.insert>(converter, "__ocml_log_f32", + "__ocml_log_f64"); + patterns.insert>( + converter, "__ocml_log10_f32", "__ocml_log10_f64"); + patterns.insert>(converter, "__ocml_log2_f32", + "__ocml_log2_f64"); patterns.insert>(converter, "__ocml_tanh_f32", "__ocml_tanh_f64"); ConversionTarget target(getContext()); target.addLegalDialect(); - target.addIllegalOp(); + target.addIllegalOp(); target.addDynamicallyLegalOp( gpu::filterIllegalLLVMIntrinsics({"tanh", "tanhf"}, m.getContext())); target.addIllegalOp(); diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index b27ee03..4ae9d80 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -155,21 +155,6 @@ gpu.module @test_module { } // ----- - -gpu.module @test_module { - // CHECK: llvm.func @__nv_tanhf(!llvm.float) -> !llvm.float - // CHECK: llvm.func @__nv_tanh(!llvm.double) -> !llvm.double - // CHECK-LABEL: func @gpu_tanh - func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) { - %result32 = std.tanh %arg_f32 : f32 - // CHECK: llvm.call @__nv_tanhf(%{{.*}}) : (!llvm.float) -> !llvm.float - %result64 = std.tanh %arg_f64 : f64 - // CHECK: llvm.call @__nv_tanh(%{{.*}}) : (!llvm.double) -> !llvm.double - std.return - } -} - -// ----- gpu.module @test_module { // CHECK: llvm.func @__nv_expf(!llvm.float) -> !llvm.float // CHECK: llvm.func @__nv_exp(!llvm.double) -> !llvm.double @@ -187,6 +172,66 @@ gpu.module @test_module { // ----- +gpu.module @test_module { + // CHECK: llvm.func @__nv_logf(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__nv_log(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_log + func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.log %arg_f32 : f32 + // CHECK: llvm.call @__nv_logf(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.log %arg_f64 : f64 + // CHECK: llvm.call @__nv_log(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- + +gpu.module @test_module { + // CHECK: llvm.func @__nv_log10f(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__nv_log10(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_log10 + func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.log10 %arg_f32 : f32 + // CHECK: llvm.call @__nv_log10f(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.log10 %arg_f64 : f64 + // CHECK: llvm.call @__nv_log10(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- + +gpu.module @test_module { + // CHECK: llvm.func @__nv_log2f(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__nv_log2(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_log2 + func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.log2 %arg_f32 : f32 + // CHECK: llvm.call @__nv_log2f(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.log2 %arg_f64 : f64 + // CHECK: llvm.call @__nv_log2(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- + +gpu.module @test_module { + // CHECK: llvm.func @__nv_tanhf(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__nv_tanh(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_tanh + func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.tanh %arg_f32 : f32 + // CHECK: llvm.call @__nv_tanhf(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.tanh %arg_f64 : f64 + // CHECK: llvm.call @__nv_tanh(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- + // Test that we handled properly operation with SymbolTable other than module op gpu.module @test_module { "test.symbol_scope"() ({ diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index b733e9b..7400d4f 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -82,21 +82,6 @@ gpu.module @kernel_module { } // ----- - -gpu.module @kernel_module { - // CHECK: llvm.func @__ocml_tanh_f32(!llvm.float) -> !llvm.float - // CHECK: llvm.func @__ocml_tanh_f64(!llvm.double) -> !llvm.double - // CHECK-LABEL: func @gpu_tanh - func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) { - %result32 = std.tanh %arg_f32 : f32 - // CHECK: llvm.call @__ocml_tanh_f32(%{{.*}}) : (!llvm.float) -> !llvm.float - %result64 = std.tanh %arg_f64 : f64 - // CHECK: llvm.call @__ocml_tanh_f64(%{{.*}}) : (!llvm.double) -> !llvm.double - std.return - } -} - -// ----- gpu.module @kernel_module { // CHECK: llvm.func @__ocml_exp_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @__ocml_exp_f64(!llvm.double) -> !llvm.double @@ -134,3 +119,63 @@ gpu.module @kernel_module { "test.finish" () : () -> () }) : () -> () } + +// ----- + +gpu.module @kernel_module { + // CHECK: llvm.func @__ocml_log_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__ocml_log_f64(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_log + func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.log %arg_f32 : f32 + // CHECK: llvm.call @__ocml_log_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.log %arg_f64 : f64 + // CHECK: llvm.call @__ocml_log_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- + +gpu.module @kernel_module { + // CHECK: llvm.func @__ocml_log10_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__ocml_log10_f64(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_log10 + func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.log10 %arg_f32 : f32 + // CHECK: llvm.call @__ocml_log10_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.log10 %arg_f64 : f64 + // CHECK: llvm.call @__ocml_log10_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- + +gpu.module @kernel_module { + // CHECK: llvm.func @__ocml_log2_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__ocml_log2_f64(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_log2 + func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.log2 %arg_f32 : f32 + // CHECK: llvm.call @__ocml_log2_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.log2 %arg_f64 : f64 + // CHECK: llvm.call @__ocml_log2_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- + +gpu.module @kernel_module { + // CHECK: llvm.func @__ocml_tanh_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__ocml_tanh_f64(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_tanh + func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.tanh %arg_f32 : f32 + // CHECK: llvm.call @__ocml_tanh_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.tanh %arg_f64 : f64 + // CHECK: llvm.call @__ocml_tanh_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} -- 2.7.4