From: Julian Gross Date: Tue, 28 Jan 2020 10:09:13 +0000 (+0100) Subject: Changed wrong ROCDL instructions in GPU lowering. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=addc27bc437d2fb1f31d88294b227ac32be63cc5;p=platform%2Fupstream%2Fllvm.git Changed wrong ROCDL instructions in GPU lowering. Summary: In the scope of the lowering phase from GPU to ROCDL, the intructions for the conversion patterns seems to be wrong. According to https://github.com/ROCm-Developer-Tools/HIP/blob/master/include/hip/hcc_detail/math_fwd.h the instructions need two underscores in the beginning instead of one. Reviewers: nicolasvasilache, herhut, rriddle Reviewed By: herhut, rriddle Subscribers: merge_guards_bot, mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, csigg, arpith-jacob, mgester, lucyrfox, herhut, liufengdb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73535 --- diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 602068d..d74fcdb 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -50,16 +50,16 @@ public: GPUIndexIntrinsicOpLowering>( converter); - patterns.insert>(converter, "_ocml_fabs_f32", - "_ocml_fabs_f64"); - patterns.insert>(converter, "_ocml_ceil_f32", - "_ocml_ceil_f64"); - patterns.insert>(converter, "_ocml_cos_f32", - "_ocml_cos_f64"); - patterns.insert>(converter, "_ocml_exp_f32", - "_ocml_exp_f64"); - patterns.insert>(converter, "_ocml_tanh_f32", - "_ocml_tanh_f64"); + patterns.insert>(converter, "__ocml_fabs_f32", + "__ocml_fabs_f64"); + patterns.insert>(converter, "__ocml_ceil_f32", + "__ocml_ceil_f64"); + patterns.insert>(converter, "__ocml_cos_f32", + "__ocml_cos_f64"); + patterns.insert>(converter, "__ocml_exp_f32", + "__ocml_exp_f64"); + patterns.insert>(converter, "__ocml_tanh_f32", + "__ocml_tanh_f64"); ConversionTarget target(getContext()); target.addLegalDialect(); diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index b2d9f73..b733e9b 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -39,14 +39,14 @@ gpu.module @kernel_module { // ----- gpu.module @kernel_module { - // CHECK: llvm.func @_ocml_fabs_f32(!llvm.float) -> !llvm.float - // CHECK: llvm.func @_ocml_fabs_f64(!llvm.double) -> !llvm.double + // CHECK: llvm.func @__ocml_fabs_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__ocml_fabs_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_fabs func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) { %result32 = std.absf %arg_f32 : f32 - // CHECK: llvm.call @_ocml_fabs_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + // CHECK: llvm.call @__ocml_fabs_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.absf %arg_f64 : f64 - // CHECK: llvm.call @_ocml_fabs_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + // CHECK: llvm.call @__ocml_fabs_f64(%{{.*}}) : (!llvm.double) -> !llvm.double std.return } } @@ -54,14 +54,14 @@ gpu.module @kernel_module { // ----- gpu.module @kernel_module { - // CHECK: llvm.func @_ocml_ceil_f32(!llvm.float) -> !llvm.float - // CHECK: llvm.func @_ocml_ceil_f64(!llvm.double) -> !llvm.double + // CHECK: llvm.func @__ocml_ceil_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__ocml_ceil_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_ceil func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) { %result32 = std.ceilf %arg_f32 : f32 - // CHECK: llvm.call @_ocml_ceil_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + // CHECK: llvm.call @__ocml_ceil_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.ceilf %arg_f64 : f64 - // CHECK: llvm.call @_ocml_ceil_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + // CHECK: llvm.call @__ocml_ceil_f64(%{{.*}}) : (!llvm.double) -> !llvm.double std.return } } @@ -69,14 +69,14 @@ gpu.module @kernel_module { // ----- gpu.module @kernel_module { - // CHECK: llvm.func @_ocml_cos_f32(!llvm.float) -> !llvm.float - // CHECK: llvm.func @_ocml_cos_f64(!llvm.double) -> !llvm.double + // CHECK: llvm.func @__ocml_cos_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__ocml_cos_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_cos func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) { %result32 = std.cos %arg_f32 : f32 - // CHECK: llvm.call @_ocml_cos_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + // CHECK: llvm.call @__ocml_cos_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.cos %arg_f64 : f64 - // CHECK: llvm.call @_ocml_cos_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + // CHECK: llvm.call @__ocml_cos_f64(%{{.*}}) : (!llvm.double) -> !llvm.double std.return } } @@ -84,30 +84,30 @@ gpu.module @kernel_module { // ----- gpu.module @kernel_module { - // CHECK: llvm.func @_ocml_tanh_f32(!llvm.float) -> !llvm.float - // CHECK: llvm.func @_ocml_tanh_f64(!llvm.double) -> !llvm.double + // CHECK: llvm.func @__ocml_tanh_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__ocml_tanh_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_tanh func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) { %result32 = std.tanh %arg_f32 : f32 - // CHECK: llvm.call @_ocml_tanh_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + // CHECK: llvm.call @__ocml_tanh_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.tanh %arg_f64 : f64 - // CHECK: llvm.call @_ocml_tanh_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + // CHECK: llvm.call @__ocml_tanh_f64(%{{.*}}) : (!llvm.double) -> !llvm.double std.return } } // ----- gpu.module @kernel_module { - // CHECK: llvm.func @_ocml_exp_f32(!llvm.float) -> !llvm.float - // CHECK: llvm.func @_ocml_exp_f64(!llvm.double) -> !llvm.double + // CHECK: llvm.func @__ocml_exp_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__ocml_exp_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_exp func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) { %exp_f32 = std.exp %arg_f32 : f32 - // CHECK: llvm.call @_ocml_exp_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result_f32 = std.exp %exp_f32 : f32 - // CHECK: llvm.call @_ocml_exp_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.exp %arg_f64 : f64 - // CHECK: llvm.call @_ocml_exp_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + // CHECK: llvm.call @__ocml_exp_f64(%{{.*}}) : (!llvm.double) -> !llvm.double std.return } } @@ -119,16 +119,16 @@ gpu.module @kernel_module { gpu.module @kernel_module { "test.symbol_scope"() ({ // CHECK: test.symbol_scope - // CHECK: llvm.func @_ocml_exp_f32(!llvm.float) -> !llvm.float - // CHECK: llvm.func @_ocml_exp_f64(!llvm.double) -> !llvm.double + // CHECK: llvm.func @__ocml_exp_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__ocml_exp_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_exp func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) { %exp_f32 = std.exp %arg_f32 : f32 - // CHECK: llvm.call @_ocml_exp_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result_f32 = std.exp %exp_f32 : f32 - // CHECK: llvm.call @_ocml_exp_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.exp %arg_f64 : f64 - // CHECK: llvm.call @_ocml_exp_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + // CHECK: llvm.call @__ocml_exp_f64(%{{.*}}) : (!llvm.double) -> !llvm.double std.return } "test.finish" () : () -> ()