From: Victor Campos Date: Tue, 19 Nov 2019 09:55:16 +0000 (+0000) Subject: [ARM] Fix instruction selection for ARMISD::CMOV with f16 type X-Git-Tag: llvmorg-11-init~3240 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e478385e7708d0bcef43559651e6d62e387a507a;p=platform%2Fupstream%2Fllvm.git [ARM] Fix instruction selection for ARMISD::CMOV with f16 type Summary: In the cases where the CMOV (f16) SDNode is used with condition codes LT, LE, VC or NE, it is successfully selected into a VSEL instruction. In the remaining cases, however, instruction selection fails since VSEL does not support other condition codes. This patch handles such cases by using the single-precision version of the VMOV instruction. Reviewers: ostannard, dmgreen Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70667 --- diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 563fdda..de4377e 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1213,9 +1213,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MBBI = NewMI; return true; } + case ARM::VMOVHcc: case ARM::VMOVScc: case ARM::VMOVDcc: { - unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD; + unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), MI.getOperand(1).getReg()) .add(MI.getOperand(2)) diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index fdd961b..90be9a0 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -2279,6 +2279,12 @@ def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p), [(set (f32 SPR:$Sd), (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>, RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>; + +def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p), + IIC_fpUNA16, + [(set (f16 HPR:$Sd), + (ARMcmov HPR:$Sn, HPR:$Sm, cmovpred:$p))]>, + RegConstraint<"$Sd = $Sn">, Requires<[HasFPRegs]>; } // hasSideEffects //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/ARM/cmov_fp16.ll b/llvm/test/CodeGen/ARM/cmov_fp16.ll new file mode 100644 index 0000000..925fed5 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmov_fp16.ll @@ -0,0 +1,261 @@ +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK-THUMB,CHECK +; RUN: llc -mtriple=armv8.2a-arm-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK-ARM,CHECK + +define i32 @test_ne(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_ne: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vseleq.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp ne i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_eq(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_eq: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vseleq.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp eq i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_gt(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_gt: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vselgt.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp sgt i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_ge(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_ge: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vselge.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp sge i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_lt(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_lt: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vselge.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp slt i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_le(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_le: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vselgt.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp sle i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_hi(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-THUMB-LABEL: test_hi: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: vmov s2, r0 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: it hi +; CHECK-THUMB-NEXT: vmovhi.f32 s0, s2 +; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ARM-LABEL: test_hi: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vmovhi.f32 s0, s2 +; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp ugt i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_hs(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-THUMB-LABEL: test_hs: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: vmov s2, r0 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: it hs +; CHECK-THUMB-NEXT: vmovhs.f32 s0, s2 +; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ARM-LABEL: test_hs: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vmovhs.f32 s0, s2 +; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp uge i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_lo(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-THUMB-LABEL: test_lo: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: vmov s2, r0 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: it lo +; CHECK-THUMB-NEXT: vmovlo.f32 s0, s2 +; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ARM-LABEL: test_lo: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vmovlo.f32 s0, s2 +; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp ult i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_ls(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-THUMB-LABEL: test_ls: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: vmov s2, r0 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: it ls +; CHECK-THUMB-NEXT: vmovls.f32 s0, s2 +; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ARM-LABEL: test_ls: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vmovls.f32 s0, s2 +; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp ule i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} +