From 70afe4f7e1f72607881d8ff4719149a0dbc94120 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 6 May 2019 13:35:49 +0000 Subject: [PATCH] [PowerPC] Fix erroneous condition for converting uint-to-fp vector conversion A condition for exiting the legalization of v4i32 conversion to v2f64 through extract/convert/build erroneously checks for the extract having type i32. This is not adequate as smaller extracts are actually legalized to i32 as well. Furthermore, an early exit is missing which means that we only check that both extracts are from the same vector if that check fails. As a result, both cases in the included test case fail - the first gets a select error and the second generates incorrect code. The culprit commit is r274535. llvm-svn: 360043 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 5 +- llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll | 134 ++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 92bf07a..3b61f4d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12476,9 +12476,8 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, ConstantSDNode *Ext2Op = dyn_cast(Ext2.getOperand(1)); if (!Ext1Op || !Ext2Op) return SDValue(); - if (Ext1.getValueType() != MVT::i32 || - Ext2.getValueType() != MVT::i32) - if (Ext1.getOperand(0) != Ext2.getOperand(0)) + if (Ext1.getOperand(0).getValueType() != MVT::v4i32 || + Ext1.getOperand(0) != Ext2.getOperand(0)) return SDValue(); int FirstElem = Ext1Op->getZExtValue(); diff --git a/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll new file mode 100644 index 0000000..895cb07 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \ +; RUN: -check-prefix=P9BE +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \ +; RUN: -check-prefix=P9LE +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \ +; RUN: -check-prefix=P8BE +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \ +; RUN: -check-prefix=P8LE +define dso_local <2 x double> @test1(<8 x i16> %a) { +; P9BE-LABEL: test1: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9BE-NEXT: mtvsrwz f0, r3 +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9BE-NEXT: mtvsrwz f1, r3 +; P9BE-NEXT: xscvuxddp f0, f0 +; P9BE-NEXT: xscvuxddp f1, f1 +; P9BE-NEXT: xxmrghd v2, vs0, vs1 +; P9BE-NEXT: blr +; +; P9LE-LABEL: test1: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9LE-NEXT: mtvsrwz f0, r3 +; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9LE-NEXT: mtvsrwz f1, r3 +; P9LE-NEXT: xscvuxddp f0, f0 +; P9LE-NEXT: xscvuxddp f1, f1 +; P9LE-NEXT: xxmrghd v2, vs1, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: test1: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mfvsrd r3, v2 +; P8BE-NEXT: rldicl r4, r3, 16, 48 +; P8BE-NEXT: rldicl r3, r3, 32, 48 +; P8BE-NEXT: rlwinm r4, r4, 0, 16, 31 +; P8BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P8BE-NEXT: mtvsrwz f0, r4 +; P8BE-NEXT: mtvsrwz f1, r3 +; P8BE-NEXT: xscvuxddp f0, f0 +; P8BE-NEXT: xscvuxddp f1, f1 +; P8BE-NEXT: xxmrghd v2, vs0, vs1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: test1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxswapd vs0, v2 +; P8LE-NEXT: mfvsrd r3, f0 +; P8LE-NEXT: clrldi r4, r3, 48 +; P8LE-NEXT: rldicl r3, r3, 48, 48 +; P8LE-NEXT: rlwinm r4, r4, 0, 16, 31 +; P8LE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P8LE-NEXT: mtvsrwz f0, r4 +; P8LE-NEXT: mtvsrwz f1, r3 +; P8LE-NEXT: xscvuxddp f0, f0 +; P8LE-NEXT: xscvuxddp f1, f1 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = uitofp i16 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 1 + %conv2 = uitofp i16 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +define dso_local <2 x double> @test2(<4 x i32> %a, <4 x i32> %b) { +; P9BE-LABEL: test2: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxextractuw f0, v2, 0 +; P9BE-NEXT: xxextractuw f1, v3, 4 +; P9BE-NEXT: xscvuxddp f0, f0 +; P9BE-NEXT: xscvuxddp f1, f1 +; P9BE-NEXT: xxmrghd v2, vs0, vs1 +; P9BE-NEXT: blr +; +; P9LE-LABEL: test2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxextractuw f0, v2, 12 +; P9LE-NEXT: xxextractuw f1, v3, 8 +; P9LE-NEXT: xscvuxddp f0, f0 +; P9LE-NEXT: xscvuxddp f1, f1 +; P9LE-NEXT: xxmrghd v2, vs1, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: test2: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxsldwi vs0, v2, v2, 3 +; P8BE-NEXT: mfvsrwz r4, v3 +; P8BE-NEXT: mtvsrwz f1, r4 +; P8BE-NEXT: mfvsrwz r3, f0 +; P8BE-NEXT: xscvuxddp f1, f1 +; P8BE-NEXT: mtvsrwz f0, r3 +; P8BE-NEXT: xscvuxddp f0, f0 +; P8BE-NEXT: xxmrghd v2, vs0, vs1 +; P8BE-NEXT: blr +; +; P8LE-LABEL: test2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxswapd vs0, v2 +; P8LE-NEXT: xxsldwi vs1, v3, v3, 1 +; P8LE-NEXT: mfvsrwz r3, f0 +; P8LE-NEXT: mfvsrwz r4, f1 +; P8LE-NEXT: mtvsrwz f0, r3 +; P8LE-NEXT: mtvsrwz f1, r4 +; P8LE-NEXT: xscvuxddp f0, f0 +; P8LE-NEXT: xscvuxddp f1, f1 +; P8LE-NEXT: xxmrghd v2, vs1, vs0 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = uitofp i32 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x i32> %b, i32 1 + %conv2 = uitofp i32 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} -- 2.7.4