From 85cf564c5168150d306d63f360409cd86f92f04c Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 26 Aug 2016 18:52:31 +0000 Subject: [PATCH] AArch64: avoid assertion on illegal types in performFDivCombine. In the code to detect fixed-point conversions and make use of AArch64's special instructions, we weren't prepared for weird types. The fptosi direction got fixed recently, but not the similar sitofp code. llvm-svn: 279852 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 11 +++++-- llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll | 43 +++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index fcbc1a7..6ed9e6f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7778,13 +7778,15 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, /// Fold a floating-point divide by power of two into fixed-point to /// floating-point conversion. static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { if (!Subtarget->hasNEON()) return SDValue(); SDValue Op = N->getOperand(0); unsigned Opc = Op->getOpcode(); - if (!Op.getValueType().isVector() || + if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() || + !Op.getOperand(0).getValueType().isSimple() || (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP)) return SDValue(); @@ -7821,10 +7823,13 @@ static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG, ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64; break; case 4: - ResTy = MVT::v4i32; + ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64; break; } + if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps()) + return SDValue(); + SDLoc DL(N); SDValue ConvInput = Op.getOperand(0); bool IsSigned = Opc == ISD::SINT_TO_FP; @@ -9883,7 +9888,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::FP_TO_UINT: return performFpToIntCombine(N, DAG, DCI, Subtarget); case ISD::FDIV: - return performFDivCombine(N, DAG, Subtarget); + return performFDivCombine(N, DAG, DCI, Subtarget); case ISD::OR: return performORCombine(N, DCI, Subtarget); case ISD::SRL: diff --git a/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll b/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll new file mode 100644 index 0000000..f2a2642 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll @@ -0,0 +1,43 @@ +; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s + +define <16 x double> @test_sitofp_fixed(<16 x i32> %in) { +; CHECK-LABEL: test_sitofp_fixed: + + ; First, extend each i32 to i64 +; CHECK-DAG: sshll2.2d [[BLOCK0_HI:v[0-9]+]], v0, #0 +; CHECK-DAG: sshll2.2d [[BLOCK1_HI:v[0-9]+]], v1, #0 +; CHECK-DAG: sshll2.2d [[BLOCK2_HI:v[0-9]+]], v2, #0 +; CHECK-DAG: sshll2.2d [[BLOCK3_HI:v[0-9]+]], v3, #0 +; CHECK-DAG: sshll.2d [[BLOCK0_LO:v[0-9]+]], v0, #0 +; CHECK-DAG: sshll.2d [[BLOCK1_LO:v[0-9]+]], v1, #0 +; CHECK-DAG: sshll.2d [[BLOCK2_LO:v[0-9]+]], v2, #0 +; CHECK-DAG: sshll.2d [[BLOCK3_LO:v[0-9]+]], v3, #0 + + ; Next, convert each to double. +; CHECK-DAG: scvtf.2d v0, [[BLOCK0_LO]] +; CHECK-DAG: scvtf.2d v1, [[BLOCK0_HI]] +; CHECK-DAG: scvtf.2d v2, [[BLOCK1_LO]] +; CHECK-DAG: scvtf.2d v3, [[BLOCK1_HI]] +; CHECK-DAG: scvtf.2d v4, [[BLOCK2_LO]] +; CHECK-DAG: scvtf.2d v5, [[BLOCK2_HI]] +; CHECK-DAG: scvtf.2d v6, [[BLOCK3_LO]] +; CHECK-DAG: scvtf.2d v7, [[BLOCK3_HI]] + +; CHECK: ret + %flt = sitofp <16 x i32> %in to <16 x double> + %res = fdiv <16 x double> %flt, + ret <16 x double> %res +} + +; This one is small enough to satisfy isSimple, but still illegally large. +define <4 x double> @test_sitofp_fixed_shortish(<4 x i64> %in) { +; CHECK-LABEL: test_sitofp_fixed_shortish: + +; CHECK-DAG: scvtf.2d v0, v0 +; CHECK-DAG: scvtf.2d v1, v1 + +; CHECK: ret + %flt = sitofp <4 x i64> %in to <4 x double> + %res = fdiv <4 x double> %flt, + ret <4 x double> %res +} -- 2.7.4