From 0148bf53f0a0b533eb742a7c8005a40328c48d66 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 13 Apr 2021 19:40:36 -0500 Subject: [PATCH] [PowerPC] Use correct node to get a super register from a subreg The VSX tablegen file has some rather eggregious uses of COPY_TO_REGCLASS even in situations where it needs to use SUBREG_TO_REG. While this produces correct code, it often doesn't allow the register coalescer to coalesce copies and the resulting code ends up being suboptimal. This patch just changes over patterns that should use SUBREG_TO_REG. --- llvm/lib/Target/PowerPC/PPCInstrVSX.td | 140 ++- llvm/test/CodeGen/PowerPC/fp-strict-round.ll | 78 +- .../CodeGen/PowerPC/handle-f16-storage-type.ll | 66 +- llvm/test/CodeGen/PowerPC/pre-inc-disable.ll | 21 +- .../PowerPC/vector-constrained-fp-intrinsics.ll | 1064 +++++++++----------- 5 files changed, 614 insertions(+), 755 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 475098e..8673d6a 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2198,7 +2198,7 @@ def VectorExtractions { def AlignValues { dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3)); - dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC); + dag I32_TO_BE_WORD1 = (SUBREG_TO_REG (i64 1), (MTVSRWZ $B), sub_64); } // Integer extend helper dags 32 -> 64 @@ -2349,14 +2349,14 @@ def LoadFP { // FP merge dags (for f32 -> v4f32) def MrgFP { - dag LD32A = (COPY_TO_REGCLASS (LIWZX xoaddr:$A), VSRC); - dag LD32B = (COPY_TO_REGCLASS (LIWZX xoaddr:$B), VSRC); - dag LD32C = (COPY_TO_REGCLASS (LIWZX xoaddr:$C), VSRC); - dag LD32D = (COPY_TO_REGCLASS (LIWZX xoaddr:$D), VSRC); - dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC), - (COPY_TO_REGCLASS $C, VSRC), 0)); - dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC), - (COPY_TO_REGCLASS $D, VSRC), 0)); + dag LD32A = (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$A), sub_64); + dag LD32B = (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$B), sub_64); + dag LD32C = (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$C), sub_64); + dag LD32D = (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$D), sub_64); + dag AC = (XVCVDPSP (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64), + (SUBREG_TO_REG (i64 1), $C, sub_64), 0)); + dag BD = (XVCVDPSP (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64), + (SUBREG_TO_REG (i64 1), $D, sub_64), 0)); dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0)); dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3)); dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0)); @@ -2383,10 +2383,10 @@ def MrgWords { // For big endian, we merge hi doublewords of (A, C) and (B, D), convert // then merge. - dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC), - (COPY_TO_REGCLASS f64:$C, VSRC), 0)); - dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC), - (COPY_TO_REGCLASS f64:$D, VSRC), 0)); + dag AC = (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), f64:$A, sub_64), + (SUBREG_TO_REG (i64 1), f64:$C, sub_64), 0)); + dag BD = (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), f64:$B, sub_64), + (SUBREG_TO_REG (i64 1), f64:$D, sub_64), 0)); dag CVACS = (v4i32 (XVCVDPSXWS AC)); dag CVBDS = (v4i32 (XVCVDPSXWS BD)); dag CVACU = (v4i32 (XVCVDPUXWS AC)); @@ -2394,10 +2394,10 @@ def MrgWords { // For little endian, we merge hi doublewords of (D, B) and (C, A), convert // then merge. - dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC), - (COPY_TO_REGCLASS f64:$B, VSRC), 0)); - dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC), - (COPY_TO_REGCLASS f64:$A, VSRC), 0)); + dag DB = (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), f64:$D, sub_64), + (SUBREG_TO_REG (i64 1), f64:$B, sub_64), 0)); + dag CA = (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), f64:$C, sub_64), + (SUBREG_TO_REG (i64 1), f64:$A, sub_64), 0)); dag CVDBS = (v4i32 (XVCVDPSXWS DB)); dag CVCAS = (v4i32 (XVCVDPSXWS CA)); dag CVDBU = (v4i32 (XVCVDPUXWS DB)); @@ -2762,24 +2762,24 @@ def : Pat<(v2i64 immAllZerosV), // Build vectors of floating point converted to i32. def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A, DblToInt.A, DblToInt.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>; + (v4i32 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS $A), sub_64), 1))>; def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A, DblToUInt.A, DblToUInt.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>; + (v4i32 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWS $A), sub_64), 1))>; def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)), - (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), - (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>; + (v2i64 (XXPERMDI (SUBREG_TO_REG (i64 1), (XSCVDPSXDS $A), sub_64), + (SUBREG_TO_REG (i64 1), (XSCVDPSXDS $A), sub_64), 0))>; def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)), - (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), - (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>; + (v2i64 (XXPERMDI (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64), + (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64), 0))>; defm : ScalToVecWPermute< v4i32, FltToIntLoad.A, - (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1), - (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC)>; + (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), sub_64), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), sub_64)>; defm : ScalToVecWPermute< v4i32, FltToUIntLoad.A, - (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1), - (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC)>; + (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), sub_64), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), sub_64)>; def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; def : Pat<(v2f64 (PPCldsplat xoaddr:$A)), @@ -2825,8 +2825,8 @@ def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), (v2f64 (XXPERMDI - (COPY_TO_REGCLASS $A, VSRC), - (COPY_TO_REGCLASS $B, VSRC), 0))>; + (SUBREG_TO_REG (i64 1), $A, sub_64), + (SUBREG_TO_REG (i64 1), $B, sub_64), 0))>; // Using VMRGEW to assemble the final vector would be a lower latency // solution. However, we choose to go with the slightly higher latency // XXPERMDI for 2 reasons: @@ -2930,8 +2930,8 @@ def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), // Little endian, available on all targets with VSX def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), (v2f64 (XXPERMDI - (COPY_TO_REGCLASS $B, VSRC), - (COPY_TO_REGCLASS $A, VSRC), 0))>; + (SUBREG_TO_REG (i64 1), $B, sub_64), + (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>; // Using VMRGEW to assemble the final vector would be a lower latency // solution. However, we choose to go with the slightly higher latency // XXPERMDI for 2 reasons: @@ -3012,12 +3012,12 @@ def : Pat<(PPCstore_scal_int_from_vsr // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads). defm : ScalToVecWPermute< v4i32, DblToIntLoad.A, - (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1), - (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC)>; + (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), sub_64), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), sub_64)>; defm : ScalToVecWPermute< v4i32, DblToUIntLoad.A, - (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1), - (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC)>; + (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), sub_64), 1), + (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), sub_64)>; defm : ScalToVecWPermute< v2i64, FltToLongLoad.A, (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0), @@ -3193,15 +3193,13 @@ def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), // LIWZX - This instruction will be emitted for i32, f32, and when // zero-extending i32 to i64 (zext i32 -> i64). def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), - (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>; + (v2i64 (SUBREG_TO_REG (i64 1), (LIWAX xoaddr:$src), sub_64))>; def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), - (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>; + (v2i64 (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), VSRC))>; def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), - (v4i32 (XXSLDWIs - (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; + (v4i32 (XXSLDWIs (LIWZX xoaddr:$src), 1))>; def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), - (v4f32 (XXSLDWIs - (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; + (v4f32 (XXSLDWIs (LIWZX xoaddr:$src), 1))>; def : Pat i64). defm : ScalToVecWPermute< v2i64, (i64 (sextloadi32 xoaddr:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2), + (XXPERMDIs (LIWAX xoaddr:$src), 2), (SUBREG_TO_REG (i64 1), (LIWAX xoaddr:$src), sub_64)>; defm : ScalToVecWPermute< v2i64, (i64 (zextloadi32 xoaddr:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2), + (XXPERMDIs (LIWZX xoaddr:$src), 2), (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; defm : ScalToVecWPermute< v4i32, (i32 (load xoaddr:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2), + (XXPERMDIs (LIWZX xoaddr:$src), 2), (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; defm : ScalToVecWPermute< v4f32, (f32 (load xoaddr:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2), + (XXPERMDIs (LIWZX xoaddr:$src), 2), (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; def : Pat; + (SUBREG_TO_REG (i64 1), (MTVSRD $A), sub_64), + (SUBREG_TO_REG (i64 1), (MTVSRD $B), sub_64), 0))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), (XXPERMDI - (COPY_TO_REGCLASS - (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC), - (COPY_TO_REGCLASS - (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>; + (SUBREG_TO_REG (i64 1), + (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), sub_64), + (SUBREG_TO_REG (i64 1), + (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), sub_64), 0)>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), - (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; + (XXSPLTW (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64), 1)>; } // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64 // Little endian pre-Power9 VSX subtarget that has direct moves. @@ -3612,16 +3610,16 @@ let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] in { // Little endian integer vectors using direct moves. def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), (v2i64 (XXPERMDI - (COPY_TO_REGCLASS (MTVSRD $B), VSRC), - (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>; + (SUBREG_TO_REG (i64 1), (MTVSRD $B), sub_64), + (SUBREG_TO_REG (i64 1), (MTVSRD $A), sub_64), 0))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), (XXPERMDI - (COPY_TO_REGCLASS - (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC), - (COPY_TO_REGCLASS - (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>; + (SUBREG_TO_REG (i64 1), + (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), sub_64), + (SUBREG_TO_REG (i64 1), + (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), sub_64), 0)>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), - (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; + (XXSPLTW (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64), 1)>; } // Any Power9 VSX subtarget. @@ -3797,9 +3795,9 @@ def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))), (f32 (DFLOADf32 iaddrX4:$src))>; def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)), - (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>; + (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddr:$src), sub_64)>; def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)), - (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>; + (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>; // Convert (Un)Signed DWord in memory -> QP def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))), @@ -3928,11 +3926,11 @@ defm : ScalToVecWPermute; defm : ScalToVecWPermute< v4i32, DblToIntLoadP9.A, - (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1), + (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), sub_64), 1), (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), sub_64)>; defm : ScalToVecWPermute< v4i32, DblToUIntLoadP9.A, - (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1), + (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), sub_64), 1), (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), sub_64)>; defm : ScalToVecWPermute< v2i64, FltToLongLoadP9.A, @@ -4072,14 +4070,14 @@ def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), - (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>; + (v2i64 (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64))>; def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), - (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>; + (v2i64 (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64))>; def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))), - (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>; + (v2f64 (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64))>; def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))), - (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>; + (v2f64 (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64))>; def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), xaddrX4:$src)>; @@ -4259,19 +4257,19 @@ def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), defm : ScalToVecWPermute< v2i64, (i64 (load iaddrX4:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2), + (XXPERMDIs (DFLOADf64 iaddrX4:$src), 2), (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>; defm : ScalToVecWPermute< v2i64, (i64 (load xaddrX4:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2), + (XXPERMDIs (XFLOADf64 xaddrX4:$src), 2), (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>; defm : ScalToVecWPermute< v2f64, (f64 (load iaddrX4:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2), + (XXPERMDIs (DFLOADf64 iaddrX4:$src), 2), (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>; defm : ScalToVecWPermute< v2f64, (f64 (load xaddrX4:$src)), - (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2), + (XXPERMDIs (XFLOADf64 xaddrX4:$src), 2), (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>; def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll index fa36f24..2e226a3 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll @@ -210,49 +210,46 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) { ; P8-NEXT: stdu r1, -176(r1) ; P8-NEXT: .cfi_def_cfa_offset 176 ; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: .cfi_offset v29, -48 ; P8-NEXT: .cfi_offset v30, -32 ; P8-NEXT: .cfi_offset v31, -16 ; P8-NEXT: xxsldwi vs0, v2, v2, 3 +; P8-NEXT: li r3, 128 +; P8-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; P8-NEXT: li r3, 144 ; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; P8-NEXT: li r3, 160 +; P8-NEXT: xscvspdpn f1, vs0 ; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; P8-NEXT: vmr v31, v2 -; P8-NEXT: xscvspdpn f1, vs0 ; P8-NEXT: bl nearbyintf ; P8-NEXT: nop ; P8-NEXT: xxsldwi vs0, v31, v31, 1 -; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: li r3, 128 -; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: xxlor v30, f1, f1 ; P8-NEXT: xscvspdpn f1, vs0 ; P8-NEXT: bl nearbyintf ; P8-NEXT: nop -; P8-NEXT: li r3, 128 ; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload -; P8-NEXT: xxmrghd vs0, vs1, vs0 +; P8-NEXT: xxmrghd vs0, vs1, v30 ; P8-NEXT: xscvspdpn f1, v31 -; P8-NEXT: xvcvdpsp v30, vs0 +; P8-NEXT: xvcvdpsp v29, vs0 ; P8-NEXT: bl nearbyintf ; P8-NEXT: nop ; P8-NEXT: xxswapd vs0, v31 -; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: li r3, 128 -; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: xxlor v30, f1, f1 ; P8-NEXT: xscvspdpn f1, vs0 ; P8-NEXT: bl nearbyintf ; P8-NEXT: nop -; P8-NEXT: li r3, 128 ; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload +; P8-NEXT: xxmrghd vs0, v30, vs1 ; P8-NEXT: li r3, 160 ; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; P8-NEXT: li r3, 144 -; P8-NEXT: xxmrghd vs0, vs0, vs1 -; P8-NEXT: xvcvdpsp v2, vs0 -; P8-NEXT: vmrgew v2, v2, v30 ; P8-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 128 +; P8-NEXT: xvcvdpsp v2, vs0 +; P8-NEXT: vmrgew v2, v2, v29 +; P8-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; P8-NEXT: addi r1, r1, 176 ; P8-NEXT: ld r0, 16(r1) ; P8-NEXT: mtlr r0 @@ -265,41 +262,40 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) { ; P9-NEXT: stdu r1, -80(r1) ; P9-NEXT: .cfi_def_cfa_offset 80 ; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: .cfi_offset v29, -48 ; P9-NEXT: .cfi_offset v30, -32 ; P9-NEXT: .cfi_offset v31, -16 ; P9-NEXT: xxsldwi vs0, v2, v2, 3 -; P9-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill +; P9-NEXT: stxv v29, 32(r1) # 16-byte Folded Spill ; P9-NEXT: xscvspdpn f1, vs0 +; P9-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill ; P9-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill ; P9-NEXT: vmr v31, v2 ; P9-NEXT: bl nearbyintf ; P9-NEXT: nop ; P9-NEXT: xxsldwi vs0, v31, v31, 1 -; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill +; P9-NEXT: xscpsgndp v30, f1, f1 ; P9-NEXT: xscvspdpn f1, vs0 ; P9-NEXT: bl nearbyintf ; P9-NEXT: nop -; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload ; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P9-NEXT: xxmrghd vs0, vs1, vs0 +; P9-NEXT: xxmrghd vs0, vs1, v30 ; P9-NEXT: xscvspdpn f1, v31 -; P9-NEXT: xvcvdpsp v30, vs0 +; P9-NEXT: xvcvdpsp v29, vs0 ; P9-NEXT: bl nearbyintf ; P9-NEXT: nop ; P9-NEXT: xxswapd vs0, v31 -; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill +; P9-NEXT: xscpsgndp v30, f1, f1 ; P9-NEXT: xscvspdpn f1, vs0 ; P9-NEXT: bl nearbyintf ; P9-NEXT: nop -; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload ; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P9-NEXT: xxmrghd vs0, v30, vs1 ; P9-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload -; P9-NEXT: xxmrghd vs0, vs0, vs1 -; P9-NEXT: xvcvdpsp v2, vs0 -; P9-NEXT: vmrgew v2, v2, v30 ; P9-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload +; P9-NEXT: xvcvdpsp v2, vs0 +; P9-NEXT: vmrgew v2, v2, v29 +; P9-NEXT: lxv v29, 32(r1) # 16-byte Folded Reload ; P9-NEXT: addi r1, r1, 80 ; P9-NEXT: ld r0, 16(r1) ; P9-NEXT: mtlr r0 @@ -319,26 +315,27 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) { ; P8-NEXT: stdu r1, -160(r1) ; P8-NEXT: .cfi_def_cfa_offset 160 ; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: .cfi_offset v30, -32 ; P8-NEXT: .cfi_offset v31, -16 +; P8-NEXT: li r3, 128 +; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; P8-NEXT: li r3, 144 ; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; P8-NEXT: vmr v31, v2 ; P8-NEXT: xxlor f1, v31, v31 ; P8-NEXT: bl nearbyint ; P8-NEXT: nop -; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: li r3, 128 -; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: xxlor v30, f1, f1 ; P8-NEXT: xxswapd vs1, v31 ; P8-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; P8-NEXT: bl nearbyint ; P8-NEXT: nop -; P8-NEXT: li r3, 128 -; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload ; P8-NEXT: li r3, 144 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: xxmrghd v2, v30, vs1 ; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; P8-NEXT: xxmrghd v2, vs0, vs1 +; P8-NEXT: li r3, 128 +; P8-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; P8-NEXT: addi r1, r1, 160 ; P8-NEXT: ld r0, 16(r1) ; P8-NEXT: mtlr r0 @@ -351,22 +348,23 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) { ; P9-NEXT: stdu r1, -64(r1) ; P9-NEXT: .cfi_def_cfa_offset 64 ; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: .cfi_offset v30, -32 ; P9-NEXT: .cfi_offset v31, -16 ; P9-NEXT: stxv v31, 48(r1) # 16-byte Folded Spill ; P9-NEXT: vmr v31, v2 ; P9-NEXT: xscpsgndp f1, v31, v31 +; P9-NEXT: stxv v30, 32(r1) # 16-byte Folded Spill ; P9-NEXT: bl nearbyint ; P9-NEXT: nop -; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill +; P9-NEXT: xscpsgndp v30, f1, f1 ; P9-NEXT: xxswapd vs1, v31 ; P9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; P9-NEXT: bl nearbyint ; P9-NEXT: nop -; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload -; P9-NEXT: lxv v31, 48(r1) # 16-byte Folded Reload ; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P9-NEXT: xxmrghd v2, vs0, vs1 +; P9-NEXT: xxmrghd v2, v30, vs1 +; P9-NEXT: lxv v31, 48(r1) # 16-byte Folded Reload +; P9-NEXT: lxv v30, 32(r1) # 16-byte Folded Reload ; P9-NEXT: addi r1, r1, 64 ; P9-NEXT: ld r0, 16(r1) ; P9-NEXT: mtlr r0 diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll index ab19afa..d7edb0e 100644 --- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll +++ b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll @@ -640,46 +640,46 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { ; P8-LABEL: test_extend32_vec4: ; P8: # %bb.0: ; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; P8-NEXT: std r0, 16(r1) ; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: li r4, 48 +; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill ; P8-NEXT: mr r30, r3 ; P8-NEXT: lhz r3, 6(r3) +; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill +; P8-NEXT: li r4, 64 +; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill +; P8-NEXT: li r4, 80 +; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill ; P8-NEXT: bl __gnu_h2f_ieee ; P8-NEXT: nop -; P8-NEXT: li r3, 80 -; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill ; P8-NEXT: lhz r3, 2(r30) +; P8-NEXT: xxlor vs63, f1, f1 ; P8-NEXT: bl __gnu_h2f_ieee ; P8-NEXT: nop -; P8-NEXT: li r3, 64 -; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill ; P8-NEXT: lhz r3, 4(r30) +; P8-NEXT: xxlor vs62, f1, f1 ; P8-NEXT: bl __gnu_h2f_ieee ; P8-NEXT: nop -; P8-NEXT: li r3, 48 -; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill ; P8-NEXT: lhz r3, 0(r30) +; P8-NEXT: xxlor vs61, f1, f1 ; P8-NEXT: bl __gnu_h2f_ieee ; P8-NEXT: nop -; P8-NEXT: li r3, 80 ; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload +; P8-NEXT: xxmrghd vs0, vs63, vs62 +; P8-NEXT: li r3, 80 +; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload +; P8-NEXT: xxmrghd vs1, vs61, vs1 +; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload ; P8-NEXT: li r3, 64 -; P8-NEXT: lxvd2x vs2, r1, r3 # 16-byte Folded Reload +; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload ; P8-NEXT: li r3, 48 -; P8-NEXT: xxmrghd vs0, vs0, vs2 -; P8-NEXT: lxvd2x vs2, r1, r3 # 16-byte Folded Reload -; P8-NEXT: xxmrghd vs1, vs2, vs1 ; P8-NEXT: xvcvdpsp vs34, vs0 +; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload ; P8-NEXT: xvcvdpsp vs35, vs1 ; P8-NEXT: vmrgew v2, v2, v3 ; P8-NEXT: addi r1, r1, 112 ; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; P8-NEXT: mtlr r0 ; P8-NEXT: blr ; @@ -749,43 +749,43 @@ define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { ; P8-LABEL: test_extend64_vec4: ; P8: # %bb.0: ; P8-NEXT: mflr r0 -; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; P8-NEXT: std r0, 16(r1) ; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: li r4, 48 +; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill ; P8-NEXT: mr r30, r3 ; P8-NEXT: lhz r3, 6(r3) +; P8-NEXT: stxvd2x vs61, r1, r4 # 16-byte Folded Spill +; P8-NEXT: li r4, 64 +; P8-NEXT: stxvd2x vs62, r1, r4 # 16-byte Folded Spill +; P8-NEXT: li r4, 80 +; P8-NEXT: stxvd2x vs63, r1, r4 # 16-byte Folded Spill ; P8-NEXT: bl __gnu_h2f_ieee ; P8-NEXT: nop -; P8-NEXT: li r3, 80 -; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill ; P8-NEXT: lhz r3, 4(r30) +; P8-NEXT: xxlor vs63, f1, f1 ; P8-NEXT: bl __gnu_h2f_ieee ; P8-NEXT: nop -; P8-NEXT: li r3, 64 -; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill ; P8-NEXT: lhz r3, 2(r30) +; P8-NEXT: xxlor vs62, f1, f1 ; P8-NEXT: bl __gnu_h2f_ieee ; P8-NEXT: nop -; P8-NEXT: li r3, 48 -; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill ; P8-NEXT: lhz r3, 0(r30) +; P8-NEXT: xxlor vs61, f1, f1 ; P8-NEXT: bl __gnu_h2f_ieee ; P8-NEXT: nop ; P8-NEXT: li r3, 80 +; P8-NEXT: xxmrghd vs35, vs63, vs62 ; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload +; P8-NEXT: xxmrghd vs34, vs61, vs1 +; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload +; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload ; P8-NEXT: li r3, 64 -; P8-NEXT: lxvd2x vs2, r1, r3 # 16-byte Folded Reload +; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload ; P8-NEXT: li r3, 48 -; P8-NEXT: xxmrghd vs35, vs0, vs2 -; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload -; P8-NEXT: xxmrghd vs34, vs0, vs1 +; P8-NEXT: lxvd2x vs61, r1, r3 # 16-byte Folded Reload ; P8-NEXT: addi r1, r1, 112 ; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; P8-NEXT: mtlr r0 ; P8-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll index 93d0d29..42d9a8b 100644 --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -67,14 +67,13 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; ; P9BE-LABEL: test_pre_inc_disable_1: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: lfd f0, 0(r5) +; P9BE-NEXT: lxsd v5, 0(r5) ; P9BE-NEXT: addis r5, r2, .LCPI0_0@toc@ha ; P9BE-NEXT: xxlxor v3, v3, v3 ; P9BE-NEXT: li r6, 0 ; P9BE-NEXT: addi r5, r5, .LCPI0_0@toc@l ; P9BE-NEXT: lxvx v2, 0, r5 ; P9BE-NEXT: addis r5, r2, .LCPI0_1@toc@ha -; P9BE-NEXT: xxlor v5, vs0, vs0 ; P9BE-NEXT: addi r5, r5, .LCPI0_1@toc@l ; P9BE-NEXT: lxvx v4, 0, r5 ; P9BE-NEXT: li r5, 4 @@ -87,10 +86,8 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; P9BE-NEXT: .p2align 4 ; P9BE-NEXT: .LBB0_1: # %for.cond1.preheader ; P9BE-NEXT: # -; P9BE-NEXT: lfd f0, 0(r3) +; P9BE-NEXT: lxsd v1, 0(r3) ; P9BE-NEXT: add r7, r3, r4 -; P9BE-NEXT: xxlor v1, vs0, vs0 -; P9BE-NEXT: lfdx f0, r3, r4 ; P9BE-NEXT: vperm v6, v3, v1, v4 ; P9BE-NEXT: vperm v1, v3, v1, v2 ; P9BE-NEXT: xvnegsp v1, v1 @@ -102,14 +99,14 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig ; P9BE-NEXT: vadduwm v1, v1, v6 ; P9BE-NEXT: xxspltw v6, v1, 1 ; P9BE-NEXT: vadduwm v1, v1, v6 -; P9BE-NEXT: xxlor v6, vs0, vs0 +; P9BE-NEXT: lxsdx v6, r3, r4 +; P9BE-NEXT: vextuwlx r3, r5, v1 ; P9BE-NEXT: vperm v7, v3, v6, v4 ; P9BE-NEXT: vperm v6, v3, v6, v2 -; P9BE-NEXT: vextuwlx r3, r5, v1 -; P9BE-NEXT: xvnegsp v6, v6 ; P9BE-NEXT: add r6, r3, r6 -; P9BE-NEXT: xvnegsp v1, v7 ; P9BE-NEXT: add r3, r7, r4 +; P9BE-NEXT: xvnegsp v6, v6 +; P9BE-NEXT: xvnegsp v1, v7 ; P9BE-NEXT: vabsduw v6, v6, v5 ; P9BE-NEXT: vabsduw v1, v1, v0 ; P9BE-NEXT: vadduwm v1, v1, v6 @@ -205,17 +202,15 @@ define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* noc ; ; P9BE-LABEL: test_pre_inc_disable_2: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: lfd f0, 0(r3) +; P9BE-NEXT: lxsd v2, 0(r3) ; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; P9BE-NEXT: lxsd v1, 0(r4) ; P9BE-NEXT: xxlxor v3, v3, v3 ; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l ; P9BE-NEXT: lxvx v4, 0, r3 ; P9BE-NEXT: addis r3, r2, .LCPI1_1@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI1_1@toc@l -; P9BE-NEXT: xxlor v2, vs0, vs0 -; P9BE-NEXT: lfd f0, 0(r4) ; P9BE-NEXT: lxvx v0, 0, r3 -; P9BE-NEXT: xxlor v1, vs0, vs0 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vperm v5, v3, v2, v4 ; P9BE-NEXT: vperm v2, v3, v2, v0 diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index 21fc855..dc543aa 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -261,41 +261,42 @@ define <2 x double> @constrained_vector_frem_v2f64() #0 { ; PC64LE-LABEL: constrained_vector_frem_v2f64: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: stfd 31, -8(1) # 8-byte Folded Spill ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -80(1) ; PC64LE-NEXT: addis 4, 2, .LCPI6_1@toc@ha +; PC64LE-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI6_0@toc@ha ; PC64LE-NEXT: lfs 31, .LCPI6_1@toc@l(4) ; PC64LE-NEXT: lfs 1, .LCPI6_0@toc@l(3) ; PC64LE-NEXT: fmr 2, 31 ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI6_2@toc@ha +; PC64LE-NEXT: fmr 2, 31 +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI6_2@toc@l(3) ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 1, 0 +; PC64LE-NEXT: xxmrghd 34, 1, 63 +; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: lfd 31, -8(1) # 8-byte Folded Reload ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_frem_v2f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: stfd 31, -8(1) # 8-byte Folded Spill ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI6_0@toc@ha +; PC64LE9-NEXT: stfd 31, 56(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfs 1, .LCPI6_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI6_1@toc@ha ; PC64LE9-NEXT: lfs 31, .LCPI6_1@toc@l(3) @@ -303,18 +304,17 @@ define <2 x double> @constrained_vector_frem_v2f64() #0 { ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI6_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: lfs 1, .LCPI6_2@toc@l(3) ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 1, 0 +; PC64LE9-NEXT: xxmrghd 34, 1, 63 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: lfd 31, -8(1) # 8-byte Folded Reload ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: @@ -432,10 +432,10 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -96(1) +; PC64LE-NEXT: stdu 1, -80(1) ; PC64LE-NEXT: addis 4, 2, .LCPI8_1@toc@ha -; PC64LE-NEXT: stfd 31, 88(1) # 8-byte Folded Spill -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI8_0@toc@ha ; PC64LE-NEXT: lfs 31, .LCPI8_1@toc@l(4) @@ -443,31 +443,27 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 { ; PC64LE-NEXT: fmr 2, 31 ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI8_2@toc@ha +; PC64LE-NEXT: fmr 2, 31 +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI8_2@toc@l(3) ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: addis 3, 2, .LCPI8_3@toc@ha ; PC64LE-NEXT: fmr 2, 31 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI8_3@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: lfs 1, .LCPI8_3@toc@l(3) ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: xxlor 2, 63, 63 -; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload +; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 96 +; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -476,10 +472,10 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI8_0@toc@ha -; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stfd 31, 56(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfs 1, .LCPI8_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI8_1@toc@ha ; PC64LE9-NEXT: lfs 31, .LCPI8_1@toc@l(3) @@ -487,27 +483,25 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 { ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI8_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: lfs 1, .LCPI8_2@toc@l(3) ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI8_3@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 ; PC64LE9-NEXT: lfs 1, .LCPI8_3@toc@l(3) ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 80 +; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -528,47 +522,43 @@ define <4 x double> @constrained_vector_frem_v4f64() #0 { ; PC64LE-NEXT: stdu 1, -96(1) ; PC64LE-NEXT: addis 4, 2, .LCPI9_1@toc@ha ; PC64LE-NEXT: stfd 31, 88(1) # 8-byte Folded Spill +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: lfs 31, .LCPI9_1@toc@l(4) ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; PC64LE-NEXT: lfs 31, .LCPI9_1@toc@l(4) ; PC64LE-NEXT: lfs 1, .LCPI9_0@toc@l(3) ; PC64LE-NEXT: fmr 2, 31 ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI9_2@toc@ha +; PC64LE-NEXT: fmr 2, 31 +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI9_2@toc@l(3) ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: addis 3, 2, .LCPI9_3@toc@ha ; PC64LE-NEXT: fmr 2, 31 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI9_3@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: lfs 1, .LCPI9_3@toc@l(3) ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI9_4@toc@ha +; PC64LE-NEXT: fmr 2, 31 +; PC64LE-NEXT: xxlor 62, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI9_4@toc@l(3) ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: vmr 2, 31 -; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxmrghd 35, 1, 62 +; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 35, 1, 0 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 96 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -581,41 +571,39 @@ define <4 x double> @constrained_vector_frem_v4f64() #0 { ; PC64LE9-NEXT: stdu 1, -80(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI9_0@toc@ha ; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfs 1, .LCPI9_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI9_1@toc@ha +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfs 31, .LCPI9_1@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI9_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: lfs 1, .LCPI9_2@toc@l(3) ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI9_3@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 ; PC64LE9-NEXT: lfs 1, .LCPI9_3@toc@l(3) ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI9_4@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 62, 1, 1 ; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: lfs 1, .LCPI9_4@toc@l(3) ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 35, 1, 62 ; PC64LE9-NEXT: vmr 2, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 35, 1, 0 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -1490,41 +1478,42 @@ define <2 x double> @constrained_vector_pow_v2f64() #0 { ; PC64LE-LABEL: constrained_vector_pow_v2f64: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: stfd 31, -8(1) # 8-byte Folded Spill ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -80(1) ; PC64LE-NEXT: addis 4, 2, .LCPI31_1@toc@ha +; PC64LE-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI31_0@toc@ha ; PC64LE-NEXT: lfs 31, .LCPI31_1@toc@l(4) ; PC64LE-NEXT: lfd 1, .LCPI31_0@toc@l(3) ; PC64LE-NEXT: fmr 2, 31 ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI31_2@toc@ha +; PC64LE-NEXT: fmr 2, 31 +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI31_2@toc@l(3) ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 1, 0 +; PC64LE-NEXT: xxmrghd 34, 1, 63 +; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: lfd 31, -8(1) # 8-byte Folded Reload ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_pow_v2f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: stfd 31, -8(1) # 8-byte Folded Spill ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI31_0@toc@ha +; PC64LE9-NEXT: stfd 31, 56(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI31_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI31_1@toc@ha ; PC64LE9-NEXT: lfs 31, .LCPI31_1@toc@l(3) @@ -1532,18 +1521,17 @@ define <2 x double> @constrained_vector_pow_v2f64() #0 { ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI31_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: lfd 1, .LCPI31_2@toc@l(3) ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 1, 0 +; PC64LE9-NEXT: xxmrghd 34, 1, 63 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: lfd 31, -8(1) # 8-byte Folded Reload ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr entry: @@ -1661,10 +1649,10 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -96(1) +; PC64LE-NEXT: stdu 1, -80(1) ; PC64LE-NEXT: addis 4, 2, .LCPI33_1@toc@ha -; PC64LE-NEXT: stfd 31, 88(1) # 8-byte Folded Spill -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI33_0@toc@ha ; PC64LE-NEXT: lfs 31, .LCPI33_1@toc@l(4) @@ -1672,31 +1660,27 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 { ; PC64LE-NEXT: fmr 2, 31 ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI33_2@toc@ha +; PC64LE-NEXT: fmr 2, 31 +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI33_2@toc@l(3) ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: addis 3, 2, .LCPI33_3@toc@ha ; PC64LE-NEXT: fmr 2, 31 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI33_3@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: lfd 1, .LCPI33_3@toc@l(3) ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: xxlor 2, 63, 63 -; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload +; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 96 +; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -1705,10 +1689,10 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI33_0@toc@ha -; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stfd 31, 56(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfs 1, .LCPI33_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI33_1@toc@ha ; PC64LE9-NEXT: lfs 31, .LCPI33_1@toc@l(3) @@ -1716,27 +1700,25 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 { ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI33_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: lfd 1, .LCPI33_2@toc@l(3) ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI33_3@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 ; PC64LE9-NEXT: lfd 1, .LCPI33_3@toc@l(3) ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 80 +; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -1757,47 +1739,43 @@ define <4 x double> @constrained_vector_pow_v4f64() #0 { ; PC64LE-NEXT: stdu 1, -96(1) ; PC64LE-NEXT: addis 4, 2, .LCPI34_1@toc@ha ; PC64LE-NEXT: stfd 31, 88(1) # 8-byte Folded Spill +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: lfs 31, .LCPI34_1@toc@l(4) ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI34_0@toc@ha -; PC64LE-NEXT: lfs 31, .LCPI34_1@toc@l(4) ; PC64LE-NEXT: lfd 1, .LCPI34_0@toc@l(3) ; PC64LE-NEXT: fmr 2, 31 ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI34_2@toc@ha +; PC64LE-NEXT: fmr 2, 31 +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI34_2@toc@l(3) ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: addis 3, 2, .LCPI34_3@toc@ha ; PC64LE-NEXT: fmr 2, 31 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI34_3@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: lfd 1, .LCPI34_3@toc@l(3) ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: fmr 2, 31 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI34_4@toc@ha +; PC64LE-NEXT: fmr 2, 31 +; PC64LE-NEXT: xxlor 62, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI34_4@toc@l(3) ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: vmr 2, 31 -; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxmrghd 35, 1, 62 +; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 35, 1, 0 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 96 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -1810,41 +1788,39 @@ define <4 x double> @constrained_vector_pow_v4f64() #0 { ; PC64LE9-NEXT: stdu 1, -80(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI34_0@toc@ha ; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI34_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI34_1@toc@ha +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfs 31, .LCPI34_1@toc@l(3) ; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI34_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: lfd 1, .LCPI34_2@toc@l(3) ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI34_3@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: fmr 2, 31 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 ; PC64LE9-NEXT: lfd 1, .LCPI34_3@toc@l(3) ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI34_4@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 62, 1, 1 ; PC64LE9-NEXT: fmr 2, 31 ; PC64LE9-NEXT: lfd 1, .LCPI34_4@toc@l(3) ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 35, 1, 62 ; PC64LE9-NEXT: vmr 2, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 35, 1, 0 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -1905,23 +1881,23 @@ define <2 x double> @constrained_vector_powi_v2f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -64(1) -; PC64LE-NEXT: addis 3, 2, .LCPI36_0@toc@ha +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: li 4, 3 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: addis 3, 2, .LCPI36_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI36_0@toc@l(3) ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: li 4, 3 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI36_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 +; PC64LE-NEXT: li 4, 3 ; PC64LE-NEXT: lfd 1, .LCPI36_1@toc@l(3) ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 1, 0 +; PC64LE-NEXT: xxmrghd 34, 1, 63 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -1934,19 +1910,19 @@ define <2 x double> @constrained_vector_powi_v2f64() #0 { ; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI36_0@toc@ha ; PC64LE9-NEXT: li 4, 3 +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI36_0@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI36_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: li 4, 3 ; PC64LE9-NEXT: lfd 1, .LCPI36_1@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 1, 0 +; PC64LE9-NEXT: xxmrghd 34, 1, 63 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2060,38 +2036,34 @@ define <3 x double> @constrained_vector_powi_v3f64() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: li 4, 3 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI38_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI38_0@toc@l(3) ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: li 4, 3 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI38_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 +; PC64LE-NEXT: li 4, 3 ; PC64LE-NEXT: lfs 1, .LCPI38_1@toc@l(3) ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: addis 3, 2, .LCPI38_2@toc@ha ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 63, 1 ; PC64LE-NEXT: li 4, 3 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI38_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 0, 1 ; PC64LE-NEXT: lfd 1, .LCPI38_2@toc@l(3) ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -2100,34 +2072,32 @@ define <3 x double> @constrained_vector_powi_v3f64() #0 { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI38_0@toc@ha ; PC64LE9-NEXT: li 4, 3 -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI38_0@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI38_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: li 4, 3 ; PC64LE9-NEXT: lfs 1, .LCPI38_1@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI38_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 63, 1 ; PC64LE9-NEXT: li 4, 3 -; PC64LE9-NEXT: xxmrghd 63, 0, 1 ; PC64LE9-NEXT: lfd 1, .LCPI38_2@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -2146,45 +2116,41 @@ define <4 x double> @constrained_vector_powi_v4f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: li 4, 3 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI39_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI39_0@toc@l(3) ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: li 4, 3 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI39_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 +; PC64LE-NEXT: li 4, 3 ; PC64LE-NEXT: lfd 1, .LCPI39_1@toc@l(3) ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: addis 3, 2, .LCPI39_2@toc@ha ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: li 4, 3 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI39_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 ; PC64LE-NEXT: lfd 1, .LCPI39_2@toc@l(3) ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: li 4, 3 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI39_3@toc@ha +; PC64LE-NEXT: xxlor 62, 1, 1 +; PC64LE-NEXT: li 4, 3 ; PC64LE-NEXT: lfd 1, .LCPI39_3@toc@l(3) ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: vmr 2, 31 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxmrghd 35, 1, 62 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 35, 1, 0 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2197,37 +2163,35 @@ define <4 x double> @constrained_vector_powi_v4f64() #0 { ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI39_0@toc@ha ; PC64LE9-NEXT: li 4, 3 -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI39_0@toc@l(3) +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI39_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: li 4, 3 ; PC64LE9-NEXT: lfd 1, .LCPI39_1@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI39_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: li 4, 3 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 ; PC64LE9-NEXT: lfd 1, .LCPI39_2@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI39_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 62, 1, 1 ; PC64LE9-NEXT: li 4, 3 ; PC64LE9-NEXT: lfd 1, .LCPI39_3@toc@l(3) ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 35, 1, 62 ; PC64LE9-NEXT: vmr 2, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 35, 1, 0 +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2284,21 +2248,21 @@ define <2 x double> @constrained_vector_sin_v2f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI41_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI41_0@toc@l(3) ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI41_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI41_1@toc@l(3) ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 0, 1 +; PC64LE-NEXT: xxmrghd 34, 63, 1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2310,18 +2274,18 @@ define <2 x double> @constrained_vector_sin_v2f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI41_0@toc@ha +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI41_0@toc@l(3) ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI41_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI41_1@toc@l(3) ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 0, 1 +; PC64LE9-NEXT: xxmrghd 34, 63, 1 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2425,35 +2389,31 @@ define <3 x double> @constrained_vector_sin_v3f64() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI43_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI43_0@toc@l(3) ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI43_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI43_1@toc@l(3) ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI43_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 0, 1 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 63, 1 ; PC64LE-NEXT: lfd 1, .LCPI43_2@toc@l(3) ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -2462,31 +2422,29 @@ define <3 x double> @constrained_vector_sin_v3f64() #0 { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI43_0@toc@ha -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI43_0@toc@l(3) ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI43_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI43_1@toc@l(3) ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI43_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 0, 1 +; PC64LE9-NEXT: xxmrghd 63, 63, 1 ; PC64LE9-NEXT: lfd 1, .LCPI43_2@toc@l(3) ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -2504,41 +2462,37 @@ define <4 x double> @constrained_vector_sin_v4f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI44_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI44_0@toc@l(3) ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI44_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI44_1@toc@l(3) ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI44_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: lfd 1, .LCPI44_2@toc@l(3) ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI44_3@toc@ha +; PC64LE-NEXT: xxlor 62, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI44_3@toc@l(3) ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: vmr 3, 31 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 0, 1 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2550,34 +2504,32 @@ define <4 x double> @constrained_vector_sin_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI44_0@toc@ha +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI44_0@toc@l(3) ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI44_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfd 1, .LCPI44_1@toc@l(3) ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI44_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: lfd 1, .LCPI44_2@toc@l(3) ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI44_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 62, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI44_3@toc@l(3) ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: vmr 3, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 0, 1 +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2633,21 +2585,21 @@ define <2 x double> @constrained_vector_cos_v2f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI46_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI46_0@toc@l(3) ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI46_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI46_1@toc@l(3) ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 0, 1 +; PC64LE-NEXT: xxmrghd 34, 63, 1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2659,18 +2611,18 @@ define <2 x double> @constrained_vector_cos_v2f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI46_0@toc@ha +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI46_0@toc@l(3) ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI46_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI46_1@toc@l(3) ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 0, 1 +; PC64LE9-NEXT: xxmrghd 34, 63, 1 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2774,35 +2726,31 @@ define <3 x double> @constrained_vector_cos_v3f64() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI48_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI48_0@toc@l(3) ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI48_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI48_1@toc@l(3) ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI48_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 0, 1 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 63, 1 ; PC64LE-NEXT: lfd 1, .LCPI48_2@toc@l(3) ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -2811,31 +2759,29 @@ define <3 x double> @constrained_vector_cos_v3f64() #0 { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI48_0@toc@ha -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI48_0@toc@l(3) ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI48_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI48_1@toc@l(3) ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI48_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 0, 1 +; PC64LE9-NEXT: xxmrghd 63, 63, 1 ; PC64LE9-NEXT: lfd 1, .LCPI48_2@toc@l(3) ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -2853,41 +2799,37 @@ define <4 x double> @constrained_vector_cos_v4f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI49_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI49_0@toc@l(3) ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI49_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI49_1@toc@l(3) ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI49_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: lfd 1, .LCPI49_2@toc@l(3) ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI49_3@toc@ha +; PC64LE-NEXT: xxlor 62, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI49_3@toc@l(3) ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: vmr 3, 31 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 0, 1 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2899,34 +2841,32 @@ define <4 x double> @constrained_vector_cos_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI49_0@toc@ha +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI49_0@toc@l(3) ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI49_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfd 1, .LCPI49_1@toc@l(3) ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI49_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: lfd 1, .LCPI49_2@toc@l(3) ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI49_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 62, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI49_3@toc@l(3) ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: vmr 3, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 0, 1 +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2982,21 +2922,21 @@ define <2 x double> @constrained_vector_exp_v2f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI51_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI51_0@toc@l(3) ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI51_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI51_1@toc@l(3) ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 0, 1 +; PC64LE-NEXT: xxmrghd 34, 63, 1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3008,18 +2948,18 @@ define <2 x double> @constrained_vector_exp_v2f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI51_0@toc@ha +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI51_0@toc@l(3) ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI51_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI51_1@toc@l(3) ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 0, 1 +; PC64LE9-NEXT: xxmrghd 34, 63, 1 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3123,35 +3063,31 @@ define <3 x double> @constrained_vector_exp_v3f64() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI53_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI53_0@toc@l(3) ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI53_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI53_1@toc@l(3) ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI53_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 0, 1 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 63, 1 ; PC64LE-NEXT: lfd 1, .LCPI53_2@toc@l(3) ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -3160,31 +3096,29 @@ define <3 x double> @constrained_vector_exp_v3f64() #0 { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI53_0@toc@ha -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI53_0@toc@l(3) ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI53_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI53_1@toc@l(3) ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI53_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 0, 1 +; PC64LE9-NEXT: xxmrghd 63, 63, 1 ; PC64LE9-NEXT: lfd 1, .LCPI53_2@toc@l(3) ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -3202,41 +3136,37 @@ define <4 x double> @constrained_vector_exp_v4f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI54_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI54_0@toc@l(3) ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI54_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI54_1@toc@l(3) ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI54_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: lfd 1, .LCPI54_2@toc@l(3) ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI54_3@toc@ha +; PC64LE-NEXT: xxlor 62, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI54_3@toc@l(3) ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: vmr 3, 31 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 0, 1 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3248,34 +3178,32 @@ define <4 x double> @constrained_vector_exp_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI54_0@toc@ha +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI54_0@toc@l(3) ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI54_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfd 1, .LCPI54_1@toc@l(3) ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI54_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: lfd 1, .LCPI54_2@toc@l(3) ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI54_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 62, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI54_3@toc@l(3) ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: vmr 3, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 0, 1 +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3331,21 +3259,21 @@ define <2 x double> @constrained_vector_exp2_v2f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI56_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI56_0@toc@l(3) ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI56_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI56_1@toc@l(3) ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 1, 0 +; PC64LE-NEXT: xxmrghd 34, 1, 63 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3357,18 +3285,18 @@ define <2 x double> @constrained_vector_exp2_v2f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI56_0@toc@ha +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI56_0@toc@l(3) ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI56_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI56_1@toc@l(3) ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 1, 0 +; PC64LE9-NEXT: xxmrghd 34, 1, 63 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3472,35 +3400,31 @@ define <3 x double> @constrained_vector_exp2_v3f64() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI58_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI58_0@toc@l(3) ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI58_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI58_1@toc@l(3) ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI58_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 0, 1 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 63, 1 ; PC64LE-NEXT: lfd 1, .LCPI58_2@toc@l(3) ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -3509,31 +3433,29 @@ define <3 x double> @constrained_vector_exp2_v3f64() #0 { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI58_0@toc@ha -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI58_0@toc@l(3) ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI58_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI58_1@toc@l(3) ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI58_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 0, 1 +; PC64LE9-NEXT: xxmrghd 63, 63, 1 ; PC64LE9-NEXT: lfd 1, .LCPI58_2@toc@l(3) ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -3551,41 +3473,37 @@ define <4 x double> @constrained_vector_exp2_v4f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI59_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI59_0@toc@l(3) ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI59_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI59_1@toc@l(3) ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI59_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: lfd 1, .LCPI59_2@toc@l(3) ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI59_3@toc@ha +; PC64LE-NEXT: xxlor 62, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI59_3@toc@l(3) ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: vmr 2, 31 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxmrghd 35, 1, 62 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 35, 1, 0 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3597,34 +3515,32 @@ define <4 x double> @constrained_vector_exp2_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI59_0@toc@ha +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI59_0@toc@l(3) ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI59_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfd 1, .LCPI59_1@toc@l(3) ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI59_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: lfd 1, .LCPI59_2@toc@l(3) ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI59_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 62, 1, 1 ; PC64LE9-NEXT: lfd 1, .LCPI59_3@toc@l(3) ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 35, 1, 62 ; PC64LE9-NEXT: vmr 2, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 35, 1, 0 +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3680,21 +3596,21 @@ define <2 x double> @constrained_vector_log_v2f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI61_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI61_0@toc@l(3) ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI61_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI61_1@toc@l(3) ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 0, 1 +; PC64LE-NEXT: xxmrghd 34, 63, 1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3706,18 +3622,18 @@ define <2 x double> @constrained_vector_log_v2f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI61_0@toc@ha +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI61_0@toc@l(3) ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI61_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI61_1@toc@l(3) ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 0, 1 +; PC64LE9-NEXT: xxmrghd 34, 63, 1 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3821,35 +3737,31 @@ define <3 x double> @constrained_vector_log_v3f64() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI63_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI63_0@toc@l(3) ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI63_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI63_1@toc@l(3) ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI63_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 0, 1 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 63, 1 ; PC64LE-NEXT: lfd 1, .LCPI63_2@toc@l(3) ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -3858,31 +3770,29 @@ define <3 x double> @constrained_vector_log_v3f64() #0 { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI63_0@toc@ha -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI63_0@toc@l(3) ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI63_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI63_1@toc@l(3) ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI63_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 0, 1 +; PC64LE9-NEXT: xxmrghd 63, 63, 1 ; PC64LE9-NEXT: lfd 1, .LCPI63_2@toc@l(3) ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -3900,41 +3810,37 @@ define <4 x double> @constrained_vector_log_v4f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI64_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI64_0@toc@l(3) ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI64_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI64_1@toc@l(3) ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI64_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: lfd 1, .LCPI64_2@toc@l(3) ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI64_3@toc@ha +; PC64LE-NEXT: xxlor 62, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI64_3@toc@l(3) ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: vmr 3, 31 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 0, 1 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3946,34 +3852,32 @@ define <4 x double> @constrained_vector_log_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI64_0@toc@ha +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI64_0@toc@l(3) ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI64_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfd 1, .LCPI64_1@toc@l(3) ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI64_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: lfd 1, .LCPI64_2@toc@l(3) ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI64_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 62, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI64_3@toc@l(3) ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: vmr 3, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 0, 1 +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4029,21 +3933,21 @@ define <2 x double> @constrained_vector_log10_v2f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI66_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI66_0@toc@l(3) ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI66_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI66_1@toc@l(3) ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 0, 1 +; PC64LE-NEXT: xxmrghd 34, 63, 1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4055,18 +3959,18 @@ define <2 x double> @constrained_vector_log10_v2f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI66_0@toc@ha +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI66_0@toc@l(3) ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI66_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI66_1@toc@l(3) ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 0, 1 +; PC64LE9-NEXT: xxmrghd 34, 63, 1 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4170,35 +4074,31 @@ define <3 x double> @constrained_vector_log10_v3f64() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI68_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI68_0@toc@l(3) ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI68_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI68_1@toc@l(3) ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI68_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 0, 1 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 63, 1 ; PC64LE-NEXT: lfd 1, .LCPI68_2@toc@l(3) ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -4207,31 +4107,29 @@ define <3 x double> @constrained_vector_log10_v3f64() #0 { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI68_0@toc@ha -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI68_0@toc@l(3) ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI68_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI68_1@toc@l(3) ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI68_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 0, 1 +; PC64LE9-NEXT: xxmrghd 63, 63, 1 ; PC64LE9-NEXT: lfd 1, .LCPI68_2@toc@l(3) ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -4249,41 +4147,37 @@ define <4 x double> @constrained_vector_log10_v4f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI69_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI69_0@toc@l(3) ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI69_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI69_1@toc@l(3) ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI69_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: lfd 1, .LCPI69_2@toc@l(3) ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI69_3@toc@ha +; PC64LE-NEXT: xxlor 62, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI69_3@toc@l(3) ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: vmr 3, 31 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 0, 1 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4295,34 +4189,32 @@ define <4 x double> @constrained_vector_log10_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI69_0@toc@ha +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI69_0@toc@l(3) ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI69_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfd 1, .LCPI69_1@toc@l(3) ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI69_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: lfd 1, .LCPI69_2@toc@l(3) ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI69_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 62, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI69_3@toc@l(3) ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: vmr 3, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 0, 1 +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4378,21 +4270,21 @@ define <2 x double> @constrained_vector_log2_v2f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI71_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI71_0@toc@l(3) ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI71_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI71_1@toc@l(3) ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 0, 1 +; PC64LE-NEXT: xxmrghd 34, 63, 1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4404,18 +4296,18 @@ define <2 x double> @constrained_vector_log2_v2f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI71_0@toc@ha +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI71_0@toc@l(3) ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI71_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI71_1@toc@l(3) ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 0, 1 +; PC64LE9-NEXT: xxmrghd 34, 63, 1 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4519,35 +4411,31 @@ define <3 x double> @constrained_vector_log2_v3f64() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI73_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI73_0@toc@l(3) ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI73_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI73_1@toc@l(3) ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI73_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 0, 1 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 63, 1 ; PC64LE-NEXT: lfd 1, .LCPI73_2@toc@l(3) ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -4556,31 +4444,29 @@ define <3 x double> @constrained_vector_log2_v3f64() #0 { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI73_0@toc@ha -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI73_0@toc@l(3) ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI73_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI73_1@toc@l(3) ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI73_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 0, 1 +; PC64LE9-NEXT: xxmrghd 63, 63, 1 ; PC64LE9-NEXT: lfd 1, .LCPI73_2@toc@l(3) ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -4598,41 +4484,37 @@ define <4 x double> @constrained_vector_log2_v4f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI74_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI74_0@toc@l(3) ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI74_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI74_1@toc@l(3) ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI74_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: lfd 1, .LCPI74_2@toc@l(3) ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI74_3@toc@ha +; PC64LE-NEXT: xxlor 62, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI74_3@toc@l(3) ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: vmr 3, 31 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxmrghd 34, 62, 1 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 0, 1 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4644,34 +4526,32 @@ define <4 x double> @constrained_vector_log2_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI74_0@toc@ha +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI74_0@toc@l(3) ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI74_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfd 1, .LCPI74_1@toc@l(3) ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI74_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: lfd 1, .LCPI74_2@toc@l(3) ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI74_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 62, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI74_3@toc@l(3) ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 62, 1 ; PC64LE9-NEXT: vmr 3, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 0, 1 +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4902,21 +4782,21 @@ define <2 x double> @constrained_vector_nearbyint_v2f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI81_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI81_0@toc@l(3) ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI81_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI81_1@toc@l(3) ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 1, 0 +; PC64LE-NEXT: xxmrghd 34, 1, 63 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4928,18 +4808,18 @@ define <2 x double> @constrained_vector_nearbyint_v2f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI81_0@toc@ha +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI81_0@toc@l(3) ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI81_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI81_1@toc@l(3) ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 1, 0 +; PC64LE9-NEXT: xxmrghd 34, 1, 63 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5043,35 +4923,31 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI83_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI83_0@toc@l(3) ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfs 1, .LCPI83_1@toc@l(3) ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI83_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 0, 1 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 63, 1 ; PC64LE-NEXT: lfd 1, .LCPI83_2@toc@l(3) ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxswapd 0, 63 -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: fmr 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -5080,31 +4956,29 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 { ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: stdu 1, -48(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI83_0@toc@ha -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI83_0@toc@l(3) ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI83_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfs 1, .LCPI83_1@toc@l(3) ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI83_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 0, 1 +; PC64LE9-NEXT: xxmrghd 63, 63, 1 ; PC64LE9-NEXT: lfd 1, .LCPI83_2@toc@l(3) ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -5122,41 +4996,37 @@ define <4 x double> @constrained_vector_nearbyint_v4f64() #0 { ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) ; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI84_0@toc@ha ; PC64LE-NEXT: lfd 1, .LCPI84_0@toc@l(3) ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI84_1@toc@ha +; PC64LE-NEXT: xxlor 63, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI84_1@toc@l(3) ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addis 3, 2, .LCPI84_2@toc@ha -; PC64LE-NEXT: xxmrghd 63, 1, 0 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 1, 63 ; PC64LE-NEXT: lfd 1, .LCPI84_2@toc@l(3) ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI84_3@toc@ha +; PC64LE-NEXT: xxlor 62, 1, 1 ; PC64LE-NEXT: lfd 1, .LCPI84_3@toc@l(3) ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: vmr 2, 31 ; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxmrghd 35, 1, 62 ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 35, 1, 0 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: addi 1, 1, 80 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -5168,34 +5038,32 @@ define <4 x double> @constrained_vector_nearbyint_v4f64() #0 { ; PC64LE9-NEXT: std 0, 16(1) ; PC64LE9-NEXT: stdu 1, -64(1) ; PC64LE9-NEXT: addis 3, 2, .LCPI84_0@toc@ha +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: lfd 1, .LCPI84_0@toc@l(3) ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI84_1@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 ; PC64LE9-NEXT: lfd 1, .LCPI84_1@toc@l(3) ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addis 3, 2, .LCPI84_2@toc@ha ; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 ; PC64LE9-NEXT: lfd 1, .LCPI84_2@toc@l(3) ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI84_3@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 62, 1, 1 ; PC64LE9-NEXT: lfd 1, .LCPI84_3@toc@l(3) ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 35, 1, 62 ; PC64LE9-NEXT: vmr 2, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 35, 1, 0 +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 -- 2.7.4