// builtins) into loads with swaps.
SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
DAGCombinerInfo &DCI) const {
+ // Delay VSX load for LE combine until after LegalizeOps to prioritize other
+ // load combines.
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
SDValue Chain;
MVT VecTy = N->getValueType(0).getSimpleVT();
- // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
- // aligned and the type is a vector with elements up to 4 bytes
- if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
- VecTy.getScalarSizeInBits() <= 32) {
- return SDValue();
- }
-
SDValue LoadOps[] = { Chain, Base };
SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
DAG.getVTList(MVT::v2f64, MVT::Other),
// builtins) into stores with swaps.
SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
DAGCombinerInfo &DCI) const {
+ // Delay VSX store for LE combine until after LegalizeOps to prioritize other
+ // store combines.
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
SDValue Chain;
SDValue Src = N->getOperand(SrcOpnd);
MVT VecTy = Src.getValueType().getSimpleVT();
- // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
- // aligned and the type is a vector with elements up to 4 bytes
- if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
- VecTy.getScalarSizeInBits() <= 32) {
- return SDValue();
- }
-
// All stores are done as v2f64 and possible bit cast.
if (VecTy != MVT::v2f64) {
Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
; CHECK-LE-NEXT: addis r4, r2, .LCPI0_1@toc@ha
; CHECK-LE-NEXT: addi r3, r3, .LCPI0_0@toc@l
; CHECK-LE-NEXT: addi r4, r4, .LCPI0_1@toc@l
-; CHECK-LE-NEXT: lvx v2, 0, r3
-; CHECK-LE-NEXT: lvx v3, 0, r4
+; CHECK-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-NEXT: lxvd2x vs1, 0, r4
; CHECK-LE-NEXT: addi r4, r1, 48
; CHECK-LE-NEXT: addi r3, r1, 32
-; CHECK-LE-NEXT: stvx v2, 0, r4
-; CHECK-LE-NEXT: stvx v3, 0, r3
+; CHECK-LE-NEXT: stxvd2x vs0, 0, r4
+; CHECK-LE-NEXT: stxvd2x vs1, 0, r3
; CHECK-LE-NEXT: bl test
; CHECK-LE-NEXT: nop
; CHECK-LE-NEXT: lwa r3, 32(r1)
; CHECK-LE-NEXT: stdux r1, r1, r0
; CHECK-LE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-LE-NEXT: addi r3, r3, .LCPI1_0@toc@l
-; CHECK-LE-NEXT: lvx v2, 0, r3
+; CHECK-LE-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-NEXT: addi r3, r1, 32
-; CHECK-LE-NEXT: stvx v2, 0, r3
+; CHECK-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-LE-NEXT: bl test1
; CHECK-LE-NEXT: nop
; CHECK-LE-NEXT: lwa r3, 32(r1)
; CHECK-OPT-NEXT: entry:
; CHECK-OPT-NEXT: %Arr2 = alloca [64 x i16], align 2
; CHECK-OPT: store <16 x i16> [[TMP0:%.*]], <16 x i16>* [[TMP0:%.*]], align 2
-
+; CHECK-LE-LABEL: test_Array:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mflr r0
+; CHECK-LE-NEXT: std r0, 16(r1)
+; CHECK-LE-NEXT: stdu r1, -176(r1)
+; CHECK-LE-NEXT: addis r4, r2, Arr1@toc@ha
+; CHECK-LE-NEXT: li r3, 0
+; CHECK-LE-NEXT: li r6, 65
+; CHECK-LE-NEXT: addi r5, r1, 46
+; CHECK-LE-NEXT: addi r4, r4, Arr1@toc@l
+; CHECK-LE-NEXT: stw r3, 44(r1)
+; CHECK-LE-NEXT: addi r4, r4, -1
+; CHECK-LE-NEXT: mtctr r6
+; CHECK-LE-NEXT: bdz .LBB2_2
+; CHECK-LE-NEXT: .p2align 5
+; CHECK-LE-NEXT: .LBB2_1: # %for.body
+; CHECK-LE-NEXT: #
+; CHECK-LE-NEXT: lbz r6, 1(r4)
+; CHECK-LE-NEXT: addi r7, r5, 2
+; CHECK-LE-NEXT: addi r4, r4, 1
+; CHECK-LE-NEXT: addi r3, r3, 1
+; CHECK-LE-NEXT: sth r6, 2(r5)
+; CHECK-LE-NEXT: mr r5, r7
+; CHECK-LE-NEXT: bdnz .LBB2_1
+; CHECK-LE-NEXT: .LBB2_2: # %for.cond.cleanup
+; CHECK-LE-NEXT: addi r3, r1, 48
+; CHECK-LE-NEXT: bl test_arr
+; CHECK-LE-NEXT: nop
+; CHECK-LE-NEXT: addi r1, r1, 176
+; CHECK-LE-NEXT: ld r0, 16(r1)
+; CHECK-LE-NEXT: mtlr r0
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: test_Array:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: mflr r0
+; CHECK-BE-NEXT: std r0, 16(r1)
+; CHECK-BE-NEXT: stdu r1, -256(r1)
+; CHECK-BE-NEXT: addis r5, r2, Arr1@toc@ha
+; CHECK-BE-NEXT: li r3, 0
+; CHECK-BE-NEXT: addi r5, r5, Arr1@toc@l
+; CHECK-BE-NEXT: addi r4, r1, 126
+; CHECK-BE-NEXT: li r6, 65
+; CHECK-BE-NEXT: stw r3, 124(r1)
+; CHECK-BE-NEXT: addi r5, r5, -1
+; CHECK-BE-NEXT: mtctr r6
+; CHECK-BE-NEXT: bdz .LBB2_2
+; CHECK-BE-NEXT: .LBB2_1: # %for.body
+; CHECK-BE-NEXT: #
+; CHECK-BE-NEXT: lbz r6, 1(r5)
+; CHECK-BE-NEXT: addi r5, r5, 1
+; CHECK-BE-NEXT: addi r3, r3, 1
+; CHECK-BE-NEXT: sth r6, 2(r4)
+; CHECK-BE-NEXT: addi r4, r4, 2
+; CHECK-BE-NEXT: bdnz .LBB2_1
+; CHECK-BE-NEXT: .LBB2_2: # %for.cond.cleanup
+; CHECK-BE-NEXT: addi r3, r1, 128
+; CHECK-BE-NEXT: bl test_arr
+; CHECK-BE-NEXT: nop
+; CHECK-BE-NEXT: addi r1, r1, 256
+; CHECK-BE-NEXT: ld r0, 16(r1)
+; CHECK-BE-NEXT: mtlr r0
+; CHECK-BE-NEXT: blr
entry:
%Arr2 = alloca [64 x i16], align 2
%i = alloca i32, align 4
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8: lfiwzx f0, 0, r3
; CHECK-P8: ld r4, .LC0@toc@l(r4)
-; CHECK-P8: xxspltw v2, vs0, 1
-; CHECK-P8: stvx v2, 0, r4
+; CHECK-P8: xxspltw vs0, vs0, 1
+; CHECK-P8: xxswapd vs0, vs0
+; CHECK-P8; stxvd2x vs0, 0, r4
; CHECK-P8: lis r4, 1024
; CHECK-P8: lfiwax f0, 0, r3
; CHECK-P8: addis r3, r2, .LC1@toc@ha
; CHECK-NEXT: li 3, 0
; CHECK-NEXT: mullw 4, 4, 5
; CHECK-NEXT: vsplth 2, 2, 3
-; CHECK-NEXT: stvx 2, 0, 3
+; CHECK-NEXT: xxswapd 0, 34
; CHECK-NEXT: neg 4, 4
; CHECK-NEXT: mtvsrd 35, 4
+; CHECK-NEXT: stxvd2x 0, 0, 3
; CHECK-NEXT: vsplth 3, 3, 3
-; CHECK-NEXT: stvx 3, 0, 3
+; CHECK-NEXT: xxswapd 1, 35
+; CHECK-NEXT: stxvd2x 1, 0, 3
bb:
br i1 undef, label %bb22, label %bb3
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI5_0@toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI5_0@toc@l
-; P8LE-NEXT: lvx v2, 0, r3
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: blr
entry:
ret <4 x i32> <i32 242, i32 -113, i32 889, i32 19>
;
; P8LE-LABEL: fromDiffMemConsDi:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: addis r4, r2, .LCPI7_0@toc@ha
-; P8LE-NEXT: addi r3, r4, .LCPI7_0@toc@l
-; P8LE-NEXT: lvx v2, 0, r3
-; P8LE-NEXT: xxswapd v3, vs0
-; P8LE-NEXT: vperm v2, v3, v3, v2
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: addi r4, r4, .LCPI7_0@toc@l
+; P8LE-NEXT: lxvd2x vs1, 0, r4
+; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: xxswapd v3, vs1
+; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
; P8LE-NEXT: sldi r4, r4, 2
; P8LE-NEXT: addis r5, r2, .LCPI9_0@toc@ha
; P8LE-NEXT: add r3, r3, r4
+; P8LE-NEXT: addi r4, r5, .LCPI9_0@toc@l
; P8LE-NEXT: addi r3, r3, -12
+; P8LE-NEXT: lxvd2x vs1, 0, r4
; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: addi r3, r5, .LCPI9_0@toc@l
-; P8LE-NEXT: lvx v3, 0, r3
+; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: blr
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI16_0@toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI16_0@toc@l
-; P8LE-NEXT: lvx v2, 0, r3
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: blr
entry:
ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
;
; P8LE-LABEL: fromDiffMemConsDConvftoi:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: addis r4, r2, .LCPI18_0@toc@ha
-; P8LE-NEXT: addi r3, r4, .LCPI18_0@toc@l
-; P8LE-NEXT: lvx v2, 0, r3
-; P8LE-NEXT: xxswapd v3, vs0
-; P8LE-NEXT: vperm v2, v3, v3, v2
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: addi r4, r4, .LCPI18_0@toc@l
+; P8LE-NEXT: lxvd2x vs1, 0, r4
+; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: xxswapd v3, vs1
+; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: xvcvspsxws v2, v2
; P8LE-NEXT: blr
entry:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI25_0@toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI25_0@toc@l
-; P8LE-NEXT: lvx v2, 0, r3
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: blr
entry:
ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI37_0@toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI37_0@toc@l
-; P8LE-NEXT: lvx v2, 0, r3
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: blr
entry:
ret <4 x i32> <i32 242, i32 -113, i32 889, i32 19>
;
; P8LE-LABEL: fromDiffMemConsDui:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: addis r4, r2, .LCPI39_0@toc@ha
-; P8LE-NEXT: addi r3, r4, .LCPI39_0@toc@l
-; P8LE-NEXT: lvx v2, 0, r3
-; P8LE-NEXT: xxswapd v3, vs0
-; P8LE-NEXT: vperm v2, v3, v3, v2
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: addi r4, r4, .LCPI39_0@toc@l
+; P8LE-NEXT: lxvd2x vs1, 0, r4
+; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: xxswapd v3, vs1
+; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
; P8LE-NEXT: sldi r4, r4, 2
; P8LE-NEXT: addis r5, r2, .LCPI41_0@toc@ha
; P8LE-NEXT: add r3, r3, r4
+; P8LE-NEXT: addi r4, r5, .LCPI41_0@toc@l
; P8LE-NEXT: addi r3, r3, -12
+; P8LE-NEXT: lxvd2x vs1, 0, r4
; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: addi r3, r5, .LCPI41_0@toc@l
-; P8LE-NEXT: lvx v3, 0, r3
+; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: blr
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI48_0@toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI48_0@toc@l
-; P8LE-NEXT: lvx v2, 0, r3
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: blr
entry:
ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
;
; P8LE-LABEL: fromDiffMemConsDConvftoui:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: addis r4, r2, .LCPI50_0@toc@ha
-; P8LE-NEXT: addi r3, r4, .LCPI50_0@toc@l
-; P8LE-NEXT: lvx v2, 0, r3
-; P8LE-NEXT: xxswapd v3, vs0
-; P8LE-NEXT: vperm v2, v3, v3, v2
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: addi r4, r4, .LCPI50_0@toc@l
+; P8LE-NEXT: lxvd2x vs1, 0, r4
+; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: xxswapd v3, vs1
+; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: xvcvspuxws v2, v2
; P8LE-NEXT: blr
entry:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI57_0@toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI57_0@toc@l
-; P8LE-NEXT: lvx v2, 0, r3
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: blr
entry:
ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
; CHECK-P7: # %bb.0: # %entry
; CHECK-P7-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-P7-NEXT: addi r3, r3, .LCPI1_0@toc@l
-; CHECK-P7-NEXT: lvx v4, 0, r3
+; CHECK-P7-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P7-NEXT: xxswapd v4, vs0
; CHECK-P7-NEXT: vperm v2, v3, v2, v4
; CHECK-P7-NEXT: blr
;
; CHECK-P7: # %bb.0: # %entry
; CHECK-P7-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; CHECK-P7-NEXT: addi r3, r3, .LCPI3_0@toc@l
-; CHECK-P7-NEXT: lvx v4, 0, r3
+; CHECK-P7-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P7-NEXT: xxswapd v4, vs0
; CHECK-P7-NEXT: vperm v2, v3, v2, v4
; CHECK-P7-NEXT: blr
;
; CHECK-P7: # %bb.0: # %entry
; CHECK-P7-NEXT: addis r3, r2, .LCPI5_0@toc@ha
; CHECK-P7-NEXT: addi r3, r3, .LCPI5_0@toc@l
-; CHECK-P7-NEXT: lvx v4, 0, r3
+; CHECK-P7-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P7-NEXT: xxswapd v4, vs0
; CHECK-P7-NEXT: vperm v2, v3, v2, v4
; CHECK-P7-NEXT: blr
;
; CHECK-P7: # %bb.0: # %entry
; CHECK-P7-NEXT: addis r3, r2, .LCPI7_0@toc@ha
; CHECK-P7-NEXT: addi r3, r3, .LCPI7_0@toc@l
-; CHECK-P7-NEXT: lvx v4, 0, r3
+; CHECK-P7-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P7-NEXT: xxswapd v4, vs0
; CHECK-P7-NEXT: vperm v2, v3, v2, v4
; CHECK-P7-NEXT: blr
;
; CHECK-P7: # %bb.0: # %entry
; CHECK-P7-NEXT: addis r3, r2, .LCPI9_0@toc@ha
; CHECK-P7-NEXT: addi r3, r3, .LCPI9_0@toc@l
-; CHECK-P7-NEXT: lvx v4, 0, r3
+; CHECK-P7-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P7-NEXT: xxswapd v4, vs0
; CHECK-P7-NEXT: vperm v2, v3, v2, v4
; CHECK-P7-NEXT: blr
;
; CHECK-P7: # %bb.0: # %entry
; CHECK-P7-NEXT: addis r3, r2, .LCPI11_0@toc@ha
; CHECK-P7-NEXT: addi r3, r3, .LCPI11_0@toc@l
-; CHECK-P7-NEXT: lvx v4, 0, r3
+; CHECK-P7-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P7-NEXT: xxswapd v4, vs0
; CHECK-P7-NEXT: vperm v2, v3, v2, v4
; CHECK-P7-NEXT: blr
;
; CHECK-P7-NEXT: xxlxor v4, v4, v4
; CHECK-P7-NEXT: std r3, -16(r1)
; CHECK-P7-NEXT: addis r3, r2, .LCPI12_0@toc@ha
-; CHECK-P7-NEXT: lxvd2x vs0, 0, r4
; CHECK-P7-NEXT: addi r3, r3, .LCPI12_0@toc@l
-; CHECK-P7-NEXT: lvx v3, 0, r3
+; CHECK-P7-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P7-NEXT: lxvd2x vs1, 0, r3
; CHECK-P7-NEXT: xxswapd v2, vs0
+; CHECK-P7-NEXT: xxswapd v3, vs1
; CHECK-P7-NEXT: vperm v2, v2, v4, v3
; CHECK-P7-NEXT: blr
;
; CHECK-P7-NEXT: stw r3, -16(r1)
; CHECK-P7-NEXT: addi r3, r1, -16
; CHECK-P7-NEXT: addi r4, r4, .LCPI14_0@toc@l
-; CHECK-P7-NEXT: lvx v3, 0, r3
-; CHECK-P7-NEXT: lvx v2, 0, r4
+; CHECK-P7-NEXT: lxvd2x vs1, 0, r3
+; CHECK-P7-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P7-NEXT: xxswapd v3, vs1
+; CHECK-P7-NEXT: xxswapd v2, vs0
; CHECK-P7-NEXT: vperm v2, v3, v3, v2
; CHECK-P7-NEXT: blr
;
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI15_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI15_0@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: vmrgow v2, v3, v2
; CHECK-P8-NEXT: blr
;
; CHECK-P7-NEXT: addis r3, r2, .LCPI15_0@toc@ha
; CHECK-P7-NEXT: addis r4, r2, .LCPI15_1@toc@ha
; CHECK-P7-NEXT: addi r3, r3, .LCPI15_0@toc@l
-; CHECK-P7-NEXT: lvx v3, 0, r3
+; CHECK-P7-NEXT: lxvd2x vs0, 0, r3
; CHECK-P7-NEXT: addi r3, r4, .LCPI15_1@toc@l
-; CHECK-P7-NEXT: lvx v4, 0, r3
+; CHECK-P7-NEXT: lxvd2x vs1, 0, r3
+; CHECK-P7-NEXT: xxswapd v3, vs0
+; CHECK-P7-NEXT: xxswapd v4, vs1
; CHECK-P7-NEXT: vperm v2, v4, v2, v3
; CHECK-P7-NEXT: blr
;
define <2 x i64> @testSplati64_1(<2 x i64>* nocapture readonly %ptr) #0 {
; CHECK-P8-LABEL: testSplati64_1:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT: xxspltd v2, vs0, 1
+; CHECK-P8-NEXT: addi r3, r3, 8
+; CHECK-P8-NEXT: lxvdsx v2, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: testSplati64_1:
;
; CHECK-P7-LABEL: testSplati64_1:
; CHECK-P7: # %bb.0: # %entry
-; CHECK-P7-NEXT: lxvd2x vs0, 0, r3
-; CHECK-P7-NEXT: xxspltd v2, vs0, 1
+; CHECK-P7-NEXT: addi r3, r3, 8
+; CHECK-P7-NEXT: lxvdsx v2, 0, r3
; CHECK-P7-NEXT: blr
;
; P8-AIX-LABEL: testSplati64_1:
; CHECK-P8-NEXT: lbzx r3, 0, r3
; CHECK-P8-NEXT: mtvsrwz v2, r3
; CHECK-P8-NEXT: vspltb v2, v2, 7
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: testByteSplat:
; CHECK-P7-NEXT: lvx v3, 0, r3
; CHECK-P7-NEXT: vperm v2, v3, v3, v2
; CHECK-P7-NEXT: vspltb v2, v2, 15
-; CHECK-P7-NEXT: stvx v2, 0, r3
+; CHECK-P7-NEXT: xxswapd vs0, v2
+; CHECK-P7-NEXT: stxvd2x vs0, 0, r3
; CHECK-P7-NEXT: blr
;
; P8-AIX-LABEL: testByteSplat:
; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI0_1@toc@ha
; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI0_0@toc@l
-; CHECK-LE-P8-NEXT: lvx 2, 0, 3
-; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI0_1@toc@l
-; CHECK-LE-P8-NEXT: lvx 3, 0, 3
+; CHECK-LE-P8-NEXT: addi 4, 4, .LCPI0_1@toc@l
+; CHECK-LE-P8-NEXT: lxvd2x 0, 0, 3
+; CHECK-LE-P8-NEXT: lxvd2x 1, 0, 4
+; CHECK-LE-P8-NEXT: xxswapd 34, 0
+; CHECK-LE-P8-NEXT: xxswapd 35, 1
; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI1_0@toc@ha
; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI1_1@toc@ha
; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI1_0@toc@l
-; CHECK-LE-P8-NEXT: lvx 2, 0, 3
-; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI1_1@toc@l
-; CHECK-LE-P8-NEXT: lvx 3, 0, 3
+; CHECK-LE-P8-NEXT: addi 4, 4, .LCPI1_1@toc@l
+; CHECK-LE-P8-NEXT: lxvd2x 0, 0, 3
+; CHECK-LE-P8-NEXT: lxvd2x 1, 0, 4
+; CHECK-LE-P8-NEXT: xxswapd 34, 0
+; CHECK-LE-P8-NEXT: xxswapd 35, 1
; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI2_0@toc@ha
; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI2_1@toc@ha
; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI2_0@toc@l
-; CHECK-LE-P8-NEXT: lvx 2, 0, 3
-; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI2_1@toc@l
-; CHECK-LE-P8-NEXT: lvx 3, 0, 3
+; CHECK-LE-P8-NEXT: addi 4, 4, .LCPI2_1@toc@l
+; CHECK-LE-P8-NEXT: lxvd2x 0, 0, 3
+; CHECK-LE-P8-NEXT: lxvd2x 1, 0, 4
+; CHECK-LE-P8-NEXT: xxswapd 34, 0
+; CHECK-LE-P8-NEXT: xxswapd 35, 1
; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI3_0@toc@ha
; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI3_1@toc@ha
; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI3_0@toc@l
-; CHECK-LE-P8-NEXT: lvx 2, 0, 3
-; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI3_1@toc@l
-; CHECK-LE-P8-NEXT: lvx 3, 0, 3
+; CHECK-LE-P8-NEXT: addi 4, 4, .LCPI3_1@toc@l
+; CHECK-LE-P8-NEXT: lxvd2x 0, 0, 3
+; CHECK-LE-P8-NEXT: lxvd2x 1, 0, 4
+; CHECK-LE-P8-NEXT: xxswapd 34, 0
+; CHECK-LE-P8-NEXT: xxswapd 35, 1
; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2
; CHECK-LE-P8-NEXT: blr
;
; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1
; CHECK-NEXT: li r4, 20
; CHECK-NEXT: addi r3, r3, .LCPI16_0@toc@l
-; CHECK-NEXT: lvx v3, 0, r3
+; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: li r3, 16
+; CHECK-NEXT: xxswapd vs35, vs0
; CHECK-NEXT: vperm v3, v2, v2, v3
; CHECK-NEXT: xxswapd vs0, vs35
; CHECK-NEXT: stxvd2x vs0, 0, r5
; CHECK-P8-LABEL: testArray_01:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addi r3, r3, 32
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: blr
entry:
; CHECK-P8-NEXT: addis r3, r2, .LC0@toc@ha
; CHECK-P8-NEXT: ld r3, .LC0@toc@l(r3)
; CHECK-P8-NEXT: addi r3, r3, 32
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: blr
entry:
; CHECK-P8-NEXT: std r4, 40(r1)
; CHECK-P8-NEXT: std r5, 48(r1)
; CHECK-P8-NEXT: std r6, 56(r1)
-; CHECK-P8-NEXT: lvx v2, r12, r11
+; CHECK-P8-NEXT: lxvd2x vs0, r12, r11
; CHECK-P8-NEXT: std r7, 64(r1)
; CHECK-P8-NEXT: std r8, 72(r1)
; CHECK-P8-NEXT: std r9, 80(r1)
; CHECK-P8-NEXT: std r10, 88(r1)
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: blr
entry:
; CHECK-P8-NEXT: addi r3, r1, -16
; CHECK-P8-NEXT: std r8, -8(r1)
; CHECK-P8-NEXT: std r7, -16(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: blr
entry:
; CHECK-P8-NEXT: addi r3, r1, -16
; CHECK-P8-NEXT: std r6, -8(r1)
; CHECK-P8-NEXT: std r5, -16(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: blr
entry:
; CHECK-P8-NEXT: addi r4, r1, 48
; CHECK-P8-NEXT: std r6, 56(r1)
; CHECK-P8-NEXT: std r5, 48(r1)
-; CHECK-P8-NEXT: lvx v31, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: bl __floatsikf
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: vmr v3, v2
; CHECK-P8-NEXT: std r9, 80(r1)
; CHECK-P8-NEXT: std r10, 88(r1)
; CHECK-P8-NEXT: addi r7, r1, 32
-; CHECK-P8-NEXT: lvx v2, r7, r11
+; CHECK-P8-NEXT: lxvd2x vs0, r7, r11
; CHECK-P8-NEXT: std r3, 32(r1)
; CHECK-P8-NEXT: std r4, 40(r1)
; CHECK-P8-NEXT: std r5, 48(r1)
; CHECK-P8-NEXT: std r6, 56(r1)
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: blr
entry:
; CHECK-P8-NEXT: addi r5, r1, -16
; CHECK-P8-NEXT: std r4, -8(r1)
; CHECK-P8-NEXT: std r3, -16(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: blr
entry:
; CHECK-P8-NEXT: addi r5, r1, -16
; CHECK-P8-NEXT: std r4, -8(r1)
; CHECK-P8-NEXT: std r3, -16(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: blr
entry:
; CHECK-P8-NEXT: addi r3, r1, -16
; CHECK-P8-NEXT: std r8, -8(r1)
; CHECK-P8-NEXT: std r7, -16(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: blr
entry:
; CHECK-P8-NEXT: std r5, 112(r1)
; CHECK-P8-NEXT: std r6, 120(r1)
; CHECK-P8-NEXT: std r7, 128(r1)
-; CHECK-P8-NEXT: addi r3, r11, .LCPI17_0@toc@l
+; CHECK-P8-NEXT: addi r11, r11, .LCPI17_0@toc@l
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r11
; CHECK-P8-NEXT: std r8, 136(r1)
; CHECK-P8-NEXT: std r9, 144(r1)
; CHECK-P8-NEXT: std r10, 152(r1)
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: blt cr0, .LBB17_2
; CHECK-P8-NEXT: # %bb.1: # %if.end
; CHECK-P8-NEXT: addi r30, r1, 104
-; CHECK-P8-NEXT: lvx v3, 0, r3
; CHECK-P8-NEXT: lxvd2x vs0, 0, r30
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: b .LBB17_3
-; CHECK-P8-NEXT: .LBB17_2:
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: .LBB17_3: # %cleanup
+; CHECK-P8-NEXT: vmr v3, v2
+; CHECK-P8-NEXT: .LBB17_2: # %cleanup
+; CHECK-P8-NEXT: vmr v2, v3
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: vmr v3, v2
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: vmr v3, v2
; CHECK-P8-NEXT: bl __subkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: vmr v3, v2
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: vmr v3, v2
; CHECK-P8-NEXT: bl __divkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-LABEL: testLdNSt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addi r3, r3, 4
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: addi r3, r4, 8
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i8, i8* %PtrC, i64 4
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl sqrtf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
;
; CHECK-P8-LABEL: qpCpsgn:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
-; CHECK-P8-NEXT: addi r3, r1, -16
-; CHECK-P8-NEXT: addi r4, r1, -32
-; CHECK-P8-NEXT: stvx v3, 0, r3
-; CHECK-P8-NEXT: stvx v2, 0, r4
-; CHECK-P8-NEXT: lbz r3, -1(r1)
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: addi r4, r1, -16
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: addi r3, r1, -32
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lbz r4, -1(r1)
; CHECK-P8-NEXT: lbz r6, -17(r1)
-; CHECK-P8-NEXT: rlwimi r6, r3, 0, 0, 24
+; CHECK-P8-NEXT: rlwimi r6, r4, 0, 0, 24
; CHECK-P8-NEXT: stb r6, -17(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r4
-; CHECK-P8-NEXT: stvx v2, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r5
; CHECK-P8-NEXT: blr
fp128* nocapture %res) {
entry:
;
; CHECK-P8-LABEL: qpAbs:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: addi r3, r1, -16
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: lbz r5, -1(r1)
; CHECK-P8-NEXT: clrlwi r5, r5, 25
; CHECK-P8-NEXT: stb r5, -1(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: stvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r4
; CHECK-P8-NEXT: blr
entry:
%0 = load fp128, fp128* %a, align 16
;
; CHECK-P8-LABEL: qpNAbs:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: addi r3, r1, -32
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: lbz r5, -17(r1)
; CHECK-P8-NEXT: clrlwi r5, r5, 25
; CHECK-P8-NEXT: stb r5, -17(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: addi r3, r1, -16
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: lbz r5, -1(r1)
; CHECK-P8-NEXT: xori r5, r5, 128
; CHECK-P8-NEXT: stb r5, -1(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: stvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r4
; CHECK-P8-NEXT: blr
entry:
%0 = load fp128, fp128* %a, align 16
;
; CHECK-P8-LABEL: qpNeg:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: addi r3, r1, -16
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: lbz r5, -1(r1)
; CHECK-P8-NEXT: xori r5, r5, 128
; CHECK-P8-NEXT: stb r5, -1(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: stvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r4
; CHECK-P8-NEXT: blr
entry:
%0 = load fp128, fp128* %a, align 16
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl sinf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl cosf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl logf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl log10f128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl log2f128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl fminf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl fmaxf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl powf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl expf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl exp2f128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: lwz r3, 0(r4)
; CHECK-P8-NEXT: mr r30, r5
; CHECK-P8-NEXT: mr r5, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __powikf2
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addis r4, r2, b@toc@ha
; CHECK-P8-NEXT: addi r3, r3, a@toc@l
; CHECK-P8-NEXT: addi r4, r4, b@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl fmodf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl ceilf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl floorf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl truncf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl roundf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl lroundf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: stw r3, 0(r30)
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl llroundf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: std r3, 0(r30)
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl rintf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl lrintf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: stw r3, 0(r30)
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl llrintf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: std r3, 0(r30)
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl nearbyintf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
-; CHECK-P8-NEXT: lvx v4, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r6
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: xxswapd v4, vs2
; CHECK-P8-NEXT: bl fmaf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __gtkf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: extsw r3, r3
; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __ltkf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: rlwinm r3, r3, 1, 31, 31
; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __gekf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: rlwinm r3, r3, 1, 31, 31
; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __lekf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: extsw r3, r3
; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __eqkf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: cntlzw r3, r3
; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __gtkf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: extsw r3, r3
; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __ltkf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: rlwinm r3, r3, 1, 31, 31
; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __gekf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: rlwinm r3, r3, 1, 31, 31
; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __lekf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: extsw r3, r3
; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __nekf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: cntlzw r3, r3
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: addis r3, r2, a_qp@toc@ha
-; CHECK-P8-NEXT: lvx v30, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
-; CHECK-P8-NEXT: lvx v31, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v30, vs1
+; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: vmr v3, v30
; CHECK-P8-NEXT: vmr v2, v31
; CHECK-P8-NEXT: bl __gtkf2
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: addis r3, r2, a_qp@toc@ha
-; CHECK-P8-NEXT: lvx v30, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
-; CHECK-P8-NEXT: lvx v31, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v30, vs1
+; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: vmr v3, v30
; CHECK-P8-NEXT: vmr v2, v31
; CHECK-P8-NEXT: bl __ltkf2
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: addis r3, r2, a_qp@toc@ha
-; CHECK-P8-NEXT: lvx v30, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
-; CHECK-P8-NEXT: lvx v31, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v30, vs1
+; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: vmr v3, v30
; CHECK-P8-NEXT: vmr v2, v31
; CHECK-P8-NEXT: bl __gekf2
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: addis r3, r2, a_qp@toc@ha
-; CHECK-P8-NEXT: lvx v30, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
-; CHECK-P8-NEXT: lvx v31, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v30, vs1
+; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: vmr v3, v30
; CHECK-P8-NEXT: vmr v2, v31
; CHECK-P8-NEXT: bl __lekf2
; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l
; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: addis r3, r2, a_qp@toc@ha
-; CHECK-P8-NEXT: lvx v30, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l
-; CHECK-P8-NEXT: lvx v31, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v30, vs1
+; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: vmr v3, v30
; CHECK-P8-NEXT: vmr v2, v31
; CHECK-P8-NEXT: bl __eqkf2
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatdikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r4, r5
; CHECK-P8-NEXT: bl __floattikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatdikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatdikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: iselgt r3, r4, r3
; CHECK-P8-NEXT: bl __floatsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatundikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r4, r5
; CHECK-P8-NEXT: bl __floatuntikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatundikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatundikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: clrldi r3, r4, 63
; CHECK-P8-NEXT: bl __floatsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatdikf
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mr r3, r30
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatundikf
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: mr r3, r30
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatunsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatunsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatunsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: clrldi r3, r3, 32
; CHECK-P8-NEXT: bl __floatunsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatunsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatunsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatunsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: clrldi r3, r3, 32
; CHECK-P8-NEXT: bl __floatsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatunsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatunsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r3, r4
; CHECK-P8-NEXT: bl __floatunsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: clrldi r3, r3, 32
; CHECK-P8-NEXT: bl __floatsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __trunckfdf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: addis r4, r2, .LC6@toc@ha
; CHECK-P8-NEXT: mr r30, r3
; CHECK-P8-NEXT: ld r4, .LC6@toc@l(r4)
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __trunckfdf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: stfd f1, 0(r30)
; CHECK-P8-NEXT: addis r4, r2, .LC7@toc@ha
; CHECK-P8-NEXT: mr r29, r3
; CHECK-P8-NEXT: ld r4, .LC7@toc@l(r4)
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __trunckfdf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: sldi r3, r30, 3
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __trunckfdf2
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __trunckfsf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: addis r4, r2, .LC6@toc@ha
; CHECK-P8-NEXT: mr r30, r3
; CHECK-P8-NEXT: ld r4, .LC6@toc@l(r4)
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __trunckfsf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: stfs f1, 0(r30)
; CHECK-P8-NEXT: mr r29, r3
; CHECK-P8-NEXT: ld r4, .LC7@toc@l(r4)
; CHECK-P8-NEXT: addi r4, r4, 48
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __trunckfsf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: sldi r3, r30, 2
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __trunckfsf2
; CHECK-P8-NEXT: bl __extenddfkf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3)
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: bl __extenddfkf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3)
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: mr r29, r3
; CHECK-P8-NEXT: bl __extenddfkf2
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: sldi r3, r30, 4
-; CHECK-P8-NEXT: stvx v2, r29, r3
+; CHECK-P8-NEXT: stxvd2x vs0, r29, r3
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r30, r4
; CHECK-P8-NEXT: bl __extenddfkf2
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: bl __extendsfkf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3)
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: bl __extendsfkf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3)
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: mr r29, r3
; CHECK-P8-NEXT: bl __extendsfkf2
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: sldi r3, r30, 4
-; CHECK-P8-NEXT: stvx v2, r29, r3
+; CHECK-P8-NEXT: stxvd2x vs0, r29, r3
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mr r30, r4
; CHECK-P8-NEXT: bl __extendsfkf2
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: extsw r3, r3
; CHECK-P8-NEXT: bl __floatsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: bl __floatdikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: extsw r3, r3
; CHECK-P8-NEXT: bl __floatsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: bl __floatdikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: clrldi r3, r3, 32
; CHECK-P8-NEXT: bl __floatunsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: bl __floatundikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: clrldi r3, r3, 32
; CHECK-P8-NEXT: bl __floatunsikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mffprd r3, f0
; CHECK-P8-NEXT: bl __floatundikf
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfti
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixunskfti
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: .cfi_offset v31, -32
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: li r7, 48
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: mr r30, r6
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r5
; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT: lvx v31, 0, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: xxswapd v31, vs2
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: vmr v3, v31
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: li r3, 48
-; CHECK-P8-NEXT: stvx v2, 0, r30
-; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 80
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: .cfi_offset v31, -32
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r5
; CHECK-P8-NEXT: li r7, 48
-; CHECK-P8-NEXT: lvx v2, 0, r4
-; CHECK-P8-NEXT: lvx v3, 0, r5
; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: mr r30, r6
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT: lvx v31, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs1
+; CHECK-P8-NEXT: xxswapd v3, vs2
+; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: vmr v3, v2
; CHECK-P8-NEXT: vmr v2, v31
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: li r3, 48
-; CHECK-P8-NEXT: stvx v2, 0, r30
-; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 80
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -64(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r6
; CHECK-P8-NEXT: mr r29, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: lvx v3, 0, r29
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r29
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: .cfi_offset v31, -32
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r5
; CHECK-P8-NEXT: li r7, 64
-; CHECK-P8-NEXT: lvx v2, 0, r4
-; CHECK-P8-NEXT: lvx v3, 0, r5
; CHECK-P8-NEXT: std r30, 80(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: mr r30, r6
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT: lvx v31, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs1
+; CHECK-P8-NEXT: xxswapd v3, vs2
+; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: vmr v3, v2
; CHECK-P8-NEXT: vmr v2, v31
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: addi r3, r1, 48
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: lbz r4, 63(r1)
; CHECK-P8-NEXT: xori r4, r4, 128
; CHECK-P8-NEXT: stb r4, 63(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: li r3, 64
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
-; CHECK-P8-NEXT: stvx v2, 0, r30
; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 96
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -80(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r6
; CHECK-P8-NEXT: mr r29, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: lvx v3, 0, r29
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r29
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: addi r3, r1, 32
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: lbz r4, 47(r1)
; CHECK-P8-NEXT: xori r4, r4, 128
; CHECK-P8-NEXT: stb r4, 47(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 80
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: .cfi_offset v31, -32
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r5
; CHECK-P8-NEXT: li r7, 48
-; CHECK-P8-NEXT: lvx v2, 0, r4
-; CHECK-P8-NEXT: lvx v3, 0, r5
; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: mr r30, r6
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT: lvx v31, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs1
+; CHECK-P8-NEXT: xxswapd v3, vs2
+; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: vmr v3, v2
; CHECK-P8-NEXT: vmr v2, v31
; CHECK-P8-NEXT: bl __subkf3
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: li r3, 48
-; CHECK-P8-NEXT: stvx v2, 0, r30
-; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 80
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -64(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r6
; CHECK-P8-NEXT: mr r29, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: lvx v3, 0, r29
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r29
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: bl __subkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: .cfi_offset v31, -32
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r5
; CHECK-P8-NEXT: li r7, 64
-; CHECK-P8-NEXT: lvx v2, 0, r4
-; CHECK-P8-NEXT: lvx v3, 0, r5
; CHECK-P8-NEXT: std r30, 80(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: mr r30, r6
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT: lvx v31, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs1
+; CHECK-P8-NEXT: xxswapd v3, vs2
+; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: vmr v3, v2
; CHECK-P8-NEXT: vmr v2, v31
; CHECK-P8-NEXT: bl __subkf3
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: addi r3, r1, 48
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: lbz r4, 63(r1)
; CHECK-P8-NEXT: xori r4, r4, 128
; CHECK-P8-NEXT: stb r4, 63(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: li r3, 64
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
-; CHECK-P8-NEXT: stvx v2, 0, r30
; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: addi r1, r1, 96
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -80(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r6
; CHECK-P8-NEXT: mr r29, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: lvx v3, 0, r29
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r29
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: bl __subkf3
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: addi r3, r1, 32
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: lbz r4, 47(r1)
; CHECK-P8-NEXT: xori r4, r4, 128
; CHECK-P8-NEXT: stb r4, 47(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 80
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: blr
entry:
ret fp128 0xL00000000000000004001400000000000
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: sldi r4, r4, 4
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: add r4, r3, r4
; CHECK-P8-NEXT: addi r4, r4, -16
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: li r3, 48
; CHECK-P8-NEXT: stvx v21, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: li r3, 64
-; CHECK-P8-NEXT: vmr v21, v4
; CHECK-P8-NEXT: stvx v22, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: li r3, 80
-; CHECK-P8-NEXT: vmr v22, v5
+; CHECK-P8-NEXT: vmr v22, v4
; CHECK-P8-NEXT: stvx v23, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: li r3, 96
-; CHECK-P8-NEXT: vmr v23, v6
+; CHECK-P8-NEXT: vmr v23, v5
; CHECK-P8-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: li r3, 112
-; CHECK-P8-NEXT: vmr v24, v7
+; CHECK-P8-NEXT: vmr v24, v6
; CHECK-P8-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: li r3, 128
-; CHECK-P8-NEXT: vmr v25, v8
+; CHECK-P8-NEXT: vmr v25, v7
; CHECK-P8-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: li r3, 144
-; CHECK-P8-NEXT: vmr v26, v9
+; CHECK-P8-NEXT: vmr v26, v8
; CHECK-P8-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: li r3, 160
-; CHECK-P8-NEXT: vmr v27, v10
+; CHECK-P8-NEXT: vmr v27, v9
; CHECK-P8-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: li r3, 176
-; CHECK-P8-NEXT: vmr v28, v11
+; CHECK-P8-NEXT: vmr v28, v10
; CHECK-P8-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: li r3, 192
+; CHECK-P8-NEXT: vmr v29, v11
; CHECK-P8-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: li r3, 208
; CHECK-P8-NEXT: vmr v30, v12
; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: addi r3, r1, 448
; CHECK-P8-NEXT: vmr v31, v13
-; CHECK-P8-NEXT: lvx v29, 0, r3
-; CHECK-P8-NEXT: bl __addkf3
-; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: vmr v3, v21
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v21, vs0
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: vmr v3, v22
; CHECK-P8-NEXT: vmr v3, v28
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: vmr v3, v29
+; CHECK-P8-NEXT: bl __addkf3
+; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: vmr v3, v30
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: vmr v3, v31
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: vmr v3, v29
+; CHECK-P8-NEXT: vmr v3, v21
; CHECK-P8-NEXT: bl __subkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: li r3, 208
; CHECK-P8-NEXT: .cfi_offset v31, -32
; CHECK-P8-NEXT: li r3, 48
; CHECK-P8-NEXT: add r4, r7, r9
-; CHECK-P8-NEXT: vmr v4, v2
+; CHECK-P8-NEXT: vmr v3, v2
; CHECK-P8-NEXT: stfd f31, 72(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: lwz r3, 176(r1)
; CHECK-P8-NEXT: add r3, r4, r3
; CHECK-P8-NEXT: clrldi r3, r3, 32
; CHECK-P8-NEXT: std r3, 0(r6)
-; CHECK-P8-NEXT: lvx v3, 0, r8
-; CHECK-P8-NEXT: vmr v2, v3
-; CHECK-P8-NEXT: vmr v3, v4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r8
+; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: xxlor v2, vs0, vs0
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: fmr f1, f31
; CHECK-P8-NEXT: .cfi_offset f31, -8
; CHECK-P8-NEXT: .cfi_offset v31, -32
; CHECK-P8-NEXT: add r4, r4, r6
-; CHECK-P8-NEXT: vmr v4, v2
+; CHECK-P8-NEXT: vmr v3, v2
; CHECK-P8-NEXT: li r9, 48
; CHECK-P8-NEXT: stfd f31, 72(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: add r4, r4, r7
; CHECK-P8-NEXT: add r4, r4, r8
; CHECK-P8-NEXT: clrldi r4, r4, 32
; CHECK-P8-NEXT: std r4, 0(r3)
-; CHECK-P8-NEXT: lvx v3, 0, r5
-; CHECK-P8-NEXT: vmr v2, v3
-; CHECK-P8-NEXT: vmr v3, v4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: xxlor v2, vs0, vs0
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: fmr f1, f31
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: .cfi_offset v31, -32
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: xxswapd vs1, v3
; CHECK-P8-NEXT: ld r4, 184(r1)
; CHECK-P8-NEXT: li r3, 48
-; CHECK-P8-NEXT: stvx v2, 0, r9
; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: mr r30, r5
; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill
; CHECK-P8-NEXT: mr r3, r10
-; CHECK-P8-NEXT: stvx v3, 0, r4
-; CHECK-P8-NEXT: lvx v31, 0, r9
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r9
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r9
+; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: bl __floatsikf
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: vmr v3, v2
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: .cfi_offset v31, -32
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: xxswapd vs1, v3
; CHECK-P8-NEXT: li r6, 48
-; CHECK-P8-NEXT: stvx v2, 0, r4
-; CHECK-P8-NEXT: stvx v3, 0, r7
; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: mr r30, r3
; CHECK-P8-NEXT: mr r3, r5
; CHECK-P8-NEXT: stvx v31, r1, r6 # 16-byte Folded Spill
-; CHECK-P8-NEXT: lvx v31, 0, r4
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r7
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v31, vs0
; CHECK-P8-NEXT: bl __floatsikf
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: vmr v3, v2
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl truncf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl rintf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl nearbyintf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl roundf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl floorf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: mr r30, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl ceilf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfdi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: mr r30, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-P8-NEXT: addi r4, r4, 32
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfdi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: std r3, 0(r30)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r4, r4, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixkfdi
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixkfdi
; CHECK-P8-NEXT: mr r29, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-P8-NEXT: addi r4, r4, 32
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfdi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: sldi r4, r30, 3
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixunskfdi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: mr r30, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-P8-NEXT: addi r4, r4, 32
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixunskfdi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: std r3, 0(r30)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r4, r4, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixunskfdi
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixunskfdi
; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha
; CHECK-P8-NEXT: mr r29, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixunskfdi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: sldi r4, r30, 3
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: extsw r3, r3
; CHECK-P8-NEXT: mr r30, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-P8-NEXT: addi r4, r4, 32
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: stw r3, 0(r30)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r4, r4, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixunskfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: mr r30, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-P8-NEXT: addi r4, r4, 32
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixunskfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: stw r3, 0(r30)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r4, r4, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixunskfsi
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixunskfsi
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: extsw r3, r3
; CHECK-P8-NEXT: mr r30, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-P8-NEXT: addi r4, r4, 32
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: sth r3, 0(r30)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r4, r4, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: mr r30, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-P8-NEXT: addi r4, r4, 32
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: sth r3, 0(r30)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r4, r4, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: extsw r3, r3
; CHECK-P8-NEXT: mr r30, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-P8-NEXT: addi r4, r4, 32
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: stb r3, 0(r30)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r4, r4, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: stdu r1, -32(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 32
; CHECK-P8-NEXT: mr r30, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-P8-NEXT: addi r4, r4, 32
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: stb r3, 0(r30)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r4, r4, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -48(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: mr r30, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: bl __fixkfsi
; CHECK-P8-NEXT: mr r30, r5
; CHECK-P8-NEXT: bl __trunctfkf2
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: stvx v2, 0, r30
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r3, r2, .LCPI4_0@toc@ha
; CHECK-NEXT: addi r3, r3, .LCPI4_0@toc@l
-; CHECK-NEXT: lvx v3, 0, r3
+; CHECK-NEXT: lxvd2x vs0, 0, r3
+; CHECK-NEXT: xxswapd vs35, vs0
; CHECK-NEXT: xxland vs34, vs34, vs35
; CHECK-NEXT: blr
entry:
; CHECK-P8-NEXT: stdu r1, -48(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
; CHECK-P8-NEXT: .cfi_offset lr, 16
+; CHECK-P8-NEXT: xxswapd vs0, v4
; CHECK-P8-NEXT: addi r3, r1, 32
-; CHECK-P8-NEXT: stvx v4, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: lbz r4, 47(r1)
; CHECK-P8-NEXT: xori r4, r4, 128
; CHECK-P8-NEXT: stb r4, 47(r1)
-; CHECK-P8-NEXT: lvx v4, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v4, vs0
; CHECK-P8-NEXT: bl fmaf128
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: bl fmaf128
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: addi r3, r1, 32
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: lbz r4, 47(r1)
; CHECK-P8-NEXT: xori r4, r4, 128
; CHECK-P8-NEXT: stb r4, 47(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r1, r1, 48
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: stdu r1, -64(r1)
; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
; CHECK-P8-NEXT: .cfi_offset lr, 16
+; CHECK-P8-NEXT: xxswapd vs0, v4
; CHECK-P8-NEXT: addi r3, r1, 32
-; CHECK-P8-NEXT: stvx v4, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: lbz r4, 47(r1)
; CHECK-P8-NEXT: xori r4, r4, 128
; CHECK-P8-NEXT: stb r4, 47(r1)
-; CHECK-P8-NEXT: lvx v4, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v4, vs0
; CHECK-P8-NEXT: bl fmaf128
; CHECK-P8-NEXT: nop
+; CHECK-P8-NEXT: xxswapd vs0, v2
; CHECK-P8-NEXT: addi r3, r1, 48
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: lbz r4, 63(r1)
; CHECK-P8-NEXT: xori r4, r4, 128
; CHECK-P8-NEXT: stb r4, 63(r1)
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: mtlr r0
; LE-NEXT: ld 3, -8(1)
; LE-NEXT: addi 3, 3, 15
; LE-NEXT: rldicr 3, 3, 0, 59
-; LE-NEXT: addi 4, 3, 31
-; LE-NEXT: addi 5, 3, 16
+; LE-NEXT: addi 4, 3, 16
+; LE-NEXT: std 4, -8(1)
+; LE-NEXT: ld 4, -8(1)
+; LE-NEXT: lxvd2x 0, 0, 3
+; LE-NEXT: addi 4, 4, 15
; LE-NEXT: rldicr 4, 4, 0, 59
-; LE-NEXT: std 5, -8(1)
+; LE-NEXT: xxswapd 34, 0
; LE-NEXT: addi 5, 4, 16
-; LE-NEXT: lvx 2, 0, 3
; LE-NEXT: std 5, -8(1)
-; LE-NEXT: lvx 3, 0, 4
+; LE-NEXT: lxvd2x 1, 0, 4
+; LE-NEXT: xxswapd 35, 1
; LE-NEXT: blr
%args = alloca i8*, align 4
%x = va_arg i8** %args, <8 x i32>
; P8: # %bb.0: # %entry
; P8-NEXT: addi r4, r4, 12
; P8-NEXT: lfiwzx f0, 0, r4
-; P8-NEXT: xxspltw v2, vs0, 1
-; P8-NEXT: stvx v2, 0, r3
+; P8-NEXT: xxspltw vs0, vs0, 1
+; P8-NEXT: xxswapd vs0, vs0
+; P8-NEXT: stxvd2x vs0, 0, r3
; P8-NEXT: blr
;
; P7-LABEL: test2:
; P8: # %bb.0: # %entry
; P8-NEXT: addi r4, r4, 12
; P8-NEXT: lfiwzx f0, 0, r4
-; P8-NEXT: xxspltw v2, vs0, 1
-; P8-NEXT: stvx v2, 0, r3
+; P8-NEXT: xxspltw vs0, vs0, 1
+; P8-NEXT: xxswapd vs0, vs0
+; P8-NEXT: stxvd2x vs0, 0, r3
; P8-NEXT: blr
;
; P7-LABEL: test3:
; P8-NEXT: lhzx r4, 0, r4
; P8-NEXT: mtvsrwz v2, r4
; P8-NEXT: vsplth v2, v2, 3
-; P8-NEXT: stvx v2, 0, r3
+; P8-NEXT: xxswapd vs0, v2
+; P8-NEXT: stxvd2x vs0, 0, r3
; P8-NEXT: blr
;
; P7-LABEL: test7:
; P8-NEXT: lbzx r4, 0, r4
; P8-NEXT: mtvsrwz v2, r4
; P8-NEXT: vspltb v2, v2, 7
-; P8-NEXT: stvx v2, 0, r3
+; P8-NEXT: xxswapd vs0, v2
+; P8-NEXT: stxvd2x vs0, 0, r3
; P8-NEXT: blr
;
; P7-LABEL: test8:
;
; P8-LABEL: unadjusted_lxvwsx_v16i8:
; P8: # %bb.0: # %entry
-; P8-NEXT: lvx v2, 0, r3
+; P8-NEXT: lxvd2x vs0, 0, r3
+; P8-NEXT: xxswapd v2, vs0
; P8-NEXT: xxspltw v2, v2, 3
; P8-NEXT: blr
;
;
; P8-LABEL: adjusted_lxvwsx_v16i8:
; P8: # %bb.0: # %entry
-; P8-NEXT: lvx v2, 0, r3
+; P8-NEXT: lxvd2x vs0, 0, r3
+; P8-NEXT: xxswapd v2, vs0
; P8-NEXT: xxspltw v2, v2, 2
; P8-NEXT: blr
;
;
; P8-LABEL: adjusted_lxvwsx_v16i8_2:
; P8: # %bb.0: # %entry
-; P8-NEXT: lvx v2, 0, r3
+; P8-NEXT: lxvd2x vs0, 0, r3
+; P8-NEXT: xxswapd v2, vs0
; P8-NEXT: xxspltw v2, v2, 1
; P8-NEXT: blr
;
;
; P8-LABEL: adjusted_lxvwsx_v16i8_3:
; P8: # %bb.0: # %entry
-; P8-NEXT: lvx v2, 0, r3
+; P8-NEXT: lxvd2x vs0, 0, r3
+; P8-NEXT: xxswapd v2, vs0
; P8-NEXT: xxspltw v2, v2, 0
; P8-NEXT: blr
;
; CHECK-P8-LABEL: load_swap10:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r4
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: vperm v2, v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: load_swap10:
; CHECK-P8-LABEL: load_swap11:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: vperm v2, v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: load_swap11:
; CHECK-P8-LABEL: load_swap20:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r4, r2, .LCPI4_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r4
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: vperm v2, v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: load_swap20:
; CHECK-P8-LABEL: load_swap21:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r3, r2, .LCPI5_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: addi r3, r3, .LCPI5_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: vperm v2, v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: load_swap21:
; CHECK-P8-LABEL: load_swap30:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r4, r2, .LCPI6_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: addi r4, r4, .LCPI6_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r4
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: vperm v2, v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: load_swap30:
; CHECK-P8-LABEL: load_swap31:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r3, r2, .LCPI7_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: addi r3, r3, .LCPI7_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: vperm v2, v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: load_swap31:
; CHECK-P8-LABEL: load_swap50:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r4, r2, .LCPI9_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: addi r4, r4, .LCPI9_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r4
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: vperm v2, v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: load_swap50:
; CHECK-P8-LABEL: load_swap51:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r3, r2, .LCPI10_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: addi r3, r3, .LCPI10_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: vperm v2, v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: load_swap51:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r3, r2, .LCPI13_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI13_0@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: vperm v2, v2, v2, v3
-; CHECK-P8-NEXT: stvx v2, 0, r7
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: swap_store10:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r3, r2, .LCPI14_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI14_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
-; CHECK-P8-NEXT: stvx v2, 0, r7
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: swap_store11:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r3, r2, .LCPI15_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI15_0@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: vperm v2, v2, v2, v3
-; CHECK-P8-NEXT: stvx v2, 0, r7
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: swap_store20:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r3, r2, .LCPI16_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI16_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
-; CHECK-P8-NEXT: stvx v2, 0, r7
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: swap_store21:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r3, r2, .LCPI17_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI17_0@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: vperm v2, v2, v2, v3
-; CHECK-P8-NEXT: stvx v2, 0, r7
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: swap_store30:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r3, r2, .LCPI18_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI18_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
-; CHECK-P8-NEXT: stvx v2, 0, r7
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: swap_store31:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r3, r2, .LCPI21_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI21_0@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: vperm v2, v2, v2, v3
-; CHECK-P8-NEXT: stvx v2, 0, r7
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: swap_store50:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: addis r3, r2, .LCPI22_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI22_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
-; CHECK-P8-NEXT: stvx v2, 0, r7
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: swap_store51:
; LE-PWR8-LABEL: testUnalignedLdSt:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: addis r3, r2, f@toc@ha
-; LE-PWR8-NEXT: li r4, 59
-; LE-PWR8-NEXT: li r5, 43
+; LE-PWR8-NEXT: li r4, 43
+; LE-PWR8-NEXT: li r5, 59
; LE-PWR8-NEXT: addi r3, r3, f@toc@l
; LE-PWR8-NEXT: lxvd2x vs0, r3, r4
; LE-PWR8-NEXT: li r4, 11
; LE-PWR8-NEXT: li r5, 27
; LE-PWR8-NEXT: lxvd2x vs2, r3, r4
; LE-PWR8-NEXT: lxvd2x vs3, r3, r5
-; LE-PWR8-NEXT: li r4, 51
-; LE-PWR8-NEXT: li r5, 67
+; LE-PWR8-NEXT: li r4, 67
+; LE-PWR8-NEXT: li r5, 51
; LE-PWR8-NEXT: stxvd2x vs1, r3, r4
; LE-PWR8-NEXT: li r4, 35
; LE-PWR8-NEXT: stxvd2x vs0, r3, r5
; LE-PWR8-LABEL: testUnalignedLdStPair:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: addis r3, r2, g@toc@ha
-; LE-PWR8-NEXT: li r4, 27
-; LE-PWR8-NEXT: li r5, 11
-; LE-PWR8-NEXT: li r6, 19
-; LE-PWR8-NEXT: li r8, 35
+; LE-PWR8-NEXT: li r4, 11
+; LE-PWR8-NEXT: li r5, 27
; LE-PWR8-NEXT: addi r3, r3, g@toc@l
; LE-PWR8-NEXT: lxvd2x vs0, r3, r4
-; LE-PWR8-NEXT: ldx r5, r3, r5
-; LE-PWR8-NEXT: ldx r7, r3, r6
-; LE-PWR8-NEXT: stdx r7, r3, r4
-; LE-PWR8-NEXT: stdx r5, r3, r6
-; LE-PWR8-NEXT: stxvd2x vs0, r3, r8
+; LE-PWR8-NEXT: lxvd2x vs1, r3, r5
+; LE-PWR8-NEXT: li r4, 35
+; LE-PWR8-NEXT: li r5, 19
+; LE-PWR8-NEXT: stxvd2x vs1, r3, r4
+; LE-PWR8-NEXT: stxvd2x vs0, r3, r5
; LE-PWR8-NEXT: blr
;
; BE-PWR9-LABEL: testUnalignedLdStPair:
; CHECK-NEXT: xvcvdpsp 35, 1
; CHECK-NEXT: vmrgew 2, 2, 3
; CHECK-NEXT: .loc 1 3 9 is_stmt 0
-; CHECK-NEXT: stvx 2, 0, 4
+; CHECK-NEXT: xxswapd 0, 34
+; CHECK-NEXT: stxvd2x 0, 0, 4
; CHECK-NEXT: .loc 1 4 1 is_stmt 1
; CHECK-NEXT: blr
entry:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd11@toc@ha
; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd11@toc@l
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt11@toc@ha
; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt11@toc@l
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: testGlob11PtrPlus0:
; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd11@toc@ha
; CHECK-P8-LE-NEXT: li r4, 3
; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd11@toc@l
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt11@toc@ha
; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt11@toc@l
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: testGlob11PtrPlus3:
; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd11@toc@ha
; CHECK-P8-LE-NEXT: li r4, 4
; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd11@toc@l
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt11@toc@ha
; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt11@toc@l
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: testGlob11PtrPlus4:
; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd11@toc@ha
; CHECK-P8-LE-NEXT: li r4, 16
; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd11@toc@l
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt11@toc@ha
; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt11@toc@l
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: testGlob11PtrPlus16:
; CHECK-P8-LE-NEXT: addis r4, r2, GlobLd11@toc@ha
; CHECK-P8-LE-NEXT: sldi r3, r3, 4
; CHECK-P8-LE-NEXT: addi r4, r4, GlobLd11@toc@l
-; CHECK-P8-LE-NEXT: lvx v2, r4, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r4, r3
; CHECK-P8-LE-NEXT: addis r4, r2, GlobSt11@toc@ha
; CHECK-P8-LE-NEXT: addi r4, r4, GlobSt11@toc@l
-; CHECK-P8-LE-NEXT: stvx v2, r4, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r4, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: testGlob11PtrPlusVar:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd12@toc@ha
; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd12@toc@l
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt12@toc@ha
; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt12@toc@l
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: testGlob12PtrPlus0:
; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd12@toc@ha
; CHECK-P8-LE-NEXT: li r4, 3
; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd12@toc@l
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt12@toc@ha
; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt12@toc@l
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: testGlob12PtrPlus3:
; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd12@toc@ha
; CHECK-P8-LE-NEXT: li r4, 4
; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd12@toc@l
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt12@toc@ha
; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt12@toc@l
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: testGlob12PtrPlus4:
; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd12@toc@ha
; CHECK-P8-LE-NEXT: li r4, 16
; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd12@toc@l
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt12@toc@ha
; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt12@toc@l
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: testGlob12PtrPlus16:
; CHECK-P8-LE-NEXT: addis r4, r2, GlobLd12@toc@ha
; CHECK-P8-LE-NEXT: sldi r3, r3, 4
; CHECK-P8-LE-NEXT: addi r4, r4, GlobLd12@toc@l
-; CHECK-P8-LE-NEXT: lvx v2, r4, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r4, r3
; CHECK-P8-LE-NEXT: addis r4, r2, GlobSt12@toc@ha
; CHECK-P8-LE-NEXT: addi r4, r4, GlobSt12@toc@l
-; CHECK-P8-LE-NEXT: stvx v2, r4, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r4, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: testGlob12PtrPlusVar:
ret <1 x i128> %ret
; CHECK-LE-LABEL: @call_v1i128_increment_by_one
-; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE: lxvd2x [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE: xxswapd 34, [[VAL]]
; CHECK-LE: bl v1i128_increment_by_one
; CHECK-LE: blr
ret <1 x i128> %ret
; CHECK-LE-LABEL: @call_v1i128_increment_by_val
-; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK-LE: lvx 3, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE: lxvd2x [[VAL1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE-DAG: lxvd2x [[VAL2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE-DAG: xxswapd 34, [[VAL1]]
+; CHECK-LE: xxswapd 35, [[VAL2]]
; CHECK-LE: bl v1i128_increment_by_val
; CHECK-LE: blr
; LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; LE-NEXT: xxlxor 37, 37, 37
; LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
-; LE-NEXT: lvx 4, 0, 3
+; LE-NEXT: lxvd2x 0, 0, 3
+; LE-NEXT: xxswapd 36, 0
; LE-NEXT: xxland 34, 34, 36
; LE-NEXT: xxland 35, 35, 36
; LE-NEXT: vcmpequw 2, 2, 5
; LE-NEXT: vmrghh 4, 1, 4
; LE-NEXT: addi 3, 3, .LCPI0_1@toc@l
; LE-NEXT: vmrghh 3, 3, 6
+; LE-NEXT: lxvd2x 2, 0, 3
; LE-NEXT: vmrghh 5, 0, 5
; LE-NEXT: xxmrglw 0, 36, 34
; LE-NEXT: vspltish 4, 15
; LE-NEXT: xxmrglw 1, 37, 35
-; LE-NEXT: lvx 3, 0, 3
+; LE-NEXT: xxswapd 35, 2
; LE-NEXT: xxmrgld 34, 1, 0
; LE-NEXT: xxlor 34, 34, 35
; LE-NEXT: vslh 2, 2, 4
; CHECK-P8-NEXT: addis 3, 2, .LCPI12_0@toc@ha
; CHECK-P8-NEXT: addis 4, 2, .LCPI12_1@toc@ha
; CHECK-P8-NEXT: addi 3, 3, .LCPI12_0@toc@l
-; CHECK-P8-NEXT: xvmulsp 1, 35, 0
-; CHECK-P8-NEXT: lvx 3, 0, 3
+; CHECK-P8-NEXT: lxvd2x 1, 0, 3
; CHECK-P8-NEXT: addi 3, 4, .LCPI12_1@toc@l
-; CHECK-P8-NEXT: lvx 4, 0, 3
-; CHECK-P8-NEXT: xvmaddasp 35, 1, 0
-; CHECK-P8-NEXT: xvmulsp 0, 0, 36
+; CHECK-P8-NEXT: lxvd2x 3, 0, 3
+; CHECK-P8-NEXT: xxswapd 1, 1
+; CHECK-P8-NEXT: xvmulsp 2, 35, 0
+; CHECK-P8-NEXT: xxswapd 35, 3
+; CHECK-P8-NEXT: xvmaddasp 1, 2, 0
; CHECK-P8-NEXT: xvmulsp 0, 0, 35
+; CHECK-P8-NEXT: xvmulsp 0, 0, 1
; CHECK-P8-NEXT: xvmulsp 34, 34, 0
; CHECK-P8-NEXT: blr
;
; CHECK-P8-NEXT: addis 3, 2, .LCPI25_0@toc@ha
; CHECK-P8-NEXT: addis 4, 2, .LCPI25_1@toc@ha
; CHECK-P8-NEXT: addi 3, 3, .LCPI25_0@toc@l
-; CHECK-P8-NEXT: xvmulsp 1, 34, 0
-; CHECK-P8-NEXT: lvx 2, 0, 3
+; CHECK-P8-NEXT: lxvd2x 1, 0, 3
; CHECK-P8-NEXT: addi 3, 4, .LCPI25_1@toc@l
-; CHECK-P8-NEXT: lvx 3, 0, 3
-; CHECK-P8-NEXT: xvmaddasp 34, 1, 0
-; CHECK-P8-NEXT: xvmulsp 0, 1, 35
-; CHECK-P8-NEXT: xvmulsp 34, 0, 34
+; CHECK-P8-NEXT: lxvd2x 3, 0, 3
+; CHECK-P8-NEXT: xxswapd 1, 1
+; CHECK-P8-NEXT: xvmulsp 2, 34, 0
+; CHECK-P8-NEXT: xxswapd 34, 3
+; CHECK-P8-NEXT: xvmaddasp 1, 2, 0
+; CHECK-P8-NEXT: xvmulsp 0, 2, 34
+; CHECK-P8-NEXT: xvmulsp 34, 0, 1
; CHECK-P8-NEXT: blr
; CHECK-P8-NEXT: .LBB25_2:
; CHECK-P8-NEXT: xvsqrtsp 34, 34
; CHECK-NEXT: xscvdpspn 0, 1
; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 1, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
+; CHECK-NEXT: xxswapd 35, 1
+; CHECK-NEXT: lxvd2x 1, 0, 3
; CHECK-NEXT: xxspltw 0, 0, 0
; CHECK-NEXT: xvdivsp 0, 35, 0
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: xxswapd 35, 1
; CHECK-NEXT: xvmulsp 1, 34, 35
; CHECK-NEXT: xvmulsp 34, 1, 0
; CHECK-NEXT: blr
; CHECK-NEXT: xscvdpspn 0, 1
; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 1, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI1_1@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI1_1@toc@l
-; CHECK-NEXT: lvx 4, 0, 3
+; CHECK-NEXT: xxswapd 1, 1
; CHECK-NEXT: xxspltw 0, 0, 0
-; CHECK-NEXT: xvresp 1, 0
-; CHECK-NEXT: xvmaddasp 35, 0, 1
-; CHECK-NEXT: xvmulsp 0, 34, 36
-; CHECK-NEXT: xvnmsubasp 1, 1, 35
-; CHECK-NEXT: xvmulsp 34, 0, 1
+; CHECK-NEXT: xvresp 2, 0
+; CHECK-NEXT: xvmaddasp 1, 0, 2
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: xvnmsubasp 2, 2, 1
+; CHECK-NEXT: xvmulsp 0, 34, 35
+; CHECK-NEXT: xvmulsp 34, 0, 2
; CHECK-NEXT: blr
%ins = insertelement <4 x float> undef, float %a, i32 0
%splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI24_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI24_0@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI24_1@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI24_1@toc@l
+; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vminub 2, 2, 3
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vaddubm 2, 2, 3
; CHECK-NEXT: blr
%c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI25_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI25_0@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vaddubs 2, 2, 3
; CHECK-NEXT: blr
%a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI26_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI26_0@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vaddubs 2, 2, 3
; CHECK-NEXT: blr
%a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI27_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI27_0@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI27_1@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI27_1@toc@l
+; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vminuh 2, 2, 3
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vadduhm 2, 2, 3
; CHECK-NEXT: blr
%c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI28_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI28_0@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vadduhs 2, 2, 3
; CHECK-NEXT: blr
%a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI29_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI29_0@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vadduhs 2, 2, 3
; CHECK-NEXT: blr
%a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI30_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI30_0@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI30_1@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI30_1@toc@l
+; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vminuw 2, 2, 3
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI31_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI31_0@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vadduws 2, 2, 3
; CHECK-NEXT: blr
%a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI32_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI32_0@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vadduws 2, 2, 3
; CHECK-NEXT: blr
%a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
; P8LE-LABEL: s2v_test1:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha
-; P8LE-NEXT: lxsiwzx v4, 0, r3
+; P8LE-NEXT: lxsiwzx v3, 0, r3
; P8LE-NEXT: addi r4, r4, .LCPI0_0@toc@l
-; P8LE-NEXT: lvx v3, 0, r4
-; P8LE-NEXT: vperm v2, v2, v4, v3
+; P8LE-NEXT: lxvd2x vs0, 0, r4
+; P8LE-NEXT: xxswapd v4, vs0
+; P8LE-NEXT: vperm v2, v2, v3, v4
; P8LE-NEXT: blr
;
; P8BE-LABEL: s2v_test1:
; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: addi r4, r4, .LCPI1_0@toc@l
-; P8LE-NEXT: lxsiwzx v4, 0, r3
-; P8LE-NEXT: lvx v3, 0, r4
-; P8LE-NEXT: vperm v2, v2, v4, v3
+; P8LE-NEXT: lxsiwzx v3, 0, r3
+; P8LE-NEXT: lxvd2x vs0, 0, r4
+; P8LE-NEXT: xxswapd v4, vs0
+; P8LE-NEXT: vperm v2, v2, v3, v4
; P8LE-NEXT: blr
;
; P8BE-LABEL: s2v_test2:
; P8LE-LABEL: s2v_test3:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
-; P8LE-NEXT: sldi r5, r7, 2
; P8LE-NEXT: addi r4, r4, .LCPI2_0@toc@l
-; P8LE-NEXT: lxsiwzx v3, r3, r5
-; P8LE-NEXT: lvx v4, 0, r4
+; P8LE-NEXT: lxvd2x vs0, 0, r4
+; P8LE-NEXT: sldi r4, r7, 2
+; P8LE-NEXT: lxsiwzx v3, r3, r4
+; P8LE-NEXT: xxswapd v4, vs0
; P8LE-NEXT: vperm v2, v2, v3, v4
; P8LE-NEXT: blr
;
; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: addi r4, r4, .LCPI3_0@toc@l
-; P8LE-NEXT: lxsiwzx v4, 0, r3
-; P8LE-NEXT: lvx v3, 0, r4
-; P8LE-NEXT: vperm v2, v2, v4, v3
+; P8LE-NEXT: lxsiwzx v3, 0, r3
+; P8LE-NEXT: lxvd2x vs0, 0, r4
+; P8LE-NEXT: xxswapd v4, vs0
+; P8LE-NEXT: vperm v2, v2, v3, v4
; P8LE-NEXT: blr
;
; P8BE-LABEL: s2v_test4:
; P8LE-LABEL: s2v_test5:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
-; P8LE-NEXT: lxsiwzx v4, 0, r5
+; P8LE-NEXT: lxsiwzx v3, 0, r5
; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l
-; P8LE-NEXT: lvx v3, 0, r3
-; P8LE-NEXT: vperm v2, v2, v4, v3
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: xxswapd v4, vs0
+; P8LE-NEXT: vperm v2, v2, v3, v4
; P8LE-NEXT: blr
;
; P8BE-LABEL: s2v_test5:
; P8LE-LABEL: s2v_test_f1:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha
-; P8LE-NEXT: lxsiwzx v4, 0, r3
+; P8LE-NEXT: lxsiwzx v3, 0, r3
; P8LE-NEXT: addi r4, r4, .LCPI5_0@toc@l
-; P8LE-NEXT: lvx v3, 0, r4
-; P8LE-NEXT: vperm v2, v2, v4, v3
+; P8LE-NEXT: lxvd2x vs0, 0, r4
+; P8LE-NEXT: xxswapd v4, vs0
+; P8LE-NEXT: vperm v2, v2, v3, v4
; P8LE-NEXT: blr
;
; P8BE-LABEL: s2v_test_f1:
; CHECK-NEXT: addis 3, 2, .LCPI2_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI2_0@toc@l
; CHECK-NEXT: vcmpgtsw 2, 2, 3
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vsubuwm 2, 3, 2
; CHECK-NEXT: blr
%c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT: addis 3, 2, .LCPI6_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI6_0@toc@l
; CHECK-NEXT: vcmpgtsw 2, 2, 3
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT: vspltisw 4, 15
; CHECK-NEXT: addis 3, 2, .LCPI15_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI15_0@toc@l
+; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vsubuwm 3, 4, 3
; CHECK-NEXT: vsraw 2, 2, 3
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT: vspltisw 4, 15
; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l
+; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vsubuwm 3, 4, 3
; CHECK-NEXT: vsrw 2, 2, 3
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT: vspltisw 4, 15
; CHECK-NEXT: addis 3, 2, .LCPI21_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI21_0@toc@l
+; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vsubuwm 3, 4, 3
; CHECK-NEXT: vsraw 2, 2, 3
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
; CHECK-PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-PWR8-NEXT: std 0, 16(1)
; CHECK-PWR8-NEXT: stdu 1, -48(1)
-; CHECK-PWR8-NEXT: lvx 2, 0, 3
+; CHECK-PWR8-NEXT: lxvd2x 0, 0, 3
; CHECK-PWR8-NEXT: mr 30, 4
+; CHECK-PWR8-NEXT: xxswapd 2, 0
; CHECK-PWR8-NEXT: bl __fixkfdi
; CHECK-PWR8-NEXT: nop
; CHECK-PWR8-NEXT: std 3, 0(30)
; CHECK-PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-PWR8-NEXT: std 0, 16(1)
; CHECK-PWR8-NEXT: stdu 1, -48(1)
-; CHECK-PWR8-NEXT: lvx 2, 0, 3
+; CHECK-PWR8-NEXT: lxvd2x 0, 0, 3
; CHECK-PWR8-NEXT: mr 30, 4
+; CHECK-PWR8-NEXT: xxswapd 2, 0
; CHECK-PWR8-NEXT: bl __fixkfsi
; CHECK-PWR8-NEXT: nop
; CHECK-PWR8-NEXT: stw 3, 0(30)
; CHECK-PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-PWR8-NEXT: std 0, 16(1)
; CHECK-PWR8-NEXT: stdu 1, -48(1)
-; CHECK-PWR8-NEXT: lvx 2, 0, 3
+; CHECK-PWR8-NEXT: lxvd2x 0, 0, 3
; CHECK-PWR8-NEXT: mr 30, 4
+; CHECK-PWR8-NEXT: xxswapd 2, 0
; CHECK-PWR8-NEXT: bl __fixunskfdi
; CHECK-PWR8-NEXT: nop
; CHECK-PWR8-NEXT: std 3, 0(30)
; CHECK-PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-PWR8-NEXT: std 0, 16(1)
; CHECK-PWR8-NEXT: stdu 1, -48(1)
-; CHECK-PWR8-NEXT: lvx 2, 0, 3
+; CHECK-PWR8-NEXT: lxvd2x 0, 0, 3
; CHECK-PWR8-NEXT: mr 30, 4
+; CHECK-PWR8-NEXT: xxswapd 2, 0
; CHECK-PWR8-NEXT: bl __fixunskfsi
; CHECK-PWR8-NEXT: nop
; CHECK-PWR8-NEXT: stw 3, 0(30)
; CHECK-LE-P7-NEXT: addi r3, r1, -4
; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI0_0@toc@ha
; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI0_0@toc@l
-; CHECK-LE-P7-NEXT: lvx v3, 0, r4
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
+; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P7-NEXT: lwz r3, -4(r1)
; CHECK-LE-P7-NEXT: stw r3, -32(r1)
; CHECK-LE-P7-NEXT: addi r3, r1, -32
-; CHECK-LE-P7-NEXT: lvx v4, 0, r3
+; CHECK-LE-P7-NEXT: xxswapd v3, vs0
+; CHECK-LE-P7-NEXT: lxvd2x vs1, 0, r3
+; CHECK-LE-P7-NEXT: xxswapd v4, vs1
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P7-NEXT: blr
;
; CHECK-LE-P8-LABEL: test:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: xscvdpsxws v3, f1
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-LE-P8-NEXT: xscvdpsxws v3, f1
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l
-; CHECK-LE-P8-NEXT: lvx v4, 0, r3
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd v4, vs0
; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P7-NEXT: addi r3, r1, -4
; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI1_0@toc@ha
; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI1_0@toc@l
-; CHECK-LE-P7-NEXT: lvx v3, 0, r4
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
+; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P7-NEXT: lwz r3, -4(r1)
; CHECK-LE-P7-NEXT: stw r3, -32(r1)
; CHECK-LE-P7-NEXT: addi r3, r1, -32
-; CHECK-LE-P7-NEXT: lvx v4, 0, r3
+; CHECK-LE-P7-NEXT: xxswapd v3, vs0
+; CHECK-LE-P7-NEXT: lxvd2x vs1, 0, r3
+; CHECK-LE-P7-NEXT: xxswapd v4, vs1
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P7-NEXT: blr
;
; CHECK-LE-P8-LABEL: test2:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: xscvdpsxws v3, f1
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-LE-P8-NEXT: xscvdpsxws v3, f1
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l
-; CHECK-LE-P8-NEXT: lvx v4, 0, r3
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd v4, vs0
; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P7-NEXT: addi r3, r1, -4
; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI2_0@toc@ha
; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI2_0@toc@l
-; CHECK-LE-P7-NEXT: lvx v3, 0, r4
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
+; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P7-NEXT: lwz r3, -4(r1)
; CHECK-LE-P7-NEXT: stw r3, -32(r1)
; CHECK-LE-P7-NEXT: addi r3, r1, -32
-; CHECK-LE-P7-NEXT: lvx v4, 0, r3
+; CHECK-LE-P7-NEXT: xxswapd v3, vs0
+; CHECK-LE-P7-NEXT: lxvd2x vs1, 0, r3
+; CHECK-LE-P7-NEXT: xxswapd v4, vs1
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P7-NEXT: blr
;
; CHECK-LE-P8-LABEL: test3:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: xscvdpuxws v3, f1
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha
+; CHECK-LE-P8-NEXT: xscvdpuxws v3, f1
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l
-; CHECK-LE-P8-NEXT: lvx v4, 0, r3
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd v4, vs0
; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-LE-P8-NEXT: blr
;
; CHECK-LE-P7-NEXT: addi r3, r1, -4
; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI3_0@toc@ha
; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI3_0@toc@l
-; CHECK-LE-P7-NEXT: lvx v3, 0, r4
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
+; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P7-NEXT: lwz r3, -4(r1)
; CHECK-LE-P7-NEXT: stw r3, -32(r1)
; CHECK-LE-P7-NEXT: addi r3, r1, -32
-; CHECK-LE-P7-NEXT: lvx v4, 0, r3
+; CHECK-LE-P7-NEXT: xxswapd v3, vs0
+; CHECK-LE-P7-NEXT: lxvd2x vs1, 0, r3
+; CHECK-LE-P7-NEXT: xxswapd v4, vs1
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P7-NEXT: blr
;
; CHECK-LE-P8-LABEL: test4:
; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: xscvdpuxws v3, f1
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha
+; CHECK-LE-P8-NEXT: xscvdpuxws v3, f1
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l
-; CHECK-LE-P8-NEXT: lvx v4, 0, r3
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd v4, vs0
; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-LE-P8-NEXT: blr
;
; CHECK-P8-NEXT: addis 3, 2, .LC0@toc@ha
; CHECK-P8-NEXT: ld 3, .LC0@toc@l(3)
; CHECK-P8-NEXT: addi 3, 3, 32
-; CHECK-P8-NEXT: lvx 2, 0, 3
+; CHECK-P8-NEXT: lxvd2x 0, 0, 3
+; CHECK-P8-NEXT: xxswapd 34, 0
; CHECK-P8-NEXT: blr
entry:
%0 = load <4 x i32>, <4 x i32>* getelementptr inbounds ([10 x <4 x i32>], [10 x <4 x i32>]* @vec_arr, i64 0, i64 2), align 16
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: addis 6, 2, .LCPI4_0@toc@ha
; PPC64LE-NEXT: mtfprwz 0, 3
-; PPC64LE-NEXT: mtfprwz 1, 4
-; PPC64LE-NEXT: addi 3, 6, .LCPI4_0@toc@l
-; PPC64LE-NEXT: addis 4, 2, .LCPI4_2@toc@ha
-; PPC64LE-NEXT: lvx 2, 0, 3
-; PPC64LE-NEXT: mtvsrwz 36, 5
; PPC64LE-NEXT: addis 3, 2, .LCPI4_1@toc@ha
-; PPC64LE-NEXT: xxmrghw 35, 1, 0
+; PPC64LE-NEXT: addi 6, 6, .LCPI4_0@toc@l
+; PPC64LE-NEXT: mtfprwz 2, 4
; PPC64LE-NEXT: addi 3, 3, .LCPI4_1@toc@l
-; PPC64LE-NEXT: vperm 2, 4, 3, 2
-; PPC64LE-NEXT: vspltisw 3, -11
-; PPC64LE-NEXT: lvx 4, 0, 3
+; PPC64LE-NEXT: addis 4, 2, .LCPI4_2@toc@ha
+; PPC64LE-NEXT: lxvd2x 1, 0, 6
+; PPC64LE-NEXT: mtvsrwz 36, 5
+; PPC64LE-NEXT: xxmrghw 34, 2, 0
+; PPC64LE-NEXT: lxvd2x 0, 0, 3
; PPC64LE-NEXT: addi 3, 4, .LCPI4_2@toc@l
; PPC64LE-NEXT: addis 4, 2, .LCPI4_4@toc@ha
-; PPC64LE-NEXT: lvx 5, 0, 3
-; PPC64LE-NEXT: addis 3, 2, .LCPI4_3@toc@ha
; PPC64LE-NEXT: addi 4, 4, .LCPI4_4@toc@l
+; PPC64LE-NEXT: xxswapd 35, 1
+; PPC64LE-NEXT: lxvd2x 1, 0, 3
+; PPC64LE-NEXT: addis 3, 2, .LCPI4_3@toc@ha
; PPC64LE-NEXT: addi 3, 3, .LCPI4_3@toc@l
-; PPC64LE-NEXT: vsrw 3, 3, 3
-; PPC64LE-NEXT: vsubuwm 2, 2, 4
-; PPC64LE-NEXT: lvx 4, 0, 3
+; PPC64LE-NEXT: vperm 2, 4, 2, 3
+; PPC64LE-NEXT: vspltisw 3, -11
+; PPC64LE-NEXT: xxswapd 36, 0
+; PPC64LE-NEXT: xxswapd 37, 1
+; PPC64LE-NEXT: lxvd2x 0, 0, 3
+; PPC64LE-NEXT: lxvd2x 1, 0, 4
; PPC64LE-NEXT: addis 3, 2, .LCPI4_5@toc@ha
; PPC64LE-NEXT: addi 3, 3, .LCPI4_5@toc@l
+; PPC64LE-NEXT: vsrw 3, 3, 3
+; PPC64LE-NEXT: vsubuwm 2, 2, 4
+; PPC64LE-NEXT: xxswapd 36, 0
+; PPC64LE-NEXT: lxvd2x 0, 0, 3
; PPC64LE-NEXT: vmuluwm 2, 2, 5
-; PPC64LE-NEXT: lvx 5, 0, 4
+; PPC64LE-NEXT: xxswapd 37, 1
; PPC64LE-NEXT: xxland 32, 34, 35
; PPC64LE-NEXT: vslw 2, 2, 4
; PPC64LE-NEXT: vsrw 4, 0, 5
-; PPC64LE-NEXT: xxlor 0, 36, 34
-; PPC64LE-NEXT: lvx 2, 0, 3
-; PPC64LE-NEXT: xxland 35, 0, 35
+; PPC64LE-NEXT: xxlor 1, 36, 34
+; PPC64LE-NEXT: xxswapd 34, 0
+; PPC64LE-NEXT: xxland 35, 1, 35
; PPC64LE-NEXT: vcmpgtuw 2, 3, 2
; PPC64LE-NEXT: xxswapd 0, 34
; PPC64LE-NEXT: xxsldwi 1, 34, 34, 1
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis 3, 2, .LCPI6_0@toc@ha
; CHECK-P8-NEXT: vadduhm 2, 2, 3
-; CHECK-P8-NEXT: vspltish 4, 1
+; CHECK-P8-NEXT: vspltish 3, 1
; CHECK-P8-NEXT: addi 3, 3, .LCPI6_0@toc@l
-; CHECK-P8-NEXT: lvx 3, 0, 3
-; CHECK-P8-NEXT: vadduhm 2, 2, 3
-; CHECK-P8-NEXT: vsrah 2, 2, 4
+; CHECK-P8-NEXT: lxvd2x 0, 0, 3
+; CHECK-P8-NEXT: xxswapd 36, 0
+; CHECK-P8-NEXT: vadduhm 2, 2, 4
+; CHECK-P8-NEXT: vsrah 2, 2, 3
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: test_v8i16_sign_negative:
; CHECK-P7: # %bb.0: # %entry
; CHECK-P7-NEXT: addis 3, 2, .LCPI6_0@toc@ha
; CHECK-P7-NEXT: vadduhm 2, 2, 3
-; CHECK-P7-NEXT: vspltish 4, 1
+; CHECK-P7-NEXT: vspltish 3, 1
; CHECK-P7-NEXT: addi 3, 3, .LCPI6_0@toc@l
-; CHECK-P7-NEXT: lvx 3, 0, 3
-; CHECK-P7-NEXT: vadduhm 2, 2, 3
-; CHECK-P7-NEXT: vsrah 2, 2, 4
+; CHECK-P7-NEXT: lxvd2x 0, 0, 3
+; CHECK-P7-NEXT: xxswapd 36, 0
+; CHECK-P7-NEXT: vadduhm 2, 2, 4
+; CHECK-P7-NEXT: vsrah 2, 2, 3
; CHECK-P7-NEXT: blr
entry:
%add = add <8 x i16> %m, <i16 1, i16 1, i16 1, i16 -1, i16 1, i16 1, i16 1, i16 1>
; CHECK_LE-NEXT: xxswapd 35, 34
; CHECK_LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; CHECK_LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
-; CHECK_LE-NEXT: vcmpequw 2, 2, 3
-; CHECK_LE-NEXT: lvx 3, 0, 3
+; CHECK_LE-NEXT: lxvd2x 0, 0, 3
; CHECK_LE-NEXT: addi 3, 1, -16
+; CHECK_LE-NEXT: vcmpequw 2, 2, 3
+; CHECK_LE-NEXT: xxswapd 35, 0
; CHECK_LE-NEXT: vperm 3, 2, 2, 3
-; CHECK_LE-NEXT: xxland 34, 35, 34
-; CHECK_LE-NEXT: stvx 2, 0, 3
+; CHECK_LE-NEXT: xxland 0, 35, 34
+; CHECK_LE-NEXT: xxswapd 0, 0
+; CHECK_LE-NEXT: stxvd2x 0, 0, 3
; CHECK_LE-NEXT: ld 3, -16(1)
; CHECK_LE-NEXT: blr
;
; CHECK_LE-NEXT: xxswapd 35, 34
; CHECK_LE-NEXT: addis 3, 2, .LCPI1_0@toc@ha
; CHECK_LE-NEXT: addi 3, 3, .LCPI1_0@toc@l
-; CHECK_LE-NEXT: vcmpequw 2, 2, 3
-; CHECK_LE-NEXT: lvx 3, 0, 3
+; CHECK_LE-NEXT: lxvd2x 0, 0, 3
; CHECK_LE-NEXT: addi 3, 1, -16
+; CHECK_LE-NEXT: vcmpequw 2, 2, 3
+; CHECK_LE-NEXT: xxswapd 35, 0
; CHECK_LE-NEXT: vperm 3, 2, 2, 3
-; CHECK_LE-NEXT: xxland 34, 35, 34
-; CHECK_LE-NEXT: stvx 2, 0, 3
+; CHECK_LE-NEXT: xxland 0, 35, 34
+; CHECK_LE-NEXT: xxswapd 0, 0
+; CHECK_LE-NEXT: stxvd2x 0, 0, 3
; CHECK_LE-NEXT: ld 3, -16(1)
; CHECK_LE-NEXT: blr
;
;
; CHECK_LE-LABEL: cmpeq:
; CHECK_LE: # %bb.0: # %entry
-; CHECK_LE-NEXT: vcmpequw 2, 2, 3
; CHECK_LE-NEXT: addis 3, 2, .LCPI2_0@toc@ha
+; CHECK_LE-NEXT: vcmpequw 2, 2, 3
; CHECK_LE-NEXT: addi 3, 3, .LCPI2_0@toc@l
-; CHECK_LE-NEXT: lvx 3, 0, 3
+; CHECK_LE-NEXT: lxvd2x 0, 0, 3
+; CHECK_LE-NEXT: xxswapd 35, 0
; CHECK_LE-NEXT: vperm 3, 2, 2, 3
; CHECK_LE-NEXT: xxland 34, 35, 34
; CHECK_LE-NEXT: blr
;
; CHECK_LE-LABEL: cmpne:
; CHECK_LE: # %bb.0: # %entry
-; CHECK_LE-NEXT: vcmpequw 2, 2, 3
; CHECK_LE-NEXT: addis 3, 2, .LCPI3_0@toc@ha
+; CHECK_LE-NEXT: vcmpequw 2, 2, 3
; CHECK_LE-NEXT: addi 3, 3, .LCPI3_0@toc@l
-; CHECK_LE-NEXT: lvx 3, 0, 3
+; CHECK_LE-NEXT: lxvd2x 0, 0, 3
+; CHECK_LE-NEXT: xxswapd 35, 0
; CHECK_LE-NEXT: xxlnor 34, 34, 34
; CHECK_LE-NEXT: vperm 3, 2, 2, 3
; CHECK_LE-NEXT: xxlor 34, 35, 34
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI0_0@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI0_2@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r4
-; CHECK-P8-NEXT: addis r4, r2, .LCPI0_1@toc@ha
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: addis r4, r2, .LCPI0_3@toc@ha
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: addi r5, r5, .LCPI0_0@toc@l
-; CHECK-P8-NEXT: addi r6, r6, .LCPI0_2@toc@l
-; CHECK-P8-NEXT: addi r4, r4, .LCPI0_1@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r5
-; CHECK-P8-NEXT: addis r5, r2, .LCPI0_3@toc@ha
-; CHECK-P8-NEXT: lvx v5, 0, r6
-; CHECK-P8-NEXT: lvx v1, 0, r4
+; CHECK-P8-NEXT: addi r4, r4, .LCPI0_3@toc@l
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: addi r5, r6, .LCPI0_2@toc@l
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
; CHECK-P8-NEXT: li r4, 48
-; CHECK-P8-NEXT: addi r5, r5, .LCPI0_3@toc@l
-; CHECK-P8-NEXT: lvx v0, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT: addis r5, r2, .LCPI0_1@toc@ha
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: addi r5, r5, .LCPI0_1@toc@l
+; CHECK-P8-NEXT: lxvd2x vs4, 0, r5
+; CHECK-P8-NEXT: xxswapd v0, vs3
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: li r5, 32
-; CHECK-P8-NEXT: vperm v2, v4, v3, v2
-; CHECK-P8-NEXT: vperm v5, v4, v3, v5
-; CHECK-P8-NEXT: vperm v0, v4, v3, v0
-; CHECK-P8-NEXT: vperm v3, v4, v3, v1
-; CHECK-P8-NEXT: xvcvuxddp vs0, v2
-; CHECK-P8-NEXT: xvcvuxddp vs1, v5
+; CHECK-P8-NEXT: xxswapd v5, vs2
+; CHECK-P8-NEXT: xxswapd v1, vs4
+; CHECK-P8-NEXT: vperm v0, v4, v2, v0
+; CHECK-P8-NEXT: vperm v3, v4, v2, v3
+; CHECK-P8-NEXT: vperm v5, v4, v2, v5
+; CHECK-P8-NEXT: vperm v2, v4, v2, v1
; CHECK-P8-NEXT: xvcvuxddp vs2, v0
-; CHECK-P8-NEXT: xvcvuxddp vs3, v3
+; CHECK-P8-NEXT: xvcvuxddp vs0, v3
+; CHECK-P8-NEXT: xvcvuxddp vs1, v5
+; CHECK-P8-NEXT: xvcvuxddp vs3, v2
+; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
-; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI1_0@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI1_1@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: xxlxor v4, v4, v4
-; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: addi r5, r5, .LCPI1_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r5
-; CHECK-P8-NEXT: addi r5, r6, .LCPI1_1@toc@l
-; CHECK-P8-NEXT: lvx v5, 0, r5
-; CHECK-P8-NEXT: vperm v2, v4, v3, v2
-; CHECK-P8-NEXT: vperm v3, v4, v3, v5
-; CHECK-P8-NEXT: xvcvuxddp vs0, v2
-; CHECK-P8-NEXT: xvcvuxddp vs1, v3
+; CHECK-P8-NEXT: addi r4, r6, .LCPI1_1@toc@l
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT: li r4, 16
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: xxswapd v5, vs2
+; CHECK-P8-NEXT: vperm v3, v4, v2, v3
+; CHECK-P8-NEXT: vperm v2, v4, v2, v5
+; CHECK-P8-NEXT: xvcvuxddp vs0, v3
+; CHECK-P8-NEXT: xvcvuxddp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-LABEL: test2:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r5
-; CHECK-P8-NEXT: vperm v2, v4, v3, v2
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-P8-NEXT: xvcvuxddp vs0, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI3_0@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI3_2@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r4
-; CHECK-P8-NEXT: addis r4, r2, .LCPI3_1@toc@ha
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: addis r4, r2, .LCPI3_3@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI3_0@toc@l
-; CHECK-P8-NEXT: addi r6, r6, .LCPI3_2@toc@l
+; CHECK-P8-NEXT: addi r4, r4, .LCPI3_3@toc@l
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: addi r5, r6, .LCPI3_2@toc@l
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT: addis r4, r2, .LCPI3_1@toc@ha
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT: addis r5, r2, .LCPI3_4@toc@ha
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r4, r4, .LCPI3_1@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r5
-; CHECK-P8-NEXT: addis r5, r2, .LCPI3_3@toc@ha
-; CHECK-P8-NEXT: lvx v4, 0, r6
-; CHECK-P8-NEXT: addis r6, r2, .LCPI3_4@toc@ha
+; CHECK-P8-NEXT: addi r5, r5, .LCPI3_4@toc@l
; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: li r4, 48
-; CHECK-P8-NEXT: addi r5, r5, .LCPI3_3@toc@l
-; CHECK-P8-NEXT: lvx v5, 0, r5
-; CHECK-P8-NEXT: addi r5, r6, .LCPI3_4@toc@l
-; CHECK-P8-NEXT: lvx v0, 0, r5
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvd2x vs4, 0, r5
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: xxswapd v5, vs3
; CHECK-P8-NEXT: li r5, 32
-; CHECK-P8-NEXT: vperm v4, v3, v3, v4
-; CHECK-P8-NEXT: vperm v5, v3, v3, v5
-; CHECK-P8-NEXT: vperm v3, v3, v3, v0
+; CHECK-P8-NEXT: xxswapd v4, vs2
+; CHECK-P8-NEXT: xxswapd v0, vs4
+; CHECK-P8-NEXT: vperm v3, v2, v2, v3
+; CHECK-P8-NEXT: vperm v4, v2, v2, v4
+; CHECK-P8-NEXT: vperm v5, v2, v2, v5
+; CHECK-P8-NEXT: vperm v2, v2, v2, v0
; CHECK-P8-NEXT: xxswapd v0, vs0
-; CHECK-P8-NEXT: vsld v2, v2, v0
+; CHECK-P8-NEXT: vsld v3, v3, v0
; CHECK-P8-NEXT: vsld v4, v4, v0
; CHECK-P8-NEXT: vsld v5, v5, v0
-; CHECK-P8-NEXT: vsld v3, v3, v0
-; CHECK-P8-NEXT: vsrad v2, v2, v0
+; CHECK-P8-NEXT: vsld v2, v2, v0
; CHECK-P8-NEXT: vsrad v3, v3, v0
+; CHECK-P8-NEXT: vsrad v2, v2, v0
; CHECK-P8-NEXT: vsrad v4, v4, v0
; CHECK-P8-NEXT: vsrad v5, v5, v0
-; CHECK-P8-NEXT: xvcvsxddp vs2, v3
-; CHECK-P8-NEXT: xvcvsxddp vs0, v2
+; CHECK-P8-NEXT: xvcvsxddp vs2, v2
+; CHECK-P8-NEXT: xvcvsxddp vs0, v3
; CHECK-P8-NEXT: xvcvsxddp vs1, v5
; CHECK-P8-NEXT: xvcvsxddp vs3, v4
; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI4_2@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r4
-; CHECK-P8-NEXT: addis r4, r2, .LCPI4_1@toc@ha
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l
+; CHECK-P8-NEXT: addi r4, r6, .LCPI4_2@toc@l
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT: addis r4, r2, .LCPI4_1@toc@ha
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r4, r4, .LCPI4_1@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r5
-; CHECK-P8-NEXT: addi r5, r6, .LCPI4_2@toc@l
; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: lvx v4, 0, r5
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
-; CHECK-P8-NEXT: vperm v3, v3, v3, v4
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: xxswapd v4, vs2
+; CHECK-P8-NEXT: vperm v3, v2, v2, v3
+; CHECK-P8-NEXT: vperm v2, v2, v2, v4
; CHECK-P8-NEXT: xxswapd v4, vs0
-; CHECK-P8-NEXT: vsld v2, v2, v4
; CHECK-P8-NEXT: vsld v3, v3, v4
-; CHECK-P8-NEXT: vsrad v2, v2, v4
+; CHECK-P8-NEXT: vsld v2, v2, v4
; CHECK-P8-NEXT: vsrad v3, v3, v4
-; CHECK-P8-NEXT: xvcvsxddp vs0, v2
-; CHECK-P8-NEXT: xvcvsxddp vs1, v3
+; CHECK-P8-NEXT: vsrad v2, v2, v4
+; CHECK-P8-NEXT: xvcvsxddp vs0, v3
+; CHECK-P8-NEXT: xvcvsxddp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
; CHECK-P8-LABEL: stest2:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI5_1@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI5_1@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: vperm v2, v2, v2, v3
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: vsld v2, v2, v3
; CHECK-P8-NEXT: vsrad v2, v2, v3
define void @test8i8(<8 x i8>* nocapture %Sink, <8 x i16>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lvx v2, 0, r4
+; CHECK-NEXT: lxvd2x vs0, 0, r4
+; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: vpkuhum v2, v2, v2
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: stfdx f0, 0, r3
define void @test4i8(<4 x i8>* nocapture %Sink, <4 x i16>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lvx v2, 0, r4
+; CHECK-NEXT: lxvd2x vs0, 0, r4
+; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: vpkuhum v2, v2, v2
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-LABEL: test4i8w:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r5, r2, .LCPI2_0@toc@ha
-; CHECK-NEXT: lvx v3, 0, r4
+; CHECK-NEXT: lxvd2x vs0, 0, r4
; CHECK-NEXT: addi r5, r5, .LCPI2_0@toc@l
-; CHECK-NEXT: lvx v2, 0, r5
-; CHECK-NEXT: vperm v2, v3, v3, v2
+; CHECK-NEXT: lxvd2x vs1, 0, r5
+; CHECK-NEXT: xxswapd v2, vs0
+; CHECK-NEXT: xxswapd v3, vs1
+; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
define void @test2i8(<2 x i8>* nocapture %Sink, <2 x i16>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lvx v2, 0, r4
+; CHECK-NEXT: lxvd2x vs0, 0, r4
+; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: vpkuhum v2, v2, v2
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mffprd r4, f0
define void @test4i16(<4 x i16>* nocapture %Sink, <4 x i32>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lvx v2, 0, r4
+; CHECK-NEXT: lxvd2x vs0, 0, r4
+; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: vpkuwum v2, v2, v2
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: stfdx f0, 0, r3
define void @test2i16(<2 x i16>* nocapture %Sink, <2 x i32>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lvx v2, 0, r4
+; CHECK-NEXT: lxvd2x vs0, 0, r4
+; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: vpkuwum v2, v2, v2
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
; CHECK-NEXT: stfiwx f0, 0, r3
define void @test2i16d(<2 x i16>* nocapture %Sink, <2 x i64>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test2i16d:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxvd2x vs0, 0, r4
; CHECK-NEXT: addis r5, r2, .LCPI6_0@toc@ha
-; CHECK-NEXT: addi r4, r5, .LCPI6_0@toc@l
-; CHECK-NEXT: lvx v3, 0, r4
+; CHECK-NEXT: lxvd2x vs0, 0, r4
+; CHECK-NEXT: addi r5, r5, .LCPI6_0@toc@l
+; CHECK-NEXT: lxvd2x vs1, 0, r5
; CHECK-NEXT: xxswapd v2, vs0
+; CHECK-NEXT: xxswapd v3, vs1
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
; CHECK-NEXT: stfiwx f0, 0, r3
define dso_local <8 x i8> @test8x32(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8) {
; CHECK-LABEL: test8x32:
; CHECK: # %bb.0:
+; CHECK-NEXT: addis r11, r2, .LCPI0_0@toc@ha
; CHECK-NEXT: rldimi r3, r4, 32, 0
; CHECK-NEXT: rldimi r5, r6, 32, 0
-; CHECK-NEXT: addis r11, r2, .LCPI0_0@toc@ha
-; CHECK-NEXT: rldimi r7, r8, 32, 0
-; CHECK-NEXT: rldimi r9, r10, 32, 0
; CHECK-NEXT: mtfprd f0, r3
; CHECK-NEXT: addi r3, r11, .LCPI0_0@toc@l
+; CHECK-NEXT: rldimi r7, r8, 32, 0
+; CHECK-NEXT: rldimi r9, r10, 32, 0
+; CHECK-NEXT: lxvd2x vs3, 0, r3
; CHECK-NEXT: mtfprd f1, r5
-; CHECK-NEXT: lvx v4, 0, r3
; CHECK-NEXT: mtfprd f2, r7
-; CHECK-NEXT: mtfprd f3, r9
+; CHECK-NEXT: mtfprd f4, r9
; CHECK-NEXT: xxmrghd v2, vs1, vs0
-; CHECK-NEXT: xxmrghd v3, vs3, vs2
+; CHECK-NEXT: xxswapd v4, vs3
+; CHECK-NEXT: xxmrghd v3, vs4, vs2
; CHECK-NEXT: vperm v2, v3, v2, v4
; CHECK-NEXT: blr
;
; CHECK: # %bb.0:
; CHECK-NEXT: addis r7, r2, .LCPI1_0@toc@ha
; CHECK-NEXT: mtfprd f0, r5
+; CHECK-NEXT: addi r5, r7, .LCPI1_0@toc@l
; CHECK-NEXT: mtfprd f1, r6
+; CHECK-NEXT: lxvd2x vs3, 0, r5
; CHECK-NEXT: mtfprd f2, r3
-; CHECK-NEXT: addi r3, r7, .LCPI1_0@toc@l
-; CHECK-NEXT: mtfprd f3, r4
+; CHECK-NEXT: mtfprd f4, r4
; CHECK-NEXT: xxmrghd v2, vs1, vs0
-; CHECK-NEXT: lvx v4, 0, r3
-; CHECK-NEXT: xxmrghd v3, vs3, vs2
+; CHECK-NEXT: xxmrghd v3, vs4, vs2
+; CHECK-NEXT: xxswapd v4, vs3
; CHECK-NEXT: vperm v2, v2, v3, v4
; CHECK-NEXT: blr
;
define <2 x i64> @v2si64_cmp(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
; CHECK-LABEL: v2si64_cmp:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcmpequw 2, 2, 3
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-NEXT: vcmpequw 2, 2, 3
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vperm 3, 2, 2, 3
; CHECK-NEXT: xxland 34, 35, 34
; CHECK-NEXT: blr
;
; LE-LABEL: test1:
; LE: # %bb.0:
-; LE-NEXT: lvx 2, 0, 3
+; LE-NEXT: lxvd2x 0, 0, 3
+; LE-NEXT: xxswapd 34, 0
; LE-NEXT: vspltisb 3, -1
; LE-NEXT: vslw 3, 3, 3
-; LE-NEXT: xxland 34, 34, 35
-; LE-NEXT: stvx 2, 0, 3
-; LE-NEXT: lvx 2, 0, 4
-; LE-NEXT: xxlandc 34, 34, 35
-; LE-NEXT: stvx 2, 0, 4
-; LE-NEXT: lvx 2, 0, 5
-; LE-NEXT: xvabssp 34, 34
-; LE-NEXT: stvx 2, 0, 5
+; LE-NEXT: xxland 0, 34, 35
+; LE-NEXT: xxswapd 0, 0
+; LE-NEXT: stxvd2x 0, 0, 3
+; LE-NEXT: lxvd2x 0, 0, 4
+; LE-NEXT: xxswapd 34, 0
+; LE-NEXT: xxlandc 0, 34, 35
+; LE-NEXT: xxswapd 0, 0
+; LE-NEXT: stxvd2x 0, 0, 4
+; LE-NEXT: lxvd2x 0, 0, 5
+; LE-NEXT: xxswapd 34, 0
+; LE-NEXT: xvabssp 0, 34
+; LE-NEXT: xxswapd 0, 0
+; LE-NEXT: stxvd2x 0, 0, 5
; LE-NEXT: blr
%tmp = load <4 x i32>, <4 x i32>* %P1 ; <<4 x i32>> [#uses=1]
%tmp4 = and <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1]
define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: lvx v3, r3, r4
-; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT: xxswapd vs1, v2
-; CHECK-P8-NEXT: xscvspdpn f2, v2
-; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1
-; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3
-; CHECK-P8-NEXT: xscvspdpn f3, v3
+; CHECK-P8-NEXT: lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xscvspdpn f0, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: xscvspdpn f1, vs1
+; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3
+; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1
+; CHECK-P8-NEXT: xscvspdpn f3, v2
+; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3
+; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1
+; CHECK-P8-NEXT: xscvspdpn f5, v3
+; CHECK-P8-NEXT: xscvspdpn f2, vs2
; CHECK-P8-NEXT: xscvspdpn f4, vs4
-; CHECK-P8-NEXT: xscvspdpn f5, vs5
-; CHECK-P8-NEXT: xscvdpsxws f2, f2
-; CHECK-P8-NEXT: xscvdpsxws f3, f3
; CHECK-P8-NEXT: xscvdpsxws f0, f0
+; CHECK-P8-NEXT: xscvspdpn f6, vs6
+; CHECK-P8-NEXT: xscvdpsxws f3, f3
+; CHECK-P8-NEXT: xscvspdpn f7, vs7
+; CHECK-P8-NEXT: xscvdpsxws f2, f2
+; CHECK-P8-NEXT: xscvdpsxws f4, f4
; CHECK-P8-NEXT: xscvdpsxws f1, f1
+; CHECK-P8-NEXT: xscvdpsxws f5, f5
; CHECK-P8-NEXT: mffprwz r3, f0
-; CHECK-P8-NEXT: xxswapd vs0, v3
-; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1
+; CHECK-P8-NEXT: mffprwz r4, f3
; CHECK-P8-NEXT: mtvsrd v2, r3
-; CHECK-P8-NEXT: xscvspdpn f0, vs0
; CHECK-P8-NEXT: mffprwz r3, f2
-; CHECK-P8-NEXT: xscvdpsxws f2, f4
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: xscvdpsxws f4, f5
-; CHECK-P8-NEXT: mtvsrd v4, r4
-; CHECK-P8-NEXT: xscvdpsxws f0, f0
-; CHECK-P8-NEXT: vmrghh v2, v4, v2
-; CHECK-P8-NEXT: mffprwz r4, f2
-; CHECK-P8-NEXT: xscvdpsxws f1, f1
-; CHECK-P8-NEXT: mtvsrd v3, r3
-; CHECK-P8-NEXT: mffprwz r3, f3
-; CHECK-P8-NEXT: mtvsrd v4, r4
-; CHECK-P8-NEXT: mffprwz r4, f0
-; CHECK-P8-NEXT: vmrghh v3, v3, v4
+; CHECK-P8-NEXT: xscvdpsxws f0, f6
+; CHECK-P8-NEXT: xscvdpsxws f2, f7
+; CHECK-P8-NEXT: mtvsrd v3, r4
+; CHECK-P8-NEXT: mffprwz r4, f4
; CHECK-P8-NEXT: mtvsrd v4, r3
-; CHECK-P8-NEXT: mffprwz r3, f4
-; CHECK-P8-NEXT: mtvsrd v0, r4
-; CHECK-P8-NEXT: mtvsrd v5, r3
+; CHECK-P8-NEXT: mtvsrd v5, r4
; CHECK-P8-NEXT: mffprwz r3, f1
-; CHECK-P8-NEXT: vmrghh v5, v0, v5
+; CHECK-P8-NEXT: mffprwz r4, f5
+; CHECK-P8-NEXT: vmrghh v2, v2, v4
+; CHECK-P8-NEXT: vmrghh v3, v3, v5
+; CHECK-P8-NEXT: mtvsrd v4, r3
+; CHECK-P8-NEXT: mtvsrd v5, r4
+; CHECK-P8-NEXT: mffprwz r3, f0
+; CHECK-P8-NEXT: mffprwz r4, f2
+; CHECK-P8-NEXT: mtvsrd v0, r3
+; CHECK-P8-NEXT: mtvsrd v1, r4
; CHECK-P8-NEXT: xxmrglw vs0, v3, v2
-; CHECK-P8-NEXT: mtvsrd v1, r3
-; CHECK-P8-NEXT: vmrghh v4, v4, v1
-; CHECK-P8-NEXT: xxmrglw vs1, v4, v5
+; CHECK-P8-NEXT: vmrghh v4, v4, v0
+; CHECK-P8-NEXT: vmrghh v5, v5, v1
+; CHECK-P8-NEXT: xxmrglw vs1, v5, v4
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: blr
;
define void @test16elt(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #3 {
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: lvx v5, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
-; CHECK-P8-NEXT: lvx v3, r4, r5
-; CHECK-P8-NEXT: lvx v2, r4, r6
+; CHECK-P8-NEXT: lxvd2x vs1, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs2, r4, r6
; CHECK-P8-NEXT: li r6, 48
-; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3
-; CHECK-P8-NEXT: xscvspdpn f1, v5
-; CHECK-P8-NEXT: lvx v4, r4, r6
-; CHECK-P8-NEXT: xxswapd vs3, v5
-; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1
-; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 3
-; CHECK-P8-NEXT: xxswapd vs8, v3
+; CHECK-P8-NEXT: lxvd2x vs3, r4, r6
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xscvspdpn f0, vs0
-; CHECK-P8-NEXT: xscvspdpn f3, vs3
-; CHECK-P8-NEXT: xscvspdpn f5, vs5
-; CHECK-P8-NEXT: xscvdpsxws f1, f1
-; CHECK-P8-NEXT: xscvspdpn f7, vs7
-; CHECK-P8-NEXT: xscvspdpn f8, vs8
+; CHECK-P8-NEXT: xscvspdpn f4, vs1
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: xscvspdpn f1, vs2
+; CHECK-P8-NEXT: xxswapd v4, vs2
+; CHECK-P8-NEXT: xxsldwi vs6, v2, v2, 3
+; CHECK-P8-NEXT: xxsldwi vs8, v2, v2, 1
+; CHECK-P8-NEXT: xscvspdpn f7, v2
+; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 3
+; CHECK-P8-NEXT: xscvspdpn f5, vs3
+; CHECK-P8-NEXT: xxswapd v0, vs3
+; CHECK-P8-NEXT: xscvspdpn f6, vs6
; CHECK-P8-NEXT: xscvdpsxws f0, f0
-; CHECK-P8-NEXT: xscvdpsxws f3, f3
+; CHECK-P8-NEXT: xscvspdpn f8, vs8
+; CHECK-P8-NEXT: xscvdpsxws f4, f4
+; CHECK-P8-NEXT: xscvspdpn f9, vs9
; CHECK-P8-NEXT: xscvspdpn f2, v3
-; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: xscvdpsxws f1, f5
+; CHECK-P8-NEXT: xscvdpsxws f6, f6
+; CHECK-P8-NEXT: mffprwz r4, f0
+; CHECK-P8-NEXT: xscvdpsxws f0, f7
+; CHECK-P8-NEXT: xxsldwi vs7, v0, v0, 3
+; CHECK-P8-NEXT: mtvsrd v2, r4
+; CHECK-P8-NEXT: mffprwz r4, f4
+; CHECK-P8-NEXT: xscvdpsxws f4, f8
; CHECK-P8-NEXT: mtvsrd v5, r4
+; CHECK-P8-NEXT: mffprwz r4, f6
+; CHECK-P8-NEXT: xscvdpsxws f6, f9
+; CHECK-P8-NEXT: xscvspdpn f10, v4
+; CHECK-P8-NEXT: mtvsrd v1, r4
; CHECK-P8-NEXT: mffprwz r4, f0
; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1
-; CHECK-P8-NEXT: xscvspdpn f4, v2
-; CHECK-P8-NEXT: xscvdpsxws f5, f7
-; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 3
+; CHECK-P8-NEXT: xscvdpsxws f1, f1
+; CHECK-P8-NEXT: xscvspdpn f3, v0
; CHECK-P8-NEXT: mtvsrd v3, r4
-; CHECK-P8-NEXT: mffprwz r4, f3
-; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3
-; CHECK-P8-NEXT: xscvspdpn f6, v4
-; CHECK-P8-NEXT: mtvsrd v0, r4
-; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: xscvdpsxws f1, f8
-; CHECK-P8-NEXT: xxswapd vs8, v4
+; CHECK-P8-NEXT: mffprwz r4, f4
+; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 3
+; CHECK-P8-NEXT: xscvdpsxws f5, f5
+; CHECK-P8-NEXT: vmrghh v2, v2, v1
; CHECK-P8-NEXT: xscvspdpn f0, vs0
+; CHECK-P8-NEXT: mtvsrd v1, r4
+; CHECK-P8-NEXT: mffprwz r4, f6
+; CHECK-P8-NEXT: xxsldwi vs6, v4, v4, 1
; CHECK-P8-NEXT: xscvdpsxws f2, f2
+; CHECK-P8-NEXT: vmrghh v3, v3, v1
; CHECK-P8-NEXT: mtvsrd v1, r4
-; CHECK-P8-NEXT: mffprwz r4, f5
-; CHECK-P8-NEXT: xxswapd vs5, v2
-; CHECK-P8-NEXT: xscvspdpn f3, vs3
-; CHECK-P8-NEXT: xscvdpsxws f4, f4
-; CHECK-P8-NEXT: vmrghh v3, v0, v3
-; CHECK-P8-NEXT: mtvsrd v0, r4
; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: xscvdpsxws f6, f6
-; CHECK-P8-NEXT: xscvspdpn f1, vs5
-; CHECK-P8-NEXT: xxsldwi vs5, v2, v2, 1
-; CHECK-P8-NEXT: mtvsrd v6, r4
+; CHECK-P8-NEXT: xscvdpsxws f1, f10
+; CHECK-P8-NEXT: vmrghh v4, v5, v1
+; CHECK-P8-NEXT: xscvspdpn f4, vs4
+; CHECK-P8-NEXT: mtvsrd v5, r4
+; CHECK-P8-NEXT: mffprwz r4, f5
+; CHECK-P8-NEXT: xscvdpsxws f3, f3
+; CHECK-P8-NEXT: mtvsrd v1, r4
; CHECK-P8-NEXT: mffprwz r4, f2
+; CHECK-P8-NEXT: xxsldwi vs2, v0, v0, 1
; CHECK-P8-NEXT: xscvdpsxws f0, f0
-; CHECK-P8-NEXT: vmrghh v2, v5, v1
-; CHECK-P8-NEXT: vmrghh v5, v6, v0
-; CHECK-P8-NEXT: mtvsrd v0, r4
-; CHECK-P8-NEXT: mffprwz r4, f4
-; CHECK-P8-NEXT: xscvdpsxws f2, f3
-; CHECK-P8-NEXT: xscvspdpn f5, vs5
-; CHECK-P8-NEXT: mtvsrd v1, r4
-; CHECK-P8-NEXT: mffprwz r4, f6
-; CHECK-P8-NEXT: xscvdpsxws f1, f1
+; CHECK-P8-NEXT: xscvspdpn f6, vs6
; CHECK-P8-NEXT: mtvsrd v6, r4
-; CHECK-P8-NEXT: mffprwz r4, f0
+; CHECK-P8-NEXT: mffprwz r4, f1
; CHECK-P8-NEXT: xscvspdpn f7, vs7
+; CHECK-P8-NEXT: xscvdpsxws f1, f4
; CHECK-P8-NEXT: mtvsrd v7, r4
-; CHECK-P8-NEXT: mffprwz r4, f2
-; CHECK-P8-NEXT: xxsldwi vs2, v4, v4, 1
-; CHECK-P8-NEXT: xscvspdpn f8, vs8
-; CHECK-P8-NEXT: xscvdpsxws f0, f5
-; CHECK-P8-NEXT: mtvsrd v4, r4
-; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: xscvspdpn f1, vs2
-; CHECK-P8-NEXT: xscvdpsxws f3, f7
-; CHECK-P8-NEXT: mtvsrd v8, r4
+; CHECK-P8-NEXT: mffprwz r4, f3
+; CHECK-P8-NEXT: xscvdpsxws f3, f6
+; CHECK-P8-NEXT: mtvsrd v0, r4
; CHECK-P8-NEXT: mffprwz r4, f0
-; CHECK-P8-NEXT: xscvdpsxws f0, f8
+; CHECK-P8-NEXT: xscvspdpn f0, vs2
+; CHECK-P8-NEXT: mtvsrd v8, r4
+; CHECK-P8-NEXT: mffprwz r4, f1
+; CHECK-P8-NEXT: xscvdpsxws f1, f7
; CHECK-P8-NEXT: mtvsrd v9, r4
-; CHECK-P8-NEXT: xscvdpsxws f1, f1
; CHECK-P8-NEXT: mffprwz r4, f3
-; CHECK-P8-NEXT: vmrghh v0, v0, v7
-; CHECK-P8-NEXT: mtvsrd v7, r4
-; CHECK-P8-NEXT: mffprwz r4, f0
-; CHECK-P8-NEXT: xxmrglw vs0, v2, v3
-; CHECK-P8-NEXT: vmrghh v4, v8, v4
+; CHECK-P8-NEXT: xscvdpsxws f0, f0
+; CHECK-P8-NEXT: vmrghh v6, v6, v8
; CHECK-P8-NEXT: mtvsrd v8, r4
; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: vmrghh v1, v1, v9
-; CHECK-P8-NEXT: xxmrglw vs1, v0, v5
+; CHECK-P8-NEXT: vmrghh v5, v5, v9
; CHECK-P8-NEXT: mtvsrd v9, r4
-; CHECK-P8-NEXT: vmrghh v7, v8, v7
-; CHECK-P8-NEXT: vmrghh v6, v6, v9
+; CHECK-P8-NEXT: mffprwz r4, f0
+; CHECK-P8-NEXT: xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT: vmrghh v7, v7, v8
+; CHECK-P8-NEXT: xxmrglw vs1, v6, v4
+; CHECK-P8-NEXT: mtvsrd v8, r4
+; CHECK-P8-NEXT: vmrghh v1, v1, v9
+; CHECK-P8-NEXT: vmrghh v0, v0, v8
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
-; CHECK-P8-NEXT: xxmrglw vs2, v1, v4
-; CHECK-P8-NEXT: stvx v2, 0, r3
-; CHECK-P8-NEXT: xxmrglw vs3, v6, v7
+; CHECK-P8-NEXT: xxmrglw vs2, v7, v5
+; CHECK-P8-NEXT: xxswapd vs1, v2
+; CHECK-P8-NEXT: xxmrglw vs3, v0, v1
; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2
-; CHECK-P8-NEXT: stvx v3, r3, r5
+; CHECK-P8-NEXT: xxswapd vs0, v3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: lvx v3, r3, r4
-; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT: xxswapd vs1, v2
-; CHECK-P8-NEXT: xscvspdpn f2, v2
-; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1
-; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3
-; CHECK-P8-NEXT: xscvspdpn f3, v3
+; CHECK-P8-NEXT: lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xscvspdpn f0, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: xscvspdpn f1, vs1
+; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3
+; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1
+; CHECK-P8-NEXT: xscvspdpn f3, v2
+; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3
+; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1
+; CHECK-P8-NEXT: xscvspdpn f5, v3
+; CHECK-P8-NEXT: xscvspdpn f2, vs2
; CHECK-P8-NEXT: xscvspdpn f4, vs4
-; CHECK-P8-NEXT: xscvspdpn f5, vs5
-; CHECK-P8-NEXT: xscvdpsxws f2, f2
-; CHECK-P8-NEXT: xscvdpsxws f3, f3
; CHECK-P8-NEXT: xscvdpsxws f0, f0
+; CHECK-P8-NEXT: xscvspdpn f6, vs6
+; CHECK-P8-NEXT: xscvdpsxws f3, f3
+; CHECK-P8-NEXT: xscvspdpn f7, vs7
+; CHECK-P8-NEXT: xscvdpsxws f2, f2
+; CHECK-P8-NEXT: xscvdpsxws f4, f4
; CHECK-P8-NEXT: xscvdpsxws f1, f1
+; CHECK-P8-NEXT: xscvdpsxws f5, f5
; CHECK-P8-NEXT: mffprwz r3, f0
-; CHECK-P8-NEXT: xxswapd vs0, v3
-; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1
+; CHECK-P8-NEXT: mffprwz r4, f3
; CHECK-P8-NEXT: mtvsrd v2, r3
-; CHECK-P8-NEXT: xscvspdpn f0, vs0
; CHECK-P8-NEXT: mffprwz r3, f2
-; CHECK-P8-NEXT: xscvdpsxws f2, f4
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: xscvdpsxws f4, f5
-; CHECK-P8-NEXT: mtvsrd v4, r4
-; CHECK-P8-NEXT: xscvdpsxws f0, f0
-; CHECK-P8-NEXT: vmrghh v2, v4, v2
-; CHECK-P8-NEXT: mffprwz r4, f2
-; CHECK-P8-NEXT: xscvdpsxws f1, f1
-; CHECK-P8-NEXT: mtvsrd v3, r3
-; CHECK-P8-NEXT: mffprwz r3, f3
-; CHECK-P8-NEXT: mtvsrd v4, r4
-; CHECK-P8-NEXT: mffprwz r4, f0
-; CHECK-P8-NEXT: vmrghh v3, v3, v4
+; CHECK-P8-NEXT: xscvdpsxws f0, f6
+; CHECK-P8-NEXT: xscvdpsxws f2, f7
+; CHECK-P8-NEXT: mtvsrd v3, r4
+; CHECK-P8-NEXT: mffprwz r4, f4
; CHECK-P8-NEXT: mtvsrd v4, r3
-; CHECK-P8-NEXT: mffprwz r3, f4
-; CHECK-P8-NEXT: mtvsrd v0, r4
-; CHECK-P8-NEXT: mtvsrd v5, r3
+; CHECK-P8-NEXT: mtvsrd v5, r4
; CHECK-P8-NEXT: mffprwz r3, f1
-; CHECK-P8-NEXT: vmrghh v5, v0, v5
+; CHECK-P8-NEXT: mffprwz r4, f5
+; CHECK-P8-NEXT: vmrghh v2, v2, v4
+; CHECK-P8-NEXT: vmrghh v3, v3, v5
+; CHECK-P8-NEXT: mtvsrd v4, r3
+; CHECK-P8-NEXT: mtvsrd v5, r4
+; CHECK-P8-NEXT: mffprwz r3, f0
+; CHECK-P8-NEXT: mffprwz r4, f2
+; CHECK-P8-NEXT: mtvsrd v0, r3
+; CHECK-P8-NEXT: mtvsrd v1, r4
; CHECK-P8-NEXT: xxmrglw vs0, v3, v2
-; CHECK-P8-NEXT: mtvsrd v1, r3
-; CHECK-P8-NEXT: vmrghh v4, v4, v1
-; CHECK-P8-NEXT: xxmrglw vs1, v4, v5
+; CHECK-P8-NEXT: vmrghh v4, v4, v0
+; CHECK-P8-NEXT: vmrghh v5, v5, v1
+; CHECK-P8-NEXT: xxmrglw vs1, v5, v4
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: blr
;
define void @test16elt_signed(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #3 {
; CHECK-P8-LABEL: test16elt_signed:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: lvx v5, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
-; CHECK-P8-NEXT: lvx v3, r4, r5
-; CHECK-P8-NEXT: lvx v2, r4, r6
+; CHECK-P8-NEXT: lxvd2x vs1, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs2, r4, r6
; CHECK-P8-NEXT: li r6, 48
-; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3
-; CHECK-P8-NEXT: xscvspdpn f1, v5
-; CHECK-P8-NEXT: lvx v4, r4, r6
-; CHECK-P8-NEXT: xxswapd vs3, v5
-; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1
-; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 3
-; CHECK-P8-NEXT: xxswapd vs8, v3
+; CHECK-P8-NEXT: lxvd2x vs3, r4, r6
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xscvspdpn f0, vs0
-; CHECK-P8-NEXT: xscvspdpn f3, vs3
-; CHECK-P8-NEXT: xscvspdpn f5, vs5
-; CHECK-P8-NEXT: xscvdpsxws f1, f1
-; CHECK-P8-NEXT: xscvspdpn f7, vs7
-; CHECK-P8-NEXT: xscvspdpn f8, vs8
+; CHECK-P8-NEXT: xscvspdpn f4, vs1
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: xscvspdpn f1, vs2
+; CHECK-P8-NEXT: xxswapd v4, vs2
+; CHECK-P8-NEXT: xxsldwi vs6, v2, v2, 3
+; CHECK-P8-NEXT: xxsldwi vs8, v2, v2, 1
+; CHECK-P8-NEXT: xscvspdpn f7, v2
+; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 3
+; CHECK-P8-NEXT: xscvspdpn f5, vs3
+; CHECK-P8-NEXT: xxswapd v0, vs3
+; CHECK-P8-NEXT: xscvspdpn f6, vs6
; CHECK-P8-NEXT: xscvdpsxws f0, f0
-; CHECK-P8-NEXT: xscvdpsxws f3, f3
+; CHECK-P8-NEXT: xscvspdpn f8, vs8
+; CHECK-P8-NEXT: xscvdpsxws f4, f4
+; CHECK-P8-NEXT: xscvspdpn f9, vs9
; CHECK-P8-NEXT: xscvspdpn f2, v3
-; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: xscvdpsxws f1, f5
+; CHECK-P8-NEXT: xscvdpsxws f6, f6
+; CHECK-P8-NEXT: mffprwz r4, f0
+; CHECK-P8-NEXT: xscvdpsxws f0, f7
+; CHECK-P8-NEXT: xxsldwi vs7, v0, v0, 3
+; CHECK-P8-NEXT: mtvsrd v2, r4
+; CHECK-P8-NEXT: mffprwz r4, f4
+; CHECK-P8-NEXT: xscvdpsxws f4, f8
; CHECK-P8-NEXT: mtvsrd v5, r4
+; CHECK-P8-NEXT: mffprwz r4, f6
+; CHECK-P8-NEXT: xscvdpsxws f6, f9
+; CHECK-P8-NEXT: xscvspdpn f10, v4
+; CHECK-P8-NEXT: mtvsrd v1, r4
; CHECK-P8-NEXT: mffprwz r4, f0
; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1
-; CHECK-P8-NEXT: xscvspdpn f4, v2
-; CHECK-P8-NEXT: xscvdpsxws f5, f7
-; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 3
+; CHECK-P8-NEXT: xscvdpsxws f1, f1
+; CHECK-P8-NEXT: xscvspdpn f3, v0
; CHECK-P8-NEXT: mtvsrd v3, r4
-; CHECK-P8-NEXT: mffprwz r4, f3
-; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3
-; CHECK-P8-NEXT: xscvspdpn f6, v4
-; CHECK-P8-NEXT: mtvsrd v0, r4
-; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: xscvdpsxws f1, f8
-; CHECK-P8-NEXT: xxswapd vs8, v4
+; CHECK-P8-NEXT: mffprwz r4, f4
+; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 3
+; CHECK-P8-NEXT: xscvdpsxws f5, f5
+; CHECK-P8-NEXT: vmrghh v2, v2, v1
; CHECK-P8-NEXT: xscvspdpn f0, vs0
+; CHECK-P8-NEXT: mtvsrd v1, r4
+; CHECK-P8-NEXT: mffprwz r4, f6
+; CHECK-P8-NEXT: xxsldwi vs6, v4, v4, 1
; CHECK-P8-NEXT: xscvdpsxws f2, f2
+; CHECK-P8-NEXT: vmrghh v3, v3, v1
; CHECK-P8-NEXT: mtvsrd v1, r4
-; CHECK-P8-NEXT: mffprwz r4, f5
-; CHECK-P8-NEXT: xxswapd vs5, v2
-; CHECK-P8-NEXT: xscvspdpn f3, vs3
-; CHECK-P8-NEXT: xscvdpsxws f4, f4
-; CHECK-P8-NEXT: vmrghh v3, v0, v3
-; CHECK-P8-NEXT: mtvsrd v0, r4
; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: xscvdpsxws f6, f6
-; CHECK-P8-NEXT: xscvspdpn f1, vs5
-; CHECK-P8-NEXT: xxsldwi vs5, v2, v2, 1
-; CHECK-P8-NEXT: mtvsrd v6, r4
+; CHECK-P8-NEXT: xscvdpsxws f1, f10
+; CHECK-P8-NEXT: vmrghh v4, v5, v1
+; CHECK-P8-NEXT: xscvspdpn f4, vs4
+; CHECK-P8-NEXT: mtvsrd v5, r4
+; CHECK-P8-NEXT: mffprwz r4, f5
+; CHECK-P8-NEXT: xscvdpsxws f3, f3
+; CHECK-P8-NEXT: mtvsrd v1, r4
; CHECK-P8-NEXT: mffprwz r4, f2
+; CHECK-P8-NEXT: xxsldwi vs2, v0, v0, 1
; CHECK-P8-NEXT: xscvdpsxws f0, f0
-; CHECK-P8-NEXT: vmrghh v2, v5, v1
-; CHECK-P8-NEXT: vmrghh v5, v6, v0
-; CHECK-P8-NEXT: mtvsrd v0, r4
-; CHECK-P8-NEXT: mffprwz r4, f4
-; CHECK-P8-NEXT: xscvdpsxws f2, f3
-; CHECK-P8-NEXT: xscvspdpn f5, vs5
-; CHECK-P8-NEXT: mtvsrd v1, r4
-; CHECK-P8-NEXT: mffprwz r4, f6
-; CHECK-P8-NEXT: xscvdpsxws f1, f1
+; CHECK-P8-NEXT: xscvspdpn f6, vs6
; CHECK-P8-NEXT: mtvsrd v6, r4
-; CHECK-P8-NEXT: mffprwz r4, f0
+; CHECK-P8-NEXT: mffprwz r4, f1
; CHECK-P8-NEXT: xscvspdpn f7, vs7
+; CHECK-P8-NEXT: xscvdpsxws f1, f4
; CHECK-P8-NEXT: mtvsrd v7, r4
-; CHECK-P8-NEXT: mffprwz r4, f2
-; CHECK-P8-NEXT: xxsldwi vs2, v4, v4, 1
-; CHECK-P8-NEXT: xscvspdpn f8, vs8
-; CHECK-P8-NEXT: xscvdpsxws f0, f5
-; CHECK-P8-NEXT: mtvsrd v4, r4
-; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: xscvspdpn f1, vs2
-; CHECK-P8-NEXT: xscvdpsxws f3, f7
-; CHECK-P8-NEXT: mtvsrd v8, r4
+; CHECK-P8-NEXT: mffprwz r4, f3
+; CHECK-P8-NEXT: xscvdpsxws f3, f6
+; CHECK-P8-NEXT: mtvsrd v0, r4
; CHECK-P8-NEXT: mffprwz r4, f0
-; CHECK-P8-NEXT: xscvdpsxws f0, f8
+; CHECK-P8-NEXT: xscvspdpn f0, vs2
+; CHECK-P8-NEXT: mtvsrd v8, r4
+; CHECK-P8-NEXT: mffprwz r4, f1
+; CHECK-P8-NEXT: xscvdpsxws f1, f7
; CHECK-P8-NEXT: mtvsrd v9, r4
-; CHECK-P8-NEXT: xscvdpsxws f1, f1
; CHECK-P8-NEXT: mffprwz r4, f3
-; CHECK-P8-NEXT: vmrghh v0, v0, v7
-; CHECK-P8-NEXT: mtvsrd v7, r4
-; CHECK-P8-NEXT: mffprwz r4, f0
-; CHECK-P8-NEXT: xxmrglw vs0, v2, v3
-; CHECK-P8-NEXT: vmrghh v4, v8, v4
+; CHECK-P8-NEXT: xscvdpsxws f0, f0
+; CHECK-P8-NEXT: vmrghh v6, v6, v8
; CHECK-P8-NEXT: mtvsrd v8, r4
; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: vmrghh v1, v1, v9
-; CHECK-P8-NEXT: xxmrglw vs1, v0, v5
+; CHECK-P8-NEXT: vmrghh v5, v5, v9
; CHECK-P8-NEXT: mtvsrd v9, r4
-; CHECK-P8-NEXT: vmrghh v7, v8, v7
-; CHECK-P8-NEXT: vmrghh v6, v6, v9
+; CHECK-P8-NEXT: mffprwz r4, f0
+; CHECK-P8-NEXT: xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT: vmrghh v7, v7, v8
+; CHECK-P8-NEXT: xxmrglw vs1, v6, v4
+; CHECK-P8-NEXT: mtvsrd v8, r4
+; CHECK-P8-NEXT: vmrghh v1, v1, v9
+; CHECK-P8-NEXT: vmrghh v0, v0, v8
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
-; CHECK-P8-NEXT: xxmrglw vs2, v1, v4
-; CHECK-P8-NEXT: stvx v2, 0, r3
-; CHECK-P8-NEXT: xxmrglw vs3, v6, v7
+; CHECK-P8-NEXT: xxmrglw vs2, v7, v5
+; CHECK-P8-NEXT: xxswapd vs1, v2
+; CHECK-P8-NEXT: xxmrglw vs3, v0, v1
; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2
-; CHECK-P8-NEXT: stvx v3, r3, r5
+; CHECK-P8-NEXT: xxswapd vs0, v3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: li r6, 32
-; CHECK-P8-NEXT: lvx v2, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: li r4, 48
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxmrglw vs2, v3, v3
; CHECK-P8-NEXT: xxmrghw vs3, v3, v3
; CHECK-P8-NEXT: xxmrglw vs0, v2, v2
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r8, 64
-; CHECK-P8-NEXT: lvx v4, r4, r7
-; CHECK-P8-NEXT: lvx v2, r4, r5
-; CHECK-P8-NEXT: lvx v3, r4, r6
+; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT: xxswapd v4, vs2
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: li r4, 112
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: xxmrghw vs3, v4, v4
+; CHECK-P8-NEXT: xxmrglw vs1, v2, v2
+; CHECK-P8-NEXT: xxmrghw vs2, v2, v2
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxmrglw vs5, v4, v4
-; CHECK-P8-NEXT: xxmrglw vs0, v2, v2
-; CHECK-P8-NEXT: xxmrghw vs1, v2, v2
-; CHECK-P8-NEXT: lvx v2, 0, r4
-; CHECK-P8-NEXT: li r4, 112
-; CHECK-P8-NEXT: xxmrglw vs2, v3, v3
-; CHECK-P8-NEXT: xxmrghw vs4, v3, v3
+; CHECK-P8-NEXT: xxmrglw vs0, v3, v3
; CHECK-P8-NEXT: xvcvspdp vs3, vs3
+; CHECK-P8-NEXT: xxmrghw vs4, v3, v3
; CHECK-P8-NEXT: xxmrglw vs6, v2, v2
; CHECK-P8-NEXT: xxmrghw vs7, v2, v2
; CHECK-P8-NEXT: xvcvspdp vs5, vs5
-; CHECK-P8-NEXT: xvcvspdp vs0, vs0
; CHECK-P8-NEXT: xvcvspdp vs1, vs1
; CHECK-P8-NEXT: xvcvspdp vs2, vs2
+; CHECK-P8-NEXT: xvcvspdp vs0, vs0
; CHECK-P8-NEXT: xvcvspdp vs4, vs4
; CHECK-P8-NEXT: xvcvspdp vs6, vs6
; CHECK-P8-NEXT: xvcvspdp vs7, vs7
; CHECK-P8-NEXT: xvcvdpuxds v3, vs3
; CHECK-P8-NEXT: xvcvdpuxds v5, vs5
-; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
-; CHECK-P8-NEXT: xvcvdpuxds v4, vs1
+; CHECK-P8-NEXT: xvcvdpuxds v2, vs1
+; CHECK-P8-NEXT: xvcvdpuxds v4, vs2
; CHECK-P8-NEXT: xvcvdpuxds v0, vs4
-; CHECK-P8-NEXT: xvcvdpuxds v1, vs2
+; CHECK-P8-NEXT: xvcvdpuxds v1, vs0
; CHECK-P8-NEXT: xvcvdpuxds v6, vs6
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: xvcvdpuxds v7, vs7
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: li r6, 32
-; CHECK-P8-NEXT: lvx v2, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: li r4, 48
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxmrglw vs2, v3, v3
; CHECK-P8-NEXT: xxmrghw vs3, v3, v3
; CHECK-P8-NEXT: xxmrglw vs0, v2, v2
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r8, 64
-; CHECK-P8-NEXT: lvx v4, r4, r7
-; CHECK-P8-NEXT: lvx v2, r4, r5
-; CHECK-P8-NEXT: lvx v3, r4, r6
+; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT: xxswapd v4, vs2
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: li r4, 112
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: xxmrghw vs3, v4, v4
+; CHECK-P8-NEXT: xxmrglw vs1, v2, v2
+; CHECK-P8-NEXT: xxmrghw vs2, v2, v2
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxmrglw vs5, v4, v4
-; CHECK-P8-NEXT: xxmrglw vs0, v2, v2
-; CHECK-P8-NEXT: xxmrghw vs1, v2, v2
-; CHECK-P8-NEXT: lvx v2, 0, r4
-; CHECK-P8-NEXT: li r4, 112
-; CHECK-P8-NEXT: xxmrglw vs2, v3, v3
-; CHECK-P8-NEXT: xxmrghw vs4, v3, v3
+; CHECK-P8-NEXT: xxmrglw vs0, v3, v3
; CHECK-P8-NEXT: xvcvspdp vs3, vs3
+; CHECK-P8-NEXT: xxmrghw vs4, v3, v3
; CHECK-P8-NEXT: xxmrglw vs6, v2, v2
; CHECK-P8-NEXT: xxmrghw vs7, v2, v2
; CHECK-P8-NEXT: xvcvspdp vs5, vs5
-; CHECK-P8-NEXT: xvcvspdp vs0, vs0
; CHECK-P8-NEXT: xvcvspdp vs1, vs1
; CHECK-P8-NEXT: xvcvspdp vs2, vs2
+; CHECK-P8-NEXT: xvcvspdp vs0, vs0
; CHECK-P8-NEXT: xvcvspdp vs4, vs4
; CHECK-P8-NEXT: xvcvspdp vs6, vs6
; CHECK-P8-NEXT: xvcvspdp vs7, vs7
; CHECK-P8-NEXT: xvcvdpuxds v3, vs3
; CHECK-P8-NEXT: xvcvdpuxds v5, vs5
-; CHECK-P8-NEXT: xvcvdpuxds v2, vs0
-; CHECK-P8-NEXT: xvcvdpuxds v4, vs1
+; CHECK-P8-NEXT: xvcvdpuxds v2, vs1
+; CHECK-P8-NEXT: xvcvdpuxds v4, vs2
; CHECK-P8-NEXT: xvcvdpuxds v0, vs4
-; CHECK-P8-NEXT: xvcvdpuxds v1, vs2
+; CHECK-P8-NEXT: xvcvdpuxds v1, vs0
; CHECK-P8-NEXT: xvcvdpuxds v6, vs6
; CHECK-P8-NEXT: xxswapd vs0, v3
; CHECK-P8-NEXT: xvcvdpuxds v7, vs7
define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: lvx v3, r3, r4
-; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT: xxswapd vs1, v2
-; CHECK-P8-NEXT: xscvspdpn f2, v2
-; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1
-; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3
-; CHECK-P8-NEXT: xscvspdpn f3, v3
+; CHECK-P8-NEXT: lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xscvspdpn f0, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: xscvspdpn f1, vs1
+; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3
+; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1
+; CHECK-P8-NEXT: xscvspdpn f3, v2
+; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3
+; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1
+; CHECK-P8-NEXT: xscvspdpn f5, v3
+; CHECK-P8-NEXT: xscvspdpn f2, vs2
; CHECK-P8-NEXT: xscvspdpn f4, vs4
-; CHECK-P8-NEXT: xscvspdpn f5, vs5
-; CHECK-P8-NEXT: xscvdpsxws f2, f2
-; CHECK-P8-NEXT: xscvdpsxws f3, f3
; CHECK-P8-NEXT: xscvdpsxws f0, f0
+; CHECK-P8-NEXT: xscvspdpn f6, vs6
+; CHECK-P8-NEXT: xscvdpsxws f3, f3
+; CHECK-P8-NEXT: xscvspdpn f7, vs7
+; CHECK-P8-NEXT: xscvdpsxws f2, f2
+; CHECK-P8-NEXT: xscvdpsxws f4, f4
; CHECK-P8-NEXT: xscvdpsxws f1, f1
+; CHECK-P8-NEXT: xscvdpsxws f5, f5
; CHECK-P8-NEXT: mffprwz r3, f0
-; CHECK-P8-NEXT: xxswapd vs0, v3
-; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1
+; CHECK-P8-NEXT: mffprwz r4, f3
; CHECK-P8-NEXT: mtvsrd v2, r3
-; CHECK-P8-NEXT: xscvspdpn f0, vs0
; CHECK-P8-NEXT: mffprwz r3, f2
-; CHECK-P8-NEXT: xscvdpsxws f2, f4
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: xscvdpsxws f4, f5
-; CHECK-P8-NEXT: mtvsrd v4, r4
-; CHECK-P8-NEXT: xscvdpsxws f0, f0
-; CHECK-P8-NEXT: vmrghb v2, v4, v2
-; CHECK-P8-NEXT: mffprwz r4, f2
-; CHECK-P8-NEXT: xscvdpsxws f1, f1
-; CHECK-P8-NEXT: mtvsrd v3, r3
-; CHECK-P8-NEXT: mffprwz r3, f3
-; CHECK-P8-NEXT: mtvsrd v4, r4
-; CHECK-P8-NEXT: mffprwz r4, f0
-; CHECK-P8-NEXT: vmrghb v3, v3, v4
+; CHECK-P8-NEXT: xscvdpsxws f0, f6
+; CHECK-P8-NEXT: xscvdpsxws f2, f7
+; CHECK-P8-NEXT: mtvsrd v3, r4
+; CHECK-P8-NEXT: mffprwz r4, f4
; CHECK-P8-NEXT: mtvsrd v4, r3
-; CHECK-P8-NEXT: mffprwz r3, f4
-; CHECK-P8-NEXT: mtvsrd v0, r4
-; CHECK-P8-NEXT: mtvsrd v5, r3
+; CHECK-P8-NEXT: mtvsrd v5, r4
; CHECK-P8-NEXT: mffprwz r3, f1
-; CHECK-P8-NEXT: vmrghb v5, v0, v5
-; CHECK-P8-NEXT: mtvsrd v1, r3
+; CHECK-P8-NEXT: mffprwz r4, f5
+; CHECK-P8-NEXT: vmrghb v2, v2, v4
+; CHECK-P8-NEXT: vmrghb v3, v3, v5
+; CHECK-P8-NEXT: mtvsrd v4, r3
+; CHECK-P8-NEXT: mtvsrd v5, r4
+; CHECK-P8-NEXT: mffprwz r3, f0
+; CHECK-P8-NEXT: mffprwz r4, f2
+; CHECK-P8-NEXT: mtvsrd v0, r3
+; CHECK-P8-NEXT: mtvsrd v1, r4
+; CHECK-P8-NEXT: vmrghb v4, v4, v0
+; CHECK-P8-NEXT: vmrghb v5, v5, v1
; CHECK-P8-NEXT: vmrglh v2, v3, v2
-; CHECK-P8-NEXT: vmrghb v4, v4, v1
-; CHECK-P8-NEXT: vmrglh v3, v4, v5
+; CHECK-P8-NEXT: vmrglh v3, v5, v4
; CHECK-P8-NEXT: xxmrglw vs0, v3, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: mffprd r3, f0
define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr #3 {
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: lvx v4, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: li r5, 32
-; CHECK-P8-NEXT: lvx v3, r3, r4
-; CHECK-P8-NEXT: lvx v2, r3, r5
-; CHECK-P8-NEXT: xxsldwi vs0, v4, v4, 3
-; CHECK-P8-NEXT: xxswapd vs2, v4
-; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 1
-; CHECK-P8-NEXT: xscvspdpn f1, v4
-; CHECK-P8-NEXT: xscvspdpn f3, v3
-; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3
+; CHECK-P8-NEXT: lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT: li r4, 32
+; CHECK-P8-NEXT: lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT: li r4, 48
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xscvspdpn f0, vs0
-; CHECK-P8-NEXT: xxswapd vs7, v3
+; CHECK-P8-NEXT: lxvd2x vs4, r3, r4
+; CHECK-P8-NEXT: xxswapd v3, vs2
+; CHECK-P8-NEXT: xscvspdpn f5, vs2
+; CHECK-P8-NEXT: xxswapd v5, vs3
+; CHECK-P8-NEXT: xscvspdpn f6, vs3
+; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT: xscvspdpn f8, v2
+; CHECK-P8-NEXT: xxsldwi vs2, v3, v3, 3
+; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1
+; CHECK-P8-NEXT: xscvspdpn f3, v5
+; CHECK-P8-NEXT: xxswapd v7, vs4
+; CHECK-P8-NEXT: xscvspdpn f1, vs1
+; CHECK-P8-NEXT: xscvdpsxws f0, f0
; CHECK-P8-NEXT: xscvspdpn f2, vs2
-; CHECK-P8-NEXT: xxsldwi vs8, v3, v3, 1
-; CHECK-P8-NEXT: xscvspdpn f4, vs4
-; CHECK-P8-NEXT: xxsldwi vs9, v2, v2, 3
-; CHECK-P8-NEXT: xscvspdpn f6, vs6
+; CHECK-P8-NEXT: xscvdpsxws f5, f5
+; CHECK-P8-NEXT: xscvdpsxws f8, f8
+; CHECK-P8-NEXT: xscvspdpn f9, vs9
; CHECK-P8-NEXT: xscvdpsxws f1, f1
-; CHECK-P8-NEXT: xscvspdpn f7, vs7
-; CHECK-P8-NEXT: xscvdpsxws f0, f0
+; CHECK-P8-NEXT: mffprwz r3, f0
+; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1
; CHECK-P8-NEXT: xscvdpsxws f2, f2
-; CHECK-P8-NEXT: xscvdpsxws f4, f4
-; CHECK-P8-NEXT: xscvspdpn f8, vs8
-; CHECK-P8-NEXT: xscvdpsxws f3, f3
-; CHECK-P8-NEXT: xscvspdpn f9, vs9
-; CHECK-P8-NEXT: mffprwz r4, f0
-; CHECK-P8-NEXT: xxswapd vs0, v2
-; CHECK-P8-NEXT: mffprwz r5, f2
-; CHECK-P8-NEXT: mtvsrd v3, r4
+; CHECK-P8-NEXT: mtvsrd v4, r3
; CHECK-P8-NEXT: xscvspdpn f0, vs0
+; CHECK-P8-NEXT: mffprwz r3, f5
; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: mtvsrd v4, r5
-; CHECK-P8-NEXT: mffprwz r5, f4
-; CHECK-P8-NEXT: xscvdpsxws f1, f6
-; CHECK-P8-NEXT: vmrghb v3, v4, v3
-; CHECK-P8-NEXT: mtvsrd v4, r5
-; CHECK-P8-NEXT: mffprwz r5, f3
-; CHECK-P8-NEXT: xscvdpsxws f3, f7
-; CHECK-P8-NEXT: xscvdpsxws f4, f8
+; CHECK-P8-NEXT: xscvspdpn f1, v3
+; CHECK-P8-NEXT: mtvsrd v0, r4
+; CHECK-P8-NEXT: mtvsrd v3, r3
; CHECK-P8-NEXT: xscvdpsxws f0, f0
-; CHECK-P8-NEXT: mtvsrd v5, r4
-; CHECK-P8-NEXT: li r4, 48
-; CHECK-P8-NEXT: lvx v0, r3, r4
-; CHECK-P8-NEXT: mffprwz r3, f1
-; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1
-; CHECK-P8-NEXT: xscvspdpn f5, v2
-; CHECK-P8-NEXT: mffprwz r4, f3
-; CHECK-P8-NEXT: xxsldwi vs3, v0, v0, 3
-; CHECK-P8-NEXT: mtvsrd v1, r3
-; CHECK-P8-NEXT: mffprwz r3, f4
-; CHECK-P8-NEXT: xxswapd vs4, v0
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: mtvsrd v7, r3
+; CHECK-P8-NEXT: mffprwz r3, f8
+; CHECK-P8-NEXT: vmrghb v2, v4, v0
+; CHECK-P8-NEXT: xscvdpsxws f1, f1
+; CHECK-P8-NEXT: xscvdpsxws f5, f9
+; CHECK-P8-NEXT: mtvsrd v4, r3
; CHECK-P8-NEXT: mffprwz r3, f0
-; CHECK-P8-NEXT: xxsldwi vs0, v0, v0, 1
-; CHECK-P8-NEXT: xscvspdpn f2, v0
-; CHECK-P8-NEXT: xscvspdpn f3, vs3
-; CHECK-P8-NEXT: xscvdpsxws f6, f9
-; CHECK-P8-NEXT: xscvspdpn f4, vs4
+; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3
+; CHECK-P8-NEXT: xscvspdpn f7, vs4
+; CHECK-P8-NEXT: xxsldwi vs4, v7, v7, 3
+; CHECK-P8-NEXT: mtvsrd v1, r3
+; CHECK-P8-NEXT: mffprwz r4, f1
; CHECK-P8-NEXT: xscvspdpn f0, vs0
-; CHECK-P8-NEXT: xscvdpsxws f5, f5
-; CHECK-P8-NEXT: xscvdpsxws f1, f1
-; CHECK-P8-NEXT: xscvdpsxws f2, f2
+; CHECK-P8-NEXT: mtvsrd v0, r4
+; CHECK-P8-NEXT: mffprwz r4, f2
+; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 1
+; CHECK-P8-NEXT: mffprwz r3, f5
+; CHECK-P8-NEXT: xscvdpsxws f5, f6
+; CHECK-P8-NEXT: xxsldwi vs6, v7, v7, 1
+; CHECK-P8-NEXT: xscvspdpn f2, vs2
+; CHECK-P8-NEXT: xscvspdpn f1, v7
; CHECK-P8-NEXT: xscvdpsxws f3, f3
-; CHECK-P8-NEXT: mtvsrd v6, r4
-; CHECK-P8-NEXT: mffprwz r4, f6
-; CHECK-P8-NEXT: xscvdpsxws f4, f4
+; CHECK-P8-NEXT: xscvspdpn f4, vs4
+; CHECK-P8-NEXT: xscvspdpn f6, vs6
; CHECK-P8-NEXT: xscvdpsxws f0, f0
-; CHECK-P8-NEXT: vmrghb v2, v6, v1
-; CHECK-P8-NEXT: mtvsrd v1, r4
-; CHECK-P8-NEXT: mffprwz r4, f5
-; CHECK-P8-NEXT: mtvsrd v6, r3
-; CHECK-P8-NEXT: mffprwz r3, f1
-; CHECK-P8-NEXT: vmrghb v4, v5, v4
-; CHECK-P8-NEXT: mtvsrd v5, r5
-; CHECK-P8-NEXT: vmrghb v0, v6, v1
+; CHECK-P8-NEXT: xscvdpsxws f2, f2
+; CHECK-P8-NEXT: mtvsrd v6, r4
+; CHECK-P8-NEXT: mtvsrd v8, r3
+; CHECK-P8-NEXT: xscvdpsxws f7, f7
+; CHECK-P8-NEXT: xscvdpsxws f1, f1
+; CHECK-P8-NEXT: mffprwz r3, f5
+; CHECK-P8-NEXT: mffprwz r4, f3
+; CHECK-P8-NEXT: vmrghb v4, v4, v1
+; CHECK-P8-NEXT: vmrghb v5, v0, v8
+; CHECK-P8-NEXT: mtvsrd v0, r3
; CHECK-P8-NEXT: mtvsrd v1, r4
+; CHECK-P8-NEXT: mffprwz r3, f0
; CHECK-P8-NEXT: mffprwz r4, f2
+; CHECK-P8-NEXT: xscvdpsxws f0, f4
+; CHECK-P8-NEXT: xscvdpsxws f2, f6
+; CHECK-P8-NEXT: vmrghb v3, v3, v6
; CHECK-P8-NEXT: mtvsrd v6, r3
-; CHECK-P8-NEXT: mffprwz r3, f3
-; CHECK-P8-NEXT: vmrghb v5, v5, v7
-; CHECK-P8-NEXT: vmrghb v1, v1, v6
-; CHECK-P8-NEXT: mtvsrd v6, r4
-; CHECK-P8-NEXT: mffprwz r4, f4
-; CHECK-P8-NEXT: mtvsrd v7, r3
+; CHECK-P8-NEXT: mtvsrd v7, r4
+; CHECK-P8-NEXT: mffprwz r3, f7
+; CHECK-P8-NEXT: mffprwz r4, f1
+; CHECK-P8-NEXT: vmrghb v0, v0, v6
+; CHECK-P8-NEXT: vmrghb v1, v1, v7
+; CHECK-P8-NEXT: mtvsrd v6, r3
+; CHECK-P8-NEXT: mtvsrd v7, r4
; CHECK-P8-NEXT: mffprwz r3, f0
-; CHECK-P8-NEXT: mtvsrd v8, r4
-; CHECK-P8-NEXT: mtvsrd v9, r3
-; CHECK-P8-NEXT: vmrghb v7, v8, v7
-; CHECK-P8-NEXT: vmrghb v6, v6, v9
-; CHECK-P8-NEXT: vmrglh v3, v4, v3
-; CHECK-P8-NEXT: vmrglh v2, v5, v2
+; CHECK-P8-NEXT: mffprwz r4, f2
+; CHECK-P8-NEXT: mtvsrd v8, r3
+; CHECK-P8-NEXT: mtvsrd v9, r4
+; CHECK-P8-NEXT: vmrghb v6, v6, v8
+; CHECK-P8-NEXT: vmrghb v7, v7, v9
+; CHECK-P8-NEXT: vmrglh v2, v4, v2
+; CHECK-P8-NEXT: vmrglh v3, v5, v3
; CHECK-P8-NEXT: vmrglh v4, v1, v0
-; CHECK-P8-NEXT: vmrglh v5, v6, v7
-; CHECK-P8-NEXT: xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT: vmrglh v5, v7, v6
+; CHECK-P8-NEXT: xxmrglw vs0, v3, v2
; CHECK-P8-NEXT: xxmrglw vs1, v5, v4
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: blr
define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: lvx v2, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: lvx v3, r3, r4
-; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT: xxswapd vs1, v2
-; CHECK-P8-NEXT: xscvspdpn f2, v2
-; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1
-; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3
-; CHECK-P8-NEXT: xscvspdpn f3, v3
+; CHECK-P8-NEXT: lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xscvspdpn f0, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: xscvspdpn f1, vs1
+; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3
+; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1
+; CHECK-P8-NEXT: xscvspdpn f3, v2
+; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3
+; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1
+; CHECK-P8-NEXT: xscvspdpn f5, v3
+; CHECK-P8-NEXT: xscvspdpn f2, vs2
; CHECK-P8-NEXT: xscvspdpn f4, vs4
-; CHECK-P8-NEXT: xscvspdpn f5, vs5
-; CHECK-P8-NEXT: xscvdpsxws f2, f2
-; CHECK-P8-NEXT: xscvdpsxws f3, f3
; CHECK-P8-NEXT: xscvdpsxws f0, f0
+; CHECK-P8-NEXT: xscvspdpn f6, vs6
+; CHECK-P8-NEXT: xscvdpsxws f3, f3
+; CHECK-P8-NEXT: xscvspdpn f7, vs7
+; CHECK-P8-NEXT: xscvdpsxws f2, f2
+; CHECK-P8-NEXT: xscvdpsxws f4, f4
; CHECK-P8-NEXT: xscvdpsxws f1, f1
+; CHECK-P8-NEXT: xscvdpsxws f5, f5
; CHECK-P8-NEXT: mffprwz r3, f0
-; CHECK-P8-NEXT: xxswapd vs0, v3
-; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1
+; CHECK-P8-NEXT: mffprwz r4, f3
; CHECK-P8-NEXT: mtvsrd v2, r3
-; CHECK-P8-NEXT: xscvspdpn f0, vs0
; CHECK-P8-NEXT: mffprwz r3, f2
-; CHECK-P8-NEXT: xscvdpsxws f2, f4
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: xscvdpsxws f4, f5
-; CHECK-P8-NEXT: mtvsrd v4, r4
-; CHECK-P8-NEXT: xscvdpsxws f0, f0
-; CHECK-P8-NEXT: vmrghb v2, v4, v2
-; CHECK-P8-NEXT: mffprwz r4, f2
-; CHECK-P8-NEXT: xscvdpsxws f1, f1
-; CHECK-P8-NEXT: mtvsrd v3, r3
-; CHECK-P8-NEXT: mffprwz r3, f3
-; CHECK-P8-NEXT: mtvsrd v4, r4
-; CHECK-P8-NEXT: mffprwz r4, f0
-; CHECK-P8-NEXT: vmrghb v3, v3, v4
+; CHECK-P8-NEXT: xscvdpsxws f0, f6
+; CHECK-P8-NEXT: xscvdpsxws f2, f7
+; CHECK-P8-NEXT: mtvsrd v3, r4
+; CHECK-P8-NEXT: mffprwz r4, f4
; CHECK-P8-NEXT: mtvsrd v4, r3
-; CHECK-P8-NEXT: mffprwz r3, f4
-; CHECK-P8-NEXT: mtvsrd v0, r4
-; CHECK-P8-NEXT: mtvsrd v5, r3
+; CHECK-P8-NEXT: mtvsrd v5, r4
; CHECK-P8-NEXT: mffprwz r3, f1
-; CHECK-P8-NEXT: vmrghb v5, v0, v5
-; CHECK-P8-NEXT: mtvsrd v1, r3
+; CHECK-P8-NEXT: mffprwz r4, f5
+; CHECK-P8-NEXT: vmrghb v2, v2, v4
+; CHECK-P8-NEXT: vmrghb v3, v3, v5
+; CHECK-P8-NEXT: mtvsrd v4, r3
+; CHECK-P8-NEXT: mtvsrd v5, r4
+; CHECK-P8-NEXT: mffprwz r3, f0
+; CHECK-P8-NEXT: mffprwz r4, f2
+; CHECK-P8-NEXT: mtvsrd v0, r3
+; CHECK-P8-NEXT: mtvsrd v1, r4
+; CHECK-P8-NEXT: vmrghb v4, v4, v0
+; CHECK-P8-NEXT: vmrghb v5, v5, v1
; CHECK-P8-NEXT: vmrglh v2, v3, v2
-; CHECK-P8-NEXT: vmrghb v4, v4, v1
-; CHECK-P8-NEXT: vmrglh v3, v4, v5
+; CHECK-P8-NEXT: vmrglh v3, v5, v4
; CHECK-P8-NEXT: xxmrglw vs0, v3, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: mffprd r3, f0
define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnamed_addr #3 {
; CHECK-P8-LABEL: test16elt_signed:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: lvx v4, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: li r5, 32
-; CHECK-P8-NEXT: lvx v3, r3, r4
-; CHECK-P8-NEXT: lvx v2, r3, r5
-; CHECK-P8-NEXT: xxsldwi vs0, v4, v4, 3
-; CHECK-P8-NEXT: xxswapd vs2, v4
-; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 1
-; CHECK-P8-NEXT: xscvspdpn f1, v4
-; CHECK-P8-NEXT: xscvspdpn f3, v3
-; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3
+; CHECK-P8-NEXT: lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT: li r4, 32
+; CHECK-P8-NEXT: lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT: li r4, 48
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xscvspdpn f0, vs0
-; CHECK-P8-NEXT: xxswapd vs7, v3
+; CHECK-P8-NEXT: lxvd2x vs4, r3, r4
+; CHECK-P8-NEXT: xxswapd v3, vs2
+; CHECK-P8-NEXT: xscvspdpn f5, vs2
+; CHECK-P8-NEXT: xxswapd v5, vs3
+; CHECK-P8-NEXT: xscvspdpn f6, vs3
+; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT: xscvspdpn f8, v2
+; CHECK-P8-NEXT: xxsldwi vs2, v3, v3, 3
+; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1
+; CHECK-P8-NEXT: xscvspdpn f3, v5
+; CHECK-P8-NEXT: xxswapd v7, vs4
+; CHECK-P8-NEXT: xscvspdpn f1, vs1
+; CHECK-P8-NEXT: xscvdpsxws f0, f0
; CHECK-P8-NEXT: xscvspdpn f2, vs2
-; CHECK-P8-NEXT: xxsldwi vs8, v3, v3, 1
-; CHECK-P8-NEXT: xscvspdpn f4, vs4
-; CHECK-P8-NEXT: xxsldwi vs9, v2, v2, 3
-; CHECK-P8-NEXT: xscvspdpn f6, vs6
+; CHECK-P8-NEXT: xscvdpsxws f5, f5
+; CHECK-P8-NEXT: xscvdpsxws f8, f8
+; CHECK-P8-NEXT: xscvspdpn f9, vs9
; CHECK-P8-NEXT: xscvdpsxws f1, f1
-; CHECK-P8-NEXT: xscvspdpn f7, vs7
-; CHECK-P8-NEXT: xscvdpsxws f0, f0
+; CHECK-P8-NEXT: mffprwz r3, f0
+; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1
; CHECK-P8-NEXT: xscvdpsxws f2, f2
-; CHECK-P8-NEXT: xscvdpsxws f4, f4
-; CHECK-P8-NEXT: xscvspdpn f8, vs8
-; CHECK-P8-NEXT: xscvdpsxws f3, f3
-; CHECK-P8-NEXT: xscvspdpn f9, vs9
-; CHECK-P8-NEXT: mffprwz r4, f0
-; CHECK-P8-NEXT: xxswapd vs0, v2
-; CHECK-P8-NEXT: mffprwz r5, f2
-; CHECK-P8-NEXT: mtvsrd v3, r4
+; CHECK-P8-NEXT: mtvsrd v4, r3
; CHECK-P8-NEXT: xscvspdpn f0, vs0
+; CHECK-P8-NEXT: mffprwz r3, f5
; CHECK-P8-NEXT: mffprwz r4, f1
-; CHECK-P8-NEXT: mtvsrd v4, r5
-; CHECK-P8-NEXT: mffprwz r5, f4
-; CHECK-P8-NEXT: xscvdpsxws f1, f6
-; CHECK-P8-NEXT: vmrghb v3, v4, v3
-; CHECK-P8-NEXT: mtvsrd v4, r5
-; CHECK-P8-NEXT: mffprwz r5, f3
-; CHECK-P8-NEXT: xscvdpsxws f3, f7
-; CHECK-P8-NEXT: xscvdpsxws f4, f8
+; CHECK-P8-NEXT: xscvspdpn f1, v3
+; CHECK-P8-NEXT: mtvsrd v0, r4
+; CHECK-P8-NEXT: mtvsrd v3, r3
; CHECK-P8-NEXT: xscvdpsxws f0, f0
-; CHECK-P8-NEXT: mtvsrd v5, r4
-; CHECK-P8-NEXT: li r4, 48
-; CHECK-P8-NEXT: lvx v0, r3, r4
-; CHECK-P8-NEXT: mffprwz r3, f1
-; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1
-; CHECK-P8-NEXT: xscvspdpn f5, v2
-; CHECK-P8-NEXT: mffprwz r4, f3
-; CHECK-P8-NEXT: xxsldwi vs3, v0, v0, 3
-; CHECK-P8-NEXT: mtvsrd v1, r3
-; CHECK-P8-NEXT: mffprwz r3, f4
-; CHECK-P8-NEXT: xxswapd vs4, v0
-; CHECK-P8-NEXT: xscvspdpn f1, vs1
-; CHECK-P8-NEXT: mtvsrd v7, r3
+; CHECK-P8-NEXT: mffprwz r3, f8
+; CHECK-P8-NEXT: vmrghb v2, v4, v0
+; CHECK-P8-NEXT: xscvdpsxws f1, f1
+; CHECK-P8-NEXT: xscvdpsxws f5, f9
+; CHECK-P8-NEXT: mtvsrd v4, r3
; CHECK-P8-NEXT: mffprwz r3, f0
-; CHECK-P8-NEXT: xxsldwi vs0, v0, v0, 1
-; CHECK-P8-NEXT: xscvspdpn f2, v0
-; CHECK-P8-NEXT: xscvspdpn f3, vs3
-; CHECK-P8-NEXT: xscvdpsxws f6, f9
-; CHECK-P8-NEXT: xscvspdpn f4, vs4
+; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3
+; CHECK-P8-NEXT: xscvspdpn f7, vs4
+; CHECK-P8-NEXT: xxsldwi vs4, v7, v7, 3
+; CHECK-P8-NEXT: mtvsrd v1, r3
+; CHECK-P8-NEXT: mffprwz r4, f1
; CHECK-P8-NEXT: xscvspdpn f0, vs0
-; CHECK-P8-NEXT: xscvdpsxws f5, f5
-; CHECK-P8-NEXT: xscvdpsxws f1, f1
-; CHECK-P8-NEXT: xscvdpsxws f2, f2
+; CHECK-P8-NEXT: mtvsrd v0, r4
+; CHECK-P8-NEXT: mffprwz r4, f2
+; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 1
+; CHECK-P8-NEXT: mffprwz r3, f5
+; CHECK-P8-NEXT: xscvdpsxws f5, f6
+; CHECK-P8-NEXT: xxsldwi vs6, v7, v7, 1
+; CHECK-P8-NEXT: xscvspdpn f2, vs2
+; CHECK-P8-NEXT: xscvspdpn f1, v7
; CHECK-P8-NEXT: xscvdpsxws f3, f3
-; CHECK-P8-NEXT: mtvsrd v6, r4
-; CHECK-P8-NEXT: mffprwz r4, f6
-; CHECK-P8-NEXT: xscvdpsxws f4, f4
+; CHECK-P8-NEXT: xscvspdpn f4, vs4
+; CHECK-P8-NEXT: xscvspdpn f6, vs6
; CHECK-P8-NEXT: xscvdpsxws f0, f0
-; CHECK-P8-NEXT: vmrghb v2, v6, v1
-; CHECK-P8-NEXT: mtvsrd v1, r4
-; CHECK-P8-NEXT: mffprwz r4, f5
-; CHECK-P8-NEXT: mtvsrd v6, r3
-; CHECK-P8-NEXT: mffprwz r3, f1
-; CHECK-P8-NEXT: vmrghb v4, v5, v4
-; CHECK-P8-NEXT: mtvsrd v5, r5
-; CHECK-P8-NEXT: vmrghb v0, v6, v1
+; CHECK-P8-NEXT: xscvdpsxws f2, f2
+; CHECK-P8-NEXT: mtvsrd v6, r4
+; CHECK-P8-NEXT: mtvsrd v8, r3
+; CHECK-P8-NEXT: xscvdpsxws f7, f7
+; CHECK-P8-NEXT: xscvdpsxws f1, f1
+; CHECK-P8-NEXT: mffprwz r3, f5
+; CHECK-P8-NEXT: mffprwz r4, f3
+; CHECK-P8-NEXT: vmrghb v4, v4, v1
+; CHECK-P8-NEXT: vmrghb v5, v0, v8
+; CHECK-P8-NEXT: mtvsrd v0, r3
; CHECK-P8-NEXT: mtvsrd v1, r4
+; CHECK-P8-NEXT: mffprwz r3, f0
; CHECK-P8-NEXT: mffprwz r4, f2
+; CHECK-P8-NEXT: xscvdpsxws f0, f4
+; CHECK-P8-NEXT: xscvdpsxws f2, f6
+; CHECK-P8-NEXT: vmrghb v3, v3, v6
; CHECK-P8-NEXT: mtvsrd v6, r3
-; CHECK-P8-NEXT: mffprwz r3, f3
-; CHECK-P8-NEXT: vmrghb v5, v5, v7
-; CHECK-P8-NEXT: vmrghb v1, v1, v6
-; CHECK-P8-NEXT: mtvsrd v6, r4
-; CHECK-P8-NEXT: mffprwz r4, f4
-; CHECK-P8-NEXT: mtvsrd v7, r3
+; CHECK-P8-NEXT: mtvsrd v7, r4
+; CHECK-P8-NEXT: mffprwz r3, f7
+; CHECK-P8-NEXT: mffprwz r4, f1
+; CHECK-P8-NEXT: vmrghb v0, v0, v6
+; CHECK-P8-NEXT: vmrghb v1, v1, v7
+; CHECK-P8-NEXT: mtvsrd v6, r3
+; CHECK-P8-NEXT: mtvsrd v7, r4
; CHECK-P8-NEXT: mffprwz r3, f0
-; CHECK-P8-NEXT: mtvsrd v8, r4
-; CHECK-P8-NEXT: mtvsrd v9, r3
-; CHECK-P8-NEXT: vmrghb v7, v8, v7
-; CHECK-P8-NEXT: vmrghb v6, v6, v9
-; CHECK-P8-NEXT: vmrglh v3, v4, v3
-; CHECK-P8-NEXT: vmrglh v2, v5, v2
+; CHECK-P8-NEXT: mffprwz r4, f2
+; CHECK-P8-NEXT: mtvsrd v8, r3
+; CHECK-P8-NEXT: mtvsrd v9, r4
+; CHECK-P8-NEXT: vmrghb v6, v6, v8
+; CHECK-P8-NEXT: vmrghb v7, v7, v9
+; CHECK-P8-NEXT: vmrglh v2, v4, v2
+; CHECK-P8-NEXT: vmrglh v3, v5, v3
; CHECK-P8-NEXT: vmrglh v4, v1, v0
-; CHECK-P8-NEXT: vmrglh v5, v6, v7
-; CHECK-P8-NEXT: xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT: vmrglh v5, v7, v6
+; CHECK-P8-NEXT: xxmrglw vs0, v3, v2
; CHECK-P8-NEXT: xxmrglw vs1, v5, v4
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: blr
; CHECK-P8-NEXT: vmrghh v7, v9, v7
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: xxmrglw vs2, v1, v0
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: xxmrglw vs3, v7, v6
; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2
-; CHECK-P8-NEXT: stvx v3, r3, r5
+; CHECK-P8-NEXT: xxswapd vs0, v3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P8-NEXT: vmrghh v7, v9, v7
; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P8-NEXT: xxmrglw vs2, v1, v0
-; CHECK-P8-NEXT: stvx v2, 0, r3
+; CHECK-P8-NEXT: xxswapd vs1, v2
; CHECK-P8-NEXT: xxmrglw vs3, v7, v6
; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2
-; CHECK-P8-NEXT: stvx v3, r3, r5
+; CHECK-P8-NEXT: xxswapd vs0, v3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r5
-; CHECK-P8-NEXT: xxswapd vs3, vs3
-; CHECK-P8-NEXT: xxswapd vs0, vs0
-; CHECK-P8-NEXT: xxswapd vs1, vs1
-; CHECK-P8-NEXT: xxswapd vs2, vs2
-; CHECK-P8-NEXT: xxmrgld vs4, vs1, vs0
-; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT: xxmrgld vs1, vs2, vs3
-; CHECK-P8-NEXT: xxmrghd vs2, vs2, vs3
+; CHECK-P8-NEXT: xxmrghd vs4, vs0, vs1
+; CHECK-P8-NEXT: xxmrgld vs0, vs0, vs1
+; CHECK-P8-NEXT: xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT: xxmrgld vs2, vs3, vs2
; CHECK-P8-NEXT: xvcvdpuxws v2, vs4
; CHECK-P8-NEXT: xvcvdpuxws v3, vs0
; CHECK-P8-NEXT: xvcvdpuxws v4, vs1
; CHECK-P8-NEXT: xvcvdpuxws v5, vs2
; CHECK-P8-NEXT: vmrgew v2, v3, v2
; CHECK-P8-NEXT: vmrgew v3, v5, v4
-; CHECK-P8-NEXT: stvx v2, r3, r5
-; CHECK-P8-NEXT: stvx v3, 0, r3
+; CHECK-P8-NEXT: stxvd2x v2, r3, r5
+; CHECK-P8-NEXT: stxvd2x v3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: li r6, 48
-; CHECK-P8-NEXT: li r8, 64
-; CHECK-P8-NEXT: li r7, 16
-; CHECK-P8-NEXT: li r9, 80
-; CHECK-P8-NEXT: lxvd2x vs7, 0, r4
+; CHECK-P8-NEXT: li r7, 64
+; CHECK-P8-NEXT: li r8, 80
+; CHECK-P8-NEXT: lxvd2x vs8, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT: lxvd2x vs3, r4, r8
-; CHECK-P8-NEXT: li r8, 96
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT: lxvd2x vs5, r4, r8
+; CHECK-P8-NEXT: lxvd2x vs3, r4, r8
+; CHECK-P8-NEXT: li r7, 96
; CHECK-P8-NEXT: li r8, 112
-; CHECK-P8-NEXT: lxvd2x vs4, r4, r9
-; CHECK-P8-NEXT: xxswapd vs0, vs0
-; CHECK-P8-NEXT: xxswapd vs1, vs1
+; CHECK-P8-NEXT: lxvd2x vs5, r4, r7
+; CHECK-P8-NEXT: li r7, 16
; CHECK-P8-NEXT: lxvd2x vs6, r4, r8
-; CHECK-P8-NEXT: xxswapd vs2, vs2
-; CHECK-P8-NEXT: xxswapd vs3, vs3
-; CHECK-P8-NEXT: xxswapd vs4, vs4
-; CHECK-P8-NEXT: xxswapd vs5, vs5
-; CHECK-P8-NEXT: xxmrgld vs8, vs1, vs0
-; CHECK-P8-NEXT: xxswapd vs6, vs6
-; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT: xxswapd vs1, vs7
-; CHECK-P8-NEXT: xxmrgld vs7, vs4, vs3
-; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs3
-; CHECK-P8-NEXT: xxmrgld vs4, vs6, vs5
-; CHECK-P8-NEXT: xvcvdpuxws v2, vs8
+; CHECK-P8-NEXT: xxmrghd vs4, vs0, vs1
+; CHECK-P8-NEXT: xxmrgld vs0, vs0, vs1
+; CHECK-P8-NEXT: lxvd2x vs7, r4, r7
+; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT: xxmrgld vs2, vs2, vs3
+; CHECK-P8-NEXT: xxmrghd vs3, vs5, vs6
; CHECK-P8-NEXT: xvcvdpuxws v3, vs0
-; CHECK-P8-NEXT: xxmrghd vs0, vs6, vs5
-; CHECK-P8-NEXT: xxmrgld vs5, vs2, vs1
-; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs1
-; CHECK-P8-NEXT: xvcvdpuxws v4, vs7
-; CHECK-P8-NEXT: xvcvdpuxws v5, vs3
-; CHECK-P8-NEXT: xvcvdpuxws v0, vs4
+; CHECK-P8-NEXT: xxmrgld vs0, vs5, vs6
+; CHECK-P8-NEXT: xvcvdpuxws v4, vs1
+; CHECK-P8-NEXT: xxmrghd vs1, vs8, vs7
+; CHECK-P8-NEXT: xvcvdpuxws v5, vs2
+; CHECK-P8-NEXT: xxmrgld vs2, vs8, vs7
+; CHECK-P8-NEXT: xvcvdpuxws v2, vs4
+; CHECK-P8-NEXT: xvcvdpuxws v0, vs3
; CHECK-P8-NEXT: xvcvdpuxws v1, vs0
-; CHECK-P8-NEXT: xvcvdpuxws v6, vs5
-; CHECK-P8-NEXT: xvcvdpuxws v7, vs1
+; CHECK-P8-NEXT: xvcvdpuxws v6, vs1
+; CHECK-P8-NEXT: xvcvdpuxws v7, vs2
; CHECK-P8-NEXT: vmrgew v2, v3, v2
; CHECK-P8-NEXT: vmrgew v3, v5, v4
; CHECK-P8-NEXT: vmrgew v4, v1, v0
; CHECK-P8-NEXT: vmrgew v5, v7, v6
-; CHECK-P8-NEXT: stvx v2, r3, r7
-; CHECK-P8-NEXT: stvx v3, r3, r5
-; CHECK-P8-NEXT: stvx v4, r3, r6
-; CHECK-P8-NEXT: stvx v5, 0, r3
+; CHECK-P8-NEXT: stxvd2x v4, r3, r6
+; CHECK-P8-NEXT: stxvd2x v3, r3, r5
+; CHECK-P8-NEXT: stxvd2x v2, r3, r7
+; CHECK-P8-NEXT: stxvd2x v5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
; CHECK-P8-NEXT: lxvd2x vs2, r4, r5
-; CHECK-P8-NEXT: xxswapd vs3, vs3
-; CHECK-P8-NEXT: xxswapd vs0, vs0
-; CHECK-P8-NEXT: xxswapd vs1, vs1
-; CHECK-P8-NEXT: xxswapd vs2, vs2
-; CHECK-P8-NEXT: xxmrgld vs4, vs1, vs0
-; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT: xxmrgld vs1, vs2, vs3
-; CHECK-P8-NEXT: xxmrghd vs2, vs2, vs3
+; CHECK-P8-NEXT: xxmrghd vs4, vs0, vs1
+; CHECK-P8-NEXT: xxmrgld vs0, vs0, vs1
+; CHECK-P8-NEXT: xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT: xxmrgld vs2, vs3, vs2
; CHECK-P8-NEXT: xvcvdpsxws v2, vs4
; CHECK-P8-NEXT: xvcvdpsxws v3, vs0
; CHECK-P8-NEXT: xvcvdpsxws v4, vs1
; CHECK-P8-NEXT: xvcvdpsxws v5, vs2
; CHECK-P8-NEXT: vmrgew v2, v3, v2
; CHECK-P8-NEXT: vmrgew v3, v5, v4
-; CHECK-P8-NEXT: stvx v2, r3, r5
-; CHECK-P8-NEXT: stvx v3, 0, r3
+; CHECK-P8-NEXT: stxvd2x v2, r3, r5
+; CHECK-P8-NEXT: stxvd2x v3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: li r6, 48
-; CHECK-P8-NEXT: li r8, 64
-; CHECK-P8-NEXT: li r7, 16
-; CHECK-P8-NEXT: li r9, 80
-; CHECK-P8-NEXT: lxvd2x vs7, 0, r4
+; CHECK-P8-NEXT: li r7, 64
+; CHECK-P8-NEXT: li r8, 80
+; CHECK-P8-NEXT: lxvd2x vs8, 0, r4
; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT: lxvd2x vs3, r4, r8
-; CHECK-P8-NEXT: li r8, 96
; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT: lxvd2x vs5, r4, r8
+; CHECK-P8-NEXT: lxvd2x vs3, r4, r8
+; CHECK-P8-NEXT: li r7, 96
; CHECK-P8-NEXT: li r8, 112
-; CHECK-P8-NEXT: lxvd2x vs4, r4, r9
-; CHECK-P8-NEXT: xxswapd vs0, vs0
-; CHECK-P8-NEXT: xxswapd vs1, vs1
+; CHECK-P8-NEXT: lxvd2x vs5, r4, r7
+; CHECK-P8-NEXT: li r7, 16
; CHECK-P8-NEXT: lxvd2x vs6, r4, r8
-; CHECK-P8-NEXT: xxswapd vs2, vs2
-; CHECK-P8-NEXT: xxswapd vs3, vs3
-; CHECK-P8-NEXT: xxswapd vs4, vs4
-; CHECK-P8-NEXT: xxswapd vs5, vs5
-; CHECK-P8-NEXT: xxmrgld vs8, vs1, vs0
-; CHECK-P8-NEXT: xxswapd vs6, vs6
-; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT: xxswapd vs1, vs7
-; CHECK-P8-NEXT: xxmrgld vs7, vs4, vs3
-; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs3
-; CHECK-P8-NEXT: xxmrgld vs4, vs6, vs5
-; CHECK-P8-NEXT: xvcvdpsxws v2, vs8
+; CHECK-P8-NEXT: xxmrghd vs4, vs0, vs1
+; CHECK-P8-NEXT: xxmrgld vs0, vs0, vs1
+; CHECK-P8-NEXT: lxvd2x vs7, r4, r7
+; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT: xxmrgld vs2, vs2, vs3
+; CHECK-P8-NEXT: xxmrghd vs3, vs5, vs6
; CHECK-P8-NEXT: xvcvdpsxws v3, vs0
-; CHECK-P8-NEXT: xxmrghd vs0, vs6, vs5
-; CHECK-P8-NEXT: xxmrgld vs5, vs2, vs1
-; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs1
-; CHECK-P8-NEXT: xvcvdpsxws v4, vs7
-; CHECK-P8-NEXT: xvcvdpsxws v5, vs3
-; CHECK-P8-NEXT: xvcvdpsxws v0, vs4
+; CHECK-P8-NEXT: xxmrgld vs0, vs5, vs6
+; CHECK-P8-NEXT: xvcvdpsxws v4, vs1
+; CHECK-P8-NEXT: xxmrghd vs1, vs8, vs7
+; CHECK-P8-NEXT: xvcvdpsxws v5, vs2
+; CHECK-P8-NEXT: xxmrgld vs2, vs8, vs7
+; CHECK-P8-NEXT: xvcvdpsxws v2, vs4
+; CHECK-P8-NEXT: xvcvdpsxws v0, vs3
; CHECK-P8-NEXT: xvcvdpsxws v1, vs0
-; CHECK-P8-NEXT: xvcvdpsxws v6, vs5
-; CHECK-P8-NEXT: xvcvdpsxws v7, vs1
+; CHECK-P8-NEXT: xvcvdpsxws v6, vs1
+; CHECK-P8-NEXT: xvcvdpsxws v7, vs2
; CHECK-P8-NEXT: vmrgew v2, v3, v2
; CHECK-P8-NEXT: vmrgew v3, v5, v4
; CHECK-P8-NEXT: vmrgew v4, v1, v0
; CHECK-P8-NEXT: vmrgew v5, v7, v6
-; CHECK-P8-NEXT: stvx v2, r3, r7
-; CHECK-P8-NEXT: stvx v3, r3, r5
-; CHECK-P8-NEXT: stvx v4, r3, r6
-; CHECK-P8-NEXT: stvx v5, 0, r3
+; CHECK-P8-NEXT: stxvd2x v4, r3, r6
+; CHECK-P8-NEXT: stxvd2x v3, r3, r5
+; CHECK-P8-NEXT: stxvd2x v2, r3, r7
+; CHECK-P8-NEXT: stxvd2x v5, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
-; CHECK-P8-NEXT: lvx v2, r4, r5
-; CHECK-P8-NEXT: xvcvspuxws v3, v3
-; CHECK-P8-NEXT: xvcvspuxws v2, v2
-; CHECK-P8-NEXT: stvx v3, 0, r3
-; CHECK-P8-NEXT: stvx v2, r3, r5
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT: xvcvspuxws vs1, vs1
+; CHECK-P8-NEXT: xvcvspuxws vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 48
-; CHECK-P8-NEXT: lvx v5, 0, r4
-; CHECK-P8-NEXT: lvx v2, r4, r5
-; CHECK-P8-NEXT: lvx v3, r4, r6
-; CHECK-P8-NEXT: lvx v4, r4, r7
-; CHECK-P8-NEXT: xvcvspuxws v5, v5
-; CHECK-P8-NEXT: xvcvspuxws v2, v2
-; CHECK-P8-NEXT: xvcvspuxws v3, v3
-; CHECK-P8-NEXT: xvcvspuxws v4, v4
-; CHECK-P8-NEXT: stvx v5, 0, r3
-; CHECK-P8-NEXT: stvx v2, r3, r5
-; CHECK-P8-NEXT: stvx v3, r3, r6
-; CHECK-P8-NEXT: stvx v4, r3, r7
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT: xvcvspuxws vs3, vs3
+; CHECK-P8-NEXT: xvcvspuxws vs0, vs0
+; CHECK-P8-NEXT: xvcvspuxws vs1, vs1
+; CHECK-P8-NEXT: xvcvspuxws vs2, vs2
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
-; CHECK-P8-NEXT: lvx v2, r4, r5
-; CHECK-P8-NEXT: xvcvspsxws v3, v3
-; CHECK-P8-NEXT: xvcvspsxws v2, v2
-; CHECK-P8-NEXT: stvx v3, 0, r3
-; CHECK-P8-NEXT: stvx v2, r3, r5
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT: xvcvspsxws vs1, vs1
+; CHECK-P8-NEXT: xvcvspsxws vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 48
-; CHECK-P8-NEXT: lvx v5, 0, r4
-; CHECK-P8-NEXT: lvx v2, r4, r5
-; CHECK-P8-NEXT: lvx v3, r4, r6
-; CHECK-P8-NEXT: lvx v4, r4, r7
-; CHECK-P8-NEXT: xvcvspsxws v5, v5
-; CHECK-P8-NEXT: xvcvspsxws v2, v2
-; CHECK-P8-NEXT: xvcvspsxws v3, v3
-; CHECK-P8-NEXT: xvcvspsxws v4, v4
-; CHECK-P8-NEXT: stvx v5, 0, r3
-; CHECK-P8-NEXT: stvx v2, r3, r5
-; CHECK-P8-NEXT: stvx v3, r3, r6
-; CHECK-P8-NEXT: stvx v4, r3, r7
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT: xvcvspsxws vs3, vs3
+; CHECK-P8-NEXT: xvcvspsxws vs0, vs0
+; CHECK-P8-NEXT: xvcvspsxws vs1, vs1
+; CHECK-P8-NEXT: xvcvspsxws vs2, vs2
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: vmrglh v4, v3, v2
; CHECK-P8-NEXT: vmrghh v2, v3, v2
-; CHECK-P8-NEXT: xvcvuxwsp v3, v4
-; CHECK-P8-NEXT: xvcvuxwsp v2, v2
-; CHECK-P8-NEXT: stvx v3, 0, r3
-; CHECK-P8-NEXT: stvx v2, r3, r4
+; CHECK-P8-NEXT: xvcvuxwsp vs0, v4
+; CHECK-P8-NEXT: xvcvuxwsp vs1, v2
+; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: xxswapd vs1, vs1
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI3_0@toc@ha
-; CHECK-P8-NEXT: addis r6, r2, .LCPI3_1@toc@ha
-; CHECK-P8-NEXT: xxlxor v3, v3, v3
-; CHECK-P8-NEXT: lvx v4, 0, r4
+; CHECK-P8-NEXT: li r6, 16
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: addi r5, r5, .LCPI3_0@toc@l
-; CHECK-P8-NEXT: addi r6, r6, .LCPI3_1@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r5
-; CHECK-P8-NEXT: li r5, 16
-; CHECK-P8-NEXT: lvx v0, 0, r6
-; CHECK-P8-NEXT: li r6, 32
-; CHECK-P8-NEXT: lvx v5, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT: addis r5, r2, .LCPI3_1@toc@ha
+; CHECK-P8-NEXT: addi r4, r5, .LCPI3_1@toc@l
+; CHECK-P8-NEXT: xxswapd v2, vs1
+; CHECK-P8-NEXT: li r5, 32
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT: xxswapd v5, vs2
; CHECK-P8-NEXT: li r4, 48
-; CHECK-P8-NEXT: vperm v1, v3, v4, v2
-; CHECK-P8-NEXT: vperm v2, v3, v5, v2
-; CHECK-P8-NEXT: vperm v5, v3, v5, v0
-; CHECK-P8-NEXT: vperm v3, v3, v4, v0
-; CHECK-P8-NEXT: xvcvuxwsp v4, v1
-; CHECK-P8-NEXT: xvcvuxwsp v2, v2
-; CHECK-P8-NEXT: xvcvuxwsp v5, v5
-; CHECK-P8-NEXT: xvcvuxwsp v3, v3
-; CHECK-P8-NEXT: stvx v4, 0, r3
-; CHECK-P8-NEXT: stvx v2, r3, r6
-; CHECK-P8-NEXT: stvx v5, r3, r4
-; CHECK-P8-NEXT: stvx v3, r3, r5
+; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: xxswapd v0, vs3
+; CHECK-P8-NEXT: vperm v1, v4, v2, v3
+; CHECK-P8-NEXT: vperm v3, v4, v5, v3
+; CHECK-P8-NEXT: vperm v5, v4, v5, v0
+; CHECK-P8-NEXT: vperm v2, v4, v2, v0
+; CHECK-P8-NEXT: xvcvuxwsp vs0, v1
+; CHECK-P8-NEXT: xvcvuxwsp vs1, v3
+; CHECK-P8-NEXT: xvcvuxwsp vs2, v5
+; CHECK-P8-NEXT: xvcvuxwsp vs3, v2
+; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: xxswapd vs1, vs1
+; CHECK-P8-NEXT: xxswapd vs2, vs2
+; CHECK-P8-NEXT: xxswapd vs3, vs3
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs3, r3, r6
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P8-NEXT: vslw v2, v2, v3
; CHECK-P8-NEXT: vsraw v4, v4, v3
; CHECK-P8-NEXT: vsraw v2, v2, v3
-; CHECK-P8-NEXT: xvcvsxwsp v3, v4
-; CHECK-P8-NEXT: xvcvsxwsp v2, v2
-; CHECK-P8-NEXT: stvx v3, 0, r3
-; CHECK-P8-NEXT: stvx v2, r3, r4
+; CHECK-P8-NEXT: xvcvsxwsp vs0, v4
+; CHECK-P8-NEXT: xvcvsxwsp vs1, v2
+; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: xxswapd vs1, vs1
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P8-LABEL: test16elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: vspltisw v5, 8
; CHECK-P8-NEXT: li r6, 32
-; CHECK-P8-NEXT: lvx v3, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs1, r4, r5
; CHECK-P8-NEXT: li r4, 48
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: vadduwm v5, v5, v5
+; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: vmrglh v4, v2, v2
; CHECK-P8-NEXT: vmrglh v0, v3, v3
; CHECK-P8-NEXT: vmrghh v3, v3, v3
; CHECK-P8-NEXT: vmrghh v2, v2, v2
-; CHECK-P8-NEXT: vadduwm v5, v5, v5
; CHECK-P8-NEXT: vslw v4, v4, v5
; CHECK-P8-NEXT: vslw v0, v0, v5
; CHECK-P8-NEXT: vslw v3, v3, v5
; CHECK-P8-NEXT: vsraw v0, v0, v5
; CHECK-P8-NEXT: vsraw v3, v3, v5
; CHECK-P8-NEXT: vsraw v2, v2, v5
-; CHECK-P8-NEXT: xvcvsxwsp v4, v4
-; CHECK-P8-NEXT: xvcvsxwsp v5, v0
-; CHECK-P8-NEXT: xvcvsxwsp v3, v3
-; CHECK-P8-NEXT: xvcvsxwsp v2, v2
-; CHECK-P8-NEXT: stvx v4, 0, r3
-; CHECK-P8-NEXT: stvx v5, r3, r6
-; CHECK-P8-NEXT: stvx v3, r3, r4
-; CHECK-P8-NEXT: stvx v2, r3, r5
+; CHECK-P8-NEXT: xvcvsxwsp vs0, v4
+; CHECK-P8-NEXT: xvcvsxwsp vs1, v0
+; CHECK-P8-NEXT: xvcvsxwsp vs2, v3
+; CHECK-P8-NEXT: xvcvsxwsp vs3, v2
+; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: xxswapd vs1, vs1
+; CHECK-P8-NEXT: xxswapd vs2, vs2
+; CHECK-P8-NEXT: xxswapd vs3, vs3
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT: stxvd2x vs3, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P8-NEXT: mtvsrwz v2, r3
; CHECK-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l
; CHECK-P8-NEXT: xxlxor v4, v4, v4
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-P8-NEXT: xvcvuxddp v2, v2
; CHECK-P8-NEXT: blr
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI1_0@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI1_1@toc@ha
-; CHECK-P8-NEXT: mtvsrd v2, r4
+; CHECK-P8-NEXT: xxlxor v2, v2, v2
; CHECK-P8-NEXT: addi r5, r5, .LCPI1_0@toc@l
-; CHECK-P8-NEXT: addi r4, r6, .LCPI1_1@toc@l
-; CHECK-P8-NEXT: xxlxor v4, v4, v4
-; CHECK-P8-NEXT: lvx v3, 0, r5
-; CHECK-P8-NEXT: lvx v5, 0, r4
+; CHECK-P8-NEXT: mtvsrd v4, r4
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: vperm v3, v4, v2, v3
-; CHECK-P8-NEXT: vperm v2, v4, v2, v5
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT: addi r5, r6, .LCPI1_1@toc@l
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: xxswapd v5, vs1
+; CHECK-P8-NEXT: vperm v3, v2, v4, v3
+; CHECK-P8-NEXT: vperm v2, v2, v4, v5
; CHECK-P8-NEXT: xvcvuxddp vs0, v3
; CHECK-P8-NEXT: xvcvuxddp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI2_2@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI2_3@toc@ha
-; CHECK-P8-NEXT: lvx v5, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI2_1@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI2_3@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI2_1@toc@l
-; CHECK-P8-NEXT: lvx v0, 0, r4
-; CHECK-P8-NEXT: lvx v1, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r5
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: li r5, 32
+; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: xxswapd v5, vs1
+; CHECK-P8-NEXT: xxswapd v0, vs2
+; CHECK-P8-NEXT: xxswapd v1, vs3
; CHECK-P8-NEXT: vperm v3, v4, v2, v3
; CHECK-P8-NEXT: vperm v5, v4, v2, v5
; CHECK-P8-NEXT: vperm v0, v4, v2, v0
define void @test16elt(<16 x double>* noalias nocapture sret(<16 x double>) %agg.result, <16 x i16>* nocapture readonly) local_unnamed_addr #3 {
; CHECK-P8-LABEL: test16elt:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: addis r6, r2, .LCPI3_2@toc@ha
; CHECK-P8-NEXT: addis r5, r2, .LCPI3_0@toc@ha
-; CHECK-P8-NEXT: lvx v4, 0, r4
-; CHECK-P8-NEXT: xxlxor v3, v3, v3
-; CHECK-P8-NEXT: addi r6, r6, .LCPI3_2@toc@l
+; CHECK-P8-NEXT: addis r6, r2, .LCPI3_2@toc@ha
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: addi r5, r5, .LCPI3_0@toc@l
-; CHECK-P8-NEXT: lvx v5, 0, r6
-; CHECK-P8-NEXT: li r6, 16
-; CHECK-P8-NEXT: lvx v2, 0, r5
-; CHECK-P8-NEXT: addis r5, r2, .LCPI3_1@toc@ha
-; CHECK-P8-NEXT: lvx v0, r4, r6
-; CHECK-P8-NEXT: addis r4, r2, .LCPI3_3@toc@ha
-; CHECK-P8-NEXT: addi r5, r5, .LCPI3_1@toc@l
-; CHECK-P8-NEXT: addi r4, r4, .LCPI3_3@toc@l
-; CHECK-P8-NEXT: lvx v1, 0, r5
-; CHECK-P8-NEXT: li r5, 96
-; CHECK-P8-NEXT: lvx v8, 0, r4
-; CHECK-P8-NEXT: vperm v6, v3, v4, v2
+; CHECK-P8-NEXT: addi r6, r6, .LCPI3_2@toc@l
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: li r5, 16
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r6
+; CHECK-P8-NEXT: addis r6, r2, .LCPI3_1@toc@ha
+; CHECK-P8-NEXT: lxvd2x vs3, r4, r5
+; CHECK-P8-NEXT: addi r4, r6, .LCPI3_1@toc@l
+; CHECK-P8-NEXT: addis r6, r2, .LCPI3_3@toc@ha
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: lxvd2x vs4, 0, r4
+; CHECK-P8-NEXT: addi r4, r6, .LCPI3_3@toc@l
+; CHECK-P8-NEXT: li r6, 96
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: xxswapd v5, vs2
; CHECK-P8-NEXT: li r4, 112
-; CHECK-P8-NEXT: vperm v7, v3, v4, v5
-; CHECK-P8-NEXT: vperm v2, v3, v0, v2
-; CHECK-P8-NEXT: vperm v9, v3, v0, v1
-; CHECK-P8-NEXT: vperm v5, v3, v0, v5
-; CHECK-P8-NEXT: vperm v0, v3, v0, v8
-; CHECK-P8-NEXT: vperm v1, v3, v4, v1
-; CHECK-P8-NEXT: vperm v3, v3, v4, v8
-; CHECK-P8-NEXT: xvcvuxddp vs1, v2
+; CHECK-P8-NEXT: xxswapd v0, vs3
+; CHECK-P8-NEXT: xxswapd v1, vs4
+; CHECK-P8-NEXT: xxswapd v8, vs0
+; CHECK-P8-NEXT: vperm v6, v4, v2, v3
+; CHECK-P8-NEXT: vperm v7, v4, v2, v5
+; CHECK-P8-NEXT: vperm v3, v4, v0, v3
+; CHECK-P8-NEXT: vperm v9, v4, v0, v1
+; CHECK-P8-NEXT: vperm v5, v4, v0, v5
+; CHECK-P8-NEXT: vperm v0, v4, v0, v8
+; CHECK-P8-NEXT: vperm v1, v4, v2, v1
+; CHECK-P8-NEXT: vperm v2, v4, v2, v8
+; CHECK-P8-NEXT: xvcvuxddp vs1, v3
; CHECK-P8-NEXT: xvcvuxddp vs4, v9
; CHECK-P8-NEXT: xvcvuxddp vs2, v5
; CHECK-P8-NEXT: xvcvuxddp vs3, v0
; CHECK-P8-NEXT: xvcvuxddp vs0, v7
-; CHECK-P8-NEXT: xvcvuxddp vs5, v3
+; CHECK-P8-NEXT: xvcvuxddp vs5, v2
; CHECK-P8-NEXT: xvcvuxddp vs6, v6
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xvcvuxddp vs7, v1
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs5, vs5
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r6
; CHECK-P8-NEXT: li r4, 80
-; CHECK-P8-NEXT: li r5, 64
+; CHECK-P8-NEXT: li r6, 64
; CHECK-P8-NEXT: xxswapd vs2, vs7
; CHECK-P8-NEXT: xxswapd vs3, vs6
; CHECK-P8-NEXT: stxvd2x vs4, r3, r4
; CHECK-P8-NEXT: li r4, 48
-; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
-; CHECK-P8-NEXT: li r5, 32
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: stxvd2x vs5, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT: stxvd2x vs2, r3, r6
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l
; CHECK-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: addis r4, r2, .LCPI5_1@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI5_1@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
; CHECK-P8-NEXT: addi r5, r6, .LCPI5_2@toc@l
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: lvx v4, 0, r5
+; CHECK-P8-NEXT: xxswapd v4, vs1
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: vperm v3, v3, v3, v4
; CHECK-P8-NEXT: xxswapd v4, vs0
define void @test8elt_signed(<8 x double>* noalias nocapture sret(<8 x double>) %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
-; CHECK-P8-NEXT: addis r5, r2, .LCPI6_2@toc@ha
; CHECK-P8-NEXT: addis r4, r2, .LCPI6_0@toc@ha
-; CHECK-P8-NEXT: addis r6, r2, .LCPI6_3@toc@ha
-; CHECK-P8-NEXT: addi r5, r5, .LCPI6_2@toc@l
+; CHECK-P8-NEXT: addis r5, r2, .LCPI6_2@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI6_0@toc@l
-; CHECK-P8-NEXT: addi r6, r6, .LCPI6_3@toc@l
-; CHECK-P8-NEXT: lvx v4, 0, r5
+; CHECK-P8-NEXT: addi r5, r5, .LCPI6_2@toc@l
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: addis r4, r2, .LCPI6_3@toc@ha
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI6_4@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r4
-; CHECK-P8-NEXT: lvx v5, 0, r6
-; CHECK-P8-NEXT: addis r4, r2, .LCPI6_1@toc@ha
+; CHECK-P8-NEXT: addi r4, r4, .LCPI6_3@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI6_4@toc@l
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT: addis r4, r2, .LCPI6_1@toc@ha
+; CHECK-P8-NEXT: li r5, 32
+; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: xxswapd v4, vs1
; CHECK-P8-NEXT: addi r4, r4, .LCPI6_1@toc@l
-; CHECK-P8-NEXT: lvx v0, 0, r5
; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: li r4, 48
-; CHECK-P8-NEXT: li r5, 32
+; CHECK-P8-NEXT: xxswapd v5, vs2
+; CHECK-P8-NEXT: xxswapd v0, vs3
; CHECK-P8-NEXT: vperm v3, v2, v2, v3
; CHECK-P8-NEXT: vperm v4, v2, v2, v4
; CHECK-P8-NEXT: vperm v5, v2, v2, v5
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI7_0@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI7_2@toc@ha
-; CHECK-P8-NEXT: lvx v4, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: addi r5, r5, .LCPI7_0@toc@l
; CHECK-P8-NEXT: addi r6, r6, .LCPI7_2@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI7_3@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r6
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r6
; CHECK-P8-NEXT: addis r6, r2, .LCPI7_4@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI7_3@toc@l
; CHECK-P8-NEXT: addi r6, r6, .LCPI7_4@toc@l
-; CHECK-P8-NEXT: lvx v5, 0, r5
-; CHECK-P8-NEXT: lvx v0, 0, r6
-; CHECK-P8-NEXT: li r6, 16
-; CHECK-P8-NEXT: addis r5, r2, .LCPI7_1@toc@ha
-; CHECK-P8-NEXT: lvx v7, r4, r6
-; CHECK-P8-NEXT: addi r5, r5, .LCPI7_1@toc@l
-; CHECK-P8-NEXT: vperm v1, v4, v4, v2
+; CHECK-P8-NEXT: xxswapd v2, vs1
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT: li r5, 16
+; CHECK-P8-NEXT: lxvd2x vs4, 0, r6
+; CHECK-P8-NEXT: li r6, 96
+; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT: xxswapd v4, vs2
+; CHECK-P8-NEXT: addis r4, r2, .LCPI7_1@toc@ha
+; CHECK-P8-NEXT: addi r4, r4, .LCPI7_1@toc@l
+; CHECK-P8-NEXT: xxswapd v5, vs3
+; CHECK-P8-NEXT: xxswapd v0, vs4
+; CHECK-P8-NEXT: xxswapd v6, vs0
+; CHECK-P8-NEXT: vperm v1, v2, v2, v3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: li r4, 112
-; CHECK-P8-NEXT: vperm v6, v4, v4, v3
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
-; CHECK-P8-NEXT: li r5, 96
-; CHECK-P8-NEXT: vperm v8, v4, v4, v5
-; CHECK-P8-NEXT: vperm v4, v4, v4, v0
-; CHECK-P8-NEXT: vperm v5, v7, v7, v5
+; CHECK-P8-NEXT: vperm v7, v2, v2, v4
+; CHECK-P8-NEXT: vperm v8, v2, v2, v5
+; CHECK-P8-NEXT: vperm v2, v2, v2, v0
; CHECK-P8-NEXT: xxswapd v9, vs0
-; CHECK-P8-NEXT: vperm v0, v7, v7, v0
-; CHECK-P8-NEXT: vperm v2, v7, v7, v2
-; CHECK-P8-NEXT: vperm v3, v7, v7, v3
+; CHECK-P8-NEXT: vperm v5, v6, v6, v5
+; CHECK-P8-NEXT: vperm v0, v6, v6, v0
+; CHECK-P8-NEXT: vperm v3, v6, v6, v3
+; CHECK-P8-NEXT: vperm v4, v6, v6, v4
; CHECK-P8-NEXT: vsld v1, v1, v9
-; CHECK-P8-NEXT: vsld v6, v6, v9
+; CHECK-P8-NEXT: vsld v6, v7, v9
; CHECK-P8-NEXT: vsld v5, v5, v9
; CHECK-P8-NEXT: vsld v0, v0, v9
-; CHECK-P8-NEXT: vsld v2, v2, v9
; CHECK-P8-NEXT: vsld v3, v3, v9
+; CHECK-P8-NEXT: vsld v4, v4, v9
; CHECK-P8-NEXT: vsrad v5, v5, v9
; CHECK-P8-NEXT: vsrad v0, v0, v9
; CHECK-P8-NEXT: vsld v7, v8, v9
-; CHECK-P8-NEXT: vsld v4, v4, v9
-; CHECK-P8-NEXT: vsrad v2, v2, v9
+; CHECK-P8-NEXT: vsld v2, v2, v9
; CHECK-P8-NEXT: vsrad v3, v3, v9
+; CHECK-P8-NEXT: vsrad v4, v4, v9
; CHECK-P8-NEXT: xvcvsxddp vs2, v5
; CHECK-P8-NEXT: xvcvsxddp vs3, v0
; CHECK-P8-NEXT: vsrad v1, v1, v9
; CHECK-P8-NEXT: vsrad v6, v6, v9
; CHECK-P8-NEXT: vsrad v7, v7, v9
-; CHECK-P8-NEXT: vsrad v4, v4, v9
-; CHECK-P8-NEXT: xvcvsxddp vs1, v2
+; CHECK-P8-NEXT: vsrad v2, v2, v9
+; CHECK-P8-NEXT: xvcvsxddp vs1, v3
; CHECK-P8-NEXT: xxswapd vs2, vs2
-; CHECK-P8-NEXT: xvcvsxddp vs4, v3
+; CHECK-P8-NEXT: xvcvsxddp vs4, v4
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xvcvsxddp vs0, v7
-; CHECK-P8-NEXT: xvcvsxddp vs5, v4
+; CHECK-P8-NEXT: xvcvsxddp vs5, v2
; CHECK-P8-NEXT: xvcvsxddp vs6, v1
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
; CHECK-P8-NEXT: li r4, 80
; CHECK-P8-NEXT: xvcvsxddp vs7, v6
-; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
-; CHECK-P8-NEXT: li r5, 64
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r6
+; CHECK-P8-NEXT: li r6, 64
; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs4, vs4
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs4, r3, r4
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: xxswapd vs2, vs7
-; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
-; CHECK-P8-NEXT: li r5, 32
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: stxvd2x vs5, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT: stxvd2x vs2, r3, r6
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: li r6, 32
-; CHECK-P8-NEXT: lvx v2, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: li r4, 48
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxmrglw v5, v3, v3
; CHECK-P8-NEXT: xxmrghw v3, v3, v3
; CHECK-P8-NEXT: xxmrglw v4, v2, v2
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: li r7, 32
; CHECK-P8-NEXT: li r8, 64
-; CHECK-P8-NEXT: lvx v2, r4, r5
-; CHECK-P8-NEXT: lvx v3, r4, r6
-; CHECK-P8-NEXT: lvx v0, r4, r7
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r7
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: li r4, 112
; CHECK-P8-NEXT: xxmrglw v4, v2, v2
-; CHECK-P8-NEXT: xxmrghw v5, v3, v3
+; CHECK-P8-NEXT: xxmrghw v0, v3, v3
+; CHECK-P8-NEXT: xxswapd v5, vs0
; CHECK-P8-NEXT: xxmrghw v2, v2, v2
; CHECK-P8-NEXT: xxmrglw v3, v3, v3
; CHECK-P8-NEXT: xvcvuxwdp vs0, v4
-; CHECK-P8-NEXT: lvx v4, 0, r4
-; CHECK-P8-NEXT: li r4, 112
-; CHECK-P8-NEXT: xvcvuxwdp vs1, v5
-; CHECK-P8-NEXT: xxmrghw v5, v0, v0
-; CHECK-P8-NEXT: xxmrglw v0, v0, v0
+; CHECK-P8-NEXT: xxswapd v4, vs1
+; CHECK-P8-NEXT: xvcvuxwdp vs1, v0
+; CHECK-P8-NEXT: xxmrghw v0, v5, v5
+; CHECK-P8-NEXT: xxmrglw v5, v5, v5
; CHECK-P8-NEXT: xvcvuxwdp vs2, v2
; CHECK-P8-NEXT: xxmrglw v2, v4, v4
; CHECK-P8-NEXT: xvcvuxwdp vs3, v3
; CHECK-P8-NEXT: xxmrghw v3, v4, v4
-; CHECK-P8-NEXT: xvcvuxwdp vs4, v5
-; CHECK-P8-NEXT: xvcvuxwdp vs5, v0
+; CHECK-P8-NEXT: xvcvuxwdp vs4, v0
+; CHECK-P8-NEXT: xvcvuxwdp vs5, v5
; CHECK-P8-NEXT: xvcvuxwdp vs6, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xvcvuxwdp vs7, v3
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
; CHECK-P8-NEXT: li r6, 32
-; CHECK-P8-NEXT: lvx v2, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
; CHECK-P8-NEXT: li r4, 48
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxmrglw v5, v3, v3
; CHECK-P8-NEXT: xxmrghw v3, v3, v3
; CHECK-P8-NEXT: xxmrglw v4, v2, v2
; CHECK-P8-NEXT: li r6, 48
; CHECK-P8-NEXT: li r7, 32
; CHECK-P8-NEXT: li r8, 64
-; CHECK-P8-NEXT: lvx v2, r4, r5
-; CHECK-P8-NEXT: lvx v3, r4, r6
-; CHECK-P8-NEXT: lvx v0, r4, r7
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v3, vs1
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r7
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: li r4, 112
; CHECK-P8-NEXT: xxmrglw v4, v2, v2
-; CHECK-P8-NEXT: xxmrghw v5, v3, v3
+; CHECK-P8-NEXT: xxmrghw v0, v3, v3
+; CHECK-P8-NEXT: xxswapd v5, vs0
; CHECK-P8-NEXT: xxmrghw v2, v2, v2
; CHECK-P8-NEXT: xxmrglw v3, v3, v3
; CHECK-P8-NEXT: xvcvsxwdp vs0, v4
-; CHECK-P8-NEXT: lvx v4, 0, r4
-; CHECK-P8-NEXT: li r4, 112
-; CHECK-P8-NEXT: xvcvsxwdp vs1, v5
-; CHECK-P8-NEXT: xxmrghw v5, v0, v0
-; CHECK-P8-NEXT: xxmrglw v0, v0, v0
+; CHECK-P8-NEXT: xxswapd v4, vs1
+; CHECK-P8-NEXT: xvcvsxwdp vs1, v0
+; CHECK-P8-NEXT: xxmrghw v0, v5, v5
+; CHECK-P8-NEXT: xxmrglw v5, v5, v5
; CHECK-P8-NEXT: xvcvsxwdp vs2, v2
; CHECK-P8-NEXT: xxmrglw v2, v4, v4
; CHECK-P8-NEXT: xvcvsxwdp vs3, v3
; CHECK-P8-NEXT: xxmrghw v3, v4, v4
-; CHECK-P8-NEXT: xvcvsxwdp vs4, v5
-; CHECK-P8-NEXT: xvcvsxwdp vs5, v0
+; CHECK-P8-NEXT: xvcvsxwdp vs4, v0
+; CHECK-P8-NEXT: xvcvsxwdp vs5, v5
; CHECK-P8-NEXT: xvcvsxwdp vs6, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xvcvsxwdp vs7, v3
; CHECK-P8-NEXT: xxsldwi v4, vs2, vs2, 3
; CHECK-P8-NEXT: vpkudum v2, v3, v2
; CHECK-P8-NEXT: vpkudum v3, v4, v5
-; CHECK-P8-NEXT: stvx v2, r3, r5
-; CHECK-P8-NEXT: stvx v3, 0, r3
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: xxswapd vs1, v3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P8-NEXT: xxsldwi v7, vs7, vs7, 3
; CHECK-P8-NEXT: vpkudum v4, v1, v0
; CHECK-P8-NEXT: vpkudum v5, v6, v7
-; CHECK-P8-NEXT: stvx v2, r3, r7
-; CHECK-P8-NEXT: stvx v3, r3, r5
-; CHECK-P8-NEXT: stvx v4, r3, r6
-; CHECK-P8-NEXT: stvx v5, 0, r3
+; CHECK-P8-NEXT: xxswapd vs2, v2
+; CHECK-P8-NEXT: xxswapd vs1, v3
+; CHECK-P8-NEXT: xxswapd vs0, v4
+; CHECK-P8-NEXT: xxswapd vs3, v5
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P8-NEXT: xxsldwi v4, vs2, vs2, 3
; CHECK-P8-NEXT: vpkudum v2, v3, v2
; CHECK-P8-NEXT: vpkudum v3, v4, v5
-; CHECK-P8-NEXT: stvx v2, r3, r5
-; CHECK-P8-NEXT: stvx v3, 0, r3
+; CHECK-P8-NEXT: xxswapd vs0, v2
+; CHECK-P8-NEXT: xxswapd vs1, v3
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P8-NEXT: xxsldwi v7, vs7, vs7, 3
; CHECK-P8-NEXT: vpkudum v4, v1, v0
; CHECK-P8-NEXT: vpkudum v5, v6, v7
-; CHECK-P8-NEXT: stvx v2, r3, r7
-; CHECK-P8-NEXT: stvx v3, r3, r5
-; CHECK-P8-NEXT: stvx v4, r3, r6
-; CHECK-P8-NEXT: stvx v5, 0, r3
+; CHECK-P8-NEXT: xxswapd vs2, v2
+; CHECK-P8-NEXT: xxswapd vs1, v3
+; CHECK-P8-NEXT: xxswapd vs0, v4
+; CHECK-P8-NEXT: xxswapd vs3, v5
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P8-NEXT: mtvsrwz v2, r3
; CHECK-P8-NEXT: addi r4, r4, .LCPI1_0@toc@l
; CHECK-P8-NEXT: xxlxor v4, v4, v4
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-P8-NEXT: xvcvuxwsp v2, v2
; CHECK-P8-NEXT: blr
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI2_1@toc@ha
-; CHECK-P8-NEXT: mtvsrd v2, r4
+; CHECK-P8-NEXT: xxlxor v2, v2, v2
; CHECK-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l
-; CHECK-P8-NEXT: addi r4, r6, .LCPI2_1@toc@l
-; CHECK-P8-NEXT: xxlxor v4, v4, v4
-; CHECK-P8-NEXT: lvx v3, 0, r5
-; CHECK-P8-NEXT: lvx v5, 0, r4
+; CHECK-P8-NEXT: mtvsrd v4, r4
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: vperm v3, v4, v2, v3
-; CHECK-P8-NEXT: vperm v2, v4, v2, v5
-; CHECK-P8-NEXT: xvcvuxwsp v3, v3
-; CHECK-P8-NEXT: xvcvuxwsp v2, v2
-; CHECK-P8-NEXT: stvx v3, 0, r3
-; CHECK-P8-NEXT: stvx v2, r3, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT: addi r5, r6, .LCPI2_1@toc@l
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: xxswapd v5, vs1
+; CHECK-P8-NEXT: vperm v3, v2, v4, v3
+; CHECK-P8-NEXT: vperm v2, v2, v4, v5
+; CHECK-P8-NEXT: xvcvuxwsp vs0, v3
+; CHECK-P8-NEXT: xvcvuxwsp vs1, v2
+; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: xxswapd vs1, vs1
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: addi r4, r4, .LCPI3_0@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI3_2@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI3_3@toc@ha
-; CHECK-P8-NEXT: lvx v5, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI3_1@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI3_3@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI3_1@toc@l
-; CHECK-P8-NEXT: lvx v0, 0, r4
-; CHECK-P8-NEXT: lvx v1, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r5
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: li r5, 32
-; CHECK-P8-NEXT: vperm v5, v4, v2, v5
+; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: xxswapd v5, vs1
+; CHECK-P8-NEXT: xxswapd v0, vs2
+; CHECK-P8-NEXT: xxswapd v1, vs3
; CHECK-P8-NEXT: vperm v3, v4, v2, v3
+; CHECK-P8-NEXT: vperm v5, v4, v2, v5
; CHECK-P8-NEXT: vperm v0, v4, v2, v0
; CHECK-P8-NEXT: vperm v2, v4, v2, v1
-; CHECK-P8-NEXT: xvcvuxwsp v4, v5
-; CHECK-P8-NEXT: xvcvuxwsp v3, v3
-; CHECK-P8-NEXT: xvcvuxwsp v5, v0
-; CHECK-P8-NEXT: xvcvuxwsp v2, v2
-; CHECK-P8-NEXT: stvx v4, r3, r5
-; CHECK-P8-NEXT: stvx v3, 0, r3
-; CHECK-P8-NEXT: stvx v5, r3, r4
+; CHECK-P8-NEXT: xvcvuxwsp vs0, v3
+; CHECK-P8-NEXT: xvcvuxwsp vs1, v5
+; CHECK-P8-NEXT: xvcvuxwsp vs2, v0
+; CHECK-P8-NEXT: xvcvuxwsp vs3, v2
+; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: xxswapd vs1, vs1
+; CHECK-P8-NEXT: xxswapd vs2, vs2
+; CHECK-P8-NEXT: xxswapd vs3, vs3
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: stvx v2, r3, r4
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P8-NEXT: addis r4, r2, .LCPI5_0@toc@ha
; CHECK-P8-NEXT: mtvsrwz v3, r3
; CHECK-P8-NEXT: addi r4, r4, .LCPI5_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: vspltisw v3, 12
; CHECK-P8-NEXT: vadduwm v3, v3, v3
; CHECK-P8-NEXT: addis r5, r2, .LCPI6_0@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI6_1@toc@ha
; CHECK-P8-NEXT: mtvsrd v3, r4
-; CHECK-P8-NEXT: vspltisw v5, 12
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
; CHECK-P8-NEXT: addi r5, r6, .LCPI6_1@toc@l
-; CHECK-P8-NEXT: lvx v4, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v4, vs1
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: vperm v3, v3, v3, v4
-; CHECK-P8-NEXT: vadduwm v4, v5, v5
+; CHECK-P8-NEXT: vspltisw v4, 12
+; CHECK-P8-NEXT: vadduwm v4, v4, v4
; CHECK-P8-NEXT: vslw v2, v2, v4
; CHECK-P8-NEXT: vslw v3, v3, v4
; CHECK-P8-NEXT: vsraw v2, v2, v4
; CHECK-P8-NEXT: vsraw v3, v3, v4
-; CHECK-P8-NEXT: xvcvsxwsp v2, v2
-; CHECK-P8-NEXT: xvcvsxwsp v3, v3
-; CHECK-P8-NEXT: stvx v2, 0, r3
-; CHECK-P8-NEXT: stvx v3, r3, r4
+; CHECK-P8-NEXT: xvcvsxwsp vs0, v2
+; CHECK-P8-NEXT: xvcvsxwsp vs1, v3
+; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: xxswapd vs1, vs1
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P8-NEXT: vspltisw v1, 12
; CHECK-P8-NEXT: addi r4, r4, .LCPI7_0@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI7_2@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI7_3@toc@ha
-; CHECK-P8-NEXT: lvx v4, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI7_1@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI7_3@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI7_1@toc@l
-; CHECK-P8-NEXT: lvx v5, 0, r4
-; CHECK-P8-NEXT: lvx v0, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r5
; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: li r5, 32
+; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: xxswapd v4, vs1
+; CHECK-P8-NEXT: xxswapd v5, vs2
+; CHECK-P8-NEXT: xxswapd v0, vs3
; CHECK-P8-NEXT: vperm v3, v2, v2, v3
; CHECK-P8-NEXT: vperm v4, v2, v2, v4
; CHECK-P8-NEXT: vperm v5, v2, v2, v5
; CHECK-P8-NEXT: vsraw v4, v4, v0
; CHECK-P8-NEXT: vsraw v5, v5, v0
; CHECK-P8-NEXT: vsraw v2, v2, v0
-; CHECK-P8-NEXT: xvcvsxwsp v3, v3
-; CHECK-P8-NEXT: xvcvsxwsp v4, v4
-; CHECK-P8-NEXT: xvcvsxwsp v5, v5
-; CHECK-P8-NEXT: xvcvsxwsp v2, v2
-; CHECK-P8-NEXT: stvx v3, 0, r3
-; CHECK-P8-NEXT: stvx v4, r3, r5
-; CHECK-P8-NEXT: stvx v5, r3, r4
+; CHECK-P8-NEXT: xvcvsxwsp vs0, v3
+; CHECK-P8-NEXT: xvcvsxwsp vs1, v4
+; CHECK-P8-NEXT: xvcvsxwsp vs2, v5
+; CHECK-P8-NEXT: xvcvsxwsp vs3, v2
+; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: xxswapd vs1, vs1
+; CHECK-P8-NEXT: xxswapd vs2, vs2
+; CHECK-P8-NEXT: xxswapd vs3, vs3
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: stvx v2, r3, r4
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
; CHECK-P8-NEXT: mtvsrwz v2, r3
; CHECK-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l
; CHECK-P8-NEXT: xxlxor v4, v4, v4
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-P8-NEXT: xvcvuxddp v2, v2
; CHECK-P8-NEXT: blr
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI1_0@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI1_1@toc@ha
-; CHECK-P8-NEXT: mtvsrwz v2, r4
+; CHECK-P8-NEXT: xxlxor v2, v2, v2
; CHECK-P8-NEXT: addi r5, r5, .LCPI1_0@toc@l
-; CHECK-P8-NEXT: addi r4, r6, .LCPI1_1@toc@l
-; CHECK-P8-NEXT: xxlxor v4, v4, v4
-; CHECK-P8-NEXT: lvx v3, 0, r5
-; CHECK-P8-NEXT: lvx v5, 0, r4
+; CHECK-P8-NEXT: mtvsrwz v4, r4
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: vperm v3, v4, v2, v3
-; CHECK-P8-NEXT: vperm v2, v4, v2, v5
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT: addi r5, r6, .LCPI1_1@toc@l
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: xxswapd v5, vs1
+; CHECK-P8-NEXT: vperm v3, v2, v4, v3
+; CHECK-P8-NEXT: vperm v2, v2, v4, v5
; CHECK-P8-NEXT: xvcvuxddp vs0, v3
; CHECK-P8-NEXT: xvcvuxddp vs1, v2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI2_2@toc@ha
-; CHECK-P8-NEXT: mtvsrd v2, r4
-; CHECK-P8-NEXT: addis r4, r2, .LCPI2_3@toc@ha
+; CHECK-P8-NEXT: xxlxor v2, v2, v2
; CHECK-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l
-; CHECK-P8-NEXT: addi r4, r4, .LCPI2_3@toc@l
-; CHECK-P8-NEXT: xxlxor v4, v4, v4
-; CHECK-P8-NEXT: lvx v3, 0, r5
-; CHECK-P8-NEXT: addi r5, r6, .LCPI2_2@toc@l
-; CHECK-P8-NEXT: lvx v0, 0, r4
+; CHECK-P8-NEXT: addi r6, r6, .LCPI2_2@toc@l
+; CHECK-P8-NEXT: mtvsrd v4, r4
; CHECK-P8-NEXT: li r4, 48
-; CHECK-P8-NEXT: lvx v5, 0, r5
-; CHECK-P8-NEXT: addis r5, r2, .LCPI2_1@toc@ha
-; CHECK-P8-NEXT: addi r5, r5, .LCPI2_1@toc@l
-; CHECK-P8-NEXT: lvx v1, 0, r5
-; CHECK-P8-NEXT: vperm v0, v4, v2, v0
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT: addis r5, r2, .LCPI2_3@toc@ha
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r6
+; CHECK-P8-NEXT: addis r6, r2, .LCPI2_1@toc@ha
+; CHECK-P8-NEXT: addi r5, r5, .LCPI2_3@toc@l
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT: addi r5, r6, .LCPI2_1@toc@l
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: xxswapd v5, vs1
; CHECK-P8-NEXT: li r5, 32
-; CHECK-P8-NEXT: vperm v3, v4, v2, v3
-; CHECK-P8-NEXT: vperm v5, v4, v2, v5
-; CHECK-P8-NEXT: vperm v2, v4, v2, v1
-; CHECK-P8-NEXT: xvcvuxddp vs2, v0
+; CHECK-P8-NEXT: xxswapd v0, vs2
+; CHECK-P8-NEXT: xxswapd v1, vs3
+; CHECK-P8-NEXT: vperm v3, v2, v4, v3
+; CHECK-P8-NEXT: vperm v5, v2, v4, v5
+; CHECK-P8-NEXT: vperm v0, v2, v4, v0
+; CHECK-P8-NEXT: vperm v2, v2, v4, v1
; CHECK-P8-NEXT: xvcvuxddp vs0, v3
; CHECK-P8-NEXT: xvcvuxddp vs1, v5
+; CHECK-P8-NEXT: xvcvuxddp vs2, v0
; CHECK-P8-NEXT: xvcvuxddp vs3, v2
-; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: xxswapd vs1, vs1
+; CHECK-P8-NEXT: xxswapd vs2, vs2
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: stxvd2x vs2, r3, r4
; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: xxlxor v4, v4, v4
; CHECK-P8-NEXT: addi r4, r4, .LCPI3_0@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI3_1@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI3_2@toc@ha
-; CHECK-P8-NEXT: lvx v5, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI3_4@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI3_2@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI3_4@toc@l
-; CHECK-P8-NEXT: lvx v0, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI3_6@toc@ha
-; CHECK-P8-NEXT: lvx v1, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI3_7@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI3_6@toc@l
+; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: addi r5, r5, .LCPI3_7@toc@l
-; CHECK-P8-NEXT: vperm v3, v4, v2, v3
-; CHECK-P8-NEXT: lvx v6, 0, r4
+; CHECK-P8-NEXT: xxswapd v5, vs1
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: addis r4, r2, .LCPI3_5@toc@ha
-; CHECK-P8-NEXT: lvx v7, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI3_3@toc@ha
-; CHECK-P8-NEXT: vperm v5, v4, v2, v5
; CHECK-P8-NEXT: addi r4, r4, .LCPI3_5@toc@l
+; CHECK-P8-NEXT: xxswapd v0, vs2
; CHECK-P8-NEXT: addi r5, r5, .LCPI3_3@toc@l
-; CHECK-P8-NEXT: vperm v0, v4, v2, v0
-; CHECK-P8-NEXT: lvx v8, 0, r4
-; CHECK-P8-NEXT: lvx v9, 0, r5
-; CHECK-P8-NEXT: vperm v1, v4, v2, v1
+; CHECK-P8-NEXT: xxswapd v1, vs3
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT: vperm v3, v4, v2, v3
; CHECK-P8-NEXT: li r4, 112
; CHECK-P8-NEXT: li r5, 96
+; CHECK-P8-NEXT: xxswapd v6, vs0
+; CHECK-P8-NEXT: xxswapd v7, vs1
+; CHECK-P8-NEXT: vperm v5, v4, v2, v5
+; CHECK-P8-NEXT: xxswapd v8, vs2
+; CHECK-P8-NEXT: xxswapd v9, vs3
; CHECK-P8-NEXT: vperm v6, v4, v2, v6
; CHECK-P8-NEXT: vperm v7, v4, v2, v7
; CHECK-P8-NEXT: vperm v8, v4, v2, v8
+; CHECK-P8-NEXT: vperm v0, v4, v2, v0
+; CHECK-P8-NEXT: vperm v1, v4, v2, v1
; CHECK-P8-NEXT: vperm v2, v4, v2, v9
-; CHECK-P8-NEXT: xvcvuxddp vs0, v0
-; CHECK-P8-NEXT: xvcvuxddp vs1, v1
; CHECK-P8-NEXT: xvcvuxddp vs2, v6
; CHECK-P8-NEXT: xvcvuxddp vs3, v7
; CHECK-P8-NEXT: xvcvuxddp vs4, v8
+; CHECK-P8-NEXT: xvcvuxddp vs0, v0
+; CHECK-P8-NEXT: xvcvuxddp vs1, v1
; CHECK-P8-NEXT: xvcvuxddp vs5, v2
; CHECK-P8-NEXT: xvcvuxddp vs6, v3
-; CHECK-P8-NEXT: xxswapd vs0, vs0
-; CHECK-P8-NEXT: xvcvuxddp vs7, v5
-; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: xxswapd vs2, vs2
+; CHECK-P8-NEXT: xvcvuxddp vs7, v5
; CHECK-P8-NEXT: xxswapd vs3, vs3
; CHECK-P8-NEXT: xxswapd vs4, vs4
-; CHECK-P8-NEXT: xxswapd vs5, vs5
+; CHECK-P8-NEXT: xxswapd vs0, vs0
+; CHECK-P8-NEXT: xxswapd vs1, vs1
; CHECK-P8-NEXT: stxvd2x vs3, r3, r4
-; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: li r4, 80
+; CHECK-P8-NEXT: xxswapd vs5, vs5
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r5
; CHECK-P8-NEXT: li r5, 64
-; CHECK-P8-NEXT: xxswapd vs2, vs7
; CHECK-P8-NEXT: xxswapd vs3, vs6
; CHECK-P8-NEXT: stxvd2x vs4, r3, r4
; CHECK-P8-NEXT: li r4, 48
+; CHECK-P8-NEXT: xxswapd vs2, vs7
; CHECK-P8-NEXT: stxvd2x vs1, r3, r5
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: stxvd2x vs5, r3, r4
; CHECK-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l
; CHECK-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: addis r4, r2, .LCPI5_1@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l
; CHECK-P8-NEXT: addi r4, r4, .LCPI5_1@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
; CHECK-P8-NEXT: addi r5, r6, .LCPI5_2@toc@l
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: li r4, 16
-; CHECK-P8-NEXT: lvx v4, 0, r5
+; CHECK-P8-NEXT: xxswapd v4, vs1
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: vperm v3, v3, v3, v4
; CHECK-P8-NEXT: xxswapd v4, vs0
; CHECK-P8-NEXT: addis r5, r2, .LCPI6_0@toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI6_2@toc@ha
; CHECK-P8-NEXT: mtvsrd v3, r4
-; CHECK-P8-NEXT: addis r4, r2, .LCPI6_1@toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l
; CHECK-P8-NEXT: addi r6, r6, .LCPI6_2@toc@l
-; CHECK-P8-NEXT: addi r4, r4, .LCPI6_1@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI6_3@toc@ha
-; CHECK-P8-NEXT: lvx v4, 0, r6
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r6
; CHECK-P8-NEXT: addis r6, r2, .LCPI6_4@toc@ha
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: addi r5, r5, .LCPI6_3@toc@l
-; CHECK-P8-NEXT: lvx v5, 0, r5
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r5
; CHECK-P8-NEXT: addi r5, r6, .LCPI6_4@toc@l
-; CHECK-P8-NEXT: lvx v0, 0, r5
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT: xxswapd v2, vs0
+; CHECK-P8-NEXT: xxswapd v4, vs1
+; CHECK-P8-NEXT: addis r5, r2, .LCPI6_1@toc@ha
+; CHECK-P8-NEXT: addi r4, r5, .LCPI6_1@toc@l
; CHECK-P8-NEXT: li r5, 32
+; CHECK-P8-NEXT: xxswapd v5, vs2
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: li r4, 48
+; CHECK-P8-NEXT: xxswapd v0, vs3
+; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: vperm v4, v3, v3, v4
; CHECK-P8-NEXT: vperm v5, v3, v3, v5
; CHECK-P8-NEXT: vperm v3, v3, v3, v0
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r4, r2, .LCPI7_0@toc@ha
; CHECK-P8-NEXT: addis r5, r2, .LCPI7_2@toc@ha
-; CHECK-P8-NEXT: addis r6, r2, .LCPI7_3@toc@ha
; CHECK-P8-NEXT: addi r4, r4, .LCPI7_0@toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI7_2@toc@l
-; CHECK-P8-NEXT: addi r6, r6, .LCPI7_3@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r4
-; CHECK-P8-NEXT: addis r4, r2, .LCPI7_4@toc@ha
-; CHECK-P8-NEXT: lvx v4, 0, r5
-; CHECK-P8-NEXT: addis r5, r2, .LCPI7_5@toc@ha
-; CHECK-P8-NEXT: lvx v5, 0, r6
-; CHECK-P8-NEXT: addis r6, r2, .LCPI7_1@toc@ha
-; CHECK-P8-NEXT: addi r4, r4, .LCPI7_4@toc@l
-; CHECK-P8-NEXT: addi r5, r5, .LCPI7_5@toc@l
-; CHECK-P8-NEXT: addi r6, r6, .LCPI7_1@toc@l
-; CHECK-P8-NEXT: lvx v0, 0, r4
-; CHECK-P8-NEXT: addis r4, r2, .LCPI7_6@toc@ha
-; CHECK-P8-NEXT: lvx v1, 0, r5
-; CHECK-P8-NEXT: addis r5, r2, .LCPI7_7@toc@ha
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r6
-; CHECK-P8-NEXT: addi r4, r4, .LCPI7_6@toc@l
-; CHECK-P8-NEXT: addi r5, r5, .LCPI7_7@toc@l
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: addis r4, r2, .LCPI7_3@toc@ha
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: addis r5, r2, .LCPI7_4@toc@ha
+; CHECK-P8-NEXT: addi r4, r4, .LCPI7_3@toc@l
+; CHECK-P8-NEXT: addi r5, r5, .LCPI7_4@toc@l
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT: addis r4, r2, .LCPI7_5@toc@ha
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT: addis r5, r2, .LCPI7_6@toc@ha
+; CHECK-P8-NEXT: addi r4, r4, .LCPI7_5@toc@l
+; CHECK-P8-NEXT: addi r5, r5, .LCPI7_6@toc@l
+; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: xxswapd v4, vs1
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: addis r4, r2, .LCPI7_7@toc@ha
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT: addis r5, r2, .LCPI7_8@toc@ha
+; CHECK-P8-NEXT: addi r4, r4, .LCPI7_7@toc@l
+; CHECK-P8-NEXT: addi r5, r5, .LCPI7_8@toc@l
+; CHECK-P8-NEXT: xxswapd v5, vs2
+; CHECK-P8-NEXT: xxswapd v0, vs3
+; CHECK-P8-NEXT: lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT: addis r4, r2, .LCPI7_1@toc@ha
; CHECK-P8-NEXT: vperm v3, v2, v2, v3
-; CHECK-P8-NEXT: lvx v6, 0, r4
-; CHECK-P8-NEXT: addis r4, r2, .LCPI7_8@toc@ha
-; CHECK-P8-NEXT: lvx v7, 0, r5
-; CHECK-P8-NEXT: vperm v4, v2, v2, v4
; CHECK-P8-NEXT: li r5, 96
-; CHECK-P8-NEXT: addi r4, r4, .LCPI7_8@toc@l
+; CHECK-P8-NEXT: xxswapd v1, vs0
+; CHECK-P8-NEXT: xxswapd v6, vs1
+; CHECK-P8-NEXT: addi r4, r4, .LCPI7_1@toc@l
+; CHECK-P8-NEXT: vperm v4, v2, v2, v4
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: li r4, 112
+; CHECK-P8-NEXT: xxswapd v7, vs2
+; CHECK-P8-NEXT: xxswapd v8, vs3
; CHECK-P8-NEXT: vperm v5, v2, v2, v5
-; CHECK-P8-NEXT: xxswapd v9, vs0
-; CHECK-P8-NEXT: lvx v8, 0, r4
; CHECK-P8-NEXT: vperm v0, v2, v2, v0
-; CHECK-P8-NEXT: li r4, 112
+; CHECK-P8-NEXT: xxswapd v9, vs0
; CHECK-P8-NEXT: vperm v1, v2, v2, v1
; CHECK-P8-NEXT: vperm v6, v2, v2, v6
; CHECK-P8-NEXT: vperm v7, v2, v2, v7
; CHECK-P8-LABEL: test8elt:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
-; CHECK-P8-NEXT: lvx v2, r4, r5
-; CHECK-P8-NEXT: xvcvuxwsp v3, v3
-; CHECK-P8-NEXT: xvcvuxwsp v2, v2
-; CHECK-P8-NEXT: stvx v3, 0, r3
-; CHECK-P8-NEXT: stvx v2, r3, r5
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT: xvcvuxwsp vs1, vs1
+; CHECK-P8-NEXT: xvcvuxwsp vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt:
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 48
-; CHECK-P8-NEXT: lvx v5, 0, r4
-; CHECK-P8-NEXT: lvx v2, r4, r5
-; CHECK-P8-NEXT: lvx v3, r4, r6
-; CHECK-P8-NEXT: lvx v4, r4, r7
-; CHECK-P8-NEXT: xvcvuxwsp v5, v5
-; CHECK-P8-NEXT: xvcvuxwsp v2, v2
-; CHECK-P8-NEXT: xvcvuxwsp v3, v3
-; CHECK-P8-NEXT: xvcvuxwsp v4, v4
-; CHECK-P8-NEXT: stvx v5, 0, r3
-; CHECK-P8-NEXT: stvx v2, r3, r5
-; CHECK-P8-NEXT: stvx v3, r3, r6
-; CHECK-P8-NEXT: stvx v4, r3, r7
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT: xvcvuxwsp vs3, vs3
+; CHECK-P8-NEXT: xvcvuxwsp vs0, vs0
+; CHECK-P8-NEXT: xvcvuxwsp vs1, vs1
+; CHECK-P8-NEXT: xvcvuxwsp vs2, vs2
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt:
; CHECK-P8-LABEL: test8elt_signed:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: li r5, 16
-; CHECK-P8-NEXT: lvx v3, 0, r4
-; CHECK-P8-NEXT: lvx v2, r4, r5
-; CHECK-P8-NEXT: xvcvsxwsp v3, v3
-; CHECK-P8-NEXT: xvcvsxwsp v2, v2
-; CHECK-P8-NEXT: stvx v3, 0, r3
-; CHECK-P8-NEXT: stvx v2, r3, r5
+; CHECK-P8-NEXT: lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT: xvcvsxwsp vs1, vs1
+; CHECK-P8-NEXT: xvcvsxwsp vs0, vs0
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs1, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test8elt_signed:
; CHECK-P8-NEXT: li r5, 16
; CHECK-P8-NEXT: li r6, 32
; CHECK-P8-NEXT: li r7, 48
-; CHECK-P8-NEXT: lvx v5, 0, r4
-; CHECK-P8-NEXT: lvx v2, r4, r5
-; CHECK-P8-NEXT: lvx v3, r4, r6
-; CHECK-P8-NEXT: lvx v4, r4, r7
-; CHECK-P8-NEXT: xvcvsxwsp v5, v5
-; CHECK-P8-NEXT: xvcvsxwsp v2, v2
-; CHECK-P8-NEXT: xvcvsxwsp v3, v3
-; CHECK-P8-NEXT: xvcvsxwsp v4, v4
-; CHECK-P8-NEXT: stvx v5, 0, r3
-; CHECK-P8-NEXT: stvx v2, r3, r5
-; CHECK-P8-NEXT: stvx v3, r3, r6
-; CHECK-P8-NEXT: stvx v4, r3, r7
+; CHECK-P8-NEXT: lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT: lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT: lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT: lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT: xvcvsxwsp vs3, vs3
+; CHECK-P8-NEXT: xvcvsxwsp vs0, vs0
+; CHECK-P8-NEXT: xvcvsxwsp vs1, vs1
+; CHECK-P8-NEXT: xvcvsxwsp vs2, vs2
+; CHECK-P8-NEXT: stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT: stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT: stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT: stxvd2x vs3, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: test16elt_signed:
define void @VPKUDUM_unary(<2 x i64>* %A) {
; CHECK-LABEL: VPKUDUM_unary:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lvx 2, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: xxswapd 34, 0
; CHECK-NEXT: vpkudum 2, 2, 2
-; CHECK-NEXT: stvx 2, 0, 3
+; CHECK-NEXT: xxswapd 0, 34
+; CHECK-NEXT: stxvd2x 0, 0, 3
; CHECK-NEXT: blr
entry:
%tmp = load <2 x i64>, <2 x i64>* %A
define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) {
; CHECK-LABEL: VPKUDUM:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lvx 2, 0, 3
-; CHECK-NEXT: lvx 3, 0, 4
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: lxvd2x 1, 0, 4
+; CHECK-NEXT: xxswapd 34, 0
+; CHECK-NEXT: xxswapd 35, 1
; CHECK-NEXT: vpkudum 2, 3, 2
-; CHECK-NEXT: stvx 2, 0, 3
+; CHECK-NEXT: xxswapd 0, 34
+; CHECK-NEXT: stxvd2x 0, 0, 3
; CHECK-NEXT: blr
entry:
%tmp = load <2 x i64>, <2 x i64>* %A
; PC64LE-NEXT: addi 3, 3, .LCPI2_0@toc@l
; PC64LE-NEXT: xxsldwi 4, 35, 35, 1
; PC64LE-NEXT: xxsldwi 5, 34, 34, 1
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xscvspdpn 2, 2
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 34, 2
+; PC64LE-NEXT: lxvd2x 2, 0, 3
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: blr
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI7_0@toc@l
; PC64LE-NEXT: xscvdpspn 34, 31
-; PC64LE-NEXT: lvx 4, 0, 3
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: li 3, 64
; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: addi 1, 1, 96
; PC64LE-NEXT: addi 3, 3, .LCPI12_0@toc@l
; PC64LE-NEXT: xxsldwi 4, 35, 35, 1
; PC64LE-NEXT: xxsldwi 5, 34, 34, 1
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xscvspdpn 2, 2
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 34, 2
+; PC64LE-NEXT: lxvd2x 2, 0, 3
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: blr
; PC64LE-NEXT: addi 3, 3, .LCPI17_0@toc@l
; PC64LE-NEXT: xxsldwi 4, 35, 35, 1
; PC64LE-NEXT: xxsldwi 5, 34, 34, 1
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xscvspdpn 2, 2
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 34, 2
+; PC64LE-NEXT: lxvd2x 2, 0, 3
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: blr
; PC64LE-NEXT: addi 3, 3, .LCPI22_0@toc@l
; PC64LE-NEXT: xxsldwi 4, 35, 35, 1
; PC64LE-NEXT: xxsldwi 5, 34, 34, 1
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xscvspdpn 2, 2
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 34, 2
+; PC64LE-NEXT: lxvd2x 2, 0, 3
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: blr
; PC64LE-NEXT: addis 3, 2, .LCPI27_0@toc@ha
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: addi 3, 3, .LCPI27_0@toc@l
-; PC64LE-NEXT: lvx 4, 0, 3
+; PC64LE-NEXT: lxvd2x 3, 0, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xscvspdpn 2, 2
+; PC64LE-NEXT: xxswapd 36, 3
; PC64LE-NEXT: xssqrtsp 0, 0
; PC64LE-NEXT: xssqrtsp 1, 1
; PC64LE-NEXT: xssqrtsp 2, 2
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI32_0@toc@l
; PC64LE-NEXT: xscvdpspn 34, 31
-; PC64LE-NEXT: lvx 4, 0, 3
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: li 3, 64
; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: addi 1, 1, 96
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI37_0@toc@l
; PC64LE-NEXT: xscvdpspn 34, 31
-; PC64LE-NEXT: lvx 4, 0, 3
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: addi 1, 1, 96
; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI42_0@toc@l
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: xscvdpspn 34, 31
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI47_0@toc@l
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: xscvdpspn 34, 31
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI52_0@toc@l
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: xscvdpspn 34, 31
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI57_0@toc@l
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: xscvdpspn 34, 31
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI62_0@toc@l
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: xscvdpspn 34, 31
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI67_0@toc@l
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: xscvdpspn 34, 31
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI72_0@toc@l
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: xscvdpspn 34, 31
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: addis 3, 2, .LCPI77_0@toc@ha
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: addi 3, 3, .LCPI77_0@toc@l
-; PC64LE-NEXT: lvx 4, 0, 3
+; PC64LE-NEXT: lxvd2x 3, 0, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xscvspdpn 2, 2
+; PC64LE-NEXT: xxswapd 36, 3
; PC64LE-NEXT: xsrdpic 0, 0
; PC64LE-NEXT: xsrdpic 1, 1
; PC64LE-NEXT: xsrdpic 2, 2
; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI82_0@toc@l
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: xscvdpspn 34, 31
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI87_0@toc@l
; PC64LE-NEXT: xscvdpspn 34, 31
-; PC64LE-NEXT: lvx 4, 0, 3
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: li 3, 64
; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: addi 1, 1, 96
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: addi 3, 3, .LCPI92_0@toc@l
; PC64LE-NEXT: xscvdpspn 34, 31
-; PC64LE-NEXT: lvx 4, 0, 3
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: li 3, 64
; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 48
; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 0, 1
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: addi 1, 1, 96
; PC64LE-NEXT: addis 3, 2, .LCPI97_0@toc@ha
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: addi 3, 3, .LCPI97_0@toc@l
-; PC64LE-NEXT: lvx 2, 0, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xscvspdpn 2, 2
; PC64LE-NEXT: xscvdpsxws 1, 1
; PC64LE-NEXT: xscvdpsxws 2, 2
; PC64LE-NEXT: mffprwz 4, 0
+; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: mffprwz 5, 1
-; PC64LE-NEXT: mtfprwz 0, 4
-; PC64LE-NEXT: mtfprwz 1, 5
-; PC64LE-NEXT: mffprwz 4, 2
-; PC64LE-NEXT: xxmrghw 35, 1, 0
-; PC64LE-NEXT: mtvsrwz 36, 4
-; PC64LE-NEXT: vperm 2, 4, 3, 2
+; PC64LE-NEXT: mtfprwz 1, 4
+; PC64LE-NEXT: mtfprwz 3, 5
+; PC64LE-NEXT: xxswapd 35, 0
+; PC64LE-NEXT: mffprwz 3, 2
+; PC64LE-NEXT: xxmrghw 34, 3, 1
+; PC64LE-NEXT: mtvsrwz 36, 3
+; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f32:
; PC64LE-NEXT: xscvdpsxws 1, 2
; PC64LE-NEXT: addi 3, 3, .LCPI105_0@toc@l
; PC64LE-NEXT: xscvdpsxws 2, 3
-; PC64LE-NEXT: lvx 2, 0, 3
; PC64LE-NEXT: mffprwz 4, 0
+; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: mffprwz 5, 1
-; PC64LE-NEXT: mtfprwz 0, 4
-; PC64LE-NEXT: mtfprwz 1, 5
-; PC64LE-NEXT: mffprwz 4, 2
-; PC64LE-NEXT: xxmrghw 35, 1, 0
-; PC64LE-NEXT: mtvsrwz 36, 4
-; PC64LE-NEXT: vperm 2, 4, 3, 2
+; PC64LE-NEXT: mtfprwz 1, 4
+; PC64LE-NEXT: mtfprwz 3, 5
+; PC64LE-NEXT: xxswapd 35, 0
+; PC64LE-NEXT: mffprwz 3, 2
+; PC64LE-NEXT: xxmrghw 34, 3, 1
+; PC64LE-NEXT: mtvsrwz 36, 3
+; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f64:
; PC64LE-NEXT: addis 3, 2, .LCPI113_0@toc@ha
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: addi 3, 3, .LCPI113_0@toc@l
-; PC64LE-NEXT: lvx 2, 0, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xscvspdpn 2, 2
; PC64LE-NEXT: xscvdpuxws 1, 1
; PC64LE-NEXT: xscvdpuxws 2, 2
; PC64LE-NEXT: mffprwz 4, 0
+; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: mffprwz 5, 1
-; PC64LE-NEXT: mtfprwz 0, 4
-; PC64LE-NEXT: mtfprwz 1, 5
-; PC64LE-NEXT: mffprwz 4, 2
-; PC64LE-NEXT: xxmrghw 35, 1, 0
-; PC64LE-NEXT: mtvsrwz 36, 4
-; PC64LE-NEXT: vperm 2, 4, 3, 2
+; PC64LE-NEXT: mtfprwz 1, 4
+; PC64LE-NEXT: mtfprwz 3, 5
+; PC64LE-NEXT: xxswapd 35, 0
+; PC64LE-NEXT: mffprwz 3, 2
+; PC64LE-NEXT: xxmrghw 34, 3, 1
+; PC64LE-NEXT: mtvsrwz 36, 3
+; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f32:
; PC64LE-NEXT: xscvdpuxws 1, 2
; PC64LE-NEXT: addi 3, 3, .LCPI121_0@toc@l
; PC64LE-NEXT: xscvdpuxws 2, 3
-; PC64LE-NEXT: lvx 2, 0, 3
; PC64LE-NEXT: mffprwz 4, 0
+; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: mffprwz 5, 1
-; PC64LE-NEXT: mtfprwz 0, 4
-; PC64LE-NEXT: mtfprwz 1, 5
-; PC64LE-NEXT: mffprwz 4, 2
-; PC64LE-NEXT: xxmrghw 35, 1, 0
-; PC64LE-NEXT: mtvsrwz 36, 4
-; PC64LE-NEXT: vperm 2, 4, 3, 2
+; PC64LE-NEXT: mtfprwz 1, 4
+; PC64LE-NEXT: mtfprwz 3, 5
+; PC64LE-NEXT: xxswapd 35, 0
+; PC64LE-NEXT: mffprwz 3, 2
+; PC64LE-NEXT: xxmrghw 34, 3, 1
+; PC64LE-NEXT: mtvsrwz 36, 3
+; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f64:
; PC64LE-NEXT: xsrsp 1, 2
; PC64LE-NEXT: addi 3, 3, .LCPI129_0@toc@l
; PC64LE-NEXT: xsrsp 2, 3
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xscvdpspn 34, 2
+; PC64LE-NEXT: lxvd2x 2, 0, 3
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: xxmrghw 35, 1, 0
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: blr
; PC64LE-NEXT: addis 3, 2, .LCPI137_0@toc@ha
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: addi 3, 3, .LCPI137_0@toc@l
-; PC64LE-NEXT: lvx 4, 0, 3
+; PC64LE-NEXT: lxvd2x 3, 0, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xscvspdpn 2, 2
+; PC64LE-NEXT: xxswapd 36, 3
; PC64LE-NEXT: xsrdpip 0, 0
; PC64LE-NEXT: xsrdpip 1, 1
; PC64LE-NEXT: xsrdpip 2, 2
; PC64LE-NEXT: addis 3, 2, .LCPI141_0@toc@ha
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: addi 3, 3, .LCPI141_0@toc@l
-; PC64LE-NEXT: lvx 4, 0, 3
+; PC64LE-NEXT: lxvd2x 3, 0, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xscvspdpn 2, 2
+; PC64LE-NEXT: xxswapd 36, 3
; PC64LE-NEXT: xsrdpim 0, 0
; PC64LE-NEXT: xsrdpim 1, 1
; PC64LE-NEXT: xsrdpim 2, 2
; PC64LE-NEXT: addis 3, 2, .LCPI145_0@toc@ha
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: addi 3, 3, .LCPI145_0@toc@l
-; PC64LE-NEXT: lvx 4, 0, 3
+; PC64LE-NEXT: lxvd2x 3, 0, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xscvspdpn 2, 2
+; PC64LE-NEXT: xxswapd 36, 3
; PC64LE-NEXT: xsrdpi 0, 0
; PC64LE-NEXT: xsrdpi 1, 1
; PC64LE-NEXT: xsrdpi 2, 2
; PC64LE-NEXT: addis 3, 2, .LCPI149_0@toc@ha
; PC64LE-NEXT: xxsldwi 2, 34, 34, 1
; PC64LE-NEXT: addi 3, 3, .LCPI149_0@toc@l
-; PC64LE-NEXT: lvx 4, 0, 3
+; PC64LE-NEXT: lxvd2x 3, 0, 3
; PC64LE-NEXT: xscvspdpn 0, 0
; PC64LE-NEXT: xscvspdpn 1, 1
; PC64LE-NEXT: xscvspdpn 2, 2
+; PC64LE-NEXT: xxswapd 36, 3
; PC64LE-NEXT: xsrdpiz 0, 0
; PC64LE-NEXT: xsrdpiz 1, 1
; PC64LE-NEXT: xsrdpiz 2, 2
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: addis 3, 2, .LCPI155_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI155_0@toc@l
-; PC64LE-NEXT: lvx 3, 0, 3
+; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: addis 3, 2, .LCPI155_1@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI155_1@toc@l
+; PC64LE-NEXT: xxswapd 35, 0
; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: vperm 2, 2, 2, 3
; PC64LE-NEXT: xxswapd 35, 0
; PC64LE-NEXT: xxsldwi 1, 34, 34, 1
; PC64LE-NEXT: addis 3, 2, .LCPI161_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI161_0@toc@l
-; PC64LE-NEXT: lvx 4, 0, 3
+; PC64LE-NEXT: lxvd2x 3, 0, 3
; PC64LE-NEXT: mffprwz 4, 0
; PC64LE-NEXT: mffprwz 5, 1
; PC64LE-NEXT: mtfprwa 0, 4
+; PC64LE-NEXT: xxswapd 36, 3
; PC64LE-NEXT: mtfprwa 1, 5
; PC64LE-NEXT: mfvsrwz 4, 34
; PC64LE-NEXT: xscvsxdsp 0, 0
; PC64LE-NEXT: mtfprd 1, 4
; PC64LE-NEXT: addi 3, 6, .LCPI163_0@toc@l
; PC64LE-NEXT: xscvsxdsp 0, 0
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: xscvsxdsp 1, 1
; PC64LE-NEXT: mtfprd 2, 5
; PC64LE-NEXT: xscvsxdsp 2, 2
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xscvdpspn 34, 2
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: xxmrghw 35, 1, 0
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: blr
;
; PC64LE-NEXT: addis 3, 2, .LCPI173_0@toc@ha
; PC64LE-NEXT: xxlxor 36, 36, 36
; PC64LE-NEXT: addi 3, 3, .LCPI173_0@toc@l
-; PC64LE-NEXT: lvx 3, 0, 3
+; PC64LE-NEXT: lxvd2x 0, 0, 3
+; PC64LE-NEXT: xxswapd 35, 0
; PC64LE-NEXT: vperm 2, 4, 2, 3
; PC64LE-NEXT: xvcvuxddp 34, 34
; PC64LE-NEXT: blr
; PC64LE-NEXT: xxsldwi 1, 34, 34, 1
; PC64LE-NEXT: addis 3, 2, .LCPI179_0@toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI179_0@toc@l
-; PC64LE-NEXT: lvx 4, 0, 3
+; PC64LE-NEXT: lxvd2x 3, 0, 3
; PC64LE-NEXT: mffprwz 4, 0
; PC64LE-NEXT: mffprwz 5, 1
; PC64LE-NEXT: mtfprwz 0, 4
+; PC64LE-NEXT: xxswapd 36, 3
; PC64LE-NEXT: mtfprwz 1, 5
; PC64LE-NEXT: mfvsrwz 4, 34
; PC64LE-NEXT: xscvuxdsp 0, 0
; PC64LE-NEXT: mtfprd 1, 4
; PC64LE-NEXT: addi 3, 6, .LCPI181_0@toc@l
; PC64LE-NEXT: xscvuxdsp 0, 0
-; PC64LE-NEXT: lvx 4, 0, 3
; PC64LE-NEXT: xscvuxdsp 1, 1
; PC64LE-NEXT: mtfprd 2, 5
; PC64LE-NEXT: xscvuxdsp 2, 2
; PC64LE-NEXT: xscvdpspn 0, 0
; PC64LE-NEXT: xscvdpspn 1, 1
; PC64LE-NEXT: xscvdpspn 34, 2
+; PC64LE-NEXT: lxvd2x 2, 0, 3
; PC64LE-NEXT: xxmrghw 35, 1, 0
+; PC64LE-NEXT: xxswapd 36, 2
; PC64LE-NEXT: vperm 2, 2, 3, 4
; PC64LE-NEXT: blr
;
;
; CHECK-P8-LE-LABEL: ld_0_vector:
; CHECK-P8-LE: # %bb.0: # %entry
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_0_vector:
; CHECK-P8-LE-LABEL: ld_unalign16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: addi r3, r3, 1
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_unalign16_vector:
; CHECK-P8-LE-LABEL: ld_align16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: addi r3, r3, 8
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_align16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: lis r4, 1
; CHECK-P8-LE-NEXT: ori r4, r4, 34463
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_unalign32_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: lis r4, 1525
; CHECK-P8-LE-NEXT: ori r4, r4, 56600
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_align32_vector:
; CHECK-P8-LE-NEXT: rldic r4, r4, 35, 24
; CHECK-P8-LE-NEXT: oris r4, r4, 54437
; CHECK-P8-LE-NEXT: ori r4, r4, 4097
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_unalign64_vector:
; CHECK-P8-LE-NEXT: lis r4, 3725
; CHECK-P8-LE-NEXT: ori r4, r4, 19025
; CHECK-P8-LE-NEXT: rldic r4, r4, 12, 24
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_align64_vector:
;
; CHECK-P8-LE-LABEL: ld_reg_vector:
; CHECK-P8-LE: # %bb.0: # %entry
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_reg_vector:
; CHECK-P8-LE-LABEL: ld_or_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: or r3, r4, r3
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_or_vector:
; CHECK-P8-LE-LABEL: ld_or2_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 51
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_or2_vector:
; CHECK-P8-LE-LABEL: ld_not_disjoint16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: ori r3, r3, 6
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_not_disjoint16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 51
; CHECK-P8-LE-NEXT: ori r3, r3, 6
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_disjoint_unalign16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 51
; CHECK-P8-LE-NEXT: ori r3, r3, 24
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_disjoint_align16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: ori r3, r3, 34463
; CHECK-P8-LE-NEXT: oris r3, r3, 1
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_not_disjoint32_vector:
; CHECK-P8-LE-NEXT: lis r4, 1
; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 43
; CHECK-P8-LE-NEXT: ori r4, r4, 34463
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_disjoint_unalign32_vector:
; CHECK-P8-LE-NEXT: lis r5, 15258
; CHECK-P8-LE-NEXT: and r3, r3, r4
; CHECK-P8-LE-NEXT: ori r4, r5, 41712
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_disjoint_align32_vector:
; CHECK-P8-LE-NEXT: oris r4, r4, 54437
; CHECK-P8-LE-NEXT: ori r4, r4, 4097
; CHECK-P8-LE-NEXT: or r3, r3, r4
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_not_disjoint64_vector:
; CHECK-P8-LE-NEXT: rldic r4, r4, 35, 24
; CHECK-P8-LE-NEXT: oris r4, r4, 54437
; CHECK-P8-LE-NEXT: ori r4, r4, 4097
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_disjoint_unalign64_vector:
; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 23
; CHECK-P8-LE-NEXT: ori r4, r4, 19025
; CHECK-P8-LE-NEXT: rldic r4, r4, 12, 24
-; CHECK-P8-LE-NEXT: lvx v2, r3, r4
+; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_disjoint_align64_vector:
; CHECK-P8-LE-LABEL: ld_cst_unalign16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: li r3, 255
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_cst_unalign16_vector:
; CHECK-P8-LE-LABEL: ld_cst_align16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: li r3, 4080
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_cst_align16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: lis r3, 1
; CHECK-P8-LE-NEXT: ori r3, r3, 34463
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_cst_unalign32_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: lis r3, 152
; CHECK-P8-LE-NEXT: ori r3, r3, 38428
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_cst_align32_vector:
; CHECK-P8-LE-NEXT: rldic r3, r3, 35, 24
; CHECK-P8-LE-NEXT: oris r3, r3, 54437
; CHECK-P8-LE-NEXT: ori r3, r3, 4097
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_cst_unalign64_vector:
; CHECK-P8-LE-NEXT: lis r3, 3725
; CHECK-P8-LE-NEXT: ori r3, r3, 19025
; CHECK-P8-LE-NEXT: rldic r3, r3, 12, 24
-; CHECK-P8-LE-NEXT: lvx v2, 0, r3
+; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd v2, vs0
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: ld_cst_align64_vector:
;
; CHECK-P8-LE-LABEL: st_0_vector:
; CHECK-P8-LE: # %bb.0: # %entry
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_0_vector:
;
; CHECK-P8-LE-LABEL: st_unalign16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: addi r3, r3, 1
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_unalign16_vector:
;
; CHECK-P8-LE-LABEL: st_align16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: addi r3, r3, 8
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_align16_vector:
;
; CHECK-P8-LE-LABEL: st_unalign32_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: lis r4, 1
; CHECK-P8-LE-NEXT: ori r4, r4, 34463
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_unalign32_vector:
;
; CHECK-P8-LE-LABEL: st_align32_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: lis r4, 1525
; CHECK-P8-LE-NEXT: ori r4, r4, 56600
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_align32_vector:
; CHECK-P8-LE-LABEL: st_unalign64_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: li r4, 29
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: rldic r4, r4, 35, 24
; CHECK-P8-LE-NEXT: oris r4, r4, 54437
; CHECK-P8-LE-NEXT: ori r4, r4, 4097
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_unalign64_vector:
; CHECK-P8-LE-LABEL: st_align64_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: lis r4, 3725
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: ori r4, r4, 19025
; CHECK-P8-LE-NEXT: rldic r4, r4, 12, 24
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_align64_vector:
;
; CHECK-P8-LE-LABEL: st_reg_vector:
; CHECK-P8-LE: # %bb.0: # %entry
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_reg_vector:
;
; CHECK-P8-LE-LABEL: st_or1_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: or r3, r4, r3
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_or1_vector:
;
; CHECK-P8-LE-LABEL: st_or2_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 51
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_or2_vector:
;
; CHECK-P8-LE-LABEL: st_not_disjoint16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: ori r3, r3, 6
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_not_disjoint16_vector:
;
; CHECK-P8-LE-LABEL: st_disjoint_unalign16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 51
; CHECK-P8-LE-NEXT: ori r3, r3, 6
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_disjoint_unalign16_vector:
;
; CHECK-P8-LE-LABEL: st_disjoint_align16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 51
; CHECK-P8-LE-NEXT: ori r3, r3, 24
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_disjoint_align16_vector:
;
; CHECK-P8-LE-LABEL: st_not_disjoint32_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: ori r3, r3, 34463
; CHECK-P8-LE-NEXT: oris r3, r3, 1
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_not_disjoint32_vector:
;
; CHECK-P8-LE-LABEL: st_disjoint_unalign32_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: lis r4, 1
; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 43
; CHECK-P8-LE-NEXT: ori r4, r4, 34463
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_disjoint_unalign32_vector:
;
; CHECK-P8-LE-LABEL: st_disjoint_align32_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: lis r4, -15264
; CHECK-P8-LE-NEXT: lis r5, 15258
; CHECK-P8-LE-NEXT: and r3, r3, r4
; CHECK-P8-LE-NEXT: ori r4, r5, 41712
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_disjoint_align32_vector:
; CHECK-P8-LE-LABEL: st_not_disjoint64_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: li r4, 29
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: rldic r4, r4, 35, 24
; CHECK-P8-LE-NEXT: oris r4, r4, 54437
; CHECK-P8-LE-NEXT: ori r4, r4, 4097
; CHECK-P8-LE-NEXT: or r3, r3, r4
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_not_disjoint64_vector:
; CHECK-P8-LE-LABEL: st_disjoint_unalign64_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: li r4, 29
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 23
; CHECK-P8-LE-NEXT: rldic r4, r4, 35, 24
; CHECK-P8-LE-NEXT: oris r4, r4, 54437
; CHECK-P8-LE-NEXT: ori r4, r4, 4097
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_disjoint_unalign64_vector:
; CHECK-P8-LE-LABEL: st_disjoint_align64_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: lis r4, 3725
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 23
; CHECK-P8-LE-NEXT: ori r4, r4, 19025
; CHECK-P8-LE-NEXT: rldic r4, r4, 12, 24
-; CHECK-P8-LE-NEXT: stvx v2, r3, r4
+; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_disjoint_align64_vector:
;
; CHECK-P8-LE-LABEL: st_cst_unalign16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: li r3, 255
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_cst_unalign16_vector:
;
; CHECK-P8-LE-LABEL: st_cst_align16_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: li r3, 4080
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_cst_align16_vector:
;
; CHECK-P8-LE-LABEL: st_cst_unalign32_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: lis r3, 1
; CHECK-P8-LE-NEXT: ori r3, r3, 34463
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_cst_unalign32_vector:
;
; CHECK-P8-LE-LABEL: st_cst_align32_vector:
; CHECK-P8-LE: # %bb.0: # %entry
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: lis r3, 152
; CHECK-P8-LE-NEXT: ori r3, r3, 38428
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_cst_align32_vector:
; CHECK-P8-LE-LABEL: st_cst_unalign64_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: li r3, 29
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: rldic r3, r3, 35, 24
; CHECK-P8-LE-NEXT: oris r3, r3, 54437
; CHECK-P8-LE-NEXT: ori r3, r3, 4097
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_cst_unalign64_vector:
; CHECK-P8-LE-LABEL: st_cst_align64_vector:
; CHECK-P8-LE: # %bb.0: # %entry
; CHECK-P8-LE-NEXT: lis r3, 3725
+; CHECK-P8-LE-NEXT: xxswapd vs0, v2
; CHECK-P8-LE-NEXT: ori r3, r3, 19025
; CHECK-P8-LE-NEXT: rldic r3, r3, 12, 24
-; CHECK-P8-LE-NEXT: stvx v2, 0, r3
+; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-P8-LE-NEXT: blr
;
; CHECK-P8-BE-LABEL: st_cst_align64_vector:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd vs35, vs0
; CHECK-P8-NEXT: vrlb v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd vs35, vs0
; CHECK-P8-NEXT: vrlh v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r3
+; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT: xxswapd vs35, vs0
; CHECK-P8-NEXT: vrlw v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-NEXT: addis 4, 2, .LCPI0_1@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
; CHECK-NEXT: addi 4, 4, .LCPI0_1@toc@l
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: lxvd2x 1, 0, 4
; CHECK-NEXT: vsubuwm 3, 4, 3
-; CHECK-NEXT: lvx 4, 0, 4
; CHECK-NEXT: vslw 2, 2, 3
+; CHECK-NEXT: xxswapd 36, 1
; CHECK-NEXT: vsraw 2, 2, 3
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: xxsel 34, 36, 35, 34
; CHECK-NEXT: blr
%add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
define <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: cmp_sel_C1_or_C2_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcmpequw 2, 2, 3
; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha
; CHECK-NEXT: addis 4, 2, .LCPI1_1@toc@ha
+; CHECK-NEXT: vcmpequw 2, 2, 3
; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l
; CHECK-NEXT: addi 4, 4, .LCPI1_1@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
-; CHECK-NEXT: lvx 4, 0, 4
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: lxvd2x 1, 0, 4
+; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: xxswapd 36, 1
; CHECK-NEXT: xxsel 34, 36, 35, 34
; CHECK-NEXT: blr
%cond = icmp eq <4 x i32> %x, %y
define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) {
; CHECK-LABEL: sel_Cplus1_or_C_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: vspltisw 3, 1
; CHECK-NEXT: addis 3, 2, .LCPI2_0@toc@ha
+; CHECK-NEXT: vspltisw 3, 1
; CHECK-NEXT: addi 3, 3, .LCPI2_0@toc@l
+; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: xxland 34, 34, 35
-; CHECK-NEXT: lvx 3, 0, 3
-; CHECK-NEXT: vadduwm 2, 2, 3
+; CHECK-NEXT: xxswapd 36, 0
+; CHECK-NEXT: vadduwm 2, 2, 4
; CHECK-NEXT: blr
%add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
ret <4 x i32> %add
define <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: cmp_sel_Cplus1_or_C_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcmpequw 2, 2, 3
; CHECK-NEXT: addis 3, 2, .LCPI3_0@toc@ha
+; CHECK-NEXT: vcmpequw 2, 2, 3
; CHECK-NEXT: addi 3, 3, .LCPI3_0@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vsubuwm 2, 3, 2
; CHECK-NEXT: blr
%cond = icmp eq <4 x i32> %x, %y
; CHECK-NEXT: vspltisw 4, 15
; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI4_0@toc@l
+; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vsubuwm 3, 4, 3
; CHECK-NEXT: vslw 2, 2, 3
; CHECK-NEXT: vsraw 2, 2, 3
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1>
define <4 x i32> @cmp_sel_Cminus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: cmp_sel_Cminus1_or_C_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcmpequw 2, 2, 3
; CHECK-NEXT: addis 3, 2, .LCPI5_0@toc@ha
+; CHECK-NEXT: vcmpequw 2, 2, 3
; CHECK-NEXT: addi 3, 3, .LCPI5_0@toc@l
-; CHECK-NEXT: lvx 3, 0, 3
+; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%cond = icmp eq <4 x i32> %x, %y
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
-; RUN: grep lxvd2x < %t | count 3
-; RUN: grep lvx < %t | count 3
-; RUN: grep stxvd2x < %t | count 3
-; RUN: grep stvx < %t | count 3
+; RUN: grep lxvd2x < %t | count 6
+; RUN: grep stxvd2x < %t | count 6
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
;
; CHECK-LE-LABEL: test32:
; CHECK-LE: # %bb.0:
-; CHECK-LE-NEXT: lvx v2, 0, r3
+; CHECK-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-NEXT: xxswapd v2, vs0
; CHECK-LE-NEXT: blr
%v = load <4 x float>, <4 x float>* %a, align 16
ret <4 x float> %v
;
; CHECK-LE-LABEL: test33:
; CHECK-LE: # %bb.0:
-; CHECK-LE-NEXT: stvx v2, 0, r3
+; CHECK-LE-NEXT: xxswapd vs0, v2
+; CHECK-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-LE-NEXT: blr
store <4 x float> %b, <4 x float>* %a, align 16
ret void
;
; CHECK-LE-LABEL: test34:
; CHECK-LE: # %bb.0:
-; CHECK-LE-NEXT: lvx v2, 0, r3
+; CHECK-LE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-NEXT: xxswapd v2, vs0
; CHECK-LE-NEXT: blr
%v = load <4 x i32>, <4 x i32>* %a, align 16
ret <4 x i32> %v
;
; CHECK-LE-LABEL: test35:
; CHECK-LE: # %bb.0:
-; CHECK-LE-NEXT: stvx v2, 0, r3
+; CHECK-LE-NEXT: xxswapd vs0, v2
+; CHECK-LE-NEXT: stxvd2x vs0, 0, r3
; CHECK-LE-NEXT: blr
store <4 x i32> %b, <4 x i32>* %a, align 16
ret void
; CHECK-LE: # %bb.0:
; CHECK-LE-NEXT: addis r3, r2, .LCPI63_0@toc@ha
; CHECK-LE-NEXT: addi r3, r3, .LCPI63_0@toc@l
-; CHECK-LE-NEXT: lvx v3, 0, r3
+; CHECK-LE-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-NEXT: addis r3, r2, .LCPI63_1@toc@ha
; CHECK-LE-NEXT: addi r3, r3, .LCPI63_1@toc@l
+; CHECK-LE-NEXT: xxswapd v3, vs0
; CHECK-LE-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: xxswapd v3, vs0
; CHECK-LE: # %bb.0:
; CHECK-LE-NEXT: addis r3, r2, .LCPI64_0@toc@ha
; CHECK-LE-NEXT: addi r3, r3, .LCPI64_0@toc@l
-; CHECK-LE-NEXT: lvx v3, 0, r3
+; CHECK-LE-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-NEXT: addis r3, r2, .LCPI64_1@toc@ha
; CHECK-LE-NEXT: addi r3, r3, .LCPI64_1@toc@l
+; CHECK-LE-NEXT: xxswapd v3, vs0
; CHECK-LE-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: xxswapd v3, vs0
;
; CHECK-LE-LABEL: test80:
; CHECK-LE: # %bb.0:
-; CHECK-LE-NEXT: mtfprwz f0, r3
; CHECK-LE-NEXT: addis r4, r2, .LCPI65_0@toc@ha
-; CHECK-LE-NEXT: addi r3, r4, .LCPI65_0@toc@l
-; CHECK-LE-NEXT: xxspltw v2, vs0, 1
-; CHECK-LE-NEXT: lvx v3, 0, r3
+; CHECK-LE-NEXT: mtfprwz f1, r3
+; CHECK-LE-NEXT: addi r4, r4, .LCPI65_0@toc@l
+; CHECK-LE-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-NEXT: xxspltw v2, vs1, 1
+; CHECK-LE-NEXT: xxswapd v3, vs0
; CHECK-LE-NEXT: vadduwm v2, v2, v3
; CHECK-LE-NEXT: blr
%b1 = insertelement <2 x i32> undef, i32 %v, i32 0
define double @teste0(<2 x double>* %p1) {
; CHECK-LABEL: teste0:
; CHECK: # %bb.0:
-; CHECK-NEXT: lxvd2x vs1, 0, r3
-; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-NEXT: lfd f1, 0(r3)
; CHECK-NEXT: blr
;
; CHECK-P8-BE-LABEL: teste0:
;
; CHECK-P9-VECTOR-LABEL: teste0:
; CHECK-P9-VECTOR: # %bb.0:
-; CHECK-P9-VECTOR-NEXT: lxvd2x vs1, 0, r3
-; CHECK-P9-VECTOR-NEXT: # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-P9-VECTOR-NEXT: lfd f1, 0(r3)
; CHECK-P9-VECTOR-NEXT: blr
;
; CHECK-P9-LABEL: teste0:
define double @teste1(<2 x double>* %p1) {
; CHECK-LABEL: teste1:
; CHECK: # %bb.0:
-; CHECK-NEXT: lxvd2x vs0, 0, r3
-; CHECK-NEXT: xxswapd vs1, vs0
-; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-NEXT: lfd f1, 8(r3)
; CHECK-NEXT: blr
;
; CHECK-P8-BE-LABEL: teste1:
;
; CHECK-P9-VECTOR-LABEL: teste1:
; CHECK-P9-VECTOR: # %bb.0:
-; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
-; CHECK-P9-VECTOR-NEXT: xxswapd vs1, vs0
-; CHECK-P9-VECTOR-NEXT: # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-P9-VECTOR-NEXT: lfd f1, 8(r3)
; CHECK-P9-VECTOR-NEXT: blr
;
; CHECK-P9-LABEL: teste1:
define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) {
; CHECK-LABEL: test00:
; CHECK: # %bb.0:
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxspltd 34, 0, 0
+; CHECK-NEXT: lxvdsx 34, 0, 3
; CHECK-NEXT: blr
;
; CHECK-P9-LABEL: test00:
define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
; CHECK-LABEL: test11:
; CHECK: # %bb.0:
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxspltd 34, 0, 1
+; CHECK-NEXT: addi 3, 3, 8
+; CHECK-NEXT: lxvdsx 34, 0, 3
; CHECK-NEXT: blr
;
; CHECK-P9-LABEL: test11:
define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) {
; CHECK-LABEL: test22:
; CHECK: # %bb.0:
-; CHECK-NEXT: lxvd2x 0, 0, 4
-; CHECK-NEXT: xxspltd 34, 0, 0
+; CHECK-NEXT: lxvdsx 34, 0, 4
; CHECK-NEXT: blr
;
; CHECK-P9-LABEL: test22:
define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
; CHECK-LABEL: test33:
; CHECK: # %bb.0:
-; CHECK-NEXT: lxvd2x 0, 0, 4
-; CHECK-NEXT: xxspltd 34, 0, 1
+; CHECK-NEXT: addi 3, 4, 8
+; CHECK-NEXT: lxvdsx 34, 0, 3
; CHECK-NEXT: blr
;
; CHECK-P9-LABEL: test33: