def SDTVabsd : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>
]>;
-
+def SDT_PPCld_vec_be : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [
+ SDTCisVec<0>, SDTCisPtrTy<1>
+]>;
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
[SDNPHasChain, SDNPMayStore]>;
+def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be,
+ [SDNPHasChain, SDNPMayStore]>;
def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
(STXVD2X $rS, xoaddr:$dst)>;
def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
}
+
+// Load vector big endian order
+let Predicates = [IsLittleEndian, HasVSX] in {
+ def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+ def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+}
+
let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+
+ def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>;
+ def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst),
+ (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
+
+ def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>;
+ def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst),
+ (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
} // IsLittleEndian, HasP9Vector
let Predicates = [IsBigEndian, HasP9Vector] in {
;
; P9LE-LABEL: fromDiffMemConsDi:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: lxv v2, 0(r3)
-; P9LE-NEXT: addis r3, r2, .LCPI8_0@toc@ha
-; P9LE-NEXT: addi r3, r3, .LCPI8_0@toc@l
-; P9LE-NEXT: lxvx v3, 0, r3
-; P9LE-NEXT: vperm v2, v2, v2, v3
+; P9LE-NEXT: lxvw4x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemConsDi:
;
; P8LE-LABEL: fromDiffMemConsDi:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: addis r4, r2, .LCPI8_0@toc@ha
-; P8LE-NEXT: addi r3, r4, .LCPI8_0@toc@l
-; P8LE-NEXT: lvx v2, 0, r3
-; P8LE-NEXT: xxswapd v3, vs0
-; P8LE-NEXT: vperm v2, v3, v3, v2
+; P8LE-NEXT: lxvw4x v2, 0, r3
; P8LE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
;
; P9LE-LABEL: fromDiffMemConsDui:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: lxv v2, 0(r3)
-; P9LE-NEXT: addis r3, r2, .LCPI41_0@toc@ha
-; P9LE-NEXT: addi r3, r3, .LCPI41_0@toc@l
-; P9LE-NEXT: lxvx v3, 0, r3
-; P9LE-NEXT: vperm v2, v2, v2, v3
+; P9LE-NEXT: lxvw4x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemConsDui:
;
; P8LE-LABEL: fromDiffMemConsDui:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: addis r4, r2, .LCPI41_0@toc@ha
-; P8LE-NEXT: addi r3, r4, .LCPI41_0@toc@l
-; P8LE-NEXT: lvx v2, 0, r3
-; P8LE-NEXT: xxswapd v3, vs0
-; P8LE-NEXT: vperm v2, v3, v3, v2
+; P8LE-NEXT: lxvw4x v2, 0, r3
; P8LE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
;
; P9LE-LABEL: fromDiffMemConsDll:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: lxv v2, 16(r3)
-; P9LE-NEXT: xxswapd v2, v2
+; P9LE-NEXT: addi r3, r3, 16
+; P9LE-NEXT: lxvd2x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemConsDll:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r4, 3
; P9LE-NEXT: add r3, r3, r4
-; P9LE-NEXT: li r4, -8
-; P9LE-NEXT: lxvx v2, r3, r4
-; P9LE-NEXT: xxswapd v2, v2
+; P9LE-NEXT: addi r3, r3, -8
+; P9LE-NEXT: lxvd2x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemVarDll:
;
; P9LE-LABEL: fromDiffMemConsDConvdtoll:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: lxv vs0, 16(r3)
-; P9LE-NEXT: xxswapd vs0, vs0
+; P9LE-NEXT: addi r3, r3, 16
+; P9LE-NEXT: lxvd2x vs0, 0, r3
; P9LE-NEXT: xvcvdpsxds v2, vs0
; P9LE-NEXT: blr
;
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r4, 3
; P9LE-NEXT: add r3, r3, r4
-; P9LE-NEXT: li r4, -8
-; P9LE-NEXT: lxvx vs0, r3, r4
-; P9LE-NEXT: xxswapd vs0, vs0
+; P9LE-NEXT: addi r3, r3, -8
+; P9LE-NEXT: lxvd2x vs0, 0, r3
; P9LE-NEXT: xvcvdpsxds v2, vs0
; P9LE-NEXT: blr
;
;
; P9LE-LABEL: fromDiffMemConsDull:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: lxv v2, 16(r3)
-; P9LE-NEXT: xxswapd v2, v2
+; P9LE-NEXT: addi r3, r3, 16
+; P9LE-NEXT: lxvd2x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemConsDull:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r4, 3
; P9LE-NEXT: add r3, r3, r4
-; P9LE-NEXT: li r4, -8
-; P9LE-NEXT: lxvx v2, r3, r4
-; P9LE-NEXT: xxswapd v2, v2
+; P9LE-NEXT: addi r3, r3, -8
+; P9LE-NEXT: lxvd2x v2, 0, r3
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromDiffMemVarDull:
;
; P9LE-LABEL: fromDiffMemConsDConvdtoull:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: lxv vs0, 16(r3)
-; P9LE-NEXT: xxswapd vs0, vs0
+; P9LE-NEXT: addi r3, r3, 16
+; P9LE-NEXT: lxvd2x vs0, 0, r3
; P9LE-NEXT: xvcvdpuxds v2, vs0
; P9LE-NEXT: blr
;
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r4, 3
; P9LE-NEXT: add r3, r3, r4
-; P9LE-NEXT: li r4, -8
-; P9LE-NEXT: lxvx vs0, r3, r4
-; P9LE-NEXT: xxswapd vs0, vs0
+; P9LE-NEXT: addi r3, r3, -8
+; P9LE-NEXT: lxvd2x vs0, 0, r3
; P9LE-NEXT: xvcvdpuxds v2, vs0
; P9LE-NEXT: blr
;
;
; CHECK-P9-LABEL: load_swap00:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv v2, 0(r3)
-; CHECK-P9-NEXT: xxswapd v2, v2
+; CHECK-P9-NEXT: lxvd2x v2, 0, r3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap00:
;
; CHECK-P9-LABEL: load_swap01:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv v2, 0(r4)
-; CHECK-P9-NEXT: xxswapd v2, v2
+; CHECK-P9-NEXT: lxvd2x v2, 0, r4
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap01:
define <4 x i32> @load_swap10(<4 x i32>* %vp1, <4 x i32>* %vp2) {
; CHECK-P8-LABEL: load_swap10:
; CHECK-P8: # %bb.0:
-; CHECK-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r3
-; CHECK-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r4
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvw4x v2, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: load_swap10:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv v2, 0(r3)
-; CHECK-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: lxvw4x v2, 0, r3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap10:
define <4 x i32> @load_swap11(<4 x i32>* %vp1, <4 x i32>* %vp2) {
; CHECK-P8-LABEL: load_swap11:
; CHECK-P8: # %bb.0:
-; CHECK-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r4
-; CHECK-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvw4x v2, 0, r4
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: load_swap11:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l
-; CHECK-P9-NEXT: lxv v2, 0(r4)
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: lxvw4x v2, 0, r4
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap11:
;
; CHECK-P9-LABEL: load_swap20:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv v2, 0(r3)
-; CHECK-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: lxvh8x v2, 0, r3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap20:
;
; CHECK-P9-LABEL: load_swap21:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l
-; CHECK-P9-NEXT: lxv v2, 0(r4)
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: lxvh8x v2, 0, r4
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap21:
;
; CHECK-P9-LABEL: load_swap30:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv vs0, 0(r3)
-; CHECK-P9-NEXT: xxbrq v2, vs0
+; CHECK-P9-NEXT: lxvb16x v2, 0, r3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap30:
;
; CHECK-P9-LABEL: load_swap31:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv vs0, 0(r4)
-; CHECK-P9-NEXT: xxbrq v2, vs0
+; CHECK-P9-NEXT: lxvb16x v2, 0, r4
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap31:
;
; CHECK-P9-LABEL: load_swap40:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv vs0, 0(r4)
-; CHECK-P9-NEXT: xxswapd v2, vs0
+; CHECK-P9-NEXT: lxvd2x v2, 0, r4
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap40:
define <4 x float> @load_swap50(<4 x float>* %vp1, <4 x float>* %vp2) {
; CHECK-P8-LABEL: load_swap50:
; CHECK-P8: # %bb.0:
-; CHECK-P8-NEXT: addis r4, r2, .LCPI9_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r3
-; CHECK-P8-NEXT: addi r4, r4, .LCPI9_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r4
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvw4x v2, 0, r3
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: load_swap50:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lxv v2, 0(r3)
-; CHECK-P9-NEXT: addis r3, r2, .LCPI9_0@toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI9_0@toc@l
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: lxvw4x v2, 0, r3
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap50:
define <4 x float> @load_swap51(<4 x float>* %vp1, <4 x float>* %vp2) {
; CHECK-P8-LABEL: load_swap51:
; CHECK-P8: # %bb.0:
-; CHECK-P8-NEXT: addis r3, r2, .LCPI10_0@toc@ha
-; CHECK-P8-NEXT: lvx v3, 0, r4
-; CHECK-P8-NEXT: addi r3, r3, .LCPI10_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-P8-NEXT: lxvw4x v2, 0, r4
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: load_swap51:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI10_0@toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI10_0@toc@l
-; CHECK-P9-NEXT: lxv v2, 0(r4)
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
+; CHECK-P9-NEXT: lxvw4x v2, 0, r4
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: load_swap51:
;
; CHECK-P9-LABEL: swap_store00:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: stxv vs0, 0(r7)
+; CHECK-P9-NEXT: stxvd2x v2, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store00:
;
; CHECK-P9-LABEL: swap_store01:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxswapd vs0, v3
-; CHECK-P9-NEXT: stxv vs0, 0(r7)
+; CHECK-P9-NEXT: stxvd2x v3, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store01:
define void @swap_store10(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) {
; CHECK-P8-LABEL: swap_store10:
; CHECK-P8: # %bb.0:
-; CHECK-P8-NEXT: addis r3, r2, .LCPI13_0@toc@ha
-; CHECK-P8-NEXT: addi r3, r3, .LCPI13_0@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r3
-; CHECK-P8-NEXT: vperm v2, v2, v2, v3
-; CHECK-P8-NEXT: stvx v2, 0, r7
+; CHECK-P8-NEXT: stxvw4x v2, 0, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: swap_store10:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI13_0@toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI13_0@toc@l
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
-; CHECK-P9-NEXT: stxv v2, 0(r7)
+; CHECK-P9-NEXT: stxvw4x v2, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store10:
define void @swap_store11(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) {
; CHECK-P8-LABEL: swap_store11:
; CHECK-P8: # %bb.0:
-; CHECK-P8-NEXT: addis r3, r2, .LCPI14_0@toc@ha
-; CHECK-P8-NEXT: addi r3, r3, .LCPI14_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
-; CHECK-P8-NEXT: stvx v2, 0, r7
+; CHECK-P8-NEXT: stxvw4x v3, 0, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: swap_store11:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI14_0@toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI14_0@toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r3
-; CHECK-P9-NEXT: vperm v2, v3, v3, v2
-; CHECK-P9-NEXT: stxv v2, 0(r7)
+; CHECK-P9-NEXT: stxvw4x v3, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store11:
;
; CHECK-P9-LABEL: swap_store20:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI15_0@toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI15_0@toc@l
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
-; CHECK-P9-NEXT: stxv v2, 0(r7)
+; CHECK-P9-NEXT: stxvh8x v2, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store20:
;
; CHECK-P9-LABEL: swap_store21:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI16_0@toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI16_0@toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r3
-; CHECK-P9-NEXT: vperm v2, v3, v3, v2
-; CHECK-P9-NEXT: stxv v2, 0(r7)
+; CHECK-P9-NEXT: stxvh8x v3, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store21:
;
; CHECK-P9-LABEL: swap_store30:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxbrq vs0, v2
-; CHECK-P9-NEXT: stxv vs0, 0(r7)
+; CHECK-P9-NEXT: stxvb16x v2, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store30:
;
; CHECK-P9-LABEL: swap_store31:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxbrq vs0, v3
-; CHECK-P9-NEXT: stxv vs0, 0(r7)
+; CHECK-P9-NEXT: stxvb16x v3, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store31:
;
; CHECK-P9-LABEL: swap_store40:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxswapd vs0, v2
-; CHECK-P9-NEXT: stxv vs0, 0(r7)
+; CHECK-P9-NEXT: stxvd2x v2, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store40:
;
; CHECK-P9-LABEL: swap_store41:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: xxswapd vs0, v3
-; CHECK-P9-NEXT: stxv vs0, 0(r7)
+; CHECK-P9-NEXT: stxvd2x v3, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store41:
define void @swap_store50(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) {
; CHECK-P8-LABEL: swap_store50:
; CHECK-P8: # %bb.0:
-; CHECK-P8-NEXT: addis r3, r2, .LCPI21_0@toc@ha
-; CHECK-P8-NEXT: addi r3, r3, .LCPI21_0@toc@l
-; CHECK-P8-NEXT: lvx v3, 0, r3
-; CHECK-P8-NEXT: vperm v2, v2, v2, v3
-; CHECK-P8-NEXT: stvx v2, 0, r7
+; CHECK-P8-NEXT: stxvw4x v2, 0, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: swap_store50:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI21_0@toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI21_0@toc@l
-; CHECK-P9-NEXT: lxvx v3, 0, r3
-; CHECK-P9-NEXT: vperm v2, v2, v2, v3
-; CHECK-P9-NEXT: stxv v2, 0(r7)
+; CHECK-P9-NEXT: stxvw4x v2, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store50:
define void @swap_store51(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) {
; CHECK-P8-LABEL: swap_store51:
; CHECK-P8: # %bb.0:
-; CHECK-P8-NEXT: addis r3, r2, .LCPI22_0@toc@ha
-; CHECK-P8-NEXT: addi r3, r3, .LCPI22_0@toc@l
-; CHECK-P8-NEXT: lvx v2, 0, r3
-; CHECK-P8-NEXT: vperm v2, v3, v3, v2
-; CHECK-P8-NEXT: stvx v2, 0, r7
+; CHECK-P8-NEXT: stxvw4x v3, 0, r7
; CHECK-P8-NEXT: blr
;
; CHECK-P9-LABEL: swap_store51:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: addis r3, r2, .LCPI22_0@toc@ha
-; CHECK-P9-NEXT: addi r3, r3, .LCPI22_0@toc@l
-; CHECK-P9-NEXT: lxvx v2, 0, r3
-; CHECK-P9-NEXT: vperm v2, v3, v3, v2
-; CHECK-P9-NEXT: stxv v2, 0(r7)
+; CHECK-P9-NEXT: stxvw4x v3, 0, r7
; CHECK-P9-NEXT: blr
;
; CHECK-P8-BE-LABEL: swap_store51: