; CHECK-LABEL: test8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lvx v2, 0, r4
-; CHECK-NEXT: mfvsrd r4, v2
-; CHECK-NEXT: xxswapd vs0, v2
-; CHECK-NEXT: clrldi r5, r4, 48
-; CHECK-NEXT: mtvsrd f1, r5
-; CHECK-NEXT: rldicl r5, r4, 48, 48
-; CHECK-NEXT: mtvsrd f2, r5
-; CHECK-NEXT: rldicl r5, r4, 32, 48
-; CHECK-NEXT: rldicl r4, r4, 16, 48
-; CHECK-NEXT: mtvsrd f3, r5
-; CHECK-NEXT: xxswapd v2, vs1
-; CHECK-NEXT: mfvsrd r5, f0
-; CHECK-NEXT: xxswapd v3, vs2
-; CHECK-NEXT: mtvsrd f0, r4
-; CHECK-NEXT: clrldi r4, r5, 48
-; CHECK-NEXT: mtvsrd f1, r4
-; CHECK-NEXT: rldicl r4, r5, 48, 48
-; CHECK-NEXT: xxswapd v4, vs0
-; CHECK-NEXT: mtvsrd f2, r4
-; CHECK-NEXT: rldicl r4, r5, 32, 48
-; CHECK-NEXT: rldicl r5, r5, 16, 48
-; CHECK-NEXT: vmrglb v2, v3, v2
-; CHECK-NEXT: xxswapd v3, vs3
-; CHECK-NEXT: mtvsrd f3, r4
-; CHECK-NEXT: xxswapd v5, vs1
-; CHECK-NEXT: mtvsrd f0, r5
-; CHECK-NEXT: xxswapd v0, vs2
-; CHECK-NEXT: xxswapd v1, vs3
-; CHECK-NEXT: vmrglb v3, v4, v3
-; CHECK-NEXT: xxswapd v6, vs0
-; CHECK-NEXT: vmrglb v4, v0, v5
-; CHECK-NEXT: vmrglb v5, v6, v1
-; CHECK-NEXT: vmrglh v2, v3, v2
-; CHECK-NEXT: vmrglh v3, v5, v4
-; CHECK-NEXT: vmrglw v2, v2, v3
+; CHECK-NEXT: vpkuhum v2, v2, v2
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: stfdx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test8i8:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -32
-; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
-; CHECK-BE-NEXT: lhz r4, -18(r1)
-; CHECK-BE-NEXT: stb r4, -48(r1)
-; CHECK-BE-NEXT: lhz r4, -20(r1)
-; CHECK-BE-NEXT: stb r4, -64(r1)
-; CHECK-BE-NEXT: lhz r4, -22(r1)
-; CHECK-BE-NEXT: stb r4, -80(r1)
-; CHECK-BE-NEXT: lhz r4, -24(r1)
-; CHECK-BE-NEXT: stb r4, -96(r1)
-; CHECK-BE-NEXT: lhz r4, -26(r1)
-; CHECK-BE-NEXT: stb r4, -112(r1)
-; CHECK-BE-NEXT: lhz r4, -28(r1)
-; CHECK-BE-NEXT: stb r4, -128(r1)
-; CHECK-BE-NEXT: lhz r4, -30(r1)
-; CHECK-BE-NEXT: stb r4, -144(r1)
-; CHECK-BE-NEXT: lhz r4, -32(r1)
-; CHECK-BE-NEXT: stb r4, -160(r1)
-; CHECK-BE-NEXT: addi r4, r1, -48
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -64
-; CHECK-BE-NEXT: lxvw4x v3, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -80
-; CHECK-BE-NEXT: lxvw4x v4, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -96
-; CHECK-BE-NEXT: lxvw4x v5, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -112
-; CHECK-BE-NEXT: lxvw4x v0, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -128
-; CHECK-BE-NEXT: lxvw4x v1, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -144
-; CHECK-BE-NEXT: lxvw4x v6, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -160
-; CHECK-BE-NEXT: lxvw4x v7, 0, r4
-; CHECK-BE-NEXT: vmrghb v2, v3, v2
-; CHECK-BE-NEXT: vmrghb v3, v5, v4
-; CHECK-BE-NEXT: vmrghb v4, v1, v0
-; CHECK-BE-NEXT: addi r4, r1, -16
-; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: vmrghb v5, v7, v6
-; CHECK-BE-NEXT: vmrghh v3, v5, v4
-; CHECK-BE-NEXT: vmrghw v2, v3, v2
-; CHECK-BE-NEXT: stxvd2x v2, 0, r4
+; CHECK-BE-NEXT: addi r5, r1, -16
+; CHECK-BE-NEXT: vpkuhum v2, v2, v2
+; CHECK-BE-NEXT: stxvd2x v2, 0, r5
; CHECK-BE-NEXT: ld r4, -16(r1)
; CHECK-BE-NEXT: std r4, 0(r3)
; CHECK-BE-NEXT: blr
; CHECK-LABEL: test4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lvx v2, 0, r4
-; CHECK-NEXT: xxswapd vs0, v2
-; CHECK-NEXT: mfvsrd r4, f0
-; CHECK-NEXT: clrldi r5, r4, 48
-; CHECK-NEXT: mtvsrd f0, r5
-; CHECK-NEXT: rldicl r5, r4, 48, 48
-; CHECK-NEXT: mtvsrd f1, r5
-; CHECK-NEXT: rldicl r5, r4, 32, 48
-; CHECK-NEXT: rldicl r4, r4, 16, 48
-; CHECK-NEXT: mtvsrd f2, r5
-; CHECK-NEXT: xxswapd v2, vs0
-; CHECK-NEXT: mtvsrd f3, r4
-; CHECK-NEXT: xxswapd v3, vs1
-; CHECK-NEXT: xxswapd v4, vs2
-; CHECK-NEXT: xxswapd v5, vs3
-; CHECK-NEXT: vmrglb v2, v3, v2
-; CHECK-NEXT: vmrglb v3, v5, v4
-; CHECK-NEXT: vmrglh v2, v3, v2
+; CHECK-NEXT: vpkuhum v2, v2, v2
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test4i8:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -32
-; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
-; CHECK-BE-NEXT: lhz r4, -26(r1)
-; CHECK-BE-NEXT: stb r4, -48(r1)
-; CHECK-BE-NEXT: lhz r4, -28(r1)
-; CHECK-BE-NEXT: stb r4, -64(r1)
-; CHECK-BE-NEXT: lhz r4, -30(r1)
-; CHECK-BE-NEXT: stb r4, -80(r1)
-; CHECK-BE-NEXT: lhz r4, -32(r1)
-; CHECK-BE-NEXT: stb r4, -96(r1)
-; CHECK-BE-NEXT: addi r4, r1, -48
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -64
-; CHECK-BE-NEXT: lxvw4x v3, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -80
-; CHECK-BE-NEXT: lxvw4x v4, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -96
-; CHECK-BE-NEXT: lxvw4x v5, 0, r4
-; CHECK-BE-NEXT: vmrghb v2, v3, v2
-; CHECK-BE-NEXT: addi r4, r1, -16
-; CHECK-BE-NEXT: vmrghb v3, v5, v4
-; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: stxvw4x v2, 0, r4
+; CHECK-BE-NEXT: addi r5, r1, -16
+; CHECK-BE-NEXT: vpkuhum v2, v2, v2
+; CHECK-BE-NEXT: stxvw4x v2, 0, r5
; CHECK-BE-NEXT: lwz r4, -16(r1)
; CHECK-BE-NEXT: stw r4, 0(r3)
; CHECK-BE-NEXT: blr
define void @test4i8w(<4 x i8>* nocapture %Sink, <4 x i32>* nocapture readonly %SrcPtr) {
; CHECK-LABEL: test4i8w:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lvx v2, 0, r4
-; CHECK-NEXT: xxswapd vs0, v2
-; CHECK-NEXT: mfvsrwz r4, v2
-; CHECK-NEXT: xxsldwi vs1, v2, v2, 1
-; CHECK-NEXT: xxsldwi vs3, v2, v2, 3
-; CHECK-NEXT: mtvsrd f2, r4
-; CHECK-NEXT: mfvsrwz r4, f0
-; CHECK-NEXT: mfvsrwz r5, f1
-; CHECK-NEXT: xxswapd v4, vs2
-; CHECK-NEXT: mtvsrd f0, r4
-; CHECK-NEXT: mfvsrwz r4, f3
-; CHECK-NEXT: mtvsrd f1, r5
-; CHECK-NEXT: xxswapd v2, vs0
-; CHECK-NEXT: mtvsrd f3, r4
-; CHECK-NEXT: xxswapd v3, vs1
-; CHECK-NEXT: xxswapd v5, vs3
-; CHECK-NEXT: vmrglb v2, v3, v2
-; CHECK-NEXT: vmrglb v3, v5, v4
-; CHECK-NEXT: vmrglh v2, v3, v2
+; CHECK-NEXT: addis r5, r2, .LCPI2_0@toc@ha
+; CHECK-NEXT: lvx v3, 0, r4
+; CHECK-NEXT: addi r5, r5, .LCPI2_0@toc@l
+; CHECK-NEXT: lvx v2, 0, r5
+; CHECK-NEXT: vperm v2, v3, v3, v2
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test4i8w:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -32
-; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
-; CHECK-BE-NEXT: lwz r4, -20(r1)
-; CHECK-BE-NEXT: stb r4, -48(r1)
-; CHECK-BE-NEXT: lwz r4, -24(r1)
-; CHECK-BE-NEXT: stb r4, -64(r1)
-; CHECK-BE-NEXT: lwz r4, -28(r1)
-; CHECK-BE-NEXT: stb r4, -80(r1)
-; CHECK-BE-NEXT: lwz r4, -32(r1)
-; CHECK-BE-NEXT: stb r4, -96(r1)
-; CHECK-BE-NEXT: addi r4, r1, -48
+; CHECK-BE-NEXT: addis r5, r2, .LCPI2_0@toc@ha
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -64
+; CHECK-BE-NEXT: addi r4, r5, .LCPI2_0@toc@l
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -80
-; CHECK-BE-NEXT: lxvw4x v4, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -96
-; CHECK-BE-NEXT: lxvw4x v5, 0, r4
-; CHECK-BE-NEXT: vmrghb v2, v3, v2
; CHECK-BE-NEXT: addi r4, r1, -16
-; CHECK-BE-NEXT: vmrghb v3, v5, v4
-; CHECK-BE-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-NEXT: vperm v2, v2, v2, v3
; CHECK-BE-NEXT: stxvw4x v2, 0, r4
; CHECK-BE-NEXT: lwz r4, -16(r1)
; CHECK-BE-NEXT: stw r4, 0(r3)
; CHECK-LABEL: test2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lvx v2, 0, r4
-; CHECK-NEXT: xxswapd vs0, v2
-; CHECK-NEXT: mfvsrd r4, f0
-; CHECK-NEXT: clrldi r5, r4, 48
-; CHECK-NEXT: rldicl r4, r4, 48, 48
-; CHECK-NEXT: mtvsrd f0, r5
-; CHECK-NEXT: mtvsrd f1, r4
-; CHECK-NEXT: xxswapd v2, vs0
-; CHECK-NEXT: xxswapd v3, vs1
-; CHECK-NEXT: vmrglb v2, v3, v2
+; CHECK-NEXT: vpkuhum v2, v2, v2
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mfvsrd r4, f0
; CHECK-NEXT: clrldi r4, r4, 48
;
; CHECK-BE-LABEL: test2i8:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -32
-; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
-; CHECK-BE-NEXT: lhz r4, -30(r1)
-; CHECK-BE-NEXT: stb r4, -48(r1)
-; CHECK-BE-NEXT: lhz r4, -32(r1)
-; CHECK-BE-NEXT: stb r4, -64(r1)
-; CHECK-BE-NEXT: addi r4, r1, -48
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -64
-; CHECK-BE-NEXT: lxvw4x v3, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -16
-; CHECK-BE-NEXT: vmrghb v2, v3, v2
-; CHECK-BE-NEXT: stxvw4x v2, 0, r4
+; CHECK-BE-NEXT: addi r5, r1, -16
+; CHECK-BE-NEXT: vpkuhum v2, v2, v2
+; CHECK-BE-NEXT: stxvw4x v2, 0, r5
; CHECK-BE-NEXT: lhz r4, -16(r1)
; CHECK-BE-NEXT: sth r4, 0(r3)
; CHECK-BE-NEXT: blr
; CHECK-LABEL: test4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lvx v2, 0, r4
-; CHECK-NEXT: xxswapd vs0, v2
-; CHECK-NEXT: mfvsrwz r4, v2
-; CHECK-NEXT: xxsldwi vs1, v2, v2, 1
-; CHECK-NEXT: xxsldwi vs3, v2, v2, 3
-; CHECK-NEXT: mtvsrd f2, r4
-; CHECK-NEXT: mfvsrwz r4, f0
-; CHECK-NEXT: mfvsrwz r5, f1
-; CHECK-NEXT: xxswapd v4, vs2
-; CHECK-NEXT: mtvsrd f0, r4
-; CHECK-NEXT: mfvsrwz r4, f3
-; CHECK-NEXT: mtvsrd f1, r5
-; CHECK-NEXT: xxswapd v2, vs0
-; CHECK-NEXT: mtvsrd f3, r4
-; CHECK-NEXT: xxswapd v3, vs1
-; CHECK-NEXT: xxswapd v5, vs3
-; CHECK-NEXT: vmrglh v2, v3, v2
-; CHECK-NEXT: vmrglh v3, v5, v4
-; CHECK-NEXT: vmrglw v2, v3, v2
+; CHECK-NEXT: vpkuwum v2, v2, v2
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: stfdx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test4i16:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -32
-; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
-; CHECK-BE-NEXT: lwz r4, -20(r1)
-; CHECK-BE-NEXT: sth r4, -48(r1)
-; CHECK-BE-NEXT: lwz r4, -24(r1)
-; CHECK-BE-NEXT: sth r4, -64(r1)
-; CHECK-BE-NEXT: lwz r4, -28(r1)
-; CHECK-BE-NEXT: sth r4, -80(r1)
-; CHECK-BE-NEXT: lwz r4, -32(r1)
-; CHECK-BE-NEXT: sth r4, -96(r1)
-; CHECK-BE-NEXT: addi r4, r1, -48
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -64
-; CHECK-BE-NEXT: lxvw4x v3, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -80
-; CHECK-BE-NEXT: lxvw4x v4, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -96
-; CHECK-BE-NEXT: lxvw4x v5, 0, r4
-; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: addi r4, r1, -16
-; CHECK-BE-NEXT: vmrghh v3, v5, v4
-; CHECK-BE-NEXT: vmrghw v2, v3, v2
-; CHECK-BE-NEXT: stxvd2x v2, 0, r4
+; CHECK-BE-NEXT: addi r5, r1, -16
+; CHECK-BE-NEXT: vpkuwum v2, v2, v2
+; CHECK-BE-NEXT: stxvd2x v2, 0, r5
; CHECK-BE-NEXT: ld r4, -16(r1)
; CHECK-BE-NEXT: std r4, 0(r3)
; CHECK-BE-NEXT: blr
; CHECK-LABEL: test2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lvx v2, 0, r4
-; CHECK-NEXT: xxswapd vs0, v2
-; CHECK-NEXT: xxsldwi vs1, v2, v2, 1
-; CHECK-NEXT: mfvsrwz r4, f0
-; CHECK-NEXT: mfvsrwz r5, f1
-; CHECK-NEXT: mtvsrd f0, r4
-; CHECK-NEXT: mtvsrd f1, r5
-; CHECK-NEXT: xxswapd v2, vs0
-; CHECK-NEXT: xxswapd v3, vs1
-; CHECK-NEXT: vmrglh v2, v3, v2
+; CHECK-NEXT: vpkuwum v2, v2, v2
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test2i16:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -32
-; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
-; CHECK-BE-NEXT: lwz r4, -28(r1)
-; CHECK-BE-NEXT: sth r4, -48(r1)
-; CHECK-BE-NEXT: lwz r4, -32(r1)
-; CHECK-BE-NEXT: sth r4, -64(r1)
-; CHECK-BE-NEXT: addi r4, r1, -48
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -64
-; CHECK-BE-NEXT: lxvw4x v3, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -16
-; CHECK-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-BE-NEXT: stxvw4x v2, 0, r4
+; CHECK-BE-NEXT: addi r5, r1, -16
+; CHECK-BE-NEXT: vpkuwum v2, v2, v2
+; CHECK-BE-NEXT: stxvw4x v2, 0, r5
; CHECK-BE-NEXT: lwz r4, -16(r1)
; CHECK-BE-NEXT: stw r4, 0(r3)
; CHECK-BE-NEXT: blr
; CHECK-LABEL: test2i16d:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvd2x vs0, 0, r4
-; CHECK-NEXT: xxswapd vs1, vs0
-; CHECK-NEXT: mfvsrwz r4, f0
-; CHECK-NEXT: mtvsrd f0, r4
-; CHECK-NEXT: mfvsrwz r5, f1
+; CHECK-NEXT: addis r5, r2, .LCPI6_0@toc@ha
+; CHECK-NEXT: addi r4, r5, .LCPI6_0@toc@l
+; CHECK-NEXT: lvx v3, 0, r4
; CHECK-NEXT: xxswapd v2, vs0
-; CHECK-NEXT: mtvsrd f1, r5
-; CHECK-NEXT: xxswapd v3, vs1
-; CHECK-NEXT: vmrglh v2, v3, v2
+; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
; CHECK-NEXT: stfiwx f0, 0, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test2i16d:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxvd2x vs0, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -32
-; CHECK-BE-NEXT: stxvd2x vs0, 0, r4
-; CHECK-BE-NEXT: lwz r4, -20(r1)
-; CHECK-BE-NEXT: sth r4, -48(r1)
-; CHECK-BE-NEXT: lwz r4, -28(r1)
-; CHECK-BE-NEXT: sth r4, -64(r1)
-; CHECK-BE-NEXT: addi r4, r1, -48
+; CHECK-BE-NEXT: addis r5, r2, .LCPI6_0@toc@ha
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
-; CHECK-BE-NEXT: addi r4, r1, -64
+; CHECK-BE-NEXT: addi r4, r5, .LCPI6_0@toc@l
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
; CHECK-BE-NEXT: addi r4, r1, -16
-; CHECK-BE-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-NEXT: vperm v2, v2, v2, v3
; CHECK-BE-NEXT: stxvw4x v2, 0, r4
; CHECK-BE-NEXT: lwz r4, -16(r1)
; CHECK-BE-NEXT: stw r4, 0(r3)