; CHECK-NEXT: #
; CHECK-NEXT: lfd f0, 0(r3)
; CHECK-NEXT: xxpermdi v1, f0, f0, 2
+; CHECK-NEXT: lfdx f0, r3, r4
; CHECK-NEXT: vperm v6, v1, v3, v4
; CHECK-NEXT: vperm v1, v3, v1, v2
; CHECK-NEXT: xvnegsp v1, v1
+; CHECK-NEXT: add r7, r3, r4
; CHECK-NEXT: xvnegsp v6, v6
; CHECK-NEXT: vabsduw v1, v1, v5
; CHECK-NEXT: vabsduw v6, v6, v0
; CHECK-NEXT: vadduwm v1, v1, v6
; CHECK-NEXT: xxspltw v6, v1, 2
; CHECK-NEXT: vadduwm v1, v1, v6
-; CHECK-NEXT: vextuwrx r7, r5, v1
-; CHECK-NEXT: lfdx f0, r3, r4
-; CHECK-NEXT: add r6, r7, r6
-; CHECK-NEXT: add r7, r3, r4
-; CHECK-NEXT: xxpermdi v1, f0, f0, 2
+; CHECK-NEXT: xxpermdi v6, f0, f0, 2
+; CHECK-NEXT: vextuwrx r3, r5, v1
+; CHECK-NEXT: vperm v7, v6, v3, v4
+; CHECK-NEXT: vperm v6, v3, v6, v2
+; CHECK-NEXT: add r6, r3, r6
; CHECK-NEXT: add r3, r7, r4
-; CHECK-NEXT: vperm v6, v3, v1, v2
-; CHECK-NEXT: vperm v1, v1, v3, v4
; CHECK-NEXT: xvnegsp v6, v6
-; CHECK-NEXT: xvnegsp v1, v1
+; CHECK-NEXT: xvnegsp v1, v7
; CHECK-NEXT: vabsduw v6, v6, v5
; CHECK-NEXT: vabsduw v1, v1, v0
; CHECK-NEXT: vadduwm v1, v1, v6
; P9BE-NEXT: #
; P9BE-NEXT: lfd f0, 0(r3)
; P9BE-NEXT: xxlor v1, vs0, vs0
+; P9BE-NEXT: lfdx f0, r3, r4
; P9BE-NEXT: vperm v6, v3, v1, v4
; P9BE-NEXT: vperm v1, v3, v1, v2
; P9BE-NEXT: xvnegsp v1, v1
; P9BE-NEXT: vadduwm v1, v1, v6
; P9BE-NEXT: xxspltw v6, v1, 1
; P9BE-NEXT: vadduwm v1, v1, v6
-; P9BE-NEXT: vextuwlx r7, r5, v1
-; P9BE-NEXT: lfdx f0, r3, r4
-; P9BE-NEXT: add r6, r7, r6
+; P9BE-NEXT: xxlor v6, vs0, vs0
+; P9BE-NEXT: vperm v7, v3, v6, v4
+; P9BE-NEXT: vperm v6, v3, v6, v2
; P9BE-NEXT: add r7, r3, r4
-; P9BE-NEXT: xxlor v1, vs0, vs0
-; P9BE-NEXT: add r3, r7, r4
-; P9BE-NEXT: vperm v6, v3, v1, v2
-; P9BE-NEXT: vperm v1, v3, v1, v4
+; P9BE-NEXT: vextuwlx r3, r5, v1
; P9BE-NEXT: xvnegsp v6, v6
-; P9BE-NEXT: xvnegsp v1, v1
-; P9BE-NEXT: vabsduw v6, v6, v5
+; P9BE-NEXT: xvnegsp v1, v7
; P9BE-NEXT: vabsduw v1, v1, v0
+; P9BE-NEXT: vabsduw v6, v6, v5
; P9BE-NEXT: vadduwm v1, v1, v6
; P9BE-NEXT: xxswapd v6, v1
+; P9BE-NEXT: add r6, r3, r6
; P9BE-NEXT: vadduwm v1, v1, v6
; P9BE-NEXT: xxspltw v6, v1, 1
; P9BE-NEXT: vadduwm v1, v1, v6
; P9BE-NEXT: vextuwlx r8, r5, v1
+; P9BE-NEXT: add r3, r7, r4
; P9BE-NEXT: add r6, r8, r6
; P9BE-NEXT: bdnz .LBB0_1
; P9BE-NEXT: # %bb.2: # %for.cond.cleanup
; P9BE-NEXT: add r6, r3, r4
; P9BE-NEXT: li r7, 16
; P9BE-NEXT: lxsihzx v2, r6, r7
-; P9BE-NEXT: vsplth v2, v2, 3
; P9BE-NEXT: lxsihzx v4, r3, r4
; P9BE-NEXT: li r6, 0
; P9BE-NEXT: sldi r6, r6, 48
+; P9BE-NEXT: vsplth v4, v4, 3
; P9BE-NEXT: mtvsrd v3, r6
+; P9BE-NEXT: vsplth v2, v2, 3
; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha
; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l
-; P9BE-NEXT: vmrghh v2, v3, v2
-; P9BE-NEXT: vsplth v4, v4, 3
; P9BE-NEXT: vmrghh v4, v3, v4
+; P9BE-NEXT: vmrghh v2, v3, v2
; P9BE-NEXT: vsplth v3, v3, 0
; P9BE-NEXT: vmrghw v3, v3, v4
; P9BE-NEXT: lxvx v4, 0, r3
define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) {
; CHECK-LABEL: test8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxsibzx v2, r3, r4
; CHECK-NEXT: add r6, r3, r4
+; CHECK-NEXT: lxsibzx v2, r3, r4
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: mtvsrd f0, r3
; CHECK-NEXT: li r3, 8
+; CHECK-NEXT: lxsibzx v5, r6, r3
; CHECK-NEXT: xxswapd v3, vs0
+; CHECK-NEXT: vspltb v4, v3, 15
; CHECK-NEXT: vspltb v2, v2, 7
-; CHECK-NEXT: lxsibzx v5, r6, r3
-; CHECK-NEXT: vspltb v5, v5, 7
; CHECK-NEXT: vmrglb v2, v3, v2
-; CHECK-NEXT: vspltb v4, v3, 15
-; CHECK-NEXT: vmrglb v3, v3, v5
; CHECK-NEXT: addis r3, r2, .LCPI4_0@toc@ha
-; CHECK-NEXT: vmrglh v2, v2, v4
; CHECK-NEXT: addi r3, r3, .LCPI4_0@toc@l
+; CHECK-NEXT: vspltb v5, v5, 7
+; CHECK-NEXT: vmrglh v2, v2, v4
+; CHECK-NEXT: vmrglb v3, v3, v5
; CHECK-NEXT: vmrglw v2, v2, v4
; CHECK-NEXT: vmrglh v3, v3, v4
; CHECK-NEXT: vmrglw v3, v4, v3
; P9BE-NEXT: add r6, r3, r4
; P9BE-NEXT: li r7, 8
; P9BE-NEXT: lxsibzx v2, r6, r7
-; P9BE-NEXT: vspltb v2, v2, 7
; P9BE-NEXT: lxsibzx v4, r3, r4
; P9BE-NEXT: li r6, 0
; P9BE-NEXT: sldi r6, r6, 56
+; P9BE-NEXT: vspltb v4, v4, 7
; P9BE-NEXT: mtvsrd v3, r6
+; P9BE-NEXT: vspltb v2, v2, 7
+; P9BE-NEXT: vmrghb v4, v3, v4
; P9BE-NEXT: vmrghb v2, v3, v2
+; P9BE-NEXT: vspltb v3, v3, 0
; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l
-; P9BE-NEXT: vspltb v4, v4, 7
-; P9BE-NEXT: vmrghb v4, v3, v4
-; P9BE-NEXT: vspltb v3, v3, 0
; P9BE-NEXT: vmrghh v4, v4, v3
; P9BE-NEXT: xxspltw v3, v3, 0
; P9BE-NEXT: vmrghw v2, v4, v2