From: David Green Date: Sat, 4 Feb 2023 19:18:50 +0000 (+0000) Subject: [AArch64] Add ABD combine tests. NFC X-Git-Tag: upstream/17.0.6~18579 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d81d60876ac6586a79e567062f5d1cc3ac68772b;p=platform%2Fupstream%2Fllvm.git [AArch64] Add ABD combine tests. NFC --- diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll new file mode 100644 index 0000000..e2ed700 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/abd-combine.ll @@ -0,0 +1,461 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s + +define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: abdu_base: +; CHECK: // %bb.0: +; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> + %zextsrc2 = zext <8 x i16> %src2 to <8 x i32> + %sub = sub <8 x i32> %zextsrc1, %zextsrc2 + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + +define <8 x i16> @abdu_const(<8 x i16> %src1) { +; CHECK-LABEL: abdu_const: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: sub v2.4s, v2.4s, v1.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: abs v0.4s, v0.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> + %sub = sub <8 x i32> %zextsrc1, + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + +define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) { +; CHECK-LABEL: abdu_const_lhs: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: usubw2 v2.4s, v1.4s, v0.8h +; CHECK-NEXT: usubw v0.4s, v1.4s, v0.4h +; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: abs v0.4s, v0.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> + %sub = sub <8 x i32> , %zextsrc1 + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + +define <8 x i16> @abdu_const_zero(<8 x i16> %src1) { +; CHECK-LABEL: abdu_const_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h +; CHECK-NEXT: neg v1.4s, v2.4s +; CHECK-NEXT: abs v0.4s, v0.4s +; CHECK-NEXT: abs v1.4s, v1.4s +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NEXT: ret + %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> + %sub = sub <8 x i32> , %zextsrc1 + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + +define <8 x i16> @abdu_const_both() { +; CHECK-LABEL: abdu_const_both: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.8h, #2 +; CHECK-NEXT: ret + %sub = sub <8 x i32> , + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + +define <8 x i16> @abdu_const_bothhigh() { +; CHECK-LABEL: abdu_const_bothhigh: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.8h, #1 +; CHECK-NEXT: ret + %zextsrc1 = zext <8 x i16> to <8 x i32> + %zextsrc2 = zext <8 x i16> to <8 x i32> + %sub = sub <8 x i32> %zextsrc1, %zextsrc2 + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + +define <8 x i16> @abdu_undef(<8 x i16> %src1) { +; CHECK-LABEL: abdu_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> + %zextsrc2 = zext <8 x i16> undef to <8 x i32> + %sub = sub <8 x i32> %zextsrc1, %zextsrc2 + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + + + +define <8 x i16> @abdu_i_base(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: abdu_i_base: +; CHECK: // %bb.0: +; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> %src2) + ret <8 x i16> %result +} + +define <8 x i16> @abdu_i_const(<8 x i16> %src1) { +; CHECK-LABEL: abdu_i_const: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.8h, #1 +; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> ) + ret <8 x i16> %result +} + +define <8 x i16> @abdu_i_const_lhs(<8 x i16> %src1) { +; CHECK-LABEL: abdu_i_const_lhs: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.8h, #1 +; CHECK-NEXT: uabd v0.8h, v1.8h, v0.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> , <8 x i16> %src1) + ret <8 x i16> %result +} + +define <8 x i16> @abdu_i_const_zero(float %t, <8 x i16> %src1) { +; CHECK-LABEL: abdu_i_const_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> , <8 x i16> %src1) + ret <8 x i16> %result +} + +define <8 x i16> @abdu_i_const_both() { +; CHECK-LABEL: abdu_i_const_both: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.8h, #1 +; CHECK-NEXT: movi v1.8h, #3 +; CHECK-NEXT: uabd v0.8h, v1.8h, v0.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> , <8 x i16> ) + ret <8 x i16> %result +} + +define <8 x i16> @abdu_i_const_bothhigh() { +; CHECK-LABEL: abdu_i_const_bothhigh: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-NEXT: mvni v1.8h, #1 +; CHECK-NEXT: uabd v0.8h, v1.8h, v0.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> , <8 x i16> ) + ret <8 x i16> %result +} + +define <8 x i16> @abdu_i_const_onehigh() { +; CHECK-LABEL: abdu_i_const_onehigh: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32766 +; CHECK-NEXT: movi v0.8h, #1 +; CHECK-NEXT: dup v1.8h, w8 +; CHECK-NEXT: uabd v0.8h, v1.8h, v0.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> , <8 x i16> ) + ret <8 x i16> %result +} + +define <8 x i16> @abdu_i_const_oneneg() { +; CHECK-LABEL: abdu_i_const_oneneg: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32766 +; CHECK-NEXT: mvni v1.8h, #1 +; CHECK-NEXT: dup v0.8h, w8 +; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> , <8 x i16> ) + ret <8 x i16> %result +} + +define <8 x i16> @abdu_i_zero(<8 x i16> %t, <8 x i16> %src1) { +; CHECK-LABEL: abdu_i_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> , <8 x i16> %src1) + ret <8 x i16> %result +} + +define <8 x i16> @abdu_i_undef(<8 x i16> %t, <8 x i16> %src1) { +; CHECK-LABEL: abdu_i_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> undef, <8 x i16> %src1) + ret <8 x i16> %result +} + +define <8 x i16> @abdu_i_reassoc(<8 x i16> %src1) { +; CHECK-LABEL: abdu_i_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.8h, #3 +; CHECK-NEXT: movi v2.8h, #1 +; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uabd v0.8h, v0.8h, v2.8h +; CHECK-NEXT: ret + %r1 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> ) + %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %r1, <8 x i16> ) + ret <8 x i16> %result +} + + + + + +define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: abds_base: +; CHECK: // %bb.0: +; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> + %zextsrc2 = sext <8 x i16> %src2 to <8 x i32> + %sub = sub <8 x i32> %zextsrc1, %zextsrc2 + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + +define <8 x i16> @abds_const(<8 x i16> %src1) { +; CHECK-LABEL: abds_const: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sub v2.4s, v2.4s, v1.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: abs v0.4s, v0.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> + %sub = sub <8 x i32> %zextsrc1, + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + +define <8 x i16> @abds_const_lhs(<8 x i16> %src1) { +; CHECK-LABEL: abds_const_lhs: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ssubw2 v2.4s, v1.4s, v0.8h +; CHECK-NEXT: ssubw v0.4s, v1.4s, v0.4h +; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: abs v0.4s, v0.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> + %sub = sub <8 x i32> , %zextsrc1 + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + +define <8 x i16> @abds_const_zero(<8 x i16> %src1) { +; CHECK-LABEL: abds_const_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: sshll v2.4s, v0.4h, #0 +; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h +; CHECK-NEXT: neg v1.4s, v2.4s +; CHECK-NEXT: abs v0.4s, v0.4s +; CHECK-NEXT: abs v1.4s, v1.4s +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NEXT: ret + %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> + %sub = sub <8 x i32> , %zextsrc1 + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + +define <8 x i16> @abds_const_both() { +; CHECK-LABEL: abds_const_both: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.8h, #2 +; CHECK-NEXT: ret + %sub = sub <8 x i32> , + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + +define <8 x i16> @abds_const_bothhigh() { +; CHECK-LABEL: abds_const_bothhigh: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.8h, #1 +; CHECK-NEXT: ret + %zextsrc1 = sext <8 x i16> to <8 x i32> + %zextsrc2 = sext <8 x i16> to <8 x i32> + %sub = sub <8 x i32> %zextsrc1, %zextsrc2 + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + +define <8 x i16> @abds_undef(<8 x i16> %src1) { +; CHECK-LABEL: abds_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: abs v1.4s, v1.4s +; CHECK-NEXT: abs v0.4s, v0.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> + %zextsrc2 = sext <8 x i16> undef to <8 x i32> + %sub = sub <8 x i32> %zextsrc1, %zextsrc2 + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) + %result = trunc <8 x i32> %abs to <8 x i16> + ret <8 x i16> %result +} + + + +define <8 x i16> @abds_i_base(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: abds_i_base: +; CHECK: // %bb.0: +; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> %src2) + ret <8 x i16> %result +} + +define <8 x i16> @abds_i_const(<8 x i16> %src1) { +; CHECK-LABEL: abds_i_const: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.8h, #1 +; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> ) + ret <8 x i16> %result +} + +define <8 x i16> @abds_i_const_lhs(<8 x i16> %src1) { +; CHECK-LABEL: abds_i_const_lhs: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.8h, #1 +; CHECK-NEXT: sabd v0.8h, v1.8h, v0.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> , <8 x i16> %src1) + ret <8 x i16> %result +} + +define <8 x i16> @abds_i_const_zero(<8 x i16> %src1) { +; CHECK-LABEL: abds_i_const_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: sabd v0.8h, v1.8h, v0.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> , <8 x i16> %src1) + ret <8 x i16> %result +} + +define <8 x i16> @abds_i_const_both() { +; CHECK-LABEL: abds_i_const_both: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.8h, #1 +; CHECK-NEXT: movi v1.8h, #3 +; CHECK-NEXT: sabd v0.8h, v1.8h, v0.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> , <8 x i16> ) + ret <8 x i16> %result +} + +define <8 x i16> @abds_i_const_bothhigh() { +; CHECK-LABEL: abds_i_const_bothhigh: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32766 +; CHECK-NEXT: mvni v1.8h, #128, lsl #8 +; CHECK-NEXT: dup v0.8h, w8 +; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> , <8 x i16> ) + ret <8 x i16> %result +} + +define <8 x i16> @abds_i_const_onehigh() { +; CHECK-LABEL: abds_i_const_onehigh: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32766 +; CHECK-NEXT: movi v0.8h, #1 +; CHECK-NEXT: dup v1.8h, w8 +; CHECK-NEXT: sabd v0.8h, v1.8h, v0.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> , <8 x i16> ) + ret <8 x i16> %result +} + +define <8 x i16> @abds_i_const_oneneg() { +; CHECK-LABEL: abds_i_const_oneneg: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32766 +; CHECK-NEXT: mvni v1.8h, #1 +; CHECK-NEXT: dup v0.8h, w8 +; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> , <8 x i16> ) + ret <8 x i16> %result +} + +define <8 x i16> @abds_i_zero(<8 x i16> %t, <8 x i16> %src1) { +; CHECK-LABEL: abds_i_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> , <8 x i16> %src1) + ret <8 x i16> %result +} + +define <8 x i16> @abds_i_undef(<8 x i16> %t, <8 x i16> %src1) { +; CHECK-LABEL: abds_i_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> undef, <8 x i16> %src1) + ret <8 x i16> %result +} + +define <8 x i16> @abds_i_reassoc(<8 x i16> %src1) { +; CHECK-LABEL: abds_i_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.8h, #3 +; CHECK-NEXT: movi v2.8h, #1 +; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: sabd v0.8h, v0.8h, v2.8h +; CHECK-NEXT: ret + %r1 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> ) + %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %r1, <8 x i16> ) + ret <8 x i16> %result +} + + +declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) diff --git a/llvm/test/CodeGen/Thumb2/mve-vabdus.ll b/llvm/test/CodeGen/Thumb2/mve-vabdus.ll index b832d52..87e0997 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vabdus.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vabdus.ll @@ -613,3 +613,53 @@ vector.body: ; preds = %vector.body, %entry for.cond.cleanup: ; preds = %vector.body ret void } + +define arm_aapcs_vfpcc <4 x i32> @vabd_v4u32_commutative(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: vabd_v4u32_commutative: +; CHECK: @ %bb.0: +; CHECK-NEXT: vabd.u32 q2, q1, q0 +; CHECK-NEXT: vabd.u32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q2 +; CHECK-NEXT: bx lr + %azextsrc1 = zext <4 x i32> %src1 to <4 x i64> + %azextsrc2 = zext <4 x i32> %src2 to <4 x i64> + %aadd1 = sub <4 x i64> %azextsrc1, %azextsrc2 + %aadd2 = sub <4 x i64> zeroinitializer, %aadd1 + %ac = icmp sge <4 x i64> %aadd1, zeroinitializer + %as = select <4 x i1> %ac, <4 x i64> %aadd1, <4 x i64> %aadd2 + %aresult = trunc <4 x i64> %as to <4 x i32> + %bzextsrc1 = zext <4 x i32> %src2 to <4 x i64> + %bzextsrc2 = zext <4 x i32> %src1 to <4 x i64> + %badd1 = sub <4 x i64> %bzextsrc1, %bzextsrc2 + %badd2 = sub <4 x i64> zeroinitializer, %badd1 + %bc = icmp sge <4 x i64> %badd1, zeroinitializer + %bs = select <4 x i1> %bc, <4 x i64> %badd1, <4 x i64> %badd2 + %bresult = trunc <4 x i64> %bs to <4 x i32> + %r = add <4 x i32> %aresult, %bresult + ret <4 x i32> %r +} + +define arm_aapcs_vfpcc <4 x i32> @vabd_v4u32_shuffle(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: vabd_v4u32_shuffle: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.f32 s8, s7 +; CHECK-NEXT: vmov.f32 s9, s6 +; CHECK-NEXT: vmov.f32 s10, s5 +; CHECK-NEXT: vmov.f32 s11, s4 +; CHECK-NEXT: vmov.f32 s4, s3 +; CHECK-NEXT: vmov.f32 s5, s2 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vmov.f32 s7, s0 +; CHECK-NEXT: vabd.u32 q0, q1, q2 +; CHECK-NEXT: bx lr + %s1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <4 x i32> + %s2 = shufflevector <4 x i32> %src2, <4 x i32> undef, <4 x i32> + %azextsrc1 = zext <4 x i32> %s1 to <4 x i64> + %azextsrc2 = zext <4 x i32> %s2 to <4 x i64> + %aadd1 = sub <4 x i64> %azextsrc1, %azextsrc2 + %aadd2 = sub <4 x i64> zeroinitializer, %aadd1 + %ac = icmp sge <4 x i64> %aadd1, zeroinitializer + %as = select <4 x i1> %ac, <4 x i64> %aadd1, <4 x i64> %aadd2 + %aresult = trunc <4 x i64> %as to <4 x i32> + ret <4 x i32> %aresult +}