From 6457f42bde82fd9a514434c946b9d3fbe92a8619 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Fri, 10 Dec 2021 18:05:38 +0000 Subject: [PATCH] [DAGCombiner] Extend ISD::ABDS/U combine to handle more cases. The current ABD combine doesn't quite work for SVE because only a single scalable vector per scalar integer type is legal (e.g. for i32, is the only legal scalable vector type). This patch extends the combine to also trigger for the cases when operand extension must be retained. Differential Revision: https://reviews.llvm.org/D115739 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 25 ++++--- llvm/test/CodeGen/AArch64/neon-abd.ll | 12 ++-- llvm/test/CodeGen/AArch64/sve-abd.ll | 96 +++++++++++++++++++++------ llvm/test/CodeGen/Thumb2/mve-vabdus.ll | 12 ++-- 4 files changed, 99 insertions(+), 46 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9b156b2..a070833 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9505,18 +9505,27 @@ static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG, (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) return SDValue(); + EVT VT = N->getValueType(0); EVT VT1 = Op0.getOperand(0).getValueType(); EVT VT2 = Op1.getOperand(0).getValueType(); - // Check if the operands are of same type and valid size. unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU; - if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) - return SDValue(); - Op0 = Op0.getOperand(0); - Op1 = Op1.getOperand(0); - SDValue ABD = - DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1); - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD); + // fold abs(sext(x) - sext(y)) -> zext(abds(x, y)) + // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y)) + // NOTE: Extensions must be equivalent. + if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) { + Op0 = Op0.getOperand(0); + Op1 = Op1.getOperand(0); + SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD); + } + + // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y)) + // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y)) + if (TLI.isOperationLegalOrCustom(ABDOpcode, VT)) + return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1); + + return SDValue(); } SDValue DAGCombiner::visitABS(SDNode *N) { diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll index 94c25e9..0279c83 100644 --- a/llvm/test/CodeGen/AArch64/neon-abd.ll +++ b/llvm/test/CodeGen/AArch64/neon-abd.ll @@ -53,8 +53,7 @@ define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: shl v1.4h, v1.4h, #8 ; CHECK-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-NEXT: sshr v1.4h, v1.4h, #8 -; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: abs v0.4h, v0.4h +; CHECK-NEXT: sabd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %a.sext = sext <4 x i8> %a to <4 x i16> %b.sext = sext <4 x i8> %b to <4 x i16> @@ -108,8 +107,7 @@ define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 { ; CHECK-NEXT: shl v1.2s, v1.2s, #16 ; CHECK-NEXT: sshr v0.2s, v0.2s, #16 ; CHECK-NEXT: sshr v1.2s, v1.2s, #16 -; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: abs v0.2s, v0.2s +; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %a.sext = sext <2 x i16> %a to <2 x i32> %b.sext = sext <2 x i16> %b to <2 x i32> @@ -234,8 +232,7 @@ define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: bic v0.4h, #255, lsl #8 ; CHECK-NEXT: bic v1.4h, #255, lsl #8 -; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: abs v0.4h, v0.4h +; CHECK-NEXT: uabd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %a.zext = zext <4 x i8> %a to <4 x i16> %b.zext = zext <4 x i8> %b to <4 x i16> @@ -288,8 +285,7 @@ define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 { ; CHECK-NEXT: movi d2, #0x00ffff0000ffff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: abs v0.2s, v0.2s +; CHECK-NEXT: uabd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %a.zext = zext <2 x i16> %a to <2 x i32> %b.zext = zext <2 x i16> %b to <2 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll index affd6d5..1bdff3a 100644 --- a/llvm/test/CodeGen/AArch64/sve-abd.ll +++ b/llvm/test/CodeGen/AArch64/sve-abd.ll @@ -24,11 +24,10 @@ define @sabd_b( %a, %b) define @sabd_b_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: sabd_b_promoted_ops: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: ptrue p2.b -; CHECK-NEXT: sub z0.b, z0.b, z1.b -; CHECK-NEXT: abs z0.b, p2/m, z0.b +; CHECK-NEXT: sabd z0.b, p2/m, z0.b, z1.b ; CHECK-NEXT: ret %a.sext = sext %a to %b.sext = sext %b to @@ -57,8 +56,7 @@ define @sabd_h_promoted_ops( %a, %a to %b.sext = sext %b to @@ -87,8 +85,7 @@ define @sabd_s_promoted_ops( %a, %a to %b.sext = sext %b to @@ -117,8 +114,7 @@ define @sabd_d_promoted_ops( %a, %a to %b.sext = sext %b to @@ -148,11 +144,10 @@ define @uabd_b( %a, %b) define @uabd_b_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: uabd_b_promoted_ops: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p2.b -; CHECK-NEXT: add z0.b, z0.b, z1.b -; CHECK-NEXT: abs z0.b, p2/m, z0.b +; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.b, p1/z, #1 // =0x1 +; CHECK-NEXT: uabd z0.b, p2/m, z0.b, z1.b ; CHECK-NEXT: ret %a.zext = zext %a to %b.zext = zext %b to @@ -178,11 +173,10 @@ define @uabd_h( %a, %b) define @uabd_h_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: uabd_h_promoted_ops: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: and z1.h, z1.h, #0xff -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: sub z0.h, z0.h, z1.h -; CHECK-NEXT: abs z0.h, p0/m, z0.h +; CHECK-NEXT: uabd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %a.zext = zext %a to %b.zext = zext %b to @@ -208,11 +202,10 @@ define @uabd_s( %a, %b) define @uabd_s_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: uabd_s_promoted_ops: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: and z1.s, z1.s, #0xffff -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sub z0.s, z0.s, z1.s -; CHECK-NEXT: abs z0.s, p0/m, z0.s +; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %a.zext = zext %a to %b.zext = zext %b to @@ -238,11 +231,10 @@ define @uabd_d( %a, %b) define @uabd_d_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: uabd_d_promoted_ops: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sub z0.d, z0.d, z1.d -; CHECK-NEXT: abs z0.d, p0/m, z0.d +; CHECK-NEXT: uabd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %a.zext = zext %a to %b.zext = zext %b to @@ -251,6 +243,66 @@ define @uabd_d_promoted_ops( %a, %abs } +; Test the situation where isLegal(ISD::ABD, typeof(%a)) returns true but %a and +; %b have differing types. +define @uabd_non_matching_extension( %a, %b) #0 { +; CHECK-LABEL: uabd_non_matching_extension: +; CHECK: // %bb.0: +; CHECK-NEXT: and z1.s, z1.s, #0xff +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpkhi z3.d, z1.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: sub z1.d, z2.d, z3.d +; CHECK-NEXT: abs z1.d, p0/m, z1.d +; CHECK-NEXT: abs z0.d, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = zext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv4i64( %sub, i1 true) + %trunc = trunc %abs to + ret %trunc +} + +; Test the situation where isLegal(ISD::ABD, typeof(%a.zext)) returns true but +; %a and %b have differing types. +define @uabd_non_matching_promoted_ops( %a, %b) #0 { +; CHECK-LABEL: uabd_non_matching_promoted_ops: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and z0.s, z0.s, #0xff +; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = zext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv4i32( %sub, i1 true) + ret %abs +} + +; Test the situation where isLegal(ISD::ABD, typeof(%a)) returns true but %a and +; %b are promoted differently. +define @uabd_non_matching_promotion( %a, %b) #0 { +; CHECK-LABEL: uabd_non_matching_promotion: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and z0.s, z0.s, #0xff +; CHECK-NEXT: sxtb z1.s, p0/m, z1.s +; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: abs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %a.zext = zext %a to + %b.zext = sext %b to + %sub = sub %a.zext, %b.zext + %abs = call @llvm.abs.nxv4i32( %sub, i1 true) + ret %abs +} + declare @llvm.abs.nxv16i8(, i1) declare @llvm.abs.nxv8i16(, i1) diff --git a/llvm/test/CodeGen/Thumb2/mve-vabdus.ll b/llvm/test/CodeGen/Thumb2/mve-vabdus.ll index aa37c70..5d93e5f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vabdus.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vabdus.ll @@ -21,8 +21,7 @@ define arm_aapcs_vfpcc <8 x i8> @vabd_v8s8(<8 x i8> %src1, <8 x i8> %src2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovlb.s8 q1, q1 ; CHECK-NEXT: vmovlb.s8 q0, q0 -; CHECK-NEXT: vsub.i16 q0, q0, q1 -; CHECK-NEXT: vabs.s16 q0, q0 +; CHECK-NEXT: vabd.s16 q0, q0, q1 ; CHECK-NEXT: bx lr %sextsrc1 = sext <8 x i8> %src1 to <8 x i16> %sextsrc2 = sext <8 x i8> %src2 to <8 x i16> @@ -74,8 +73,7 @@ define arm_aapcs_vfpcc <4 x i16> @vabd_v4s16(<4 x i16> %src1, <4 x i16> %src2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovlb.s16 q1, q1 ; CHECK-NEXT: vmovlb.s16 q0, q0 -; CHECK-NEXT: vsub.i32 q0, q0, q1 -; CHECK-NEXT: vabs.s32 q0, q0 +; CHECK-NEXT: vabd.s32 q0, q0, q1 ; CHECK-NEXT: bx lr %sextsrc1 = sext <4 x i16> %src1 to <4 x i32> %sextsrc2 = sext <4 x i16> %src2 to <4 x i32> @@ -158,8 +156,7 @@ define arm_aapcs_vfpcc <8 x i8> @vabd_v8u8(<8 x i8> %src1, <8 x i8> %src2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovlb.u8 q1, q1 ; CHECK-NEXT: vmovlb.u8 q0, q0 -; CHECK-NEXT: vsub.i16 q0, q0, q1 -; CHECK-NEXT: vabs.s16 q0, q0 +; CHECK-NEXT: vabd.u16 q0, q0, q1 ; CHECK-NEXT: bx lr %zextsrc1 = zext <8 x i8> %src1 to <8 x i16> %zextsrc2 = zext <8 x i8> %src2 to <8 x i16> @@ -210,8 +207,7 @@ define arm_aapcs_vfpcc <4 x i16> @vabd_v4u16(<4 x i16> %src1, <4 x i16> %src2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmovlb.u16 q1, q1 ; CHECK-NEXT: vmovlb.u16 q0, q0 -; CHECK-NEXT: vsub.i32 q0, q0, q1 -; CHECK-NEXT: vabs.s32 q0, q0 +; CHECK-NEXT: vabd.u32 q0, q0, q1 ; CHECK-NEXT: bx lr %zextsrc1 = zext <4 x i16> %src1 to <4 x i32> %zextsrc2 = zext <4 x i16> %src2 to <4 x i32> -- 2.7.4