From 9c86c5e8add23a29fb19f7c0ac366a2063a5073a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 25 Nov 2020 18:05:41 +0000 Subject: [PATCH] [DAG] Legalize abs(x) -> umin(x,sub(0,x)) iff umin/sub are legal If umin() is legal, this is likely to result in smaller codegen expansion for abs(x) than the xor(add,ashr) method. Followup to D92095 Alive2: https://alive2.llvm.org/ce/z/8nuX6s https://alive2.llvm.org/ce/z/q2hB9w --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 8 ++++++ llvm/test/CodeGen/X86/abs.ll | 5 ++-- llvm/test/CodeGen/X86/combine-abs.ll | 10 +++---- llvm/test/CodeGen/X86/viabs.ll | 35 ++++++++++-------------- 4 files changed, 28 insertions(+), 30 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 0b1c257..3464fe8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6830,6 +6830,14 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result, return true; } + // abs(x) -> umin(x,sub(0,x)) + if (isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::UMIN, VT)) { + SDValue Zero = DAG.getConstant(0, dl, VT); + Result = DAG.getNode(ISD::UMIN, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); + return true; + } + // Only expand vector types if we have the appropriate vector operations. if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) || !isOperationLegalOrCustom(ISD::ADD, VT) || diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index 942f09f..0bbc785 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -474,9 +474,8 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind { ; SSE-LABEL: test_v16i8: ; SSE: # %bb.0: ; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: pcmpgtb %xmm0, %xmm1 -; SSE-NEXT: paddb %xmm1, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: psubb %xmm0, %xmm1 +; SSE-NEXT: pminub %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v16i8: diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll index da59da4..4ff9fef 100644 --- a/llvm/test/CodeGen/X86/combine-abs.ll +++ b/llvm/test/CodeGen/X86/combine-abs.ll @@ -81,12 +81,10 @@ define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) { ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm3, %xmm3 -; SSE2-NEXT: pcmpgtb %xmm0, %xmm3 -; SSE2-NEXT: paddb %xmm3, %xmm0 -; SSE2-NEXT: pxor %xmm3, %xmm0 -; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 -; SSE2-NEXT: paddb %xmm2, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: psubb %xmm0, %xmm3 +; SSE2-NEXT: pminub %xmm3, %xmm0 +; SSE2-NEXT: psubb %xmm1, %xmm2 +; SSE2-NEXT: pminub %xmm2, %xmm1 ; SSE2-NEXT: retq ; ; SSE42-LABEL: combine_v32i8_abs_abs: diff --git a/llvm/test/CodeGen/X86/viabs.ll b/llvm/test/CodeGen/X86/viabs.ll index 873dea2..405d9ea 100644 --- a/llvm/test/CodeGen/X86/viabs.ll +++ b/llvm/test/CodeGen/X86/viabs.ll @@ -132,9 +132,8 @@ define <16 x i8> @test_abs_lt_v16i8(<16 x i8> %a) nounwind { ; SSE2-LABEL: test_abs_lt_v16i8: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 -; SSE2-NEXT: paddb %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: psubb %xmm0, %xmm1 +; SSE2-NEXT: pminub %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_lt_v16i8: @@ -363,12 +362,10 @@ define <32 x i8> @test_abs_lt_v32i8(<32 x i8> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm3, %xmm3 -; SSE2-NEXT: pcmpgtb %xmm0, %xmm3 -; SSE2-NEXT: paddb %xmm3, %xmm0 -; SSE2-NEXT: pxor %xmm3, %xmm0 -; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 -; SSE2-NEXT: paddb %xmm2, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: psubb %xmm0, %xmm3 +; SSE2-NEXT: pminub %xmm3, %xmm0 +; SSE2-NEXT: psubb %xmm1, %xmm2 +; SSE2-NEXT: pminub %xmm2, %xmm1 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_lt_v32i8: @@ -874,20 +871,16 @@ define <64 x i8> @test_abs_lt_v64i8(<64 x i8> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm4, %xmm4 ; SSE2-NEXT: pxor %xmm5, %xmm5 -; SSE2-NEXT: pcmpgtb %xmm0, %xmm5 -; SSE2-NEXT: paddb %xmm5, %xmm0 -; SSE2-NEXT: pxor %xmm5, %xmm0 +; SSE2-NEXT: psubb %xmm0, %xmm5 +; SSE2-NEXT: pminub %xmm5, %xmm0 ; SSE2-NEXT: pxor %xmm5, %xmm5 -; SSE2-NEXT: pcmpgtb %xmm1, %xmm5 -; SSE2-NEXT: paddb %xmm5, %xmm1 -; SSE2-NEXT: pxor %xmm5, %xmm1 +; SSE2-NEXT: psubb %xmm1, %xmm5 +; SSE2-NEXT: pminub %xmm5, %xmm1 ; SSE2-NEXT: pxor %xmm5, %xmm5 -; SSE2-NEXT: pcmpgtb %xmm2, %xmm5 -; SSE2-NEXT: paddb %xmm5, %xmm2 -; SSE2-NEXT: pxor %xmm5, %xmm2 -; SSE2-NEXT: pcmpgtb %xmm3, %xmm4 -; SSE2-NEXT: paddb %xmm4, %xmm3 -; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: psubb %xmm2, %xmm5 +; SSE2-NEXT: pminub %xmm5, %xmm2 +; SSE2-NEXT: psubb %xmm3, %xmm4 +; SSE2-NEXT: pminub %xmm4, %xmm3 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_lt_v64i8: -- 2.7.4