From c11051a4001c7f89e8655f1776a75110a562a45e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 22 May 2022 14:13:30 -0700 Subject: [PATCH] [SelectionDAG] Add a freeze to ISD::ABS expansion. I had initially assumed this was the problem with https://github.com/llvm/llvm-project/issues/55271#issuecomment-1133426243 But it turns out that was a simpler issue. This patch is still more correct than what we were doing before so figured I'd submit it anyway. No test case because I'm not sure how to get an undef around until expansion. Looking at the test deltas I wonder if it be valid to combine (sext_inreg (freeze (aextload X))) -> (freeze (sextload X)). Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D126175 --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 3 ++ llvm/test/CodeGen/WebAssembly/PR41149.ll | 32 +++++++++++++--------- llvm/test/CodeGen/X86/iabs.ll | 4 +-- llvm/test/CodeGen/X86/neg-abs.ll | 10 +++---- .../Inputs/basic.ll.expected | 4 +-- 5 files changed, 31 insertions(+), 22 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 35d289d..5af1cd8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7855,6 +7855,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, if (!IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::UMIN, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); + Op = DAG.getFreeze(Op); return DAG.getNode(ISD::UMIN, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } @@ -7862,6 +7863,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, // 0 - abs(x) -> smin(x, sub(0,x)) if (IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMIN, VT)) { + Op = DAG.getFreeze(Op); SDValue Zero = DAG.getConstant(0, dl, VT); return DAG.getNode(ISD::SMIN, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); @@ -7875,6 +7877,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) return SDValue(); + Op = DAG.getFreeze(Op); SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, Op, DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); diff --git a/llvm/test/CodeGen/WebAssembly/PR41149.ll b/llvm/test/CodeGen/WebAssembly/PR41149.ll index 7ee99e1..428f849 100644 --- a/llvm/test/CodeGen/WebAssembly/PR41149.ll +++ b/llvm/test/CodeGen/WebAssembly/PR41149.ll @@ -1,22 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=wasm32-unknown-unknown | FileCheck %s ; Regression test for PR41149. define void @mod() { ; CHECK-LABEL: mod: -; CHECK-NEXT: .functype mod () -> () -; CHECK: local.get 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.load8_s 0 -; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.const 7 -; CHECK-NEXT: i32.shr_s -; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: i32.xor -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.sub -; CHECK-NEXT: i32.store8 0 +; CHECK: .functype mod () -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.load8_u 0 +; CHECK-NEXT: local.tee 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 24 +; CHECK-NEXT: i32.shl +; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32.shr_s +; CHECK-NEXT: local.tee 0 +; CHECK-NEXT: i32.xor +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: i32.store8 0 +; CHECK-NEXT: # fallthrough-return %tmp = load <4 x i8>, <4 x i8>* undef %tmp2 = icmp slt <4 x i8> %tmp, zeroinitializer %tmp3 = sub <4 x i8> zeroinitializer, %tmp diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll index 2df88db..c3cb3b3 100644 --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -37,8 +37,8 @@ define i8 @test_i8(i8 %a) nounwind { define i16 @test_i16(i16 %a) nounwind { ; X86-NO-CMOV-LABEL: test_i16: ; X86-NO-CMOV: # %bb.0: -; X86-NO-CMOV-NEXT: movswl {{[0-9]+}}(%esp), %eax -; X86-NO-CMOV-NEXT: movl %eax, %ecx +; X86-NO-CMOV-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NO-CMOV-NEXT: movswl %ax, %ecx ; X86-NO-CMOV-NEXT: sarl $15, %ecx ; X86-NO-CMOV-NEXT: xorl %ecx, %eax ; X86-NO-CMOV-NEXT: subl %ecx, %eax diff --git a/llvm/test/CodeGen/X86/neg-abs.ll b/llvm/test/CodeGen/X86/neg-abs.ll index 429d0a3..84f82b7 100644 --- a/llvm/test/CodeGen/X86/neg-abs.ll +++ b/llvm/test/CodeGen/X86/neg-abs.ll @@ -34,8 +34,8 @@ define i8 @neg_abs_i8(i8 %x) nounwind { define i16 @neg_abs_i16(i16 %x) nounwind { ; X86-LABEL: neg_abs_i16: ; X86: # %bb.0: -; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movswl %cx, %eax ; X86-NEXT: sarl $15, %eax ; X86-NEXT: xorl %eax, %ecx ; X86-NEXT: subl %ecx, %eax @@ -108,8 +108,8 @@ define i128 @neg_abs_i128(i128 %x) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: sarl $31, %ecx ; X86-NEXT: xorl %ecx, %edx @@ -178,8 +178,8 @@ define i8 @sub_abs_i8(i8 %x, i8 %y) nounwind { define i16 @sub_abs_i16(i16 %x, i16 %y) nounwind { ; X86-LABEL: sub_abs_i16: ; X86: # %bb.0: -; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movswl %cx, %eax ; X86-NEXT: sarl $15, %eax ; X86-NEXT: xorl %eax, %ecx ; X86-NEXT: subl %ecx, %eax diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected index cf85117..6e7d02b 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected @@ -31,8 +31,8 @@ define i8 @test_i8(i8 %a) nounwind { define i16 @test_i16(i16 %a) nounwind { ; X86-NO-CMOV-LABEL: test_i16: ; X86-NO-CMOV: # %bb.0: -; X86-NO-CMOV-NEXT: movswl {{[0-9]+}}(%esp), %eax -; X86-NO-CMOV-NEXT: movl %eax, %ecx +; X86-NO-CMOV-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NO-CMOV-NEXT: movswl %ax, %ecx ; X86-NO-CMOV-NEXT: sarl $15, %ecx ; X86-NO-CMOV-NEXT: xorl %ecx, %eax ; X86-NO-CMOV-NEXT: subl %ecx, %eax -- 2.7.4