From f744723f7538934e0beb5d8a2267afeb86345986 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 7 May 2021 16:39:11 +0100 Subject: [PATCH] [X86] combineXor - limit fold to non-opaque constants (PR50254) Ensure we don't try to fold when one might be an opaque constant - the constant fold will fail and then the reverse fold will happen in DAGCombine..... --- llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ++++++------ llvm/test/CodeGen/X86/pr50254.ll | 49 +++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr50254.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 430df48..b9b2223 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -46998,15 +46998,17 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, // Fold xor(truncate(xor(x,c1)),c2) -> xor(truncate(x),xor(truncate(c1),c2)) // TODO: Under what circumstances could this be performed in DAGCombine? if ((N0.getOpcode() == ISD::TRUNCATE || N0.getOpcode() == ISD::ZERO_EXTEND) && - N0.getOperand(0).getOpcode() == N->getOpcode() && - isa(N1) && - isa(N0.getOperand(0).getOperand(1))) { - SDLoc DL(N); + N0.getOperand(0).getOpcode() == N->getOpcode()) { SDValue TruncExtSrc = N0.getOperand(0); - SDValue LHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(0), DL, VT); - SDValue RHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(1), DL, VT); - return DAG.getNode(ISD::XOR, DL, VT, LHS, - DAG.getNode(ISD::XOR, DL, VT, RHS, N1)); + auto *N1C = dyn_cast(N1); + auto *N001C = dyn_cast(TruncExtSrc.getOperand(1)); + if (N1C && !N1C->isOpaque() && N001C && !N001C->isOpaque()) { + SDLoc DL(N); + SDValue LHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(0), DL, VT); + SDValue RHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(1), DL, VT); + return DAG.getNode(ISD::XOR, DL, VT, LHS, + DAG.getNode(ISD::XOR, DL, VT, RHS, N1)); + } } if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) diff --git a/llvm/test/CodeGen/X86/pr50254.ll b/llvm/test/CodeGen/X86/pr50254.ll new file mode 100644 index 0000000..1bb9bec --- /dev/null +++ b/llvm/test/CodeGen/X86/pr50254.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 + +@d.e = external dso_local unnamed_addr global i32, align 4 + +define void @PR50254() { +; X86-LABEL: PR50254: +; X86: # %bb.0: # %entry +; X86-NEXT: movswl d.e, %eax +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testb %cl, %cl +; X86-NEXT: jne .LBB0_2 +; X86-NEXT: # %bb.1: # %for.end +; X86-NEXT: movw %ax, d.e +; X86-NEXT: .LBB0_2: # %for.body.1 +; X86-NEXT: retl +; +; X64-LABEL: PR50254: +; X64: # %bb.0: # %entry +; X64-NEXT: movswq {{.*}}(%rip), %rax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %cl, %cl +; X64-NEXT: jne .LBB0_2 +; X64-NEXT: # %bb.1: # %for.end +; X64-NEXT: movw %ax, {{.*}}(%rip) +; X64-NEXT: .LBB0_2: # %for.body.1 +; X64-NEXT: retq +entry: + %load = load i16, i16* bitcast (i32* @d.e to i16*), align 4 + %xor1 = xor i16 %load, 0 + %xor2 = xor i64 undef, 3821908120 + %xor3 = xor i16 %load, -1 + %xor4 = sext i16 %xor3 to i64 + %xor5 = and i64 %xor4, 4294967295 + %xor6 = xor i64 %xor5, 3821908120 + br label %for.body + +for.body: ; preds = %entry + br i1 undef, label %for.end, label %for.body.1 + +for.end: ; preds = %for.body + store i16 %xor1, i16* bitcast (i32* @d.e to i16*), align 4 + ret void + +for.body.1: ; preds = %for.body + %add.1 = add i64 %xor6, undef + ret void +} -- 2.7.4