From 0c2da88f82acd9f32557d30cd90b77153f87d7c4 Mon Sep 17 00:00:00 2001 From: Ayman Musa Date: Tue, 13 Sep 2016 09:12:45 +0000 Subject: [PATCH] Remove MVT:i1 xor instruction before SELECT. (Performance improvement). Differential Revision: https://reviews.llvm.org/D23764 llvm-svn: 281308 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 +++++++++++ llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll | 24 +++++++++------- llvm/test/CodeGen/X86/xor-select-i1-combine.ll | 40 ++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/X86/xor-select-i1-combine.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 73faff2..d1727a1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5258,6 +5258,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } + // select (xor Cond, 1), X, Y -> select Cond, Y, X + // select (xor Cond, 0), X, Y -> selext Cond, X, Y + if (VT0 == MVT::i1) { + if (N0->getOpcode() == ISD::XOR) { + if (auto *C = dyn_cast(N0->getOperand(1))) { + SDValue Cond0 = N0->getOperand(0); + if (C->isOne()) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), + Cond0, N2, N1); + else + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), + Cond0, N1, N2); + } + } + } + // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { // select x, y (fcmp lt x, y) -> fminnum x, y diff --git a/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll b/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll index ef5b7dc..8bceb9f 100644 --- a/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll +++ b/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll @@ -800,10 +800,10 @@ entry: ; CHECK-LABEL: @testv4floateq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34 -; CHECK-DAG: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK-DAG: xxlor [[REG2:[0-9]+]], 35, 35 +; CHECK-DAG: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} ; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]] -; CHECK: xxlor [[REG2]], 35, 35 +; CHECK: xxlor [[REG2]], 34, 34 ; CHECK: .LBB[[BB]]: ; CHECK: xxlor 34, [[REG2]], [[REG2]] ; CHECK: blr @@ -928,15 +928,15 @@ entry: ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 3 ; CHECK: crand [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} ; CHECK: crand [[REG2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: creqv [[REG3:[0-9]+]], [[REG2]], [[REG1]] +; CHECK: crxor [[REG3:[0-9]+]], [[REG2]], [[REG1]] ; CHECK: bc 12, [[REG3]], .LBB[[BB1:[0-9_]+]] -; CHECK: fmr 9, 11 +; CHECK: fmr 11, 9 ; CHECK: .LBB[[BB1]]: ; CHECK: bc 12, [[REG3]], .LBB[[BB2:[0-9_]+]] -; CHECK: fmr 10, 12 +; CHECK: fmr 12, 10 ; CHECK: .LBB[[BB2]]: -; CHECK-DAG: fmr 1, 9 -; CHECK-DAG: fmr 2, 10 +; CHECK-DAG: fmr 1, 11 +; CHECK-DAG: fmr 2, 12 ; CHECK: blr } @@ -1019,9 +1019,11 @@ entry: ; CHECK-LABEL: @testv2doubleeq ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 -; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK: bclr 12, [[REG1]], 0 -; CHECK: vor 2, 3, 3 +; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: bc 12, [[REG1]], .LBB[[BB55:[0-9_]+]] +; CHECK: vor 3, 2, 2 +; CHECK: .LBB[[BB55]] +; CHECK: xxlor 34, 35, 35 ; CHECK: blr } diff --git a/llvm/test/CodeGen/X86/xor-select-i1-combine.ll b/llvm/test/CodeGen/X86/xor-select-i1-combine.ll new file mode 100644 index 0000000..d270afc --- /dev/null +++ b/llvm/test/CodeGen/X86/xor-select-i1-combine.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +;RUN: llc < %s -O2 -mattr=+avx512f -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK + +@n = common global i32 0, align 4 +@m = common global i32 0, align 4 + +define i32 @main(i8 %small) { +; CHECK-LABEL: main: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl $n, %eax +; CHECK-NEXT: movl $m, %ecx +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmovneq %rax, %rcx +; CHECK-NEXT: movl (%rcx), %eax +; CHECK-NEXT: retq +entry: + %0 = and i8 %small, 1 + %cmp = icmp eq i8 %0, 0 + %m.n = select i1 %cmp, i32* @m, i32* @n + %retval = load volatile i32, i32* %m.n, align 4 + ret i32 %retval +} + + +define i32 @main2(i8 %small) { +; CHECK-LABEL: main2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl $m, %eax +; CHECK-NEXT: movl $n, %ecx +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmovneq %rax, %rcx +; CHECK-NEXT: movl (%rcx), %eax +; CHECK-NEXT: retq +entry: + %0 = and i8 %small, 1 + %cmp = icmp eq i8 %0, 1 + %m.n = select i1 %cmp, i32* @m, i32* @n + %retval = load volatile i32, i32* %m.n, align 4 + ret i32 %retval +} -- 2.7.4