[AggressiveInstCombine] convert a chain of 'or-shift' bits into masked compare

author Sanjay Patel <spatel@rotateright.com>

Tue, 1 May 2018 21:02:09 +0000 (21:02 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Tue, 1 May 2018 21:02:09 +0000 (21:02 +0000)
author Sanjay Patel <spatel@rotateright.com>
Tue, 1 May 2018 21:02:09 +0000 (21:02 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Tue, 1 May 2018 21:02:09 +0000 (21:02 +0000)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

index 49656ba..7d117a5 100644 (file)
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -19,11 +19,15 @@
  #include "llvm/Analysis/BasicAliasAnalysis.h"
  #include "llvm/Analysis/GlobalsModRef.h"
  #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/Utils/Local.h"
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
  #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/PatternMatch.h"
  #include "llvm/Pass.h"
  using namespace llvm;
+using namespace PatternMatch;
  
  #define DEBUG_TYPE "aggressive-instcombine"
  
@@ -53,6 +57,91 @@ public:
  };
  } // namespace
  
+/// This is a recursive helper for 'and X, 1' that walks through a chain of 'or'
+/// instructions looking for shift ops of a common source value (first member of
+/// the pair). The second member of the pair is a mask constant for all of the
+/// bits that are being compared. So this:
+/// or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)
+/// returns {X, 0x129} and those are the operands of an 'and' that is compared
+/// to zero.
+static bool matchMaskedCmpOp(Value *V, std::pair<Value *, APInt> &Result) {
+  // Recurse through a chain of 'or' operands.
+  Value *Op0, *Op1;
+  if (match(V, m_Or(m_Value(Op0), m_Value(Op1))))
+    return matchMaskedCmpOp(Op0, Result) && matchMaskedCmpOp(Op1, Result);
+
+  // We need a shift-right or a bare value representing a compare of bit 0 of
+  // the original source operand.
+  Value *Candidate;
+  uint64_t BitIndex = 0;
+  if (!match(V, m_LShr(m_Value(Candidate), m_ConstantInt(BitIndex))))
+    Candidate = V;
+
+  // Initialize result source operand.
+  if (!Result.first)
+    Result.first = Candidate;
+
+  // Fill in the mask bit derived from the shift constant.
+  Result.second |= (1 << BitIndex);
+  return Result.first == Candidate;
+}
+
+/// Match an 'and' of a chain of or-shifted bits from a common source value into
+/// a masked compare:
+/// and (or (lshr X, C), ...), 1 --> (X & C') != 0
+static bool foldToMaskedCmp(Instruction &I) {
+  // TODO: This is only looking for 'any-bits-set' and 'all-bits-clear'.
+  // We should also match 'all-bits-set' and 'any-bits-clear' by looking for a
+  // a chain of 'and'.
+  if (!match(&I, m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One())))
+    return false;
+
+  std::pair<Value *, APInt>
+  MaskOps(nullptr, APInt::getNullValue(I.getType()->getScalarSizeInBits()));
+  if (!matchMaskedCmpOp(cast<BinaryOperator>(&I)->getOperand(0), MaskOps))
+    return false;
+
+  IRBuilder<> Builder(&I);
+  Value *Mask = Builder.CreateAnd(MaskOps.first, MaskOps.second);
+  Value *CmpZero = Builder.CreateIsNotNull(Mask);
+  Value *Zext = Builder.CreateZExt(CmpZero, I.getType());
+  I.replaceAllUsesWith(Zext);
+  return true;
+}
+
+/// This is the entry point for folds that could be implemented in regular
+/// InstCombine, but they are separated because they are not expected to
+/// occur frequently and/or have more than a constant-length pattern match.
+static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
+  bool MadeChange = false;
+  for (BasicBlock &BB : F) {
+    // Ignore unreachable basic blocks.
+    if (!DT.isReachableFromEntry(&BB))
+      continue;
+    // Do not delete instructions under here and invalidate the iterator.
+    for (Instruction &I : BB)
+      MadeChange |= foldToMaskedCmp(I);
+  }
+
+  // We're done with transforms, so remove dead instructions.
+  if (MadeChange)
+    for (BasicBlock &BB : F)
+      SimplifyInstructionsInBlock(&BB);
+
+  return MadeChange;
+}
+
+/// This is the entry point for all transforms. Pass manager differences are
+/// handled in the callers of this function.
+static bool runImpl(Function &F, TargetLibraryInfo &TLI, DominatorTree &DT) {
+  bool MadeChange = false;
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  TruncInstCombine TIC(TLI, DL, DT);
+  MadeChange |= TIC.run(F);
+  MadeChange |= foldUnusualPatterns(F, DT);
+  return MadeChange;
+}
+
  void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
      AnalysisUsage &AU) const {
    AU.setPreservesCFG();
@@ -65,35 +154,19 @@ void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
  }
  
  bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
-  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
    auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-  auto &DL = F.getParent()->getDataLayout();
-
-  bool MadeIRChange = false;
-
-  // Handle TruncInst patterns
-  TruncInstCombine TIC(TLI, DL, DT);
-  MadeIRChange |= TIC.run(F);
-
-  // TODO: add more patterns to handle...
-
-  return MadeIRChange;
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  return runImpl(F, TLI, DT);
  }
  
  PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
                                                   FunctionAnalysisManager &AM) {
-  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
    auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
-  auto &DL = F.getParent()->getDataLayout();
-  bool MadeIRChange = false;
-
-  // Handle TruncInst patterns
-  TruncInstCombine TIC(TLI, DL, DT);
-  MadeIRChange |= TIC.run(F);
-  if (!MadeIRChange)
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  if (!runImpl(F, TLI, DT)) {
      // No changes, all analyses are preserved.
      return PreservedAnalyses::all();
-
+  }
    // Mark all the analyses that instcombine updates as preserved.
    PreservedAnalyses PA;
    PA.preserveSet<CFGAnalyses>();
diff --git a/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll b/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll

index be59919..8d16bb8 100644 (file)
--- a/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll
@@ -5,10 +5,10 @@
  
  define i32 @anyset_two_bit_mask(i32 %x) {
  ; CHECK-LABEL: @anyset_two_bit_mask(
-; CHECK-NEXT:    [[S:%.*]] = lshr i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[O:%.*]] = or i32 [[S]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = and i32 [[O]], 1
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 9
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
  ;
    %s = lshr i32 %x, 3
    %o = or i32 %s, %x
@@ -18,14 +18,10 @@ define i32 @anyset_two_bit_mask(i32 %x) {
  
  define i32 @anyset_four_bit_mask(i32 %x) {
  ; CHECK-LABEL: @anyset_four_bit_mask(
-; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[T2:%.*]] = lshr i32 [[X]], 5
-; CHECK-NEXT:    [[T3:%.*]] = lshr i32 [[X]], 8
-; CHECK-NEXT:    [[O1:%.*]] = or i32 [[T1]], [[X]]
-; CHECK-NEXT:    [[O2:%.*]] = or i32 [[T2]], [[T3]]
-; CHECK-NEXT:    [[O3:%.*]] = or i32 [[O1]], [[O2]]
-; CHECK-NEXT:    [[R:%.*]] = and i32 [[O3]], 1
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 297
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
  ;
    %t1 = lshr i32 %x, 3
    %t2 = lshr i32 %x, 5
@@ -41,13 +37,10 @@ define i32 @anyset_four_bit_mask(i32 %x) {
  
  define i32 @anyset_three_bit_mask_all_shifted_bits(i32 %x) {
  ; CHECK-LABEL: @anyset_three_bit_mask_all_shifted_bits(
-; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[T2:%.*]] = lshr i32 [[X]], 5
-; CHECK-NEXT:    [[T3:%.*]] = lshr i32 [[X]], 8
-; CHECK-NEXT:    [[O2:%.*]] = or i32 [[T2]], [[T3]]
-; CHECK-NEXT:    [[O3:%.*]] = or i32 [[T1]], [[O2]]
-; CHECK-NEXT:    [[R:%.*]] = and i32 [[O3]], 1
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 296
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
  ;
    %t1 = lshr i32 %x, 3
    %t2 = lshr i32 %x, 5
diff --git a/llvm/test/Transforms/PhaseOrdering/bitfield-bittests.ll b/llvm/test/Transforms/PhaseOrdering/bitfield-bittests.ll

index 59bb945..fea74d7 100644 (file)
--- a/llvm/test/Transforms/PhaseOrdering/bitfield-bittests.ll
+++ b/llvm/test/Transforms/PhaseOrdering/bitfield-bittests.ll
@@ -18,15 +18,10 @@ target datalayout = "n32"
  
  define i32 @allclear(i32 %a) {
  ; CHECK-LABEL: @allclear(
-; CHECK-NEXT:    [[BF_LSHR:%.*]] = lshr i32 [[A:%.*]], 1
-; CHECK-NEXT:    [[BF_CLEAR1:%.*]] = or i32 [[BF_LSHR]], [[A]]
-; CHECK-NEXT:    [[BF_LSHR5:%.*]] = lshr i32 [[A]], 2
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[BF_CLEAR1]], [[BF_LSHR5]]
-; CHECK-NEXT:    [[BF_LSHR10:%.*]] = lshr i32 [[A]], 3
-; CHECK-NEXT:    [[OR83:%.*]] = or i32 [[OR2]], [[BF_LSHR10]]
-; CHECK-NEXT:    [[OR13:%.*]] = and i32 [[OR83]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[OR13]], 1
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
  ;
    %a.sroa.0.0.trunc = trunc i32 %a to i8
    %a.sroa.5.0.shift = lshr i32 %a, 8
@@ -51,14 +46,10 @@ define i32 @allclear(i32 %a) {
  
  define i32 @anyset(i32 %a) {
  ; CHECK-LABEL: @anyset(
-; CHECK-NEXT:    [[BF_LSHR:%.*]] = lshr i32 [[A:%.*]], 1
-; CHECK-NEXT:    [[BF_CLEAR1:%.*]] = or i32 [[BF_LSHR]], [[A]]
-; CHECK-NEXT:    [[BF_LSHR5:%.*]] = lshr i32 [[A]], 2
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[BF_CLEAR1]], [[BF_LSHR5]]
-; CHECK-NEXT:    [[BF_LSHR10:%.*]] = lshr i32 [[A]], 3
-; CHECK-NEXT:    [[OR83:%.*]] = or i32 [[OR2]], [[BF_LSHR10]]
-; CHECK-NEXT:    [[OR13:%.*]] = and i32 [[OR83]], 1
-; CHECK-NEXT:    ret i32 [[OR13]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
  ;
    %a.sroa.0.0.trunc = trunc i32 %a to i8
    %a.sroa.5.0.shift = lshr i32 %a, 8
author	Sanjay Patel <spatel@rotateright.com>
	Tue, 1 May 2018 21:02:09 +0000 (21:02 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Tue, 1 May 2018 21:02:09 +0000 (21:02 +0000)
llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp		patch \| blob \| history
llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll		patch \| blob \| history
llvm/test/Transforms/PhaseOrdering/bitfield-bittests.ll		patch \| blob \| history