--- /dev/null
+//===--- HexagonGenExtract.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> ExtractCutoff("extract-cutoff", cl::init(~0U),
+ cl::Hidden, cl::desc("Cutoff for generating \"extract\""
+ " instructions"));
+
+// This prevents generating extract instructions that have the offset of 0.
+// One of the reasons for "extract" is to put a sequence of bits in a regis-
+// ter, starting at offset 0 (so that these bits can then be used by an
+// "insert"). If the bits are already at offset 0, it is better not to gene-
+// rate "extract", since logical bit operations can be merged into compound
+// instructions (as opposed to "extract").
+static cl::opt<bool> NoSR0("extract-nosr0", cl::init(true), cl::Hidden,
+ cl::desc("No extract instruction with offset 0"));
+
+static cl::opt<bool> NeedAnd("extract-needand", cl::init(true), cl::Hidden,
+ cl::desc("Require & in extract patterns"));
+
+namespace llvm {
+ void initializeHexagonGenExtractPass(PassRegistry&);
+ FunctionPass *createHexagonGenExtract();
+}
+
+
+namespace {
+ class HexagonGenExtract : public FunctionPass {
+ public:
+ static char ID;
+ HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) {
+ initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry());
+ }
+ virtual const char *getPassName() const override {
+ return "Hexagon generate \"extract\" instructions";
+ }
+ virtual bool runOnFunction(Function &F) override;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+ private:
+ bool visitBlock(BasicBlock *B);
+ bool convert(Instruction *In);
+
+ unsigned ExtractCount;
+ DominatorTree *DT;
+ };
+
+ char HexagonGenExtract::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate "
+ "\"extract\" instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate "
+ "\"extract\" instructions", false, false)
+
+
+bool HexagonGenExtract::convert(Instruction *In) {
+ using namespace PatternMatch;
+ Value *BF = 0;
+ ConstantInt *CSL = 0, *CSR = 0, *CM = 0;
+ BasicBlock *BB = In->getParent();
+ LLVMContext &Ctx = BB->getContext();
+ bool LogicalSR;
+
+ // (and (shl (lshr x, #sr), #sl), #m)
+ LogicalSR = true;
+ bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CSL)),
+ m_ConstantInt(CM)));
+
+ if (!Match) {
+ // (and (shl (ashr x, #sr), #sl), #m)
+ LogicalSR = false;
+ Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CSL)),
+ m_ConstantInt(CM)));
+ }
+ if (!Match) {
+ // (and (shl x, #sl), #m)
+ LogicalSR = true;
+ CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)),
+ m_ConstantInt(CM)));
+ if (Match && NoSR0)
+ return false;
+ }
+ if (!Match) {
+ // (and (lshr x, #sr), #m)
+ LogicalSR = true;
+ CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CM)));
+ }
+ if (!Match) {
+ // (and (ashr x, #sr), #m)
+ LogicalSR = false;
+ CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CM)));
+ }
+ if (!Match) {
+ CM = 0;
+ // (shl (lshr x, #sr), #sl)
+ LogicalSR = true;
+ Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CSL)));
+ }
+ if (!Match) {
+ CM = 0;
+ // (shl (ashr x, #sr), #sl)
+ LogicalSR = false;
+ Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CSL)));
+ }
+ if (!Match)
+ return false;
+
+ Type *Ty = BF->getType();
+ if (!Ty->isIntegerTy())
+ return false;
+ unsigned BW = Ty->getPrimitiveSizeInBits();
+ if (BW != 32 && BW != 64)
+ return false;
+
+ uint32_t SR = CSR->getZExtValue();
+ uint32_t SL = CSL->getZExtValue();
+
+ if (!CM) {
+ // If there was no and, and the shift left did not remove all potential
+ // sign bits created by the shift right, then extractu cannot reproduce
+ // this value.
+ if (!LogicalSR && (SR > SL))
+ return false;
+ APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL);
+ CM = ConstantInt::get(Ctx, A);
+ }
+
+ // CM is the shifted-left mask. Shift it back right to remove the zero
+ // bits on least-significant positions.
+ APInt M = CM->getValue().lshr(SL);
+ uint32_t T = M.countTrailingOnes();
+
+ // During the shifts some of the bits will be lost. Calculate how many
+ // of the original value will remain after shift right and then left.
+ uint32_t U = BW - std::max(SL, SR);
+ // The width of the extracted field is the minimum of the original bits
+ // that remain after the shifts and the number of contiguous 1s in the mask.
+ uint32_t W = std::min(U, T);
+ if (W == 0)
+ return false;
+
+ // Check if the extracted bits are contained within the mask that it is
+ // and-ed with. The extract operation will copy these bits, and so the
+ // mask cannot any holes in it that would clear any of the bits of the
+ // extracted field.
+ if (!LogicalSR) {
+ // If the shift right was arithmetic, it could have included some 1 bits.
+ // It is still ok to generate extract, but only if the mask eliminates
+ // those bits (i.e. M does not have any bits set beyond U).
+ APInt C = APInt::getHighBitsSet(BW, BW-U);
+ if (M.intersects(C) || !APIntOps::isMask(W, M))
+ return false;
+ } else {
+ // Check if M starts with a contiguous sequence of W times 1 bits. Get
+ // the low U bits of M (which eliminates the 0 bits shifted in on the
+ // left), and check if the result is APInt's "mask":
+ if (!APIntOps::isMask(W, M.getLoBits(U)))
+ return false;
+ }
+
+ IRBuilder<> IRB(BB, In);
+ Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
+ : Intrinsic::hexagon_S2_extractup;
+ Module *Mod = BB->getParent()->getParent();
+ Value *ExtF = Intrinsic::getDeclaration(Mod, IntId);
+ Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)});
+ if (SL != 0)
+ NewIn = IRB.CreateShl(NewIn, SL, CSL->getName());
+ In->replaceAllUsesWith(NewIn);
+ return true;
+}
+
+
+bool HexagonGenExtract::visitBlock(BasicBlock *B) {
+ // Depth-first, bottom-up traversal.
+ DomTreeNode *DTN = DT->getNode(B);
+ typedef GraphTraits<DomTreeNode*> GTN;
+ typedef GTN::ChildIteratorType Iter;
+ for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
+ visitBlock((*I)->getBlock());
+
+ // Allow limiting the number of generated extracts for debugging purposes.
+ bool HasCutoff = ExtractCutoff.getPosition();
+ unsigned Cutoff = ExtractCutoff;
+
+ bool Changed = false;
+ BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin();
+ while (true) {
+ if (HasCutoff && (ExtractCount >= Cutoff))
+ return Changed;
+ bool Last = (I == Begin);
+ if (!Last)
+ NextI = std::prev(I);
+ Instruction *In = &*I;
+ bool Done = convert(In);
+ if (HasCutoff && Done)
+ ExtractCount++;
+ Changed |= Done;
+ if (Last)
+ break;
+ I = NextI;
+ }
+ return Changed;
+}
+
+
+bool HexagonGenExtract::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ bool Changed;
+
+ // Traverse the function bottom-up, to see super-expressions before their
+ // sub-expressions.
+ BasicBlock *Entry = GraphTraits<Function*>::getEntryNode(&F);
+ Changed = visitBlock(Entry);
+
+ return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonGenExtract() {
+ return new HexagonGenExtract();
+}
static cl::opt<bool> EnableCommGEP("hexagon-commgep", cl::init(true),
cl::Hidden, cl::ZeroOrMore, cl::desc("Enable commoning of GEP instructions"));
+static cl::opt<bool> EnableGenExtract("hexagon-extract", cl::init(true),
+ cl::Hidden, cl::desc("Generate \"extract\" instructions"));
/// HexagonTargetMachineModule - Note that this is used on hosts that
/// cannot link in a library unless there are references into the
createVLIWMachineSched);
namespace llvm {
+ FunctionPass *createHexagonCFGOptimizer();
FunctionPass *createHexagonCommonGEP();
+ FunctionPass *createHexagonCopyToCombine();
FunctionPass *createHexagonExpandCondsets();
- FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
- FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM);
- FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
- FunctionPass *createHexagonCFGOptimizer();
-
- FunctionPass *createHexagonSplitConst32AndConst64();
FunctionPass *createHexagonExpandPredSpillCode();
+ FunctionPass *createHexagonFixupHwLoops();
+ FunctionPass *createHexagonGenExtract();
FunctionPass *createHexagonGenInsert();
FunctionPass *createHexagonHardwareLoops();
- FunctionPass *createHexagonPeephole();
- FunctionPass *createHexagonFixupHwLoops();
+ FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
FunctionPass *createHexagonNewValueJump();
- FunctionPass *createHexagonCopyToCombine();
FunctionPass *createHexagonPacketizer();
- FunctionPass *createHexagonNewValueJump();
+ FunctionPass *createHexagonPeephole();
+ FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
+ FunctionPass *createHexagonSplitConst32AndConst64();
} // end namespace llvm;
/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
bool NoOpt = (getOptLevel() == CodeGenOpt::None);
addPass(createAtomicExpandPass(TM));
- if (!NoOpt && EnableCommGEP)
- addPass(createHexagonCommonGEP());
+ if (!NoOpt) {
+ if (EnableCommGEP)
+ addPass(createHexagonCommonGEP());
+ // Replace certain combinations of shifts and ands with extracts.
+ if (EnableGenExtract)
+ addPass(createHexagonGenExtract());
+ }
}
bool HexagonPassConfig::addInstSelector() {
--- /dev/null
+; RUN: llc -O2 -march=hexagon < %s | FileCheck %s
+
+; CHECK-DAG: extractu(r{{[0-9]*}}, #3, #4)
+; CHECK-DAG: extractu(r{{[0-9]*}}, #8, #7)
+; CHECK-DAG: extractu(r{{[0-9]*}}, #8, #16)
+
+; C source:
+; typedef struct {
+; unsigned x1:3;
+; unsigned x2:7;
+; unsigned x3:8;
+; unsigned x4:12;
+; unsigned x5:2;
+; } structx_t;
+;
+; typedef struct {
+; unsigned y1:4;
+; unsigned y2:3;
+; unsigned y3:9;
+; unsigned y4:8;
+; unsigned y5:8;
+; } structy_t;
+;
+; void foo(structx_t *px, structy_t *py) {
+; px->x1 = py->y1;
+; px->x2 = py->y2;
+; px->x3 = py->y3;
+; px->x4 = py->y4;
+; px->x5 = py->y5;
+; }
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+%struct.structx_t = type { i8, i8, i8, i8 }
+%struct.structy_t = type { i8, i8, i8, i8 }
+
+define void @foo(%struct.structx_t* nocapture %px, %struct.structy_t* nocapture %py) nounwind {
+entry:
+ %0 = bitcast %struct.structy_t* %py to i32*
+ %1 = load i32, i32* %0, align 4
+ %bf.value = and i32 %1, 7
+ %2 = bitcast %struct.structx_t* %px to i32*
+ %3 = load i32, i32* %2, align 4
+ %4 = and i32 %3, -8
+ %5 = or i32 %4, %bf.value
+ store i32 %5, i32* %2, align 4
+ %6 = load i32, i32* %0, align 4
+ %7 = lshr i32 %6, 4
+ %bf.clear1 = shl nuw nsw i32 %7, 3
+ %8 = and i32 %bf.clear1, 56
+ %9 = and i32 %5, -1017
+ %10 = or i32 %8, %9
+ store i32 %10, i32* %2, align 4
+ %11 = load i32, i32* %0, align 4
+ %12 = lshr i32 %11, 7
+ %bf.value4 = shl i32 %12, 10
+ %13 = and i32 %bf.value4, 261120
+ %14 = and i32 %10, -262081
+ %15 = or i32 %14, %13
+ store i32 %15, i32* %2, align 4
+ %16 = load i32, i32* %0, align 4
+ %17 = lshr i32 %16, 16
+ %bf.clear5 = shl i32 %17, 18
+ %18 = and i32 %bf.clear5, 66846720
+ %19 = and i32 %15, -1073480641
+ %20 = or i32 %19, %18
+ store i32 %20, i32* %2, align 4
+ %21 = load i32, i32* %0, align 4
+ %22 = lshr i32 %21, 24
+ %23 = shl i32 %22, 30
+ %24 = and i32 %20, 67107903
+ %25 = or i32 %24, %23
+ store i32 %25, i32* %2, align 4
+ ret void
+}