cl::desc("Use DSP instructions for scalar operations\
with immediate operands"));
-namespace {
+// The goal of this pass is to enable more efficient code generation for
+// operations on narrow types (i.e. types with < 32-bits) and this is a
+// motivating IR code example:
+//
+// define hidden i32 @cmp(i8 zeroext) {
+// %2 = add i8 %0, -49
+// %3 = icmp ult i8 %2, 3
+// ..
+// }
+//
+// The issue here is that i8 is type-legalized to i32 because i8 is not a
+// legal type. Thus, arithmetic is done in integer-precision, but then the
+// byte value is masked out as follows:
+//
+// t19: i32 = add t4, Constant:i32<-49>
+// t24: i32 = and t19, Constant:i32<255>
+//
+// Consequently, we generate code like this:
+//
+// subs r0, #49
+// uxtb r1, r0
+// cmp r1, #3
+//
+// This shows that masking out the byte value results in generation of
+// the UXTB instruction. This is not optimal as r0 already contains the byte
+// value we need, and so instead we can just generate:
+//
+// sub.w r1, r0, #49
+// cmp r1, #3
+//
+// We achieve this by type promoting the IR to i32 like so for this example:
+//
+// define i32 @cmp(i8 zeroext %c) {
+// %0 = zext i8 %c to i32
+// %c.off = add i32 %0, -49
+// %1 = icmp ult i32 %c.off, 3
+// ..
+// }
+//
+// For this to be valid and legal, we need to prove that the i32 add is
+// producing the same value as the i8 addition, and that e.g. no overflow
+// happens.
+//
+// A brief sketch of the algorithm and some terminology.
+// We pattern match interesting IR patterns:
+// - which have "sources": instructions producing narrow values (i8, i16), and
+// - they have "sinks": instructions consuming these narrow values.
+//
+// We collect all instruction connecting sources and sinks in a worklist, so
+// that we can mutate these instruction and perform type promotion when it is
+// legal to do so.
+namespace {
class IRPromoter {
SmallPtrSet<Value*, 8> NewInsts;
SmallVector<Instruction*, 4> InstsToRemove;
void Mutate(Type *OrigTy,
SmallPtrSetImpl<Value*> &Visited,
- SmallPtrSetImpl<Value*> &Leaves,
- SmallPtrSetImpl<Instruction*> &Roots);
+ SmallPtrSetImpl<Value*> &Sources,
+ SmallPtrSetImpl<Instruction*> &Sinks);
};
class ARMCodeGenPrepare : public FunctionPass {
}
-/// Can the given value generate sign bits.
-static bool isSigned(Value *V) {
+static bool generateSignBits(Value *V) {
if (!isa<Instruction>(V))
return false;
return IntTy->getBitWidth() == ARMCodeGenPrepare::TypeSize;
}
-/// Return true if the given value is a leaf in the use-def chain, producing
+/// Return true if the given value is a source in the use-def chain, producing
/// a narrow (i8, i16) value. These values will be zext to start the promotion
/// of the tree to i32. We guarantee that these won't populate the upper bits
/// of the register. ZExt on the loads will be free, and the same for call
if (!isa<Instruction>(V))
return true;
- if (isSigned(V))
+ if (generateSignBits(V))
return false;
// If I is only being used by something that will require its value to be
void IRPromoter::Mutate(Type *OrigTy,
SmallPtrSetImpl<Value*> &Visited,
- SmallPtrSetImpl<Value*> &Leaves,
- SmallPtrSetImpl<Instruction*> &Roots) {
+ SmallPtrSetImpl<Value*> &Sources,
+ SmallPtrSetImpl<Instruction*> &Sinks) {
IRBuilder<> Builder{Ctx};
Type *ExtTy = Type::getInt32Ty(M->getContext());
SmallPtrSet<Value*, 8> Promoted;
TruncTysMap[ZExt] = TruncTysMap[V];
};
- // First, insert extending instructions between the leaves and their users.
- LLVM_DEBUG(dbgs() << "ARM CGP: Promoting leaves:\n");
- for (auto V : Leaves) {
+ // First, insert extending instructions between the sources and their users.
+ LLVM_DEBUG(dbgs() << "ARM CGP: Promoting sources:\n");
+ for (auto V : Sources) {
LLVM_DEBUG(dbgs() << " - " << *V << "\n");
if (auto *I = dyn_cast<Instruction>(V))
InsertZExt(I, I);
BasicBlock &BB = Arg->getParent()->front();
InsertZExt(Arg, &*BB.getFirstInsertionPt());
} else {
- llvm_unreachable("unhandled leaf that needs extending");
+ llvm_unreachable("unhandled source that needs extending");
}
Promoted.insert(V);
}
// Then mutate the types of the instructions within the tree. Here we handle
// constant operands.
for (auto *V : Visited) {
- if (Leaves.count(V))
+ if (Sources.count(V))
continue;
auto *I = cast<Instruction>(V);
- if (Roots.count(I))
+ if (Sinks.count(I))
continue;
for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
// Now we need to remove any zexts that have become unnecessary, as well
// as insert any intrinsics.
for (auto *V : Visited) {
- if (Leaves.count(V))
+ if (Sources.count(V))
continue;
if (!shouldPromote(V) || isPromotedResultSafe(V))
return nullptr;
if ((!Promoted.count(V) && !NewInsts.count(V)) || !TruncTysMap.count(V) ||
- Leaves.count(V))
+ Sources.count(V))
return nullptr;
Type *TruncTy = TruncTysMap[V];
return Trunc;
};
- LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the roots:\n");
+ LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n");
// Fix up any stores or returns that use the results of the promoted
// chain.
- for (auto I : Roots) {
+ for (auto I : Sinks) {
LLVM_DEBUG(dbgs() << " - " << *I << "\n");
// Handle calls separately as we need to iterate over arg operands.
// Special cases for calls as we need to check for zeroext
// TODO We should accept calls even if they don't have zeroext, as they can
- // still be roots.
+ // still be sinks.
if (auto *Call = dyn_cast<CallInst>(V))
return isSupportedType(Call) &&
Call->hasRetAttr(Attribute::AttrKind::ZExt);
if (!isSupportedType(V))
return false;
- bool res = !isSigned(V);
- if (!res)
- LLVM_DEBUG(dbgs() << "ARM CGP: No, it's a signed instruction.\n");
- return res;
+ if (generateSignBits(V)) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
+ return false;
+ }
+ return true;
}
/// Check that the type of V would be promoted and that the original type is
<< TypeSize << "\n");
SetVector<Value*> WorkList;
- SmallPtrSet<Value*, 8> Leaves;
- SmallPtrSet<Instruction*, 4> Roots;
+ SmallPtrSet<Value*, 8> Sources;
+ SmallPtrSet<Instruction*, 4> Sinks;
WorkList.insert(V);
SmallPtrSet<Value*, 16> CurrentVisited;
CurrentVisited.clear();
- // Return true if the given value can, or has been, visited. Add V to the
- // worklist if needed.
+ // Return true if V was added to the worklist as a supported instruction,
+ // if it was already visited, or if we don't need to explore it (e.g.
+ // pointer values and GEPs), and false otherwise.
auto AddLegalInst = [&](Value *V) {
if (CurrentVisited.count(V))
return true;
// Calls can be both sources and sinks.
if (isSink(V))
- Roots.insert(cast<Instruction>(V));
+ Sinks.insert(cast<Instruction>(V));
if (isSource(V))
- Leaves.insert(V);
+ Sources.insert(V);
else if (auto *I = dyn_cast<Instruction>(V)) {
// Visit operands of any instruction visited.
for (auto &U : I->operands()) {
);
unsigned ToPromote = 0;
for (auto *V : CurrentVisited) {
- if (Leaves.count(V))
+ if (Sources.count(V))
continue;
- if (Roots.count(cast<Instruction>(V)))
+ if (Sinks.count(cast<Instruction>(V)))
continue;
++ToPromote;
}
if (ToPromote < 2)
return false;
- Promoter->Mutate(OrigTy, CurrentVisited, Leaves, Roots);
+ Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks);
return true;
}