}
}
+/// \brief Determine whether it is worth it to fold SHL into the addressing
+/// mode.
+static bool isWorthFoldingSHL(SDValue V) {
+ assert(V.getOpcode() == ISD::SHL && "invalid opcode");
+ // It is worth folding logical shift of up to three places.
+ auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ if (!CSD)
+ return false;
+ unsigned ShiftVal = CSD->getZExtValue();
+ if (ShiftVal > 3)
+ return false;
+
+ // Check if this particular node is reused in any non-memory related
+ // operation. If yes, do not try to fold this node into the address
+ // computation, since the computation will be kept.
+ const SDNode *Node = V.getNode();
+ for (SDNode *UI : Node->uses())
+ if (!isa<MemSDNode>(*UI))
+ for (SDNode *UII : UI->uses())
+ if (!isa<MemSDNode>(*UII))
+ return false;
+ return true;
+}
+
/// \brief Determine whether it is worth to fold V into an extended register.
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
- // it hurts if the value is used at least twice, unless we are optimizing
- // for code size.
- return ForCodeSize || V.hasOneUse();
+ // Trivial if we are optimizing for code size or if there is only
+ // one use of the value.
+ if (ForCodeSize || V.hasOneUse())
+ return true;
+ // If a subtarget has a fastpath LSL we can fold a logical shift into
+ // the addressing mode and save a cycle.
+ if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
+ isWorthFoldingSHL(V))
+ return true;
+ if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
+ const SDValue LHS = V.getOperand(0);
+ const SDValue RHS = V.getOperand(1);
+ if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
+ return true;
+ if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
+ return true;
+ }
+
+ // It hurts otherwise, since the value will be reused.
+ return false;
}
/// SelectShiftedRegister - Select a "shifted register" operand. If the value
--- /dev/null
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+lsl-fast | FileCheck %s
+
+%struct.a = type [256 x i16]
+%struct.b = type [256 x i32]
+%struct.c = type [256 x i64]
+
+declare void @foo()
+define i16 @halfword(%struct.a* %ctx, i32 %xor72) nounwind {
+; CHECK-LABEL: halfword:
+; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
+; CHECK: ldrh [[REG1:w[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #1]
+; CHECK: strh [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #1]
+ %shr81 = lshr i32 %xor72, 9
+ %conv82 = zext i32 %shr81 to i64
+ %idxprom83 = and i64 %conv82, 255
+ %arrayidx86 = getelementptr inbounds %struct.a, %struct.a* %ctx, i64 0, i64 %idxprom83
+ %result = load i16, i16* %arrayidx86, align 2
+ call void @foo()
+ store i16 %result, i16* %arrayidx86, align 2
+ ret i16 %result
+}
+
+define i32 @word(%struct.b* %ctx, i32 %xor72) nounwind {
+; CHECK-LABEL: word:
+; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
+; CHECK: ldr [[REG1:w[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #2]
+; CHECK: str [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #2]
+ %shr81 = lshr i32 %xor72, 9
+ %conv82 = zext i32 %shr81 to i64
+ %idxprom83 = and i64 %conv82, 255
+ %arrayidx86 = getelementptr inbounds %struct.b, %struct.b* %ctx, i64 0, i64 %idxprom83
+ %result = load i32, i32* %arrayidx86, align 4
+ call void @foo()
+ store i32 %result, i32* %arrayidx86, align 4
+ ret i32 %result
+}
+
+define i64 @doubleword(%struct.c* %ctx, i32 %xor72) nounwind {
+; CHECK-LABEL: doubleword:
+; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
+; CHECK: ldr [[REG1:x[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #3]
+; CHECK: str [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #3]
+ %shr81 = lshr i32 %xor72, 9
+ %conv82 = zext i32 %shr81 to i64
+ %idxprom83 = and i64 %conv82, 255
+ %arrayidx86 = getelementptr inbounds %struct.c, %struct.c* %ctx, i64 0, i64 %idxprom83
+ %result = load i64, i64* %arrayidx86, align 8
+ call void @foo()
+ store i64 %result, i64* %arrayidx86, align 8
+ ret i64 %result
+}
+
+define i64 @multi_use_non_memory(i64 %a, i64 %b) {
+; CHECK-LABEL: multi_use_non_memory:
+; CHECK: lsl [[REG1:x[0-9]+]], x0, #3
+; CHECK-NOT: cmp [[REG1]], x1, lsl # 3
+; CHECK-NEXT: lsl [[REG2:x[0-9]+]], x1, #3
+; CHECK-NEXT: cmp [[REG1]], [[REG2]]
+entry:
+ %mul1 = shl i64 %a, 3
+ %mul2 = shl i64 %b, 3
+ %cmp = icmp slt i64 %mul1, %mul2
+ br i1 %cmp, label %truebb, label %falsebb
+truebb:
+ tail call void @foo()
+ unreachable
+falsebb:
+ %cmp2 = icmp sgt i64 %mul1, %mul2
+ br i1 %cmp2, label %exitbb, label %endbb
+exitbb:
+ ret i64 %mul1
+endbb:
+ ret i64 %mul2
+}