SDNode *SelectConcatVector(SDNode *N);
+ SDNode *SelectSMLAWSMULW(SDNode *N);
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
return nullptr;
}
+static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
+ bool Accumulate) {
+ // For SM*WB, we need to some form of sext.
+ // For SM*WT, we need to search for (sra X, 16)
+ // Src1 then gets set to X.
+ if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
+ SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
+ SignExt.getOpcode() == ISD::AssertSext) &&
+ SignExt.getValueType() == MVT::i32) {
+
+ *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
+ Src1 = SignExt.getOperand(0);
+ return true;
+ }
+
+ if (SignExt.getOpcode() != ISD::SRA)
+ return false;
+
+ ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
+ if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
+ return false;
+
+ SDValue Op0 = SignExt.getOperand(0);
+
+ // The sign extend operand for SM*WB could be generated by a shl and ashr.
+ if (Op0.getOpcode() == ISD::SHL) {
+ SDValue SHL = Op0;
+ ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
+ if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
+ return false;
+
+ *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
+ Src1 = Op0.getOperand(0);
+ return true;
+ }
+ *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
+ Src1 = SignExt.getOperand(0);
+ return true;
+}
+
+static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
+ SDValue &Src1, bool Accumulate) {
+ // First we look for:
+ // (add (or (srl ?, 16), (shl ?, 16)))
+ if (OR.getOpcode() != ISD::OR)
+ return false;
+
+ SDValue SRL = OR.getOperand(0);
+ SDValue SHL = OR.getOperand(1);
+
+ if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
+ SRL = OR.getOperand(1);
+ SHL = OR.getOperand(0);
+ if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
+ return false;
+ }
+
+ ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
+ ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
+ if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
+ SHLSrc1->getZExtValue() != 16)
+ return false;
+
+ // The first operands to the shifts need to be the two results from the
+ // same smul_lohi node.
+ if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
+ SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
+ return false;
+
+ SDNode *SMULLOHI = SRL.getOperand(0).getNode();
+ if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
+ SHL.getOperand(0) != SDValue(SMULLOHI, 1))
+ return false;
+
+ // Now we have:
+ // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
+ // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
+ // For SMLAWB the 16-bit value will signed extended somehow.
+ // For SMLAWT only the SRA is required.
+
+ // Check both sides of SMUL_LOHI
+ if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
+ Src0 = SMULLOHI->getOperand(1);
+ } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
+ Accumulate)) {
+ Src0 = SMULLOHI->getOperand(0);
+ } else {
+ return false;
+ }
+ return true;
+}
+
+SDNode *ARMDAGToDAGISel::SelectSMLAWSMULW(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Src0 = N->getOperand(0);
+ SDValue Src1 = N->getOperand(1);
+ SDValue A, B;
+ unsigned Opc = 0;
+
+ if (N->getOpcode() == ISD::ADD) {
+ if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
+ return nullptr;
+
+ SDValue Acc;
+ if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
+ Acc = Src1;
+ } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
+ Acc = Src0;
+ } else {
+ return nullptr;
+ }
+ if (Opc == 0)
+ return nullptr;
+
+ SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
+ } else if (N->getOpcode() == ISD::OR &&
+ SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
+ if (Opc == 0)
+ return nullptr;
+
+ SDValue Ops[] = { A, B, getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32)};
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ }
+ return nullptr;
+}
+
SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
// The only time a CONCAT_VECTORS operation can have legal types is when
// two 64-bit vectors are concatenated to a 128-bit vector.
switch (N->getOpcode()) {
default: break;
+ case ISD::ADD:
+ case ISD::OR: {
+ SDNode *ResNode = SelectSMLAWSMULW(N);
+ if (ResNode)
+ return ResNode;
+ break;
+ }
case ISD::WRITE_REGISTER: {
SDNode *ResNode = SelectWriteRegister(N);
if (ResNode)
; RUN: llc -mtriple=arm-eabi -mcpu=generic %s -o /dev/null
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb--none-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
@x = weak global i16 0 ; <i16*> [#uses=1]
@y = weak global i16 0 ; <i16*> [#uses=0]
ret i32 %tmp5
}
+define i32 @f4(i32 %a, i32 %x, i32 %y) {
+; CHECK-LABEL: f4
+; CHECK: smlatt
+ %tmp1 = ashr i32 %x, 16
+ %tmp3 = ashr i32 %y, 16
+ %tmp4 = mul i32 %tmp3, %tmp1
+ %tmp5 = add i32 %tmp4, %a
+ ret i32 %tmp5
+}
+
+define i32 @f5(i32 %a, i16 %x, i16 %y) {
+; CHECK-LABEL: f5
+; CHECK: smlabb
+ %tmp1 = sext i16 %x to i32
+ %tmp3 = sext i16 %y to i32
+ %tmp4 = mul i32 %tmp3, %tmp1
+ %tmp5 = add i32 %tmp4, %a
+ ret i32 %tmp5
+}
+
+define i32 @f6(i32 %a, i16 %x, i32 %y) {
+; CHECK-LABEL: f6
+; CHECK: smlabt
+ %tmp1 = sext i16 %x to i32
+ %tmp3 = ashr i32 %y, 16
+ %tmp4 = mul i32 %tmp3, %tmp1
+ %tmp5 = add i32 %tmp4, %a
+ ret i32 %tmp5
+}
+
+define i32 @f7(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f7
+; CHECK: smlawb
+ %shl = shl i32 %b, 16
+ %shr = ashr exact i32 %shl, 16
+ %conv = sext i32 %a to i64
+ %conv2 = sext i32 %shr to i64
+ %mul = mul nsw i64 %conv2, %conv
+ %shr49 = lshr i64 %mul, 16
+ %conv5 = trunc i64 %shr49 to i32
+ %add = add nsw i32 %conv5, %c
+ ret i32 %add
+}
+
+define i32 @f8(i32 %a, i16 signext %b, i32 %c) {
+; CHECK-LABEL: f8
+; CHECK: smlawb
+ %conv = sext i32 %a to i64
+ %conv1 = sext i16 %b to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %shr5 = lshr i64 %mul, 16
+ %conv2 = trunc i64 %shr5 to i32
+ %add = add nsw i32 %conv2, %c
+ ret i32 %add
+}
+
+define i32 @f9(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f9
+; CHECK: smlawt
+ %conv = sext i32 %a to i64
+ %shr = ashr i32 %b, 16
+ %conv1 = sext i32 %shr to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %shr26 = lshr i64 %mul, 16
+ %conv3 = trunc i64 %shr26 to i32
+ %add = add nsw i32 %conv3, %c
+ ret i32 %add
+}
+
+define i32 @f10(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f10
+; CHECK: smulwb
+ %shl = shl i32 %b, 16
+ %shr = ashr exact i32 %shl, 16
+ %conv = sext i32 %a to i64
+ %conv2 = sext i32 %shr to i64
+ %mul = mul nsw i64 %conv2, %conv
+ %shr37 = lshr i64 %mul, 16
+ %conv4 = trunc i64 %shr37 to i32
+ ret i32 %conv4
+}
+
+define i32 @f11(i32 %a, i16 signext %b, i32 %c) {
+; CHECK-LABEL: f11
+; CHECK: smulwb
+ %conv = sext i32 %a to i64
+ %conv1 = sext i16 %b to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %shr4 = lshr i64 %mul, 16
+ %conv2 = trunc i64 %shr4 to i32
+ ret i32 %conv2
+}
+
+define i32 @f12(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f12
+; CHECK: smulwt
+ %conv = sext i32 %a to i64
+ %shr = ashr i32 %b, 16
+ %conv1 = sext i32 %shr to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %shr25 = lshr i64 %mul, 16
+ %conv3 = trunc i64 %shr25 to i32
+ ret i32 %conv3
+}