From cecb2c5c8021ecc0bf18f07c991881893948eea1 Mon Sep 17 00:00:00 2001
From: Mikhail Skvortcov <m.skvortcov@partner.samsung.com>
Date: Thu, 26 Jan 2017 19:49:05 +0300
Subject: [PATCH] RyuJIT/ARM32: fix consume order and clone emitInsTernary

Commit migrated from https://github.com/dotnet/coreclr/commit/076116be7b5e7890760778020604c698377a83e2
---
 src/coreclr/src/jit/codegenarm.cpp |  59 ++-----------
 src/coreclr/src/jit/emitarm.cpp    | 169 +++++++++++++++++++++++++++++++++++++
 2 files changed, 175 insertions(+), 53 deletions(-)

diff --git a/src/coreclr/src/jit/codegenarm.cpp b/src/coreclr/src/jit/codegenarm.cpp
index ea9c1f5..472db63 100644
--- a/src/coreclr/src/jit/codegenarm.cpp
+++ b/src/coreclr/src/jit/codegenarm.cpp
@@ -350,6 +350,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
         case GT_SUB:
         case GT_MUL:
         {
+            genConsumeOperands(treeNode->AsOp());
+
             const genTreeOps oper = treeNode->OperGet();
             if ((oper == GT_ADD || oper == GT_SUB || oper == GT_MUL) && treeNode->gtOverflow())
             {
@@ -364,56 +366,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
             // The arithmetic node must be sitting in a register (since it's not contained)
             noway_assert(targetReg != REG_NA);
 
-            regNumber op1reg = op1->gtRegNum;
-            regNumber op2reg = op2->gtRegNum;
-
-            GenTreePtr dst;
-            GenTreePtr src;
-
-            genConsumeIfReg(op1);
-            genConsumeIfReg(op2);
-
-            if (!varTypeIsFloating(targetType))
-            {
-                // This is the case of reg1 = reg1 op reg2
-                // We're ready to emit the instruction without any moves
-                if (op1reg == targetReg)
-                {
-                    dst = op1;
-                    src = op2;
-                }
-                // We have reg1 = reg2 op reg1
-                // In order for this operation to be correct
-                // we need that op is a commutative operation so
-                // we can convert it into reg1 = reg1 op reg2 and emit
-                // the same code as above
-                else if (op2reg == targetReg)
-                {
-                    assert(GenTree::OperIsCommutative(treeNode->OperGet()));
-                    dst = op2;
-                    src = op1;
-                }
-                // dest, op1 and op2 registers are different:
-                // reg3 = reg1 op reg2
-                // We can implement this by issuing a mov:
-                // reg3 = reg1
-                // reg3 = reg3 op reg2
-                else
-                {
-                    inst_RV_RV(ins_Move_Extend(targetType, true), targetReg, op1reg, op1->gtType);
-                    regTracker.rsTrackRegCopy(targetReg, op1reg);
-                    gcInfo.gcMarkRegPtrVal(targetReg, targetType);
-                    dst = treeNode;
-                    src = op2;
-                }
-
-                regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
-                assert(r == targetReg);
-            }
-            else
-            {
-                emit->emitIns_R_R_R(ins, emitTypeSize(treeNode), targetReg, op1reg, op2reg);
-            }
+            regNumber r = emit->emitInsTernary(ins, emitTypeSize(treeNode), treeNode, op1, op2);
+            assert(r == targetReg);
         }
             genProduceReg(treeNode);
             break;
@@ -1270,13 +1224,12 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
 
     assert(tree->gtRegNum != REG_NA);
 
-    GenTreePtr operand = tree->gtGetOp1();
-    genConsumeReg(operand);
+    genConsumeOperands(tree->AsOp());
 
+    GenTreePtr operand = tree->gtGetOp1();
     GenTreePtr shiftBy = tree->gtGetOp2();
     if (!shiftBy->IsCnsIntOrI())
     {
-        genConsumeReg(shiftBy);
         getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
     }
     else
diff --git a/src/coreclr/src/jit/emitarm.cpp b/src/coreclr/src/jit/emitarm.cpp
index 819ae3a..f7943f2 100644
--- a/src/coreclr/src/jit/emitarm.cpp
+++ b/src/coreclr/src/jit/emitarm.cpp
@@ -7656,5 +7656,174 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
     }
 }
 
+regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2)
+{
+    regNumber result = REG_NA;
+
+    // dst can only be a reg
+    assert(!dst->isContained());
+
+    // find immed (if any) - it cannot be a dst
+    // Only one src can be an int.
+    GenTreeIntConCommon* intConst  = nullptr;
+    GenTree*             nonIntReg = nullptr;
+
+    if (varTypeIsFloating(dst))
+    {
+        // src1 can only be a reg
+        assert(!src1->isContained());
+        // src2 can only be a reg
+        assert(!src2->isContained());
+    }
+    else // not floating point
+    {
+        // src2 can be immed or reg
+        assert(!src2->isContained() || src2->isContainedIntOrIImmed());
+
+        // Check src2 first as we can always allow it to be a contained immediate
+        if (src2->isContainedIntOrIImmed())
+        {
+            intConst  = src2->AsIntConCommon();
+            nonIntReg = src1;
+        }
+        // Only for commutative operations do we check src1 and allow it to be a contained immediate
+        else if (dst->OperIsCommutative())
+        {
+            // src1 can be immed or reg
+            assert(!src1->isContained() || src1->isContainedIntOrIImmed());
+
+            // Check src1 and allow it to be a contained immediate
+            if (src1->isContainedIntOrIImmed())
+            {
+                assert(!src2->isContainedIntOrIImmed());
+                intConst  = src1->AsIntConCommon();
+                nonIntReg = src2;
+            }
+        }
+        else
+        {
+            // src1 can only be a reg
+            assert(!src1->isContained());
+        }
+    }
+    bool      isMulOverflow = false;
+    bool      isUnsignedMul = false;
+    regNumber extraReg      = REG_NA;
+    if (dst->gtOverflowEx())
+    {
+        NYI_ARM("emitInsTernary overflow");
+#if 0
+        if (ins == INS_add)
+        {
+            ins = INS_adds;
+        }
+        else if (ins == INS_sub)
+        {
+            ins = INS_subs;
+        }
+        else if (ins == INS_mul)
+        {
+            isMulOverflow = true;
+            isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0);
+            assert(intConst == nullptr); // overflow format doesn't support an int constant operand
+        }
+        else
+        {
+            assert(!"Invalid ins for overflow check");
+        }
+#endif
+    }
+    if (intConst != nullptr)
+    {
+        emitIns_R_R_I(ins, attr, dst->gtRegNum, nonIntReg->gtRegNum, intConst->IconValue());
+    }
+    else
+    {
+        if (isMulOverflow)
+        {
+            NYI_ARM("emitInsTernary overflow");
+#if 0
+            // Make sure that we have an internal register
+            assert(genCountBits(dst->gtRsvdRegs) == 2);
+
+            // There will be two bits set in tmpRegsMask.
+            // Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask'
+            regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum);
+            assert(tmpRegsMask != RBM_NONE);
+            regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
+            extraReg             = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
+
+            if (isUnsignedMul)
+            {
+                if (attr == EA_4BYTE)
+                {
+                    // Compute 8 byte results from 4 byte by 4 byte multiplication.
+                    emitIns_R_R_R(INS_umull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+                    // Get the high result by shifting dst.
+                    emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
+                }
+                else
+                {
+                    assert(attr == EA_8BYTE);
+                    // Compute the high result.
+                    emitIns_R_R_R(INS_umulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
+
+                    // Now multiply without skewing the high result.
+                    emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+                }
+
+                // zero-sign bit comparision to detect overflow.
+                emitIns_R_I(INS_cmp, attr, extraReg, 0);
+            }
+            else
+            {
+                int bitShift = 0;
+                if (attr == EA_4BYTE)
+                {
+                    // Compute 8 byte results from 4 byte by 4 byte multiplication.
+                    emitIns_R_R_R(INS_smull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+                    // Get the high result by shifting dst.
+                    emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
+
+                    bitShift = 31;
+                }
+                else
+                {
+                    assert(attr == EA_8BYTE);
+                    // Save the high result in a temporary register.
+                    emitIns_R_R_R(INS_smulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
+
+                    // Now multiply without skewing the high result.
+                    emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+                    bitShift = 63;
+                }
+
+                // Sign bit comparision to detect overflow.
+                emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR);
+            }
+#endif
+        }
+        else
+        {
+            // We can just multiply.
+            emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+        }
+    }
+
+    if (dst->gtOverflowEx())
+    {
+        NYI_ARM("emitInsTernary overflow");
+#if 0
+        assert(!varTypeIsFloating(dst));
+        codeGen->genCheckOverflow(dst);
+#endif
+    }
+
+    return dst->gtRegNum;
+}
+
 #endif // !LEGACY_BACKEND
 #endif // defined(_TARGET_ARM_)
-- 
2.7.4