Consider spilled lcl var as contained memory operands for codegen purpose.
authorsivarv <sivarv@microsoft.com>
Fri, 1 Jul 2016 18:41:02 +0000 (11:41 -0700)
committersivarv <sivarv@microsoft.com>
Sat, 2 Jul 2016 00:01:32 +0000 (17:01 -0700)
Commit migrated from https://github.com/dotnet/coreclr/commit/d92b629cdb742c42ef7b6a35f3ba55104539c9bd

src/coreclr/src/jit/codegenxarch.cpp
src/coreclr/src/jit/emitxarch.cpp
src/coreclr/src/jit/gentree.cpp
src/coreclr/src/jit/gentree.h
src/coreclr/src/jit/lower.h
src/coreclr/src/jit/lowerxarch.cpp
src/coreclr/src/jit/lsra.cpp
src/coreclr/src/jit/lsra.h
src/coreclr/src/jit/nodeinfo.h

index 6c356e6..10a96bb 100755 (executable)
@@ -1238,8 +1238,13 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
     genConsumeOperands(treeNode->AsOp());
     if (varTypeIsFloating(targetType))
     {
-        // divisor is not contained or if contained is a memory op
-        assert(!divisor->isContained() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl());
+        // divisor is not contained or if contained is a memory op.
+        // Note that a reg optional operand is a treated as a memory op
+        // if no register is allocated to it.
+        assert(!divisor->isContained() || 
+               divisor->isMemoryOp() || 
+               divisor->IsCnsFltOrDbl() ||
+               divisor->IsRegOptional());
 
         // Floating point div/rem operation
         assert(oper == GT_DIV || oper == GT_MOD);
@@ -1357,7 +1362,10 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
     if (op1->isContained())
     {
         assert(treeNode->OperIsCommutative());
-        assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32());
+        assert(op1->isMemoryOp() || 
+               op1->IsCnsNonZeroFltOrDbl() || 
+               op1->IsIntCnsFitsInI32() ||
+               op1->IsRegOptional());
 
         op1 = treeNode->gtGetOp2();
         op2 = treeNode->gtGetOp1();
@@ -2203,8 +2211,8 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
             bool isUnsignedMultiply    = ((treeNode->gtFlags & GTF_UNSIGNED) != 0);
             bool requiresOverflowCheck = treeNode->gtOverflowEx();
     
-            GenTree *op1 = treeNode->gtOp.gtOp1;
-            GenTree *op2 = treeNode->gtOp.gtOp2;
+            GenTree* op1 = treeNode->gtGetOp1();
+            GenTree* op2 = treeNode->gtGetOp2();
 
             // there are 3 forms of x64 multiply:
             // 1-op form with 128 result:  RDX:RAX = RAX * rm
@@ -4958,6 +4966,18 @@ void CodeGen::genConsumeRegs(GenTree* tree)
             // Now we need to consume the operands of the GT_AND node.
             genConsumeOperands(tree->AsOp());
         }
+        else if (tree->OperGet() == GT_LCL_VAR)
+        {
+            // A contained lcl var must be living on stack and marked as reg optional.
+            unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
+            LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+            noway_assert(varDsc->lvRegNum == REG_STK);
+            noway_assert(tree->IsRegOptional());
+
+            // Update the life of reg optional lcl var.
+            genUpdateLife(tree);
+        }
         else
         {
             assert(tree->OperIsLeaf());
@@ -5488,10 +5508,15 @@ void CodeGen::genStoreInd(GenTreePtr node)
                     rmwDst = data->gtGetOp2();
                     rmwSrc = data->gtGetOp1();
                 }
+
+                genConsumeRegs(rmwSrc);
             }
             else
             {
-                // For unary RMW ops, src and dst of RMW memory op is the same.
+                // *(p) = oper *(p): Here addr = p, rmwsrc=rmwDst = *(p) i.e. GT_IND(p)
+                // For unary RMW ops, src and dst of RMW memory op is the same.  Lower
+                // clears operand counts on rmwSrc and we don't need to perform a
+                // genConsumeReg() on it.
                 assert(storeInd->IsRMWDstOp1());
                 rmwSrc = data->gtGetOp1();
                 rmwDst = data->gtGetOp1();
@@ -5500,9 +5525,7 @@ void CodeGen::genStoreInd(GenTreePtr node)
 
             assert(rmwSrc != nullptr);
             assert(rmwDst != nullptr);
-            assert(Lowering::IndirsAreEquivalent(rmwDst, storeInd));
-
-            genConsumeRegs(rmwSrc);
+            assert(Lowering::IndirsAreEquivalent(rmwDst, storeInd));             
         }
         else
         {
index aa9c007..388a51d 100644 (file)
@@ -2845,17 +2845,22 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
 {
     // dst can only be a reg or modrm
     assert(!dst->isContained() ||
-           dst->isContainedIndir() ||
-           dst->isContainedLclField() ||
+           dst->isContainedMemoryOp() ||
            instrIs3opImul(ins)); // dst on these isn't really the dst
 
+#ifdef DEBUG
     // src can be anything but both src and dst cannot be addr modes
     // or at least cannot be contained addr modes
-    if (dst->isContainedIndir())
-        assert(!src->isContainedIndir());
+    if (dst->isContainedMemoryOp())
+    {
+        assert(!src->isContainedMemoryOp());
+    }
 
-    if (src->isContainedLclField())
-        assert(!dst->isContained());
+    if (src->isContainedMemoryOp())
+    {
+        assert(!dst->isContainedMemoryOp());
+    }
+#endif
 
     // find which operand is a memory op (if any)
     // and what its base is
@@ -2890,7 +2895,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
     }
     
     // find local field if any
-    GenTreeLclFld* lclField = nullptr;
+    GenTreeLclFld* lclField = nullptr;    
     if (src->isContainedLclField())
     {
         lclField = src->AsLclFld();
@@ -2900,9 +2905,22 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
         lclField = dst->AsLclFld();
     }
 
+    // find contained lcl var if any
+    GenTreeLclVar* lclVar = nullptr;
+    if (src->isContainedLclVar())
+    {
+        assert(src->IsRegOptional());
+        lclVar = src->AsLclVar();
+    }
+    else if (dst->isContainedLclVar())
+    {
+        assert(dst->IsRegOptional());
+        lclVar = dst->AsLclVar();
+    }
+
     // First handle the simple non-memory cases
     //
-    if ((mem == nullptr) && (lclField == nullptr))
+    if ((mem == nullptr) && (lclField == nullptr) && (lclVar == nullptr))
     {
         if (intConst != nullptr)
         {
@@ -2938,15 +2956,27 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
         return dst->gtRegNum;
     }
 
-    // Next handle the cases where we have a stack based local memory operand
+    // Next handle the cases where we have a stack based local memory operand.
     //
-    if (lclField)
+    unsigned varNum = BAD_VAR_NUM;
+    unsigned offset = (unsigned) -1;
+
+    if (lclField != nullptr)
     {
-        unsigned offset = lclField->gtLclFld.gtLclOffs;
-        unsigned varNum = lclField->gtLclVarCommon.gtLclNum;
+        varNum = lclField->AsLclVarCommon()->GetLclNum();
+        offset = lclField->gtLclFld.gtLclOffs;
+    }
+    else if (lclVar != nullptr)
+    {
+        varNum = lclVar->AsLclVarCommon()->GetLclNum();
+        offset = 0;
+    }
 
+    if (varNum != BAD_VAR_NUM)
+    {
         // Is the memory op in the source position?
-        if (src->isContainedLclField())
+        if (src->isContainedLclField() ||
+            src->isContainedLclVar())
         {
             if (instrHasImplicitRegPairDest(ins))
             {
@@ -2964,6 +2994,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
         else  // The memory op is in the dest position.
         {
             assert(dst->gtRegNum == REG_NA);
+
             // src could be int or reg
             if (src->isContainedIntOrIImmed())
             {
index 092e63d..fb66a24 100644 (file)
@@ -12869,6 +12869,19 @@ BasicBlock* BasicBlock::GetSucc(unsigned i, Compiler * comp)
     }
 }
 
+// -------------------------------------------------------------------------
+// IsRegOptional: Returns true if this gentree node is marked by lowering to
+// indicate that codegen can still generate code even if it wasn't allocated
+// a register.
+bool GenTree::IsRegOptional() const
+{
+#ifdef LEGACY_BACKEND
+    return false;
+#else
+    return gtLsraInfo.regOptional;
+#endif
+}
+
 bool GenTree::IsPhiDefn()
 {
     bool res = 
index 30cb015..0c80d33 100644 (file)
@@ -495,11 +495,16 @@ public:
 
     bool isContainedLclField() const        { return isContained() && isLclField(); }
 
+    bool isContainedLclVar() const          {  return isContained() && (OperGet() == GT_LCL_VAR);  } 
+
     // Indicates whether it is a memory op.
     // Right now it includes Indir and LclField ops.
     bool isMemoryOp() const                 { return isIndir() || isLclField(); }
 
-    bool isContainedMemoryOp() const        { return isContained() && isMemoryOp(); }
+    bool isContainedMemoryOp() const        
+    { 
+        return (isContained() && isMemoryOp()) || isContainedLclVar(); 
+    }
 
     regNumber GetRegNum() const
     {
@@ -1621,6 +1626,11 @@ public:
     inline var_types            CastFromType();
     inline var_types&           CastToType();
 
+    // Returns true if this gentree node is marked by lowering to indicate
+    // that codegen can still generate code even if it wasn't allocated a 
+    // register.
+    bool IsRegOptional() const;   
+
     // Returns "true" iff "*this" is an assignment (GT_ASG) tree that defines an SSA name (lcl = phi(...));
     bool IsPhiDefn();
 
index a3e29b2..9419557 100644 (file)
@@ -145,6 +145,8 @@ private:
     void TreeNodeInfoInit(GenTreePtr* tree, GenTree* parent);
 #if defined(_TARGET_XARCH_)
     void TreeNodeInfoInitSimple(GenTree* tree);
+    void SetRegOptionalForBinOp(GenTree* tree);
+    void TryToSetRegOptional(GenTree* operand);
 #endif // defined(_TARGET_XARCH_)
     void TreeNodeInfoInitReturn(GenTree* tree);
     void TreeNodeInfoInitShiftRotate(GenTree* tree);
index d53f048..d864b05 100644 (file)
@@ -345,15 +345,16 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
                 // overflow operations aren't supported on float/double types.
                 assert(!tree->gtOverflow());
 
+                op1 = tree->gtGetOp1();
+                op2 = tree->gtGetOp2();
+
                 // No implicit conversions at this stage as the expectation is that
                 // everything is made explicit by adding casts.
-                assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
+                assert(op1->TypeGet() == op2->TypeGet());
 
                 info->srcCount = 2;
                 info->dstCount = 1;              
-
-                op1 = tree->gtOp.gtOp1;
-                op2 = tree->gtOp.gtOp2; 
                 if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
                 {
                     MakeSrcContained(tree, op2);
@@ -371,6 +372,11 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
                     //      movss op1Reg, [memOp]; movaps targetReg, op1Reg, addss/sd targetReg, Op2Reg  
                     MakeSrcContained(tree, op1);
                 }
+                else
+                {
+                    // If there are no containable operands, we can make an operand reg optional.
+                    SetRegOptionalForBinOp(tree);
+                }
                 break;
             }
 
@@ -559,9 +565,17 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
                     other = node->gtArrLen;
                 }
 
-                if (other->isMemoryOp() && node->gtIndex->TypeGet() == node->gtArrLen->TypeGet())
+                if (other->isMemoryOp())
+                {
+                    if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet())
+                    {
+                        MakeSrcContained(tree, other);
+                    }
+                }
+                else
                 {
-                    MakeSrcContained(tree, other);
+                    // since 'other' operand is not contained, we can mark it as reg optional
+                    TryToSetRegOptional(other);
                 }
             }
             break;
@@ -772,10 +786,19 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
         // 
         // Example2: GT_CAST(int <- bool <- int) - here type of GT_CAST node is int and castToType is bool.
         //
+        // Example3: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses
+        // ubyte as the result of comparison and if the result needs to be materialized into a reg
+        // simply zero extend it to TYP_INT size.  Here is an example of generated code:
+        //         cmp dl, byte ptr[addr mode]
+        //         movzx edx, dl
+        //
         // Though this looks conservative in theory, in practice we could not think of a case where
         // the below logic leads to conservative register specification.  In future when or if we find
         // one such case, this logic needs to be fine tuned for that case(s).
-        if (varTypeIsByte(tree) || ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType())))
+        if (varTypeIsByte(tree) || 
+            ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType())) ||
+            (tree->OperIsCompare() && varTypeIsByte(tree->gtGetOp1()) && varTypeIsByte(tree->gtGetOp2()))
+           )
         {
             regMaskTP regMask;
             if (info->dstCount > 0)
@@ -1950,14 +1973,16 @@ Lowering::TreeNodeInfoInitLogicalOp(GenTree* tree)
     // for GT_ADD(Constant, SomeTree)
     info->srcCount = 2;
     info->dstCount = 1;
-    GenTree* op1 = tree->gtOp.gtOp1;
-    GenTree* op2 = tree->gtOp.gtOp2;            
+
+    GenTree* op1 = tree->gtGetOp1();
+    GenTree* op2 = tree->gtGetOp2();            
 
     // We can directly encode the second operand if it is either a containable constant or a memory-op.
     // In case of memory-op, we can encode it directly provided its type matches with 'tree' type.
     // This is because during codegen, type of 'tree' is used to determine emit Type size. If the types
     // do not match, they get normalized (i.e. sign/zero extended) on load into a register.
     bool directlyEncodable = false;
+    bool binOpInRMW = false;
     GenTreePtr operand = nullptr;
 
     if (IsContainableImmed(tree, op2))
@@ -1965,21 +1990,25 @@ Lowering::TreeNodeInfoInitLogicalOp(GenTree* tree)
         directlyEncodable = true;
         operand = op2;
     }
-    else if (!IsBinOpInRMWStoreInd(tree))
+    else
     {
-        if (op2->isMemoryOp() && tree->TypeGet() == op2->TypeGet())
+        binOpInRMW = IsBinOpInRMWStoreInd(tree);
+        if (!binOpInRMW)
         {
-            directlyEncodable = true;
-            operand = op2;
-        }
-        else if (tree->OperIsCommutative())
-        {
-            if (IsContainableImmed(tree, op1) ||
-                (op1->isMemoryOp() && tree->TypeGet() == op1->TypeGet() && IsSafeToContainMem(tree, op1)))
+            if (op2->isMemoryOp() && tree->TypeGet() == op2->TypeGet())
             {
-                // If it is safe, we can reverse the order of operands of commutative operations for efficient codegen
                 directlyEncodable = true;
-                operand = op1;
+                operand = op2;
+            }
+            else if (tree->OperIsCommutative())
+            {
+                if (IsContainableImmed(tree, op1) ||
+                    (op1->isMemoryOp() && tree->TypeGet() == op1->TypeGet() && IsSafeToContainMem(tree, op1)))
+                {
+                    // If it is safe, we can reverse the order of operands of commutative operations for efficient codegen
+                    directlyEncodable = true;
+                    operand = op1;
+                }
             }
         }
     }
@@ -1989,6 +2018,13 @@ Lowering::TreeNodeInfoInitLogicalOp(GenTree* tree)
         assert(operand != nullptr);
         MakeSrcContained(tree, operand);
     }
+    else if (!binOpInRMW)
+    {
+        // If this binary op neither has contained operands, nor is a 
+        // Read-Modify-Write (RMW) operation, we can mark its operands
+        // as reg optional.
+        SetRegOptionalForBinOp(tree);
+    }
 }
 
 
@@ -2007,6 +2043,12 @@ Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
     TreeNodeInfo* info = &(tree->gtLsraInfo);
     LinearScan* l = m_lsra;
 
+    GenTree* op1 = tree->gtGetOp1();
+    GenTree* op2 = tree->gtGetOp2();
+
+    info->srcCount = 2;
+    info->dstCount = 1;
+
     switch (tree->OperGet())
     {
     case GT_MOD:
@@ -2015,16 +2057,18 @@ Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
         {   
             // No implicit conversions at this stage as the expectation is that
             // everything is made explicit by adding casts.
-            assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
+            assert(op1->TypeGet() == op2->TypeGet());
 
-            info->srcCount = 2;
-            info->dstCount = 1;
-
-            GenTree* op2 = tree->gtOp.gtOp2;
             if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
             {
                 MakeSrcContained(tree, op2);
             }
+            else
+            {
+                // If there are no containable operands, we can make an operand reg optional.
+                SetRegOptionalForBinOp(tree);
+            }
+
             return;
         }
         break;
@@ -2033,12 +2077,6 @@ Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
         break;
     }
 
-    info->srcCount = 2;
-    info->dstCount = 1;
-
-    GenTree* op1 = tree->gtOp.gtOp1;
-    GenTree* op2 = tree->gtOp.gtOp2;
-
     // Amd64 Div/Idiv instruction: 
     //    Dividend in RAX:RDX  and computes
     //    Quotient in RAX, Remainder in RDX
@@ -2067,6 +2105,9 @@ Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
     else
     {
         op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
+
+        // If there are no containable operands, we can make an operand reg optional.
+        SetRegOptionalForBinOp(tree);
     }
 }
 
@@ -2087,7 +2128,7 @@ Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
     LinearScan* l = m_lsra;
 
     // Both operand and its result must be of floating point type.
-    GenTree* op1 = tree->gtOp.gtOp1;
+    GenTree* op1 = tree->gtGetOp1();
     assert(varTypeIsFloating(op1));
     assert(op1->TypeGet() == tree->TypeGet());
 
@@ -2101,6 +2142,12 @@ Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
         {
             MakeSrcContained(tree, op1);
         }
+        else
+        {
+            // Mark the operand as reg optional since codegen can still 
+            // generate code if op1 is on stack.
+            TryToSetRegOptional(op1);
+        }
         break;
 
      case CORINFO_INTRINSIC_Abs:
@@ -2431,6 +2478,12 @@ Lowering::TreeNodeInfoInitCast(GenTree* tree)
             {
                 MakeSrcContained(tree, castOp);
             }
+            else
+            {
+                // Mark castOp as reg optional to indicate codegen
+                // can still generate code if it is on stack.
+                TryToSetRegOptional(castOp);
+            }
         }
     }
 
@@ -2794,6 +2847,12 @@ void Lowering::LowerCmp(GenTreePtr tree)
                 MakeSrcContained(tree, otherOp);
             }
         }
+        else
+        {
+            // Mark otherOp as reg optional to indicate codgen can still generate
+            // code even if otherOp is on stack.
+            TryToSetRegOptional(otherOp);
+        }
 
         return;
     }
@@ -3096,6 +3155,18 @@ void Lowering::LowerCmp(GenTreePtr tree)
                {
                        MakeSrcContained(tree, op1);
                }
+        else
+        {
+            // One of op1 or op2 could be marked as reg optional
+            // to indicate that codgen can still generate code 
+            // if one of them is on stack.
+            TryToSetRegOptional(op2);
+
+            if (!op2->IsRegOptional())
+            {
+                TryToSetRegOptional(op1);
+            }
+        }
 
                if (varTypeIsSmall(op1Type) && varTypeIsUnsigned(op1Type))
                {
@@ -3613,6 +3684,11 @@ void Lowering::SetMulOpCounts(GenTreePtr tree)
             // generate more efficient code sequence for the case of GT_MUL(op1=memOp, op2=non-memOp)
             MakeSrcContained(tree, op1);
         }
+        else
+        {
+            // If there are no containable operands, we can make an operand reg optional.
+            SetRegOptionalForBinOp(tree);
+        }
         return;
     }
     
@@ -3691,12 +3767,21 @@ void Lowering::SetMulOpCounts(GenTreePtr tree)
     // To generate an LEA we need to force memOp into a register
     // so don't allow memOp to be 'contained'
     //
-    if ((memOp != nullptr)                    &&
-        !useLeaEncoding                       &&
-        (memOp->TypeGet() == tree->TypeGet()) &&
-        IsSafeToContainMem(tree, memOp))
+    if (!useLeaEncoding)
     {
-        MakeSrcContained(tree, memOp);
+        if (memOp != nullptr)
+        {
+            if ((memOp->TypeGet() == tree->TypeGet()) &&
+                IsSafeToContainMem(tree, memOp))
+            {
+                MakeSrcContained(tree, memOp);
+            }
+        }
+        else
+        {
+            // If there are no containable operands, we can make an operand reg optional.
+            SetRegOptionalForBinOp(tree);
+        }
     }
 }
 
@@ -3763,6 +3848,69 @@ bool Lowering:: IsContainableImmed(GenTree* parentNode, GenTree* childNode)
     return true;
 }
 
+//----------------------------------------------------------------------
+// TryToSetRegOptional - sets a bit to indicate to LSRA that register
+// for a given tree node is optional for codegen purpose.  If no
+// register is allocated to such a tree node, its parent node treats
+// it as a contained memory operand during codegen.
+//
+// Arguments:
+//    tree    -   GenTree node
+//
+// Returns
+//    None
+//
+// Note: Right now a tree node is marked as reg optional only
+// if is it a GT_LCL_VAR.  This routine needs to be modified if
+// in future if lower/codegen needs to support other tree node
+// types.
+void Lowering::TryToSetRegOptional(GenTree* tree)
+{
+    if (tree->OperGet() == GT_LCL_VAR)
+    {
+        tree->gtLsraInfo.regOptional = true;
+    }
+}
+
+// ------------------------------------------------------------------
+// SetRegOptionalBinOp - Indicates which of the operands of a bin-op
+// register requirement is optional. Xarch instruction set allows
+// either of op1 or op2 of binary operation (e.g. add, mul etc) to be
+// a memory operand.  This routine provides info to register allocator
+// which of its operands optionally require a register.  Lsra might not
+// allocate a register to RefTypeUse positions of such operands if it
+// is beneficial. In such a case codegen will treat them as memory
+// operands.
+//
+// Arguments:
+//     tree  -  Gentree of a bininary operation.
+//
+// Returns 
+//     None.
+// 
+// Note: On xarch at most only one of the operands will be marked as
+// reg optional, even when both operands could be considered register
+// optional.
+void Lowering::SetRegOptionalForBinOp(GenTree* tree)
+{
+    assert(GenTree::OperIsBinary(tree->OperGet()));
+
+    GenTree* op1 = tree->gtGetOp1();
+    GenTree* op2 = tree->gtGetOp2();
+
+    if (tree->TypeGet() == op2->TypeGet())
+    {
+        TryToSetRegOptional(op2);
+    }
+
+    if (!op2->IsRegOptional() &&
+        tree->OperIsCommutative() &&
+        tree->TypeGet() == op1->TypeGet())
+    {
+        TryToSetRegOptional(op1);
+    }
+}
+
 #endif // _TARGET_XARCH_
 
 #endif // !LEGACY_BACKEND
index ca09dc2..3298eb6 100644 (file)
@@ -140,6 +140,50 @@ void lsraAssignRegToTree(GenTreePtr tree, regNumber reg, unsigned regIdx)
     }
 }
 
+//-------------------------------------------------------------
+// getWeight: Returns the weight of the RefPosition.
+//
+// Arguments: 
+//    refPos   -   ref position
+//
+// Returns:
+//    Weight of ref position.
+unsigned LinearScan::getWeight(RefPosition* refPos)
+{
+    unsigned weight;
+    GenTreePtr treeNode = refPos->treeNode;
+
+    if (treeNode != nullptr)        
+    {
+        if (isCandidateLocalRef(treeNode))
+        {
+            // Tracked locals: use weighted ref cnt as the weight of the
+            // ref position.
+            GenTreeLclVarCommon* lclCommon = treeNode->AsLclVarCommon();
+            LclVarDsc* varDsc = &(compiler->lvaTable[lclCommon->gtLclNum]);
+            weight = varDsc->lvRefCntWtd;
+        }
+        else 
+        {
+            // Non-candidate local ref or non-lcl tree node.
+            // These are considered to have two references in the basic block:
+            // a def and a use and hence weighted ref count is 2 times
+            // the basic block weight in which they appear.
+            weight = 2 * this->blockInfo[refPos->bbNum].weight;
+        }
+    }
+    else
+    {
+        // Non-tree node ref positions.  These will have a single
+        // reference in the basic block and hence their weighted
+        // refcount is equal to the block weight in which they 
+        // appear.
+        weight = this->blockInfo[refPos->bbNum].weight;
+    }
+
+    return weight;
+}
+
 // allRegs represents a set of registers that can
 // be used to allocate the specified type in any point
 // in time (more of a 'bank' of registers).
@@ -4857,14 +4901,30 @@ LinearScan::tryAllocateFreeReg(Interval *currentInterval, RefPosition *refPositi
 //                  and that can be spilled.
 //
 // Arguments:
-//    current:         The interval for the current allocation
-//    refPosition:     The RefPosition of the current Interval for which a register is being allocated
+//    current               The interval for the current allocation
+//    refPosition           The RefPosition of the current Interval for which a register is being allocated
+//    allocationOptional    If true, a reg may not be allocated if all other ref positions currently
+//                          occupying registers are more important than the 'refPosition'.
 //
 // Return Value:
 //    The regNumber allocated to the RefPositon.  Returns REG_NA if no free register is found.
-
+//
+// Note:  Currently this routine uses weight and farthest distance of next reference
+// to select a ref position for spilling.  
+// a) if allocationOptional = false
+//        The ref position chosen for spilling will be the lowest weight
+//        of all and if there is is more than one ref position with the
+//        same lowest weight, among them choses the one with farthest
+//        distance to its next reference.
+// 
+// b) if allocationOptional = true
+//        The ref position chosen for spilling will not only be lowest weight
+//        of all but also has a weight lower than 'refPosition'.  If there is
+//        no such ref position, reg will not be allocated.
 regNumber
-LinearScan::allocateBusyReg(Interval *current, RefPosition *refPosition)
+LinearScan::allocateBusyReg(Interval* current,
+                            RefPosition* refPosition,
+                            bool allocationOptional)
 {
     regNumber foundReg = REG_NA;
 
@@ -4887,9 +4947,27 @@ LinearScan::allocateBusyReg(Interval *current, RefPosition *refPosition)
     // TODO-CQ: Determine whether/how to take preferences into account in addition to
     // prefering the one with the furthest ref position when considering
     // a candidate to spill
-    RegRecord * farthestRefPhysRegRecord = nullptr;
+    RegRecord* farthestRefPhysRegRecord = nullptr;
     LsraLocation farthestLocation = MinLocation;
     LsraLocation refLocation = refPosition->nodeLocation;
+    unsigned farthestRefPosWeight;
+    if (allocationOptional)
+    {
+        // If allocating a reg is optional, we will consider those ref positions
+        // whose weight is less than 'refPosition' for spilling. 
+        farthestRefPosWeight = getWeight(refPosition);
+    }
+    else
+    {
+        // If allocating a reg is a must, we start off with max weight so
+        // that the first spill candidate will be selected based on 
+        // farthest distance alone.  Since we start off with farthestLocation
+        // initialized to MinLocation, the first available ref position 
+        // will be selected as spill candidate and its weight as the
+        // fathestRefPosWeight.
+        farthestRefPosWeight = BB_MAX_WEIGHT;
+    }
+
     for (regNumber regNum : Registers(regType))
     {
         regMaskTP candidateBit = genRegMask(regNum);
@@ -4900,7 +4978,7 @@ LinearScan::allocateBusyReg(Interval *current, RefPosition *refPosition)
         {
             continue;
         }
-        Interval * assignedInterval = physRegRecord->assignedInterval;
+        Interval* assignedInterval = physRegRecord->assignedInterval;
 
         // If there is a fixed reference at the same location (and it's not due to this reference),
         // don't use it.
@@ -4922,14 +5000,25 @@ LinearScan::allocateBusyReg(Interval *current, RefPosition *refPosition)
             // to avoid a spill - codegen can then insert the copy.
             assert(candidates == candidateBit);
             physRegNextLocation = MaxLocation;
+            farthestRefPosWeight = BB_MAX_WEIGHT;
         }
         else
         {
             physRegNextLocation = physRegRecord->getNextRefLocation();
+
+            // If refPosition requires a fixed register, we should reject all others.
+            // Otherwise, we will still evaluate all phyRegs though their next location is
+            // not better than farthestLocation found so far.
+            //
+            // TODO: this method should be using an approach similar to tryAllocateFreeReg()
+            // where it uses a regOrder array to avoid iterating over any but the single
+            // fixed candidate.
+            if (refPosition->isFixedRegRef && physRegNextLocation < farthestLocation)
+            {
+                continue;
+            }
         }
-        if (physRegNextLocation < farthestLocation)
-            continue;
-                
+
         // If this register is not assigned to an interval, either
         // - it has a FixedReg reference at the current location that is not this reference, OR
         // - this is the special case of a fixed loReg, where this interval has a use at the same location
@@ -4947,7 +5036,7 @@ LinearScan::allocateBusyReg(Interval *current, RefPosition *refPosition)
             continue;
         }
 
-        RefPosition * recentAssignedRef = assignedInterval->recentRefPosition;
+        RefPosition* recentAssignedRef = assignedInterval->recentRefPosition;
 
         if (!assignedInterval->isActive)
         {
@@ -4967,27 +5056,39 @@ LinearScan::allocateBusyReg(Interval *current, RefPosition *refPosition)
                     RefPosition* nextAssignedRef = recentAssignedRef->nextRefPosition;
                     assert(nextAssignedRef != nullptr);
                     assert(nextAssignedRef->nodeLocation == refLocation ||
-                           (nextAssignedRef->nodeLocation + 1 == refLocation && nextAssignedRef->delayRegFree));
+                        (nextAssignedRef->nodeLocation + 1 == refLocation && nextAssignedRef->delayRegFree));
                 }
             }
             continue;
         }
-            
+
         // If we have a recentAssignedRef, check that it is going to be OK to spill it
+        //
+        // TODO-Review: Under what conditions recentAssginedRef would be null?
+        unsigned recentAssignedRefWeight = BB_ZERO_WEIGHT;
         if (recentAssignedRef != nullptr)
         {
             if (recentAssignedRef->nodeLocation == refLocation)
             {
                 // We can't spill a register that's being used at the current location
-                RefPosition * physRegRef = physRegRecord->recentRefPosition;
+                RefPosition* physRegRef = physRegRecord->recentRefPosition;
                 continue;
             }
-        
+
             // If the current position has the candidate register marked to be delayed, 
             // check if the previous location is using this register, if that's the case we have to skip
             // since we can't spill this register.
-            if(recentAssignedRef->delayRegFree &&
-               (refLocation == recentAssignedRef->nodeLocation + 1))
+            if (recentAssignedRef->delayRegFree &&
+                (refLocation == recentAssignedRef->nodeLocation + 1))
+            {
+                continue;
+            }
+
+            // We don't prefer to spill a register if the weight of recentAssignedRef > weight
+            // of the spill candidate found so far.  We would consider spilling a greater weight
+            // ref position only if the refPosition being allocated must need a reg.
+            recentAssignedRefWeight = getWeight(recentAssignedRef);
+            if (recentAssignedRefWeight > farthestRefPosWeight)
             {
                 continue;
             }
@@ -5005,28 +5106,85 @@ LinearScan::allocateBusyReg(Interval *current, RefPosition *refPosition)
         }
 
         if (nextLocation > physRegNextLocation)
+        {
             nextLocation = physRegNextLocation;
+        }
+
+        bool isBetterLocation;
 
-        bool isBetterLocation = (nextLocation > farthestLocation);
 #ifdef DEBUG
         if (doSelectNearest() && farthestRefPhysRegRecord != nullptr)
         {
-            isBetterLocation = !isBetterLocation;
+            isBetterLocation = (nextLocation <= farthestLocation);
         }
-#endif // DEBUG
+        else
+        // the below if-stmt is associated with this else
+#endif
+        if (recentAssignedRefWeight < farthestRefPosWeight)
+        {
+            isBetterLocation = true;
+        }
+        else
+        {
+            // This would mean the weight of spill ref position we found so far is equal
+            // to the weight of the ref position that is being evaluated.  In this case
+            // we prefer to spill ref position whose distance to its next reference is
+            // the farthest.
+            assert(recentAssignedRefWeight == farthestRefPosWeight);
+
+            // If allocation is optional, the first spill candidate selected
+            // will be based on weight alone. After we have found a spill
+            // candidate whose weight is less than the 'refPosition', we will 
+            // consider farthest distance when there is a tie in weights.
+            // This is to ensure that we don't spill a ref position whose
+            // weight is equal to weight of 'refPosition'.
+            if (allocationOptional && farthestRefPhysRegRecord == nullptr)
+            {
+                isBetterLocation = false;
+            }
+            else
+            {
+                isBetterLocation = (nextLocation > farthestLocation);
+            }
+        }
+
         if (isBetterLocation)
         {
             farthestLocation = nextLocation;
             farthestRefPhysRegRecord = physRegRecord;
+            farthestRefPosWeight = recentAssignedRefWeight;
         }
     }
-    assert(farthestRefPhysRegRecord != nullptr &&
-           (farthestLocation > refLocation || refPosition->isFixedRegRef));
-    foundReg = farthestRefPhysRegRecord->regNum;
-    unassignPhysReg(farthestRefPhysRegRecord, farthestRefPhysRegRecord->assignedInterval->recentRefPosition);
-    assignPhysReg(farthestRefPhysRegRecord, current);
 
-    refPosition->registerAssignment = genRegMask(foundReg);
+#if DEBUG
+    if (allocationOptional)
+    {
+        // There may not be a spill candidate or if one is found
+        // its weight must be less than the weight of 'refPosition'
+        assert((farthestRefPhysRegRecord == nullptr) ||
+               (farthestRefPosWeight < getWeight(refPosition)));
+    }
+    else 
+    {
+        // Must have found a spill candidate.
+        assert((farthestRefPhysRegRecord != nullptr) &&
+               (farthestLocation > refLocation || refPosition->isFixedRegRef));
+    }
+#endif
+
+    if (farthestRefPhysRegRecord != nullptr)
+    {
+        foundReg = farthestRefPhysRegRecord->regNum;
+        unassignPhysReg(farthestRefPhysRegRecord, farthestRefPhysRegRecord->assignedInterval->recentRefPosition);
+        assignPhysReg(farthestRefPhysRegRecord, current);
+        refPosition->registerAssignment = genRegMask(foundReg);
+    }
+    else
+    {
+        foundReg = REG_NA;
+        refPosition->registerAssignment = RBM_NONE;
+    }
+    
     return foundReg;
 }
 
@@ -5064,7 +5222,7 @@ LinearScan::assignCopyReg(RefPosition * refPosition)
     regNumber allocatedReg = tryAllocateFreeReg(currentInterval, refPosition);
     if (allocatedReg == REG_NA)
     {
-        allocatedReg = allocateBusyReg(currentInterval, refPosition);
+        allocatedReg = allocateBusyReg(currentInterval, refPosition, false);
     }
 
     // Now restore the old info
@@ -5154,7 +5312,7 @@ LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition, RefP
 
     if (!fromRefPosition->lastUse)
     {
-        if (!fromRefPosition->RequiresRegister())
+        if (!fromRefPosition->IsActualRef())
         {
             fromRefPosition->registerAssignment = RBM_NONE;
         }
@@ -6186,15 +6344,22 @@ LinearScan::allocateRegisters()
                     if (srcInterval->isActive && 
                         genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment &&
                         currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation())
-                    {
-                        
+                    {                        
                         assert(physRegRecord->regNum == srcInterval->physReg);
-                        // Is the next use of this lclVar prior to the next kill of the physReg?
-                        if (srcInterval->getNextRefLocation() <= physRegRecord->getNextRefLocation())
-                        {
-                            physRegRecord->isBusyUntilNextKill = true;
-                            assert(physRegRecord->getNextRefLocation() == currentInterval->getNextRefLocation());
-                        }
+
+                        // Special putarg_reg acts as a pass-thru since both source lcl var
+                        // and putarg_reg have the same register allocated.  Physical reg
+                        // record of reg continue to point to source lcl var's interval
+                        // instead of to putarg_reg's interval.  So if a spill of reg
+                        // allocated to source lcl var happens, to reallocate to another
+                        // tree node, before its use at call node it will lead to spill of
+                        // lcl var instead of putarg_reg since physical reg record is pointing
+                        // to lcl var's interval. As a result, arg reg would get trashed leading
+                        // to bad codegen. The assumption here is that source lcl var of a 
+                        // special putarg_reg doesn't get spilled and re-allocated prior to
+                        // its use at the call node.  This is ensured by marking physical reg
+                        // record as busy until next kill. 
+                        physRegRecord->isBusyUntilNextKill = true;
                     }
                     else
                     {
@@ -6208,6 +6373,7 @@ LinearScan::allocateRegisters()
                     continue;
                 }
             }
+            
             if (assignedRegister == REG_NA && RefTypeIsUse(refType))
             {
                 currentRefPosition->reload = true;
@@ -6311,6 +6477,7 @@ LinearScan::allocateRegisters()
                     // code here, but otherwise we may wind up in this register anyway.
                     keepAssignment = false;
                 }
+
                 if (keepAssignment == false)
                 {
                     currentRefPosition->registerAssignment = allRegs(currentInterval->registerType);
@@ -6435,10 +6602,36 @@ LinearScan::allocateRegisters()
                 }
             }
         } 
+
         if (assignedRegister == REG_NA)
         {
-            // Try to allocate a register
-            assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
+            bool allocateReg = true;
+
+            if (currentRefPosition->IsRegOptional())
+            {
+                if (currentRefPosition->lastUse &&
+                    currentRefPosition->reload)
+                {
+                    // We can avoid allocating a register if it is a the last use requiring a reload
+                    allocateReg = false;
+                }
+
+#ifdef DEBUG
+                // Under stress mode, don't attempt to allocate a reg to
+                // reg optional ref position.
+                if (allocateReg && regOptionalNoAlloc())
+                {
+                    allocateReg = false;
+                }
+#endif
+            }
+
+            if (allocateReg)
+            {
+                // Try to allocate a register
+                assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
+            }
+
             // If no register was found, and if the currentRefPosition must have a register,
             // then find a register to spill
             if (assignedRegister == REG_NA)
@@ -6460,10 +6653,32 @@ LinearScan::allocateRegisters()
                 }
                 else
 #endif // FEATURE_SIMD
-                if (currentRefPosition->RequiresRegister())
+                if (currentRefPosition->IsActualRef())
                 {
-                    assignedRegister = allocateBusyReg(currentInterval, currentRefPosition);
-                    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_SPILLED_REG, currentInterval, assignedRegister));
+                    if (allocateReg)
+                    {
+                        // Though Lower/Codegen has indicated that it can generate code even if
+                        // no reg is allocated to this ref position, we will make an attempt
+                        // to get a busy reg if it is allocated to a lesser important ref position.
+                        // If all the refpositions currently occupying registers are more
+                        // important than currentRefPosition, no reg will be allocated.                         
+                        assignedRegister = allocateBusyReg(currentInterval, currentRefPosition, currentRefPosition->IsRegOptional());
+                    }
+
+                    if (assignedRegister != REG_NA)
+                    {
+                        INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_SPILLED_REG, currentInterval, assignedRegister));
+                    }
+                    else 
+                    {
+                        // This can happen only if ref position requires a reg optionally
+                        noway_assert(currentRefPosition->IsRegOptional());
+
+                        currentRefPosition->registerAssignment = RBM_NONE;
+                        currentRefPosition->reload = false;
+
+                        INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
+                    }
                 }
                 else
                 {
@@ -6490,6 +6705,7 @@ LinearScan::allocateRegisters()
                 }
             }
 #endif // DEBUG
+
             if (refType == RefTypeDummyDef && assignedRegister != REG_NA)
             {
                 setInVarRegForBB(curBBNum, currentInterval->varNum, assignedRegister);
@@ -6502,9 +6718,10 @@ LinearScan::allocateRegisters()
                 assert(currentRefPosition->reload);
             }
         }
+
         // If we allocated a register, record it
-        if (currentInterval 
-            && assignedRegister != REG_NA)
+        if (currentInterval != nullptr &&
+            assignedRegister != REG_NA)
         {
             assignedRegBit = genRegMask(assignedRegister);
             currentRefPosition->registerAssignment = assignedRegBit;
@@ -6515,12 +6732,12 @@ LinearScan::allocateRegisters()
             // The interval could be dead if this is a user variable, or if the
             // node is being evaluated for side effects, or a call whose result
             // is not used, etc.
-
             if (currentRefPosition->lastUse || currentRefPosition->nextRefPosition == nullptr)
             {
                 assert(currentRefPosition->isIntervalRef());
-                if (refType != RefTypeExpUse
-                    && currentRefPosition->nextRefPosition == nullptr)
+
+                if (refType != RefTypeExpUse && 
+                    currentRefPosition->nextRefPosition == nullptr)
                 {
                     if (currentRefPosition->delayRegFree)
                     {
@@ -6538,6 +6755,7 @@ LinearScan::allocateRegisters()
                     currentInterval->isActive = false;
                 }
             }
+
             lastAllocatedRefPosition = currentRefPosition;
         }
     }
@@ -6649,7 +6867,10 @@ LinearScan::resolveLocalRef(GenTreePtr treeNode, RefPosition * currentRefPositio
 
     if (currentRefPosition->registerAssignment == RBM_NONE)
     {
-        assert(!currentRefPosition->RequiresRegister());
+        // Either requires no register or reg is optional.
+        assert(!currentRefPosition->IsActualRef() ||
+               currentRefPosition->IsRegOptional());
+
         interval->isSpilled = true;
         varDsc->lvRegNum = REG_STK;
         if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval)
@@ -6658,6 +6879,7 @@ LinearScan::resolveLocalRef(GenTreePtr treeNode, RefPosition * currentRefPositio
         }
         interval->assignedReg = nullptr;
         interval->physReg = REG_NA;
+
         return;
     }
 
@@ -10538,7 +10760,16 @@ LinearScan::verifyFinalAllocation()
                 {
                     interval->physReg = REG_NA;
                     interval->assignedReg = nullptr;
-                    regRecord->assignedInterval = nullptr;
+
+                    // regRegcord could be null if RefPosition requires a reg optionally
+                    if (regRecord != nullptr)
+                    {
+                        regRecord->assignedInterval = nullptr;
+                    }
+                    else
+                    {
+                        assert(currentRefPosition->IsRegOptional());
+                    }
                 }
             }
         }
index 37e6ac5..dc6d689 100644 (file)
@@ -538,6 +538,22 @@ private:
     LsraSpill                   getLsraSpill()                  { return (LsraSpill) (lsraStressMask & LSRA_SPILL_MASK); }
     bool                        spillAlways()                   { return getLsraSpill() == LSRA_SPILL_ALWAYS; }
 
+    // This controls whether RefPositions that require a register optionally should
+    // be allocated a reg at all.
+    enum LsraRegOptionalControl     { LSRA_REG_OPTIONAL_DEFAULT          = 0,
+                                      LSRA_REG_OPTIONAL_NO_ALLOC         = 0x1000,
+                                      LSRA_REG_OPTIONAL_MASK             = 0x1000 };
+
+    LsraRegOptionalControl      getLsraRegOptionalControl()            
+    { 
+        return (LsraRegOptionalControl) (lsraStressMask & LSRA_REG_OPTIONAL_MASK); 
+    }
+
+    bool                        regOptionalNoAlloc()     
+    { 
+        return getLsraRegOptionalControl() == LSRA_REG_OPTIONAL_NO_ALLOC;
+    }
+
     // Dump support
     void            lsraDumpIntervals(const char* msg);
     void            dumpRefPositions(const char *msg);
@@ -743,6 +759,8 @@ private:
 
     void associateRefPosWithRegister(RefPosition *rp);
 
+    unsigned getWeight(RefPosition* refPos);
+
     /*****************************************************************************
      * Register management
      ****************************************************************************/
@@ -750,7 +768,7 @@ private:
     regNumber tryAllocateFreeReg(Interval *current, RefPosition *refPosition);
     RegRecord* findBestPhysicalReg(RegisterType regType, LsraLocation endLocation,
                                   regMaskTP candidates, regMaskTP preferences);
-    regNumber allocateBusyReg(Interval *current, RefPosition *refPosition);
+    regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocationOptional);
     regNumber assignCopyReg(RefPosition * refPosition);
 
     void checkAndAssignInterval(RegRecord * regRec, Interval * interval);
@@ -1326,15 +1344,24 @@ public:
     LsraLocation    nodeLocation;
     regMaskTP       registerAssignment;
 
-    regNumber       assignedReg() { return genRegNumFromMask(registerAssignment); }
+    regNumber       assignedReg() { 
+        if (registerAssignment == RBM_NONE)
+        {
+            return REG_NA;
+        }
+
+        return genRegNumFromMask(registerAssignment); 
+    }
 
     RefType         refType;
 
-    bool            RequiresRegister()
+    bool            IsActualRef()
     {
-        return (refType == RefTypeDef || refType == RefTypeUse
+        return (refType == RefTypeDef || 
+                refType == RefTypeUse
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-               || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse
+                || refType == RefTypeUpperVectorSaveDef 
+                || refType == RefTypeUpperVectorSaveUse
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
                );
     }
@@ -1354,6 +1381,23 @@ public:
 
     unsigned        getMultiRegIdx() { return multiRegIdx;  }
 
+    // Returns true if codegen has indicated that the tree node
+    // referred to by RefPosition can be treated as a contained
+    // memory operand if no register was allocated.
+    bool           IsRegOptional()
+    {
+        // TODO-CQ: Right now if a ref position is marked as
+        // copyreg or movereg, then it is always allocated a
+        // register, though it is marked as reg optional.
+        // This is an implementation limitation that needs to 
+        // be addressed.
+        return (refType == RefTypeUse) &&
+               !copyReg &&
+               !moveReg &&
+               (treeNode != nullptr) &&
+               treeNode->IsRegOptional();
+    }  
+
     // Last Use - this may be true for multiple RefPositions in the same Interval
     bool            lastUse      : 1;
 
index 5b6fab8..8fc48f9 100644 (file)
@@ -31,6 +31,7 @@ public:
         isDelayFree           = false;
         hasDelayFreeSrc       = false;
         isTgtPref             = false;
+        regOptional           = false;
     }
 
     // dst
@@ -119,7 +120,8 @@ public:
     // isTgtPref is set to true when we have a rmw op, where we would like the result to be allocated
     // in the same register as op1.
     unsigned char isTgtPref:1;
-
+    // Whether a spilled second src can be treated as a contained operand
+    unsigned char regOptional:1;
 
 public: