Use a smaller expansion of `GT_INDEX` in MinOpts.
authorPat Gavlin <pagavlin@microsoft.com>
Sat, 5 Aug 2017 18:43:40 +0000 (11:43 -0700)
committerPat Gavlin <pagavlin@microsoft.com>
Mon, 21 Aug 2017 16:57:44 +0000 (09:57 -0700)
We currently expand `GT_INDEX` nodes during morph into an explicit
bounds check followed by a load. For example, this tree:

```
[000059] ------------       /--*  LCL_VAR   int    V09 loc6
[000060] R--XG-------    /--*  INDEX     ref
[000058] ------------    |  \--*  LCL_VAR   ref    V00 arg0
[000062] -A-XG-------    *  ASG       ref
[000061] D------N----    \--*  LCL_VAR   ref    V10 loc7
```

is expanded into this tree:

```
[000060] R--XG+------       /--*  IND       ref
[000491] -----+------       |  |  /--*  CNS_INT   long   16 Fseq[#FirstElem]
[000492] -----+------       |  \--*  ADD       byref
[000488] -----+-N----       |     |     /--*  CNS_INT   long   3
[000489] -----+------       |     |  /--*  LSH       long
[000487] -----+------       |     |  |  \--*  CAST      long <- int
[000484] i----+------       |     |  |     \--*  LCL_VAR   int    V09 loc6
[000490] -----+------       |     \--*  ADD       byref
[000483] -----+------       |        \--*  LCL_VAR   ref    V00 arg0
[000493] ---XG+------    /--*  COMMA     ref
[000486] ---X-+------    |  \--*  ARR_BOUNDS_CHECK_Rng void
[000059] -----+------    |     +--*  LCL_VAR   int    V09 loc6
[000485] ---X-+------    |     \--*  ARR_LENGTH int
[000058] -----+------    |        \--*  LCL_VAR   ref    V00 arg0
[000062] -A-XG+------    *  ASG       ref
[000061] D----+-N----    \--*  LCL_VAR   ref    V10 loc7

```

Even in this simple case where both the array object and the index are
lclVars, this represents a rather large increase in the size of the IR.
In the worst case, the JIT introduces and additional lclVar for both the
array object and the index, adding several additional nodes to the tree.
When optimizing, exposing the structure of the array access may be
helpful, as it may allow the compiler to better analyze the program.
When we are not optimizing, however, the expansion serves little purpose
besides constraining the IR shapes that must be handled by the backend.
Due to its need for lclVars in the worst case, this expansion may even
bloat the size of the generated code, as all lclVar references are
generated as loads/stores from/to the stack when we are not optimizing.
In the case above, the expanded tree generates the following x64
assembly:

```
IN0018: 000092 mov      rdi, gword ptr [V00 rbp-10H]
IN0019: 000096 mov      edi, dword ptr [rdi+8]
IN001a: 000099 cmp      dword ptr [V09 rbp-48H], edi
IN001b: 00009C jae      G_M5106_IG38
IN001c: 0000A2 mov      rdi, gword ptr [V00 rbp-10H]
IN001d: 0000A6 mov      esi, dword ptr [V09 rbp-48H]
IN001e: 0000A9 movsxd   rsi, esi
IN001f: 0000AC mov      rdi, gword ptr [rdi+8*rsi+16]
IN0020: 0000B1 mov      gword ptr [V10 rbp-50H], rdi
```

Inspired by other recent experiments (e.g. #13188), this change
introduces a new node that replaces the above expansion in MinOpts. This
node, `GT_INDEX_ADDR`, represents the bounds check and address
computation involved in an array access, and returns the address of the
element that is to be loaded or stored. Using this node, the example
tree given above expands to the following:

```
[000489] a--XG+------    /--*  IND       ref
[000059] -----+------    |  |  /--*  LCL_VAR   int    V09 loc6
[000060] R--XG+--R---    |  \--*  INDEX_ADDR byref
[000058] -----+------    |     \--*  LCL_VAR   ref    V00 arg0
[000062] -A-XG+------    *  ASG       ref
[000061] D----+-N----    \--*  LCL_VAR   ref    V10 loc7
```

This expansion requires only the addition of the `GT_IND` node that
represents the memory access itself. This savings in IR size translates
to about a 2% decrease in instructions retired during non-optimizing
compilation. Furthermore, this expansion tends to generate smaller
code; for example, the tree given above is generated in 29 rather than
35 bytes:

```
IN0018: 000092 mov      edi, dword ptr [V09 rbp-48H]
IN0019: 000095 mov      rsi, gword ptr [V00 rbp-10H]
IN001a: 000099 cmp      rdi, qword ptr [rsi+8]
IN001b: 00009D jae      G_M5106_IG38
IN001c: 0000A3 lea      rsi, bword ptr [rsi+8*rdi+16]
IN001d: 0000A8 mov      rdi, gword ptr [rsi]
IN001e: 0000AB mov      gword ptr [V10 rbp-50H], rdi
```

13 files changed:
src/jit/codegenarmarch.cpp
src/jit/codegenlinear.h
src/jit/codegenxarch.cpp
src/jit/compiler.h
src/jit/flowgraph.cpp
src/jit/gentree.cpp
src/jit/gentree.h
src/jit/gtlist.h
src/jit/gtstructs.h
src/jit/lsraarm.cpp
src/jit/lsraarm64.cpp
src/jit/lsraxarch.cpp
src/jit/morph.cpp

index b4b407d..a5a1ac0 100644 (file)
@@ -185,6 +185,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
             genLeaInstruction(treeNode->AsAddrMode());
             break;
 
+        case GT_INDEX_ADDR:
+            genCodeForIndexAddr(treeNode->AsIndexAddr());
+            break;
+
         case GT_IND:
             genCodeForIndir(treeNode->AsIndir());
             break;
@@ -1540,6 +1544,91 @@ void CodeGen::genCodeForLclFld(GenTreeLclFld* tree)
 }
 
 //------------------------------------------------------------------------
+// genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node.
+//
+// Arguments:
+//    tree - the GT_INDEX_ADDR node
+//
+void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
+{
+    GenTree* const base  = node->Arr();
+    GenTree* const index = node->Index();
+
+    genConsumeReg(base);
+    genConsumeReg(index);
+
+    const regNumber tmpReg = node->GetSingleTempReg();
+
+    // Generate the bounds check if necessary.
+    if ((node->gtFlags & GTF_INX_RNGCHK) != 0)
+    {
+        // Create a GT_IND(GT_LEA)) tree for the array length access and load the length into a register.
+        GenTreeAddrMode arrLenAddr(base->TypeGet(), base, nullptr, 0, static_cast<unsigned>(node->gtLenOffset));
+        arrLenAddr.gtRegNum = REG_NA;
+        arrLenAddr.SetContained();
+        arrLenAddr.gtNext = (GenTree*)(-1);
+
+        GenTreeIndir arrLen = indirForm(TYP_INT, &arrLenAddr);
+        arrLen.gtRegNum     = tmpReg;
+        arrLen.ClearContained();
+
+        getEmitter()->emitInsLoadStoreOp(ins_Load(TYP_INT), emitTypeSize(TYP_INT), arrLen.gtRegNum, &arrLen);
+
+#ifdef _TARGET_64BIT_
+        // The CLI Spec allows an array to be indexed by either an int32 or a native int.  In the case that the index
+        // is a native int on a 64-bit platform, we will need to widen the array length and the compare.
+        if (index->TypeGet() == TYP_I_IMPL)
+        {
+            // Extend the array length as needed.
+            getEmitter()->emitIns_R_R(ins_Move_Extend(TYP_INT, true), EA_8BYTE, arrLen.gtRegNum, arrLen.gtRegNum);
+        }
+#endif
+
+        // Generate the range check.
+        getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(TYP_I_IMPL), index, &arrLen);
+        genJumpToThrowHlpBlk(genJumpKindForOper(GT_GE, CK_UNSIGNED), SCK_RNGCHK_FAIL, node->gtIndRngFailBB);
+    }
+
+    // Compute the address of the array element.
+    switch (node->gtElemSize)
+    {
+        case 1:
+            // dest = base + index
+            getEmitter()->emitIns_R_R_R(INS_add, emitTypeSize(node), node->gtRegNum, base->gtRegNum, index->gtRegNum);
+            break;
+
+        case 2:
+        case 4:
+        case 8:
+        case 16:
+        {
+            DWORD lsl;
+            BitScanForward(&lsl, node->gtElemSize);
+
+            // dest = base + index * scale
+            genScaledAdd(emitTypeSize(node), node->gtRegNum, base->gtRegNum, index->gtRegNum, lsl);
+            break;
+        }
+
+        default:
+        {
+            // tmp = scale
+            CodeGen::genSetRegToIcon(tmpReg, (ssize_t)node->gtElemSize, TYP_INT);
+
+            // dest = base + index * tmp
+            getEmitter()->emitIns_R_R_R_R(INS_MULADD, emitTypeSize(node), node->gtRegNum, node->gtRegNum,
+                                          index->gtRegNum, tmpReg);
+            break;
+        }
+    }
+
+    // dest = dest + elemOffs
+    getEmitter()->emitIns_R_R_I(INS_add, emitTypeSize(node), node->gtRegNum, node->gtRegNum, node->gtElemOffset);
+
+    genProduceReg(node);
+}
+
+//------------------------------------------------------------------------
 // genCodeForIndir: Produce code for a GT_IND node.
 //
 // Arguments:
index 45927db..40f61bc 100644 (file)
@@ -167,6 +167,7 @@ void genCodeForShiftRMW(GenTreeStoreInd* storeInd);
 
 void genCodeForCast(GenTreeOp* tree);
 void genCodeForLclAddr(GenTree* tree);
+void genCodeForIndexAddr(GenTreeIndexAddr* tree);
 void genCodeForIndir(GenTreeIndir* tree);
 void genCodeForNegNot(GenTree* tree);
 void genCodeForLclVar(GenTreeLclVar* tree);
index d1363d4..7993395 100644 (file)
@@ -1787,6 +1787,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
             genLeaInstruction(treeNode->AsAddrMode());
             break;
 
+        case GT_INDEX_ADDR:
+            genCodeForIndexAddr(treeNode->AsIndexAddr());
+            break;
+
         case GT_IND:
             genCodeForIndir(treeNode->AsIndir());
             break;
@@ -4495,6 +4499,87 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree)
 }
 
 //------------------------------------------------------------------------
+// genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node.
+//
+// Arguments:
+//    tree - the GT_INDEX_ADDR node
+//
+void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
+{
+    GenTree* const base  = node->Arr();
+    GenTree* const index = node->Index();
+
+    genConsumeReg(base);
+    genConsumeReg(index);
+
+    regNumber tmpReg = REG_NA;
+
+    // Generate the bounds check if necessary.
+    if ((node->gtFlags & GTF_INX_RNGCHK) != 0)
+    {
+        // Create a GT_IND(GT_LEA)) tree for the array length access.
+        GenTreeAddrMode arrLenAddr(base->TypeGet(), base, nullptr, 0, node->gtLenOffset);
+        arrLenAddr.gtRegNum = REG_NA;
+        arrLenAddr.SetContained();
+        arrLenAddr.gtNext = (GenTree*)(-1);
+
+        GenTreeIndir arrLen = indirForm(TYP_INT, &arrLenAddr);
+
+#ifdef _TARGET_64BIT_
+        // The CLI Spec allows an array to be indexed by either an int32 or a native int.  In the case that the index
+        // is a native int on a 64-bit platform, we will need to widen the array length and the compare.
+        if (index->TypeGet() == TYP_I_IMPL)
+        {
+            // Load the array length into a register.
+            tmpReg          = node->GetSingleTempReg();
+            arrLen.gtRegNum = tmpReg;
+            arrLen.ClearContained();
+            getEmitter()->emitInsLoadInd(ins_Load(TYP_INT), EA_4BYTE, arrLen.gtRegNum, &arrLen);
+        }
+        else
+#endif
+        {
+            assert(varTypeIsIntegral(index->TypeGet()));
+
+            arrLen.gtRegNum = REG_NA;
+            arrLen.SetContained();
+            arrLen.gtNext = (GenTree*)(-1);
+        }
+
+        // Generate the range check.
+        getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(TYP_I_IMPL), index, &arrLen);
+        genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL, node->gtIndRngFailBB);
+    }
+
+    // Compute the address of the array element.
+    switch (node->gtElemSize)
+    {
+        case 1:
+        case 2:
+        case 4:
+        case 8:
+            getEmitter()->emitIns_R_ARX(INS_lea, emitTypeSize(node), node->gtRegNum, base->gtRegNum, index->gtRegNum,
+                                        node->gtElemSize, static_cast<int>(node->gtElemOffset));
+            break;
+
+        default:
+        {
+            // Multiply the index by the element size.
+            //
+            // TODO-CQ: this should really just use `imul index, index, #gtElemSize`
+            tmpReg = (tmpReg == REG_NA) ? node->GetSingleTempReg() : tmpReg;
+            CodeGen::genSetRegToIcon(tmpReg, (ssize_t)node->gtElemSize, TYP_INT);
+            inst_RV_RV(INS_imul, tmpReg, index->gtRegNum);
+            getEmitter()->emitIns_R_ARX(INS_lea, emitTypeSize(node), node->gtRegNum, base->gtRegNum, tmpReg, 1,
+                                        static_cast<int>(node->gtElemOffset));
+            break;
+        }
+    }
+
+    genProduceReg(node);
+}
+
+//------------------------------------------------------------------------
 // genCodeForIndir: Produce code for a GT_IND node.
 //
 // Arguments:
index 08f0e2e..79f8bec 100644 (file)
@@ -4668,6 +4668,8 @@ private:
 
     void fgSetRngChkTarget(GenTreePtr tree, bool delay = true);
 
+    BasicBlock* fgSetRngChkTargetInner(SpecialCodeKind kind, bool delay, unsigned* stkDepth);
+
 #if REARRANGE_ADDS
     void fgMoveOpsLeft(GenTreePtr tree);
 #endif
@@ -9320,6 +9322,26 @@ public:
         return compRoot->m_arrayInfoMap;
     }
 
+    inline bool TryGetArrayInfo(GenTreeIndir* indir, ArrayInfo* arrayInfo)
+    {
+        if ((indir->gtFlags & GTF_IND_ARR_INDEX) == 0)
+        {
+            return false;
+        }
+
+        if (indir->gtOp1->OperIs(GT_INDEX_ADDR))
+        {
+            GenTreeIndexAddr* const indexAddr = indir->gtOp1->AsIndexAddr();
+            *arrayInfo = ArrayInfo(indexAddr->gtElemType, indexAddr->gtElemSize, indexAddr->gtElemOffset,
+                                   indexAddr->gtStructElemClass);
+            return true;
+        }
+
+        bool found = GetArrayInfoMap()->Lookup(indir, arrayInfo);
+        assert(found);
+        return true;
+    }
+
     NodeToUnsignedMap* m_memorySsaMap[MemoryKindCount];
 
     // In some cases, we want to assign intermediate SSA #'s to memory states, and know what nodes create those memory
index 7790363..421c23b 100644 (file)
@@ -18356,6 +18356,13 @@ void Compiler::fgSetTreeSeqHelper(GenTreePtr tree, bool isLIR)
             noway_assert(!"DYN_BLK nodes should be sequenced as a special case");
             break;
 
+        case GT_INDEX_ADDR:
+            // Evaluate the index first, then the array address
+            assert((tree->gtFlags & GTF_REVERSE_OPS) != 0);
+            fgSetTreeSeqHelper(tree->AsIndexAddr()->Index(), isLIR);
+            fgSetTreeSeqHelper(tree->AsIndexAddr()->Arr(), isLIR);
+            break;
+
         default:
 #ifdef DEBUG
             gtDispTree(tree);
index 66155db..5e7ec8e 100644 (file)
@@ -299,6 +299,7 @@ void GenTree::InitNodeSize()
     GenTree::s_gtNodeSizes[GT_FTN_ADDR]         = TREE_NODE_SZ_LARGE;
     GenTree::s_gtNodeSizes[GT_BOX]              = TREE_NODE_SZ_LARGE;
     GenTree::s_gtNodeSizes[GT_INDEX]            = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_INDEX_ADDR]        = TREE_NODE_SZ_LARGE;
     GenTree::s_gtNodeSizes[GT_ARR_BOUNDS_CHECK] = TREE_NODE_SZ_LARGE;
 #ifdef FEATURE_SIMD
     GenTree::s_gtNodeSizes[GT_SIMD_CHK] = TREE_NODE_SZ_LARGE;
@@ -1301,6 +1302,12 @@ AGAIN:
                         return false;
                     }
                     break;
+                case GT_INDEX_ADDR:
+                    if (op1->AsIndexAddr()->gtElemSize != op2->AsIndexAddr()->gtElemSize)
+                    {
+                        return false;
+                    }
+                    break;
 
                 // For the ones below no extra argument matters for comparison.
                 case GT_QMARK:
@@ -1876,6 +1883,9 @@ AGAIN:
                 case GT_INDEX:
                     hash += tree->gtIndex.gtIndElemSize;
                     break;
+                case GT_INDEX_ADDR:
+                    hash += tree->AsIndexAddr()->gtElemSize;
+                    break;
                 case GT_ALLOCOBJ:
                     hash = genTreeHashAdd(hash, static_cast<unsigned>(
                                                     reinterpret_cast<uintptr_t>(tree->gtAllocObj.gtAllocObjClsHnd)));
@@ -1938,6 +1948,7 @@ AGAIN:
                 case GT_ARR_INDEX:
                 case GT_QMARK:
                 case GT_INDEX:
+                case GT_INDEX_ADDR:
                     break;
 
 #ifdef FEATURE_SIMD
@@ -4776,6 +4787,23 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
         }
         break;
 
+        case GT_INDEX_ADDR:
+            costEx = 6; // cmp reg,reg; jae throw; mov reg, [addrmode]  (not taken)
+            costSz = 9; // jump to cold section
+
+            level = gtSetEvalOrder(tree->AsIndexAddr()->Index());
+            costEx += tree->AsIndexAddr()->Index()->gtCostEx;
+            costSz += tree->AsIndexAddr()->Index()->gtCostSz;
+
+            lvl2 = gtSetEvalOrder(tree->AsIndexAddr()->Arr());
+            if (level < lvl2)
+            {
+                level = lvl2;
+            }
+            costEx += tree->AsIndexAddr()->Arr()->gtCostEx;
+            costSz += tree->AsIndexAddr()->Arr()->gtCostSz;
+            break;
+
         default:
 #ifdef DEBUG
             if (verbose)
@@ -5808,6 +5836,7 @@ bool GenTree::OperMayThrow()
 #ifdef FEATURE_SIMD
         case GT_SIMD_CHK:
 #endif // FEATURE_SIMD
+        case GT_INDEX_ADDR:
             return true;
         default:
             break;
@@ -7416,6 +7445,19 @@ GenTreePtr Compiler::gtCloneExpr(
             }
             break;
 
+            case GT_INDEX_ADDR:
+            {
+                GenTreeIndexAddr* asIndAddr = tree->AsIndexAddr();
+
+                copy = new (this, GT_INDEX_ADDR)
+                    GenTreeIndexAddr(asIndAddr->Arr(), asIndAddr->Index(), asIndAddr->gtElemType,
+                                     asIndAddr->gtStructElemClass, asIndAddr->gtElemSize, asIndAddr->gtLenOffset,
+                                     asIndAddr->gtElemOffset);
+                copy->AsIndexAddr()->gtIndRngFailBB = asIndAddr->gtIndRngFailBB;
+                copy->AsIndexAddr()->gtStkDepth     = asIndAddr->gtStkDepth;
+            }
+            break;
+
             case GT_ALLOCOBJ:
             {
                 GenTreeAllocObj* asAllocObj = tree->AsAllocObj();
@@ -9584,6 +9626,7 @@ void Compiler::gtDispNode(GenTreePtr tree, IndentStack* indentStack, __in __in_z
                 __fallthrough;
 
             case GT_INDEX:
+            case GT_INDEX_ADDR:
 
                 if ((tree->gtFlags & (GTF_IND_VOLATILE | GTF_IND_UNALIGNED)) == 0) // We prefer printing V or U over R
                 {
@@ -15682,6 +15725,9 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
             case GT_INDEX:
                 structHnd = tree->gtIndex.gtStructElemClass;
                 break;
+            case GT_INDEX_ADDR:
+                structHnd = tree->AsIndexAddr()->gtStructElemClass;
+                break;
             case GT_FIELD:
                 info.compCompHnd->getFieldType(tree->gtField.gtFldHnd, &structHnd);
                 break;
@@ -15710,12 +15756,12 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
                 }
                 else
 #endif
-                    if (tree->gtFlags & GTF_IND_ARR_INDEX)
                 {
                     ArrayInfo arrInfo;
-                    bool      b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
-                    assert(b);
-                    structHnd = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+                    if (TryGetArrayInfo(tree->AsIndir(), &arrInfo))
+                    {
+                        structHnd = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+                    }
                 }
                 break;
 #ifdef FEATURE_SIMD
index ab5c954..834a079 100644 (file)
@@ -4238,6 +4238,68 @@ struct GenTreeIndex : public GenTreeOp
 #endif
 };
 
+// gtIndexAddr: given an array object and an index, checks that the index is within the bounds of the array if
+//              necessary and produces the address of the value at that index of the array.
+struct GenTreeIndexAddr : public GenTreeOp
+{
+    GenTree*& Arr()
+    {
+        return gtOp1;
+    }
+    GenTree*& Index()
+    {
+        return gtOp2;
+    }
+
+    CORINFO_CLASS_HANDLE gtStructElemClass; // If the element type is a struct, this is the struct type.
+
+    GenTree* gtIndRngFailBB; // Label to jump to for array-index-out-of-range
+    unsigned gtStkDepth;     // Stack depth at which the jump occurs (required for fgSetRngChkTarget)
+
+    var_types gtElemType;   // The element type of the array.
+    unsigned  gtElemSize;   // size of elements in the array
+    unsigned  gtLenOffset;  // The offset from the array's base address to its length.
+    unsigned  gtElemOffset; // The offset from the array's base address to its first element.
+
+    GenTreeIndexAddr(GenTree*             arr,
+                     GenTree*             ind,
+                     var_types            elemType,
+                     CORINFO_CLASS_HANDLE structElemClass,
+                     unsigned             elemSize,
+                     unsigned             lenOffset,
+                     unsigned             elemOffset)
+        : GenTreeOp(GT_INDEX_ADDR, TYP_BYREF, arr, ind)
+        , gtStructElemClass(structElemClass)
+        , gtIndRngFailBB(nullptr)
+        , gtStkDepth(0)
+        , gtElemType(elemType)
+        , gtElemSize(elemSize)
+        , gtLenOffset(lenOffset)
+        , gtElemOffset(elemOffset)
+    {
+#ifdef DEBUG
+        if (JitConfig.JitSkipArrayBoundCheck() == 1)
+        {
+            // Skip bounds check
+        }
+        else
+#endif
+        {
+            // Do bounds check
+            gtFlags |= GTF_INX_RNGCHK;
+        }
+
+        // REVERSE_OPS is set because we must evaluate the index before the array address.
+        gtFlags |= GTF_EXCEPT | GTF_GLOB_REF | GTF_REVERSE_OPS;
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeIndexAddr() : GenTreeOp()
+    {
+    }
+#endif
+};
+
 /* gtArrLen -- array length (GT_ARR_LENGTH)
    GT_ARR_LENGTH is used for "arr.length" */
 
index 7e78b27..5440233 100644 (file)
@@ -163,6 +163,8 @@ GTNODE(QMARK            , GenTreeQmark       ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR)
 GTNODE(COLON            , GenTreeColon       ,0,GTK_BINOP|GTK_NOTLIR)
 
 GTNODE(INDEX            , GenTreeIndex       ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR)   // SZ-array-element
+GTNODE(INDEX_ADDR       , GenTreeIndex       ,0,GTK_BINOP|GTK_EXOP)              // addr of SZ-array-element; used when
+                                                                                 // aiming to minimize compile times.
 
 GTNODE(MKREFANY         , GenTreeOp          ,0,GTK_BINOP)
 
index bb3f82b..2004f40 100644 (file)
@@ -71,6 +71,7 @@ GTSTRUCT_1(Colon       , GT_COLON)
 GTSTRUCT_1(FptrVal     , GT_FTN_ADDR)
 GTSTRUCT_1(Intrinsic   , GT_INTRINSIC) 
 GTSTRUCT_1(Index       , GT_INDEX)
+GTSTRUCT_1(IndexAddr   , GT_INDEX_ADDR)
 #ifdef FEATURE_SIMD
 GTSTRUCT_2(BoundsChk   , GT_ARR_BOUNDS_CHECK, GT_SIMD_CHK)
 #else // !FEATURE_SIMD
index 53da45b..7ec0973 100644 (file)
@@ -782,6 +782,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
                 unreached();
             }
             break;
+
+        case GT_INDEX_ADDR:
+            info->srcCount         = 2;
+            info->dstCount         = 1;
+            info->internalIntCount = 1;
+            break;
     } // end switch (tree->OperGet())
 
     // We need to be sure that we've set info->srcCount and info->dstCount appropriately
index 0e0c2c6..5774a16 100644 (file)
@@ -668,6 +668,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
             assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0);
             info->internalIntCount = 1;
             break;
+
+        case GT_INDEX_ADDR:
+            info->srcCount         = 2;
+            info->dstCount         = 1;
+            info->internalIntCount = 1;
+            break;
     } // end switch (tree->OperGet())
 
     // We need to be sure that we've set info->srcCount and info->dstCount appropriately
index 28d345d..ef9eebf 100644 (file)
@@ -617,6 +617,31 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
             JITDUMP("Unexpected node %s in Lower.\n", GenTree::OpName(tree->OperGet()));
             unreached();
             break;
+
+        case GT_INDEX_ADDR:
+            info->srcCount = 2;
+            info->dstCount = 1;
+
+            if (tree->AsIndexAddr()->Index()->TypeGet() == TYP_I_IMPL)
+            {
+                info->internalIntCount = 1;
+            }
+            else
+            {
+                switch (tree->AsIndexAddr()->gtElemSize)
+                {
+                    case 1:
+                    case 2:
+                    case 4:
+                    case 8:
+                        break;
+
+                    default:
+                        info->internalIntCount = 1;
+                        break;
+                }
+            }
+            break;
     } // end switch (tree->OperGet())
 
     // If op2 of a binary-op gets marked as contained, then binary-op srcCount will be 1.
index 2dd18a2..997ae02 100644 (file)
@@ -5714,76 +5714,78 @@ void Compiler::fgMoveOpsLeft(GenTreePtr tree)
 
 /*****************************************************************************/
 
-void Compiler::fgSetRngChkTarget(GenTreePtr tree, bool delay)
+void Compiler::fgSetRngChkTarget(GenTree* tree, bool delay)
 {
-    GenTreeBoundsChk* bndsChk = nullptr;
-    SpecialCodeKind   kind    = SCK_RNGCHK_FAIL;
-
-#ifdef FEATURE_SIMD
-    if ((tree->gtOper == GT_ARR_BOUNDS_CHECK) || (tree->gtOper == GT_SIMD_CHK))
-#else  // FEATURE_SIMD
-    if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
-#endif // FEATURE_SIMD
+    if (tree->OperIsBoundsCheck())
+    {
+        GenTreeBoundsChk* const boundsChk = tree->AsBoundsChk();
+        BasicBlock* const failBlock = fgSetRngChkTargetInner(boundsChk->gtThrowKind, delay, &boundsChk->gtStkDepth);
+        if (failBlock != nullptr)
+        {
+            boundsChk->gtIndRngFailBB = gtNewCodeRef(failBlock);
+        }
+    }
+    else if (tree->OperIs(GT_INDEX_ADDR))
     {
-        bndsChk = tree->AsBoundsChk();
-        kind    = tree->gtBoundsChk.gtThrowKind;
+        GenTreeIndexAddr* const indexAddr = tree->AsIndexAddr();
+        BasicBlock* const       failBlock = fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay, &indexAddr->gtStkDepth);
+        if (failBlock != nullptr)
+        {
+            indexAddr->gtIndRngFailBB = gtNewCodeRef(failBlock);
+        }
     }
     else
     {
-        noway_assert((tree->gtOper == GT_ARR_ELEM) || (tree->gtOper == GT_ARR_INDEX));
+        noway_assert(tree->OperIs(GT_ARR_ELEM, GT_ARR_INDEX));
+        fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay, nullptr);
     }
+}
 
-#ifdef _TARGET_X86_
-    unsigned callStkDepth = fgPtrArgCntCur;
-#else
-    // only x86 pushes args
-    const unsigned callStkDepth          = 0;
-#endif
-
+BasicBlock* Compiler::fgSetRngChkTargetInner(SpecialCodeKind kind, bool delay, unsigned* stkDepth)
+{
     if (opts.MinOpts())
     {
         delay = false;
 
+#ifdef _TARGET_X86_
         // we need to initialize this field
-        if (fgGlobalMorph && bndsChk != nullptr)
+        if (fgGlobalMorph && (stkDepth != nullptr))
         {
-            bndsChk->gtStkDepth = callStkDepth;
+            *stkDepth = fgPtrArgCntCur;
         }
+#endif // _TARGET_X86_
     }
 
     if (!opts.compDbgCode)
     {
         if (delay || compIsForInlining())
         {
-            /*  We delay this until after loop-oriented range check
-                analysis. For now we merely store the current stack
-                level in the tree node.
-             */
-            if (bndsChk != nullptr)
+#ifdef _TARGET_X86_
+            // We delay this until after loop-oriented range check analysis. For now we merely store the current stack
+            // level in the tree node.
+            if (stkDepth != nullptr)
             {
-                noway_assert(!bndsChk->gtIndRngFailBB || previousCompletedPhase >= PHASE_OPTIMIZE_LOOPS);
-                bndsChk->gtStkDepth = callStkDepth;
+                *stkDepth = fgPtrArgCntCur;
             }
+#endif // _TARGET_X86_
         }
         else
         {
-            /* Create/find the appropriate "range-fail" label */
-
+#ifdef _TARGET_X86_
             // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
-            noway_assert((bndsChk != nullptr) || fgGlobalMorph);
-
-            unsigned stkDepth = (bndsChk != nullptr) ? bndsChk->gtStkDepth : callStkDepth;
-
-            BasicBlock* rngErrBlk = fgRngChkTarget(compCurBB, stkDepth, kind);
-
-            /* Add the label to the indirection node */
+            noway_assert(fgGlobalMorph || (stkDepth != nullptr));
+            const unsigned theStkDepth = fgGlobalMorph ? fgPtrArgCntCur : *stkDepth;
+#else
+            // only x86 pushes args
+            const unsigned theStkDepth   = 0;
+#endif
 
-            if (bndsChk != nullptr)
-            {
-                bndsChk->gtIndRngFailBB = gtNewCodeRef(rngErrBlk);
-            }
+            // Create/find the appropriate "range-fail" label
+            return fgRngChkTarget(compCurBB, theStkDepth, kind);
         }
     }
+
+    return nullptr;
 }
 
 /*****************************************************************************
@@ -5838,9 +5840,6 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
     }
 #endif // FEATURE_SIMD
 
-    GenTreePtr arrRef = asIndex->Arr();
-    GenTreePtr index  = asIndex->Index();
-
     // Set up the the array length's offset into lenOffs
     // And    the the first element's offset into elemOffs
     ssize_t lenOffs;
@@ -5862,6 +5861,45 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
         elemOffs = offsetof(CORINFO_Array, u1Elems);
     }
 
+#ifndef LEGACY_BACKEND
+    // In minopts, we expand GT_INDEX to GT_IND(GT_INDEX_ADDR) in order to minimize the size of the IR. As minopts
+    // compilation time is roughly proportional to the size of the IR, this helps keep compilation times down.
+    // Furthermore, this representation typically saves on code size in minopts w.r.t. the complete expansion
+    // performed when optimizing, as it does not require LclVar nodes (which are always stack loads/stores in
+    // minopts).
+    if (opts.MinOpts())
+    {
+        GenTree* const array = fgMorphTree(asIndex->Arr());
+        GenTree* const index = fgMorphTree(asIndex->Index());
+
+        GenTreeIndexAddr* const indexAddr =
+            new (this, GT_INDEX_ADDR) GenTreeIndexAddr(array, index, elemTyp, elemStructType, elemSize,
+                                                       static_cast<unsigned>(lenOffs), static_cast<unsigned>(elemOffs));
+        indexAddr->gtFlags |= (array->gtFlags | index->gtFlags) & GTF_ALL_EFFECT;
+
+        // Mark the indirection node as needing a range check if necessary.
+        if ((indexAddr->gtFlags & GTF_INX_RNGCHK) != 0)
+        {
+            fgSetRngChkTarget(indexAddr);
+        }
+
+        // Change `tree` into an indirection and return.
+        tree->ChangeOper(GT_IND);
+        GenTreeIndir* const indir = tree->AsIndir();
+        indir->Addr()             = indexAddr;
+        indir->gtFlags            = GTF_IND_ARR_INDEX | (indexAddr->gtFlags & GTF_ALL_EFFECT);
+
+#ifdef DEBUG
+        indexAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif // DEBUG
+
+        return indir;
+    }
+#endif // LEGACY_BACKEND
+
+    GenTreePtr arrRef = asIndex->Arr();
+    GenTreePtr index  = asIndex->Index();
+
     bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
     bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
 
@@ -9559,6 +9597,9 @@ GenTreePtr Compiler::fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDL
             case GT_ARR_ELEM:
                 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
                 break;
+            case GT_INDEX_ADDR:
+                addr = tree;
+                break;
             default:
             {
                 // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
@@ -9586,10 +9627,12 @@ GenTreePtr Compiler::fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDL
 
 GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
 {
-    if (tree->gtOper == GT_COMMA)
+    GenTree* handleTree = nullptr;
+    GenTree* addr       = nullptr;
+    if (tree->OperIs(GT_COMMA))
     {
         GenTree* effectiveVal = tree->gtEffectiveVal();
-        GenTree* addr         = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+        addr                  = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
 #ifdef DEBUG
         addr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
 #endif
@@ -9612,13 +9655,24 @@ GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
             lastComma->gtOp.gtOp2 = addr;
             addr                  = tree;
         }
-        var_types structType = effectiveVal->TypeGet();
+
+        handleTree = effectiveVal;
+    }
+    else if (tree->OperIs(GT_IND) && tree->AsIndir()->Addr()->OperIs(GT_INDEX_ADDR))
+    {
+        handleTree = tree;
+        addr       = tree->AsIndir()->Addr();
+    }
+
+    if (addr != nullptr)
+    {
+        var_types structType = handleTree->TypeGet();
         if (structType == TYP_STRUCT)
         {
-            CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(effectiveVal);
+            CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(handleTree);
             if (structHnd == NO_CLASS_HANDLE)
             {
-                tree = gtNewOperNode(GT_IND, effectiveVal->TypeGet(), addr);
+                tree = gtNewOperNode(GT_IND, structType, addr);
             }
             else
             {
@@ -15266,6 +15320,11 @@ GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
             tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
             break;
 
+        case GT_INDEX_ADDR:
+            tree->AsIndexAddr()->Index() = fgMorphTree(tree->AsIndexAddr()->Index());
+            tree->AsIndexAddr()->Arr()   = fgMorphTree(tree->AsIndexAddr()->Arr());
+            break;
+
         default:
 #ifdef DEBUG
             gtDispTree(tree);