[RyuJIT/ARM32] Return HFA struct (#12453)
authorHyeongseok Oh <hseok82.oh@samsung.com>
Fri, 30 Jun 2017 23:51:02 +0000 (08:51 +0900)
committerBruce Forstall <brucefo@microsoft.com>
Fri, 30 Jun 2017 23:51:02 +0000 (16:51 -0700)
* [RyuJIT/ARM32] Return HFA struct

- Enable return HFA struct
- Merge ARM32/ARM64 CodeGen::isStructReturn(GenTreePtr treeNode)
- Merge ARM32/ARM64 CodeGen::genStructReturn(GenTreePtr treeNode)

* Add comment

Add TODO comment to optimize two-float loading

src/jit/codegenarm.cpp
src/jit/codegenarm64.cpp
src/jit/codegenarmarch.cpp

index 5b87311..4a37681 100644 (file)
@@ -289,9 +289,9 @@ void CodeGen::genReturn(GenTreePtr treeNode)
     }
     else
     {
-        if (varTypeIsStruct(treeNode))
+        if (isStructReturn(treeNode))
         {
-            NYI_ARM("struct return");
+            genStructReturn(treeNode);
         }
         else if (targetType != TYP_VOID)
         {
index 4943430..2e98e21 100644 (file)
@@ -1740,221 +1740,6 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree)
 }
 
 //------------------------------------------------------------------------
-// isStructReturn: Returns whether the 'treeNode' is returning a struct.
-//
-// Arguments:
-//    treeNode - The tree node to evaluate whether is a struct return.
-//
-// Return Value:
-//    Returns true if the 'treeNode" is a GT_RETURN node of type struct.
-//    Otherwise returns false.
-//
-bool CodeGen::isStructReturn(GenTreePtr treeNode)
-{
-    // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
-    // For the GT_RET_FILT, the return is always
-    // a bool or a void, for the end of a finally block.
-    noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
-
-    return varTypeIsStruct(treeNode);
-}
-
-//------------------------------------------------------------------------
-// genStructReturn: Generates code for returning a struct.
-//
-// Arguments:
-//    treeNode - The GT_RETURN tree node.
-//
-// Return Value:
-//    None
-//
-// Assumption:
-//    op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
-void CodeGen::genStructReturn(GenTreePtr treeNode)
-{
-    assert(treeNode->OperGet() == GT_RETURN);
-    assert(isStructReturn(treeNode));
-    GenTreePtr op1 = treeNode->gtGetOp1();
-
-    if (op1->OperGet() == GT_LCL_VAR)
-    {
-        GenTreeLclVarCommon* lclVar  = op1->AsLclVarCommon();
-        LclVarDsc*           varDsc  = &(compiler->lvaTable[lclVar->gtLclNum]);
-        var_types            lclType = genActualType(varDsc->TypeGet());
-
-        // Currently only multireg TYP_STRUCT types such as HFA's and 16-byte structs are supported
-        // In the future we could have FEATURE_SIMD types like TYP_SIMD16
-        assert(lclType == TYP_STRUCT);
-        assert(varDsc->lvIsMultiRegRet);
-
-        ReturnTypeDesc retTypeDesc;
-        unsigned       regCount;
-
-        retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
-        regCount = retTypeDesc.GetReturnRegCount();
-
-        assert(regCount >= 2);
-        assert(op1->isContained());
-
-        // Copy var on stack into ABI return registers
-        int offset = 0;
-        for (unsigned i = 0; i < regCount; ++i)
-        {
-            var_types type = retTypeDesc.GetReturnRegType(i);
-            regNumber reg  = retTypeDesc.GetABIReturnReg(i);
-            getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
-            offset += genTypeSize(type);
-        }
-    }
-    else // op1 must be multi-reg GT_CALL
-    {
-        assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
-
-        genConsumeRegs(op1);
-
-        GenTree*     actualOp1 = op1->gtSkipReloadOrCopy();
-        GenTreeCall* call      = actualOp1->AsCall();
-
-        ReturnTypeDesc* pRetTypeDesc;
-        unsigned        regCount;
-        unsigned        matchingCount = 0;
-
-        pRetTypeDesc = call->GetReturnTypeDesc();
-        regCount     = pRetTypeDesc->GetReturnRegCount();
-
-        var_types regType[MAX_RET_REG_COUNT];
-        regNumber returnReg[MAX_RET_REG_COUNT];
-        regNumber allocatedReg[MAX_RET_REG_COUNT];
-        regMaskTP srcRegsMask       = 0;
-        regMaskTP dstRegsMask       = 0;
-        bool      needToShuffleRegs = false; // Set to true if we have to move any registers
-
-        for (unsigned i = 0; i < regCount; ++i)
-        {
-            regType[i]   = pRetTypeDesc->GetReturnRegType(i);
-            returnReg[i] = pRetTypeDesc->GetABIReturnReg(i);
-
-            regNumber reloadReg = REG_NA;
-            if (op1->IsCopyOrReload())
-            {
-                // GT_COPY/GT_RELOAD will have valid reg for those positions
-                // that need to be copied or reloaded.
-                reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
-            }
-
-            if (reloadReg != REG_NA)
-            {
-                allocatedReg[i] = reloadReg;
-            }
-            else
-            {
-                allocatedReg[i] = call->GetRegNumByIdx(i);
-            }
-
-            if (returnReg[i] == allocatedReg[i])
-            {
-                matchingCount++;
-            }
-            else // We need to move this value
-            {
-                // We want to move the value from allocatedReg[i] into returnReg[i]
-                // so record these two registers in the src and dst masks
-                //
-                srcRegsMask |= genRegMask(allocatedReg[i]);
-                dstRegsMask |= genRegMask(returnReg[i]);
-
-                needToShuffleRegs = true;
-            }
-        }
-
-        if (needToShuffleRegs)
-        {
-            assert(matchingCount < regCount);
-
-            unsigned  remainingRegCount = regCount - matchingCount;
-            regMaskTP extraRegMask      = treeNode->gtRsvdRegs;
-
-            while (remainingRegCount > 0)
-            {
-                // set 'available' to the 'dst' registers that are not currently holding 'src' registers
-                //
-                regMaskTP availableMask = dstRegsMask & ~srcRegsMask;
-
-                regMaskTP dstMask;
-                regNumber srcReg;
-                regNumber dstReg;
-                var_types curType   = TYP_UNKNOWN;
-                regNumber freeUpReg = REG_NA;
-
-                if (availableMask == 0)
-                {
-                    // Circular register dependencies
-                    // So just free up the lowest register in dstRegsMask by moving it to the 'extra' register
-
-                    assert(dstRegsMask == srcRegsMask);         // this has to be true for us to reach here
-                    assert(extraRegMask != 0);                  // we require an 'extra' register
-                    assert((extraRegMask & ~dstRegsMask) != 0); // it can't be part of dstRegsMask
-
-                    availableMask = extraRegMask & ~dstRegsMask;
-
-                    regMaskTP srcMask = genFindLowestBit(srcRegsMask);
-                    freeUpReg         = genRegNumFromMask(srcMask);
-                }
-
-                dstMask = genFindLowestBit(availableMask);
-                dstReg  = genRegNumFromMask(dstMask);
-                srcReg  = REG_NA;
-
-                if (freeUpReg != REG_NA)
-                {
-                    // We will free up the srcReg by moving it to dstReg which is an extra register
-                    //
-                    srcReg = freeUpReg;
-
-                    // Find the 'srcReg' and set 'curType', change allocatedReg[] to dstReg
-                    // and add the new register mask bit to srcRegsMask
-                    //
-                    for (unsigned i = 0; i < regCount; ++i)
-                    {
-                        if (allocatedReg[i] == srcReg)
-                        {
-                            curType         = regType[i];
-                            allocatedReg[i] = dstReg;
-                            srcRegsMask |= genRegMask(dstReg);
-                        }
-                    }
-                }
-                else // The normal case
-                {
-                    // Find the 'srcReg' and set 'curType'
-                    //
-                    for (unsigned i = 0; i < regCount; ++i)
-                    {
-                        if (returnReg[i] == dstReg)
-                        {
-                            srcReg  = allocatedReg[i];
-                            curType = regType[i];
-                        }
-                    }
-                    // After we perform this move we will have one less registers to setup
-                    remainingRegCount--;
-                }
-                assert(curType != TYP_UNKNOWN);
-
-                inst_RV_RV(ins_Copy(curType), dstReg, srcReg, curType);
-
-                // Clear the appropriate bits in srcRegsMask and dstRegsMask
-                srcRegsMask &= ~genRegMask(srcReg);
-                dstRegsMask &= ~genRegMask(dstReg);
-
-            } // while (remainingRegCount > 0)
-
-        } // (needToShuffleRegs)
-
-    } // op1 must be multi-reg GT_CALL
-}
-
-//------------------------------------------------------------------------
 // genReturn: Generates code for return statement.
 //            In case of struct return, delegates to the genStructReturn method.
 //
index 5338a04..a15d77f 100644 (file)
@@ -3066,6 +3066,224 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
     genProduceReg(lea);
 }
 
+//------------------------------------------------------------------------
+// isStructReturn: Returns whether the 'treeNode' is returning a struct.
+//
+// Arguments:
+//    treeNode - The tree node to evaluate whether is a struct return.
+//
+// Return Value:
+//    Returns true if the 'treeNode" is a GT_RETURN node of type struct.
+//    Otherwise returns false.
+//
+bool CodeGen::isStructReturn(GenTreePtr treeNode)
+{
+    // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
+    // For the GT_RET_FILT, the return is always
+    // a bool or a void, for the end of a finally block.
+    noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+
+    return varTypeIsStruct(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genStructReturn: Generates code for returning a struct.
+//
+// Arguments:
+//    treeNode - The GT_RETURN tree node.
+//
+// Return Value:
+//    None
+//
+// Assumption:
+//    op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
+void CodeGen::genStructReturn(GenTreePtr treeNode)
+{
+    assert(treeNode->OperGet() == GT_RETURN);
+    assert(isStructReturn(treeNode));
+    GenTreePtr op1 = treeNode->gtGetOp1();
+
+    if (op1->OperGet() == GT_LCL_VAR)
+    {
+        GenTreeLclVarCommon* lclVar  = op1->AsLclVarCommon();
+        LclVarDsc*           varDsc  = &(compiler->lvaTable[lclVar->gtLclNum]);
+        var_types            lclType = genActualType(varDsc->TypeGet());
+
+        // Currently only multireg TYP_STRUCT types such as HFA's(ARM32, ARM64) and 16-byte structs(ARM64) are supported
+        // In the future we could have FEATURE_SIMD types like TYP_SIMD16
+        assert(lclType == TYP_STRUCT);
+        assert(varDsc->lvIsMultiRegRet);
+
+        ReturnTypeDesc retTypeDesc;
+        unsigned       regCount;
+
+        retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
+        regCount = retTypeDesc.GetReturnRegCount();
+
+        assert(regCount >= 2);
+        assert(op1->isContained());
+
+        // Copy var on stack into ABI return registers
+        // TODO: It could be optimized by reducing two float loading to one double
+        int offset = 0;
+        for (unsigned i = 0; i < regCount; ++i)
+        {
+            var_types type = retTypeDesc.GetReturnRegType(i);
+            regNumber reg  = retTypeDesc.GetABIReturnReg(i);
+            getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
+            offset += genTypeSize(type);
+        }
+    }
+    else // op1 must be multi-reg GT_CALL
+    {
+#ifdef _TARGET_ARM_
+        NYI_ARM("struct return from multi-reg GT_CALL");
+#endif
+        assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
+
+        genConsumeRegs(op1);
+
+        GenTree*     actualOp1 = op1->gtSkipReloadOrCopy();
+        GenTreeCall* call      = actualOp1->AsCall();
+
+        ReturnTypeDesc* pRetTypeDesc;
+        unsigned        regCount;
+        unsigned        matchingCount = 0;
+
+        pRetTypeDesc = call->GetReturnTypeDesc();
+        regCount     = pRetTypeDesc->GetReturnRegCount();
+
+        var_types regType[MAX_RET_REG_COUNT];
+        regNumber returnReg[MAX_RET_REG_COUNT];
+        regNumber allocatedReg[MAX_RET_REG_COUNT];
+        regMaskTP srcRegsMask       = 0;
+        regMaskTP dstRegsMask       = 0;
+        bool      needToShuffleRegs = false; // Set to true if we have to move any registers
+
+        for (unsigned i = 0; i < regCount; ++i)
+        {
+            regType[i]   = pRetTypeDesc->GetReturnRegType(i);
+            returnReg[i] = pRetTypeDesc->GetABIReturnReg(i);
+
+            regNumber reloadReg = REG_NA;
+            if (op1->IsCopyOrReload())
+            {
+                // GT_COPY/GT_RELOAD will have valid reg for those positions
+                // that need to be copied or reloaded.
+                reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+            }
+
+            if (reloadReg != REG_NA)
+            {
+                allocatedReg[i] = reloadReg;
+            }
+            else
+            {
+                allocatedReg[i] = call->GetRegNumByIdx(i);
+            }
+
+            if (returnReg[i] == allocatedReg[i])
+            {
+                matchingCount++;
+            }
+            else // We need to move this value
+            {
+                // We want to move the value from allocatedReg[i] into returnReg[i]
+                // so record these two registers in the src and dst masks
+                //
+                srcRegsMask |= genRegMask(allocatedReg[i]);
+                dstRegsMask |= genRegMask(returnReg[i]);
+
+                needToShuffleRegs = true;
+            }
+        }
+
+        if (needToShuffleRegs)
+        {
+            assert(matchingCount < regCount);
+
+            unsigned  remainingRegCount = regCount - matchingCount;
+            regMaskTP extraRegMask      = treeNode->gtRsvdRegs;
+
+            while (remainingRegCount > 0)
+            {
+                // set 'available' to the 'dst' registers that are not currently holding 'src' registers
+                //
+                regMaskTP availableMask = dstRegsMask & ~srcRegsMask;
+
+                regMaskTP dstMask;
+                regNumber srcReg;
+                regNumber dstReg;
+                var_types curType   = TYP_UNKNOWN;
+                regNumber freeUpReg = REG_NA;
+
+                if (availableMask == 0)
+                {
+                    // Circular register dependencies
+                    // So just free up the lowest register in dstRegsMask by moving it to the 'extra' register
+
+                    assert(dstRegsMask == srcRegsMask);         // this has to be true for us to reach here
+                    assert(extraRegMask != 0);                  // we require an 'extra' register
+                    assert((extraRegMask & ~dstRegsMask) != 0); // it can't be part of dstRegsMask
+
+                    availableMask = extraRegMask & ~dstRegsMask;
+
+                    regMaskTP srcMask = genFindLowestBit(srcRegsMask);
+                    freeUpReg         = genRegNumFromMask(srcMask);
+                }
+
+                dstMask = genFindLowestBit(availableMask);
+                dstReg  = genRegNumFromMask(dstMask);
+                srcReg  = REG_NA;
+
+                if (freeUpReg != REG_NA)
+                {
+                    // We will free up the srcReg by moving it to dstReg which is an extra register
+                    //
+                    srcReg = freeUpReg;
+
+                    // Find the 'srcReg' and set 'curType', change allocatedReg[] to dstReg
+                    // and add the new register mask bit to srcRegsMask
+                    //
+                    for (unsigned i = 0; i < regCount; ++i)
+                    {
+                        if (allocatedReg[i] == srcReg)
+                        {
+                            curType         = regType[i];
+                            allocatedReg[i] = dstReg;
+                            srcRegsMask |= genRegMask(dstReg);
+                        }
+                    }
+                }
+                else // The normal case
+                {
+                    // Find the 'srcReg' and set 'curType'
+                    //
+                    for (unsigned i = 0; i < regCount; ++i)
+                    {
+                        if (returnReg[i] == dstReg)
+                        {
+                            srcReg  = allocatedReg[i];
+                            curType = regType[i];
+                        }
+                    }
+                    // After we perform this move we will have one less registers to setup
+                    remainingRegCount--;
+                }
+                assert(curType != TYP_UNKNOWN);
+
+                inst_RV_RV(ins_Copy(curType), dstReg, srcReg, curType);
+
+                // Clear the appropriate bits in srcRegsMask and dstRegsMask
+                srcRegsMask &= ~genRegMask(srcReg);
+                dstRegsMask &= ~genRegMask(dstReg);
+
+            } // while (remainingRegCount > 0)
+
+        } // (needToShuffleRegs)
+
+    } // op1 must be multi-reg GT_CALL
+}
 #endif // _TARGET_ARMARCH_
 
 #endif // !LEGACY_BACKEND