Enable RyuJIT/x86 PInvoke lowering
authorBruce Forstall <brucefo@microsoft.com>
Thu, 12 May 2016 14:31:13 +0000 (07:31 -0700)
committerBruce Forstall <brucefo@microsoft.com>
Thu, 23 Jun 2016 16:53:48 +0000 (09:53 -0700)
Fixes dotnet/coreclr#4181 "NYI_X86: Implement PInvoke frame init inlining for x86"

The main work here is to handle the custom calling convention for the
x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper call: it takes EDI as an argument,
trashes only EAX, and returns the TCB in ESI.

The code changes are as follows:
1. Lowering::InsertPInvokeMethodProlog(): don't pass the "secret stub param" for x86.
Also, don't store the InlinedCallFrame.m_pCallSiteSP in the prolog: for x86 this is done
at the call site, due to the floating stack pointer.
2. LinearScan::getKillSetForNode(): for helper calls, call compHelperCallKillSet() to get the killMask,
to account for non-standard kill sets.
3. Morph.cpp::fgMorphArgs(): set non-standard arguments for CORINFO_HELP_INIT_PINVOKE_FRAME.
4. compHelperCallKillSet(): set the correct kill set for CORINFO_HELP_INIT_PINVOKE_FRAME.
5. codegenxarch.cpp::genCallInstruction(): set the ABI return register for CORINFO_HELP_INIT_PINVOKE_FRAME.
6. lowerxarch.cpp::TreeNodeInfoInit(): set the GT_CALL dstCandidates for CORINFO_HELP_INIT_PINVOKE_FRAME.

5 & 6 are both needed to avoid a copy.

With this change, the dotnet/coreclr#1 NYI with 18415 hits over the tests is gone.
The total number of NYI is now 29516.

Commit migrated from https://github.com/dotnet/coreclr/commit/3c7ecfeea45ba11eb59ae5615ddd5d89ea980ea5

src/coreclr/src/jit/codegencommon.cpp
src/coreclr/src/jit/codegenxarch.cpp
src/coreclr/src/jit/lower.cpp
src/coreclr/src/jit/lowerxarch.cpp
src/coreclr/src/jit/lsra.cpp
src/coreclr/src/jit/morph.cpp
src/coreclr/src/jit/target.h

index aa2e8de..bdf5243 100755 (executable)
@@ -686,6 +686,13 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
     case CORINFO_HELP_STOP_FOR_GC:
         return RBM_STOP_FOR_GC_TRASH;
 
+#ifdef _TARGET_X86_
+    case CORINFO_HELP_INIT_PINVOKE_FRAME:
+        // On x86, this helper has a custom calling convention that takes EDI as argument
+        // (but doesn't trash it), trashes EAX, and returns ESI.
+        return RBM_PINVOKE_SCRATCH | RBM_PINVOKE_TCB;
+#endif // _TARGET_X86_
+
     default:
         return RBM_CALLEE_TRASH;
     }
index 621d606..bb53202 100755 (executable)
@@ -6063,6 +6063,16 @@ void CodeGen::genCallInstruction(GenTreePtr node)
             }
             else
             {                
+#ifdef _TARGET_X86_
+                if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == compiler->eeFindHelper(CORINFO_HELP_INIT_PINVOKE_FRAME)))
+                {
+                    // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
+                    // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
+                    // correct argument registers.
+                    returnReg = REG_PINVOKE_TCB;
+                }
+                else
+#endif // _TARGET_X86_
                 if (varTypeIsFloating(returnType))
                 {
                     returnReg = REG_FLOATRET;
index bbec801..3684efa 100755 (executable)
@@ -3071,7 +3071,6 @@ GenTree* Lowering::CreateFrameLinkUpdate(FrameLinkAction action)
 //
 void Lowering::InsertPInvokeMethodProlog()
 {
-    NYI_X86("Implement PInvoke frame init inlining for x86");
     noway_assert(comp->info.compCallUnmanaged);
     noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
 
@@ -3092,7 +3091,15 @@ void Lowering::InsertPInvokeMethodProlog()
 
     // Call runtime helper to fill in our InlinedCallFrame and push it on the Frame list:
     //     TCB = CORINFO_HELP_INIT_PINVOKE_FRAME(&symFrameStart, secretArg);
-    GenTree* call = comp->gtNewHelperCallNode(CORINFO_HELP_INIT_PINVOKE_FRAME, TYP_I_IMPL, 0, comp->gtNewArgList(frameAddr, PhysReg(REG_SECRET_STUB_PARAM)));
+    // for x86, don't pass the secretArg.
+
+#ifdef _TARGET_X86_
+    GenTreeArgList* argList = comp->gtNewArgList(frameAddr);
+#else // !_TARGET_X86_
+    GenTreeArgList* argList = comp->gtNewArgList(frameAddr, PhysReg(REG_SECRET_STUB_PARAM));
+#endif // !_TARGET_X86_
+
+    GenTree* call = comp->gtNewHelperCallNode(CORINFO_HELP_INIT_PINVOKE_FRAME, TYP_I_IMPL, 0, argList);
 
     // some sanity checks on the frame list root vardsc
     LclVarDsc* varDsc = &comp->lvaTable[comp->info.compLvFrameListRoot];
@@ -3109,6 +3116,8 @@ void Lowering::InsertPInvokeMethodProlog()
     GenTree* lastStmt = stmt;
     DISPTREE(lastStmt);
 
+#ifndef _TARGET_X86_ // For x86, this step is done at the call site.
+
     // --------------------------------------------------------
     // InlinedCallFrame.m_pCallSiteSP = @RSP;
 
@@ -3122,6 +3131,7 @@ void Lowering::InsertPInvokeMethodProlog()
     lastStmt = storeSPStmt;
     DISPTREE(lastStmt);
 
+#endif // !_TARGET_X86_
 
     // --------------------------------------------------------
     // InlinedCallFrame.m_pCalleeSavedEBP = @RBP;
index dc52eaa..47a0d39 100644 (file)
@@ -903,6 +903,16 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
             }
 
             // Set destination candidates for return value of the call.
+#ifdef _TARGET_X86_
+            if ((tree->gtCall.gtCallType == CT_HELPER) && (tree->gtCall.gtCallMethHnd == compiler->eeFindHelper(CORINFO_HELP_INIT_PINVOKE_FRAME)))
+            {
+                // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
+                // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
+                // correct argument registers.
+                info->setDstCandidates(l, RBM_PINVOKE_TCB);
+            }
+            else
+#endif // _TARGET_X86_
             if (hasMultiRegRetVal)
             {
                 assert(retTypeDesc != nullptr);
index 07ef30d..ca09dc2 100644 (file)
@@ -2551,10 +2551,9 @@ LinearScan::getKillSetForNode(GenTree* tree)
         killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
         break;
     case GT_CALL:
-        // if there is no FP used, we can ignore the FP kills
+#ifdef _TARGET_X86_
         if (compiler->compFloatingPointUsed)
         {
-#ifdef _TARGET_X86_
             if (tree->TypeGet() == TYP_DOUBLE)
             {
                 needDoubleTmpForFPCall = true;
@@ -2563,12 +2562,25 @@ LinearScan::getKillSetForNode(GenTree* tree)
             {
                 needFloatTmpForFPCall = true;
             }
-#endif // _TARGET_X86_
-            killMask = RBM_CALLEE_TRASH;
+        }
+        if (tree->IsHelperCall())
+        {
+            GenTreeCall* call = tree->AsCall();
+            CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd);
+            killMask = compiler->compHelperCallKillSet(helpFunc);
         }
         else
+#endif // _TARGET_X86_
         {
-            killMask = RBM_INT_CALLEE_TRASH;
+            // if there is no FP used, we can ignore the FP kills
+            if (compiler->compFloatingPointUsed)
+            {
+                killMask = RBM_CALLEE_TRASH;
+            }
+            else
+            {
+                killMask = RBM_INT_CALLEE_TRASH;
+            }
         }
         break;
     case GT_STOREIND:
index 7fd7fb2..3984a51 100644 (file)
@@ -2652,6 +2652,19 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
 
         // insert nonstandard args (outside the calling convention)
 
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+        // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper has a custom calling convention. Set the argument registers
+        // correctly here.
+        if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_INIT_PINVOKE_FRAME)))
+        {
+            GenTreeArgList* args = call->gtCallArgs;
+            GenTree* arg1 = args->Current();
+            assert(arg1 != nullptr);
+            NonStandardArg nsa = { REG_PINVOKE_FRAME, arg1 };
+            nonStandardArgs.Push(nsa);
+        }
+#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+
 #if !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_)
         // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
         // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
@@ -7544,8 +7557,6 @@ NO_TAIL_CALL:
 
     /* Process the "normal" argument list */
     call = fgMorphArgs(call);
-
-    // Optimize get_ManagedThreadId(get_CurrentThread)
     noway_assert(call->gtOper == GT_CALL);
 
     // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
@@ -7580,6 +7591,7 @@ NO_TAIL_CALL:
         }
     }
 
+    // Optimize get_ManagedThreadId(get_CurrentThread)
     if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
            info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
     {
index 49ebc4b..0043f57 100644 (file)
@@ -584,11 +584,11 @@ typedef unsigned short          regPairNoSmall; // arm: need 12 bits
   #define PREDICT_REG_VIRTUAL_STUB_PARAM  PREDICT_REG_EAX
 
   // Registers used by PInvoke frame setup
-  #define REG_PINVOKE_FRAME        REG_EDI
+  #define REG_PINVOKE_FRAME        REG_EDI      // EDI is p/invoke "Frame" pointer argument to CORINFO_HELP_INIT_PINVOKE_FRAME helper
   #define RBM_PINVOKE_FRAME        RBM_EDI
-  #define REG_PINVOKE_TCB          REG_ESI
+  #define REG_PINVOKE_TCB          REG_ESI      // ESI is set to Thread Control Block (TCB) on return from CORINFO_HELP_INIT_PINVOKE_FRAME helper
   #define RBM_PINVOKE_TCB          RBM_ESI
-  #define REG_PINVOKE_SCRATCH      REG_EAX
+  #define REG_PINVOKE_SCRATCH      REG_EAX      // EAX is trashed by CORINFO_HELP_INIT_PINVOKE_FRAME helper
   #define RBM_PINVOKE_SCRATCH      RBM_EAX
 
 #ifdef LEGACY_BACKEND