Generate relative jump/branch for tailcalls on x64/ARM (dotnet/coreclr#25848)
authorJakob Botsch Nielsen <t-janie@microsoft.com>
Wed, 7 Aug 2019 17:09:44 +0000 (10:09 -0700)
committerJarret Shook <jashoo@microsoft.com>
Wed, 7 Aug 2019 17:09:44 +0000 (10:09 -0700)
* Generate RIP-relative jmp for fast direct tailcalls on x64

* Also use relative branch on ARM

* Fix compiler errors

Commit migrated from https://github.com/dotnet/coreclr/commit/a5a13ba179f88018a89578646ffae69d4c39698d

src/coreclr/src/jit/codegenarmarch.cpp
src/coreclr/src/jit/codegencommon.cpp
src/coreclr/src/jit/codegenxarch.cpp
src/coreclr/src/jit/lower.cpp

index 9476e65..21617ae 100644 (file)
@@ -2410,15 +2410,16 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
         // Don't support fast tail calling JIT helpers
         assert(callType != CT_HELPER);
 
-        // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
-        assert(target != nullptr);
-
-        genConsumeReg(target);
-
-        // Use IP0 on ARM64 and R12 on ARM32 as the call target register.
-        if (target->gtRegNum != REG_FASTTAILCALL_TARGET)
+        if (target != nullptr)
         {
-            inst_RV_RV(INS_mov, REG_FASTTAILCALL_TARGET, target->gtRegNum);
+            // Indirect fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
+            genConsumeReg(target);
+
+            // Use IP0 on ARM64 and R12 on ARM32 as the call target register.
+            if (target->gtRegNum != REG_FASTTAILCALL_TARGET)
+            {
+                inst_RV_RV(INS_mov, REG_FASTTAILCALL_TARGET, target->gtRegNum);
+            }
         }
 
         return;
index 8725619..4e1b871 100644 (file)
@@ -8626,10 +8626,45 @@ void CodeGen::genFnEpilog(BasicBlock* block)
         else
         {
             // Fast tail call.
-            // Call target = REG_FASTTAILCALL_TARGET
-            // https://github.com/dotnet/coreclr/issues/4827
-            // Do we need a special encoding for stack walker like rex.w prefix for x64?
-            getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET);
+            GenTreeCall* call     = jmpNode->AsCall();
+            gtCallTypes  callType = (gtCallTypes)call->gtCallType;
+
+            // Fast tail calls cannot happen to helpers.
+            assert((callType == CT_INDIRECT) || (callType == CT_USER_FUNC));
+
+            // Try to dispatch this as a direct branch; this is possible when the call is
+            // truly direct. In this case, the control expression will be null and the direct
+            // target address will be in gtDirectCallAddress. It is still possible that calls
+            // to user funcs require indirection, in which case the control expression will
+            // be non-null.
+            if ((callType == CT_USER_FUNC) && (call->gtControlExpr == nullptr))
+            {
+                assert(call->gtCallMethHnd != nullptr);
+                // clang-format off
+                getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN,
+                                           call->gtCallMethHnd,
+                                           INDEBUG_LDISASM_COMMA(nullptr)
+                                           call->gtDirectCallAddress,
+                                           0,          // argSize
+                                           EA_UNKNOWN  // retSize
+                                           ARM64_ARG(EA_UNKNOWN), // secondRetSize
+                                           gcInfo.gcVarPtrSetCur,
+                                           gcInfo.gcRegGCrefSetCur,
+                                           gcInfo.gcRegByrefSetCur,
+                                           BAD_IL_OFFSET, // IL offset
+                                           REG_NA,        // ireg
+                                           REG_NA,        // xreg
+                                           0,             // xmul
+                                           0,             // disp
+                                           true);         // isJump
+                // clang-format on
+            }
+            else
+            {
+                // Target requires indirection to obtain. genCallInstruction will have materialized
+                // it into REG_FASTTAILCALL_TARGET already, so just branch to it.
+                getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET);
+            }
         }
 #endif // FEATURE_FASTTAILCALL
     }
@@ -8958,9 +8993,45 @@ void CodeGen::genFnEpilog(BasicBlock* block)
         {
 #ifdef _TARGET_AMD64_
             // Fast tail call.
-            // Call target = RAX.
-            // Stack walker requires that a register indirect tail call be rex.w prefixed.
-            getEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX);
+            GenTreeCall* call     = jmpNode->AsCall();
+            gtCallTypes  callType = (gtCallTypes)call->gtCallType;
+
+            // Fast tail calls cannot happen to helpers.
+            assert((callType == CT_INDIRECT) || (callType == CT_USER_FUNC));
+
+            // Calls to a user func can be dispatched as an RIP-relative jump when they are
+            // truly direct; in this case, the control expression will be null and the direct
+            // target address will be in gtDirectCallAddress. It is still possible that calls
+            // to user funcs require indirection, in which case the control expression will
+            // be non-null.
+            if ((callType == CT_USER_FUNC) && (call->gtControlExpr == nullptr))
+            {
+                assert(call->gtCallMethHnd != nullptr);
+                // clang-format off
+                getEmitter()->emitIns_Call(
+                        emitter::EC_FUNC_TOKEN,
+                        call->gtCallMethHnd,
+                        INDEBUG_LDISASM_COMMA(nullptr)
+                        call->gtDirectCallAddress,
+                        0,                                              // argSize
+                        EA_UNKNOWN                                      // retSize
+                        MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN),// secondRetSize
+                        gcInfo.gcVarPtrSetCur,
+                        gcInfo.gcRegGCrefSetCur,
+                        gcInfo.gcRegByrefSetCur,
+                        BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0,  /* iloffset, ireg, xreg, xmul, disp */
+                        true /* isJump */
+                );
+                // clang-format on
+            }
+            else
+            {
+                // Target requires indirection to obtain. genCallInstruction will have materialized
+                // it into RAX already, so just jump to it. The stack walker requires that a register
+                // indirect tail call be rex.w prefixed.
+                getEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX);
+            }
+
 #else
             assert(!"Fast tail call as epilog+jmp");
             unreached();
index 521aaca..0f0c4b3 100644 (file)
@@ -5497,11 +5497,15 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
         // Don't support fast tail calling JIT helpers
         assert(callType != CT_HELPER);
 
-        // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
-        assert(target != nullptr);
+        // If this is indirect then we go through RAX with epilog sequence
+        // generating "jmp rax". Otherwise epilog will try to generate a
+        // rip-relative jump.
+        if (target != nullptr)
+        {
+            genConsumeReg(target);
+            genCopyRegIfNeeded(target, REG_RAX);
+        }
 
-        genConsumeReg(target);
-        genCopyRegIfNeeded(target, REG_RAX);
         return;
     }
 
index b065910..344d592 100644 (file)
@@ -3049,7 +3049,7 @@ GenTree* Lowering::LowerDirectCall(GenTreeCall* call)
     {
         case IAT_VALUE:
             // Non-virtual direct call to known address
-            if (!IsCallTargetInRange(addr) || call->IsTailCall())
+            if (!IsCallTargetInRange(addr) || call->IsTailCallViaHelper())
             {
                 result = AddrGen(addr);
             }