Fix early stack offset and size computations for ARM32 with FEATURE_FASTTAILCALL...

author Jakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>

Wed, 9 Mar 2022 12:59:09 +0000 (13:59 +0100)

committer GitHub <noreply@github.com>

Wed, 9 Mar 2022 12:59:09 +0000 (13:59 +0100)
author Jakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>
Wed, 9 Mar 2022 12:59:09 +0000 (13:59 +0100)
committer GitHub <noreply@github.com>
Wed, 9 Mar 2022 12:59:09 +0000 (13:59 +0100)
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h

index dc8bb8f..1d5cb42 100644 (file)
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -8215,7 +8215,7 @@ public:
      CORINFO_CLASS_HANDLE eeGetArgClass(CORINFO_SIG_INFO* sig, CORINFO_ARG_LIST_HANDLE list);
      CORINFO_CLASS_HANDLE eeGetClassFromContext(CORINFO_CONTEXT_HANDLE context);
      unsigned eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig);
-    static unsigned eeGetArgAlignment(var_types type, bool isFloatHfa);
+    static unsigned eeGetArgSizeAlignment(var_types type, bool isFloatHfa);
  
      // VOM info, method sigs
  
diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp

index 2ec2c71..3c184c9 100644 (file)
--- a/src/coreclr/jit/ee_il_dll.cpp
+++ b/src/coreclr/jit/ee_il_dll.cpp
@@ -456,30 +456,36 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO*
      {
          argSize = genTypeSize(argType);
      }
-    const unsigned argAlignment       = eeGetArgAlignment(argType, (hfaType == TYP_FLOAT));
-    const unsigned argSizeWithPadding = roundUp(argSize, argAlignment);
-    return argSizeWithPadding;
+
+    const unsigned argSizeAlignment = eeGetArgSizeAlignment(argType, (hfaType == TYP_FLOAT));
+    const unsigned alignedArgSize   = roundUp(argSize, argSizeAlignment);
+    return alignedArgSize;
  
  #endif
  }
  
  //------------------------------------------------------------------------
-// eeGetArgAlignment: Return arg passing alignment for the given type.
+// eeGetArgSizeAlignment: Return alignment for an argument size.
  //
  // Arguments:
  //   type - the argument type
  //   isFloatHfa - is it an HFA<float> type
  //
  // Return value:
-//   the required alignment in bytes.
+//   the required argument size alignment in bytes.
  //
  // Notes:
-//   It currently doesn't return smaller than required alignment for arm32 (4 bytes for double and int64)
-//   but it does not lead to issues because its alignment requirements are satisfied in other code parts.
-//   TODO: fix this function and delete the other code that is handling this.
+//   Usually values passed on the stack are aligned to stack slot (i.e. pointer size), except for
+//   on macOS ARM ABI that allows packing multiple args into a single stack slot.
+//
+//   The arg size alignment can be different from the normal alignment. One
+//   example is on arm32 where a struct containing a double and float can
+//   explicitly have size 12 but with alignment 8, in which case the size is
+//   aligned to 4 (the stack slot size) while frame layout must still handle
+//   aligning the argument to 8.
  //
  // static
-unsigned Compiler::eeGetArgAlignment(var_types type, bool isFloatHfa)
+unsigned Compiler::eeGetArgSizeAlignment(var_types type, bool isFloatHfa)
  {
      if (compMacOsArm64Abi())
      {
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp

index df32fcf..f0ff0e4 100644 (file)
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -955,6 +955,23 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
              {
                  varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_INT));
              }
+
+#if FEATURE_FASTTAILCALL
+            // Check if arg was split between registers and stack.
+            if (!varTypeUsesFloatReg(argType))
+            {
+                unsigned firstRegArgNum = genMapIntRegNumToRegArgNum(varDsc->GetArgReg());
+                unsigned lastRegArgNum  = firstRegArgNum + cSlots - 1;
+                if (lastRegArgNum >= varDscInfo->maxIntRegArgNum)
+                {
+                    assert(varDscInfo->stackArgSize == 0);
+                    unsigned numEnregistered = varDscInfo->maxIntRegArgNum - firstRegArgNum;
+                    varDsc->SetStackOffset(-(int)numEnregistered * REGSIZE_BYTES);
+                    varDscInfo->stackArgSize += (cSlots - numEnregistered) * REGSIZE_BYTES;
+                    JITDUMP("set user arg V%02u offset to %d\n", varDscInfo->varNum, varDsc->GetStackOffset());
+                }
+            }
+#endif
  #endif // TARGET_ARM
  
  #ifdef DEBUG
@@ -1057,14 +1074,17 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
  #endif // TARGET_XXX
  
  #if FEATURE_FASTTAILCALL
-            const unsigned argAlignment = eeGetArgAlignment(origArgType, (hfaType == TYP_FLOAT));
-            if (compMacOsArm64Abi())
-            {
-                varDscInfo->stackArgSize = roundUp(varDscInfo->stackArgSize, argAlignment);
-            }
-
-            assert((argSize % argAlignment) == 0);
-            assert((varDscInfo->stackArgSize % argAlignment) == 0);
+#ifdef TARGET_ARM
+            unsigned argAlignment = cAlign * TARGET_POINTER_SIZE;
+#else
+            unsigned argAlignment = eeGetArgSizeAlignment(origArgType, (hfaType == TYP_FLOAT));
+            // We expect the following rounding operation to be a noop on all
+            // ABIs except ARM (where we have 8-byte aligned args) and macOS
+            // ARM64 (that allows to pack multiple smaller parameters in a
+            // single stack slot).
+            assert(compMacOsArm64Abi() || ((varDscInfo->stackArgSize % argAlignment) == 0));
+#endif
+            varDscInfo->stackArgSize = roundUp(varDscInfo->stackArgSize, argAlignment);
  
              JITDUMP("set user arg V%02u offset to %u\n", varDscInfo->varNum, varDscInfo->stackArgSize);
              varDsc->SetStackOffset(varDscInfo->stackArgSize);
@@ -3659,9 +3679,9 @@ unsigned LclVarDsc::lvSize() const // Size needed for storage representation. On
      if (lvIsParam)
      {
          assert(varTypeIsStruct(lvType));
-        const bool     isFloatHfa   = (lvIsHfa() && (GetHfaType() == TYP_FLOAT));
-        const unsigned argAlignment = Compiler::eeGetArgAlignment(lvType, isFloatHfa);
-        return roundUp(lvExactSize, argAlignment);
+        const bool     isFloatHfa       = (lvIsHfa() && (GetHfaType() == TYP_FLOAT));
+        const unsigned argSizeAlignment = Compiler::eeGetArgSizeAlignment(lvType, isFloatHfa);
+        return roundUp(lvExactSize, argSizeAlignment);
      }
  
  #if defined(FEATURE_SIMD) && !defined(TARGET_64BIT)
@@ -5949,7 +5969,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
          }
  #endif // TARGET_ARM
          const bool     isFloatHfa   = (varDsc->lvIsHfa() && (varDsc->GetHfaType() == TYP_FLOAT));
-        const unsigned argAlignment = eeGetArgAlignment(varDsc->lvType, isFloatHfa);
+        const unsigned argAlignment = eeGetArgSizeAlignment(varDsc->lvType, isFloatHfa);
          if (compMacOsArm64Abi())
          {
              argOffs = roundUp(argOffs, argAlignment);
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp

index a6e09b9..1aa1320 100644 (file)
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -3076,7 +3076,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
              // Arm64 Apple has a special ABI for passing small size arguments on stack,
              // bytes are aligned to 1-byte, shorts to 2-byte, int/float to 4-byte, etc.
              // It means passing 8 1-byte arguments on stack can take as small as 8 bytes.
-            argAlignBytes = eeGetArgAlignment(argType, isFloatHfa);
+            argAlignBytes = eeGetArgSizeAlignment(argType, isFloatHfa);
          }
  
          //
diff --git a/src/coreclr/jit/register_arg_convention.h b/src/coreclr/jit/register_arg_convention.h

index a1816ba..07552f8 100644 (file)
--- a/src/coreclr/jit/register_arg_convention.h
+++ b/src/coreclr/jit/register_arg_convention.h
@@ -28,7 +28,6 @@ struct InitVarDscInfo
  #if FEATURE_FASTTAILCALL
      // It is used to calculate argument stack size information in byte
      unsigned stackArgSize;
-    bool     hasMultiSlotStruct;
  #endif // FEATURE_FASTTAILCALL
  
  public:
@@ -49,8 +48,7 @@ public:
  #endif // TARGET_ARM
  
  #if FEATURE_FASTTAILCALL
-        stackArgSize       = 0;
-        hasMultiSlotStruct = false;
+        stackArgSize = 0;
  #endif // FEATURE_FASTTAILCALL
      }
author	Jakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>
	Wed, 9 Mar 2022 12:59:09 +0000 (13:59 +0100)
committer	GitHub <noreply@github.com>
	Wed, 9 Mar 2022 12:59:09 +0000 (13:59 +0100)
src/coreclr/jit/compiler.h		patch \| blob \| history
src/coreclr/jit/ee_il_dll.cpp		patch \| blob \| history
src/coreclr/jit/lclvars.cpp		patch \| blob \| history
src/coreclr/jit/morph.cpp		patch \| blob \| history
src/coreclr/jit/register_arg_convention.h		patch \| blob \| history