Arm64 vector ABI (dotnet/coreclr#23675)

author Carol Eidt <carol.eidt@microsoft.com>

Tue, 16 Apr 2019 16:35:49 +0000 (09:35 -0700)

committer GitHub <noreply@github.com>

Tue, 16 Apr 2019 16:35:49 +0000 (09:35 -0700)
author Carol Eidt <carol.eidt@microsoft.com>
Tue, 16 Apr 2019 16:35:49 +0000 (09:35 -0700)
committer GitHub <noreply@github.com>
Tue, 16 Apr 2019 16:35:49 +0000 (09:35 -0700)
diff --git a/src/coreclr/src/jit/assertionprop.cpp b/src/coreclr/src/jit/assertionprop.cpp

index 1b48491..cd1b90f 100644 (file)
--- a/src/coreclr/src/jit/assertionprop.cpp
+++ b/src/coreclr/src/jit/assertionprop.cpp
@@ -75,7 +75,7 @@ void Compiler::optAddCopies()
          // We only add copies for non temp local variables
          // that have a single def and that can possibly be enregistered
  
-        if (varDsc->lvIsTemp || !varDsc->lvSingleDef || !varTypeCanReg(typ))
+        if (varDsc->lvIsTemp || !varDsc->lvSingleDef || !varTypeIsEnregisterable(typ))
          {
              continue;
          }
diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp

index 91bb221..efbb590 100644 (file)
--- a/src/coreclr/src/jit/codegenarm64.cpp
+++ b/src/coreclr/src/jit/codegenarm64.cpp
@@ -2023,10 +2023,10 @@ void CodeGen::genSimpleReturn(GenTree* treeNode)
      GenTree*  op1        = treeNode->gtGetOp1();
      var_types targetType = treeNode->TypeGet();
  
-    assert(!isStructReturn(treeNode));
+    assert(targetType != TYP_STRUCT);
      assert(targetType != TYP_VOID);
  
-    regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
+    regNumber retReg = varTypeUsesFloatArgReg(treeNode) ? REG_FLOATRET : REG_INTRET;
  
      bool movRequired = (op1->gtRegNum != retReg);
  
diff --git a/src/coreclr/src/jit/codegenarmarch.cpp b/src/coreclr/src/jit/codegenarmarch.cpp

index 66d5b22..ae841a6 100644 (file)
--- a/src/coreclr/src/jit/codegenarmarch.cpp
+++ b/src/coreclr/src/jit/codegenarmarch.cpp
@@ -2383,7 +2383,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
      }
      else
      {
-        assert(!varTypeIsStruct(call));
+        assert(call->gtType != TYP_STRUCT);
  
          if (call->gtType == TYP_REF)
          {
@@ -2537,9 +2537,13 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
                  // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers.
                  returnReg = REG_PINVOKE_TCB;
              }
+            else if (compiler->opts.compUseSoftFP)
+            {
+                returnReg = REG_INTRET;
+            }
              else
  #endif // _TARGET_ARM_
-                if (varTypeIsFloating(returnType) && !compiler->opts.compUseSoftFP)
+                if (varTypeUsesFloatArgReg(returnType))
              {
                  returnReg = REG_FLOATRET;
              }
@@ -3529,8 +3533,13 @@ bool CodeGen::isStructReturn(GenTree* treeNode)
      // For the GT_RET_FILT, the return is always
      // a bool or a void, for the end of a finally block.
      noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+    var_types returnType = treeNode->TypeGet();
  
-    return varTypeIsStruct(treeNode);
+#ifdef _TARGET_ARM64_
+    return varTypeIsStruct(returnType) && (compiler->info.compRetNativeType == TYP_STRUCT);
+#else
+    return varTypeIsStruct(returnType);
+#endif
  }
  
  //------------------------------------------------------------------------
diff --git a/src/coreclr/src/jit/codegencommon.cpp b/src/coreclr/src/jit/codegencommon.cpp

index 0ec2ba7..c7a6e83 100644 (file)
--- a/src/coreclr/src/jit/codegencommon.cpp
+++ b/src/coreclr/src/jit/codegencommon.cpp
@@ -3305,7 +3305,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
          {
              // A struct might be passed  partially in XMM register for System V calls.
              // So a single arg might use both register files.
-            if (isFloatRegType(regType) != doingFloat)
+            if (emitter::isFloatReg(varDsc->lvArgReg) != doingFloat)
              {
                  continue;
              }
@@ -10158,7 +10158,11 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
      structPassingKind howToReturnStruct;
      var_types         returnType = getReturnTypeForStruct(hClass, &howToReturnStruct);
  
+#ifdef _TARGET_ARM64_
+    return (varTypeIsStruct(returnType) && (howToReturnStruct != SPK_PrimitiveType));
+#else
      return (varTypeIsStruct(returnType));
+#endif
  }
  
  //----------------------------------------------
@@ -10167,11 +10171,7 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
  
  bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
  {
-#ifdef FEATURE_HFA
-    return varTypeIsFloating(GetHfaType(hClass));
-#else
-    return false;
-#endif
+    return varTypeIsValidHfaType(GetHfaType(hClass));
  }
  
  bool Compiler::IsHfa(GenTree* tree)
@@ -10204,7 +10204,19 @@ var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
      {
  #ifdef FEATURE_HFA
          CorInfoType corType = info.compCompHnd->getHFAType(hClass);
-        if (corType != CORINFO_TYPE_UNDEF)
+#ifdef _TARGET_ARM64_
+        if (corType == CORINFO_TYPE_VALUECLASS)
+        {
+            // This is a vector type.
+            // HVAs are only supported on ARM64, and only for homogeneous aggregates of 8 or 16 byte vectors.
+            // For 8-byte vectors corType will be returned as CORINFO_TYPE_DOUBLE.
+            result = TYP_SIMD16;
+            // This type may not appear elsewhere, but it will occupy a floating point register.
+            compFloatingPointUsed = true;
+        }
+        else
+#endif // _TARGET_ARM64_
+            if (corType != CORINFO_TYPE_UNDEF)
          {
              result = JITtype2varType(corType);
          }
diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp

index 97d04e7..a7c01a7 100644 (file)
--- a/src/coreclr/src/jit/codegenxarch.cpp
+++ b/src/coreclr/src/jit/codegenxarch.cpp
@@ -1133,9 +1133,9 @@ void CodeGen::genStructReturn(GenTree* treeNode)
          unsigned regCount = retTypeDesc.GetReturnRegCount();
          assert(regCount == MAX_RET_REG_COUNT);
  
-        if (varTypeIsEnregisterableStruct(op1))
+        if (varTypeIsEnregisterable(op1))
          {
-            // Right now the only enregistrable structs supported are SIMD vector types.
+            // Right now the only enregisterable structs supported are SIMD vector types.
              assert(varTypeIsSIMD(op1));
              assert(op1->isUsedFromReg());
  
diff --git a/src/coreclr/src/jit/compiler.cpp b/src/coreclr/src/jit/compiler.cpp

index e005a77..15ed19c 100644 (file)
--- a/src/coreclr/src/jit/compiler.cpp
+++ b/src/coreclr/src/jit/compiler.cpp
@@ -573,8 +573,8 @@ bool Compiler::isSingleFloat32Struct(CORINFO_CLASS_HANDLE clsHnd)
  //     of size 'structSize'.
  //     We examine 'clsHnd' to check the GC layout of the struct and
  //     return TYP_REF for structs that simply wrap an object.
-//     If the struct is a one element HFA, we will return the
-//     proper floating point type.
+//     If the struct is a one element HFA/HVA, we will return the
+//     proper floating point or vector type.
  //
  // Arguments:
  //    structSize - the size of the struct type, cannot be zero
@@ -592,13 +592,64 @@ bool Compiler::isSingleFloat32Struct(CORINFO_CLASS_HANDLE clsHnd)
  //    same way as any other 8-byte struct
  //    For ARM32 if we have an HFA struct that wraps a 64-bit double
  //    we will return TYP_DOUBLE.
+//    For vector calling conventions, a vector is considered a "primitive"
+//    type, as it is passed in a single register.
  //
  var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd, bool isVarArg)
  {
      assert(structSize != 0);
  
-    var_types useType;
+    var_types useType = TYP_UNKNOWN;
  
+// Start by determining if we have an HFA/HVA with a single element.
+#ifdef FEATURE_HFA
+#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
+    // Arm64 Windows VarArg methods arguments will not classify HFA types, they will need to be treated
+    // as if they are not HFA types.
+    if (!isVarArg)
+#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
+    {
+        switch (structSize)
+        {
+            case 4:
+            case 8:
+#ifdef _TARGET_ARM64_
+            case 16:
+#endif // _TARGET_ARM64_
+            {
+                var_types hfaType;
+#ifdef ARM_SOFTFP
+                // For ARM_SOFTFP, HFA is unsupported so we need to check in another way.
+                // This matters only for size-4 struct because bigger structs would be processed with RetBuf.
+                if (isSingleFloat32Struct(clsHnd))
+                {
+                    hfaType = TYP_FLOAT;
+                }
+#else  // !ARM_SOFTFP
+                hfaType = GetHfaType(clsHnd);
+#endif // ARM_SOFTFP
+                // We're only interested in the case where the struct size is equal to the size of the hfaType.
+                if (varTypeIsValidHfaType(hfaType))
+                {
+                    if (genTypeSize(hfaType) == structSize)
+                    {
+                        useType = hfaType;
+                    }
+                    else
+                    {
+                        return TYP_UNKNOWN;
+                    }
+                }
+            }
+        }
+        if (useType != TYP_UNKNOWN)
+        {
+            return useType;
+        }
+    }
+#endif // FEATURE_HFA
+
+    // Now deal with non-HFA/HVA structs.
      switch (structSize)
      {
          case 1:
@@ -618,15 +669,8 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
  
  #ifdef _TARGET_64BIT_
          case 4:
-            if (IsHfa(clsHnd))
-            {
-                // A structSize of 4 with IsHfa, it must be an HFA of one float
-                useType = TYP_FLOAT;
-            }
-            else
-            {
-                useType = TYP_INT;
-            }
+            // We dealt with the one-float HFA above. All other 4-byte structs are handled as INT.
+            useType = TYP_INT;
              break;
  
  #if !defined(_TARGET_XARCH_) || defined(UNIX_AMD64_ABI)
@@ -640,86 +684,13 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
  #endif // _TARGET_64BIT_
  
          case TARGET_POINTER_SIZE:
-#ifdef ARM_SOFTFP
-            // For ARM_SOFTFP, HFA is unsupported so we need to check in another way
-            // This matters only for size-4 struct cause bigger structs would be processed with RetBuf
-            if (isSingleFloat32Struct(clsHnd))
-#else // !ARM_SOFTFP
-            if (IsHfa(clsHnd)
-#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
-                // Arm64 Windows VarArg methods arguments will not
-                // classify HFA types, they will need to be treated
-                // as if they are not HFA types.
-                && !isVarArg
-#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
-                )
-#endif // ARM_SOFTFP
-            {
-#ifdef _TARGET_64BIT_
-                var_types hfaType = GetHfaType(clsHnd);
-
-                // A structSize of 8 with IsHfa, we have two possiblities:
-                // An HFA of one double or an HFA of two floats
-                //
-                // Check and exclude the case of an HFA of two floats
-                if (hfaType == TYP_DOUBLE)
-                {
-                    // We have an HFA of one double
-                    useType = TYP_DOUBLE;
-                }
-                else
-                {
-                    assert(hfaType == TYP_FLOAT);
-
-                    // We have an HFA of two floats
-                    // This should be passed or returned in two FP registers
-                    useType = TYP_UNKNOWN;
-                }
-#else  // a 32BIT target
-                // A structSize of 4 with IsHfa, it must be an HFA of one float
-                useType = TYP_FLOAT;
-#endif // _TARGET_64BIT_
-            }
-            else
-            {
-                BYTE gcPtr = 0;
-                // Check if this pointer-sized struct is wrapping a GC object
-                info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
-                useType = getJitGCType(gcPtr);
-            }
-            break;
-
-#ifdef _TARGET_ARM_
-        case 8:
-            if (IsHfa(clsHnd))
-            {
-                var_types hfaType = GetHfaType(clsHnd);
-
-                // A structSize of 8 with IsHfa, we have two possiblities:
-                // An HFA of one double or an HFA of two floats
-                //
-                // Check and exclude the case of an HFA of two floats
-                if (hfaType == TYP_DOUBLE)
-                {
-                    // We have an HFA of one double
-                    useType = TYP_DOUBLE;
-                }
-                else
-                {
-                    assert(hfaType == TYP_FLOAT);
-
-                    // We have an HFA of two floats
-                    // This should be passed or returned in two FP registers
-                    useType = TYP_UNKNOWN;
-                }
-            }
-            else
-            {
-                // We don't have an HFA
-                useType = TYP_UNKNOWN;
-            }
-            break;
-#endif // _TARGET_ARM_
+        {
+            BYTE gcPtr = 0;
+            // Check if this pointer-sized struct is wrapping a GC object
+            info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
+            useType = getJitGCType(gcPtr);
+        }
+        break;
  
          default:
              useType = TYP_UNKNOWN;
@@ -802,11 +773,11 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
      else
  #endif // UNIX_AMD64_ABI
  
-        // The largest primitive type is 8 bytes (TYP_DOUBLE)
+        // The largest arg passed in a single register is MAX_PASS_SINGLEREG_BYTES,
          // so we can skip calling getPrimitiveTypeForStruct when we
          // have a struct that is larger than that.
          //
-        if (structSize <= sizeof(double))
+        if (structSize <= MAX_PASS_SINGLEREG_BYTES)
      {
          // We set the "primitive" useType based upon the structSize
          // and also examine the clsHnd to see if it is an HFA of count one
@@ -829,14 +800,21 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
          //
          if (structSize <= MAX_PASS_MULTIREG_BYTES)
          {
-            // Structs that are HFA's are passed by value in multiple registers
-            if (IsHfa(clsHnd)
+            // Structs that are HFA/HVA's are passed by value in multiple registers.
+            // Arm64 Windows VarArg methods arguments will not classify HFA/HVA types, they will need to be treated
+            // as if they are not HFA/HVA types.
+            var_types hfaType;
  #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
-                && !isVarArg // Arm64 Windows VarArg methods arguments will not
-                             // classify HFA types, they will need to be treated
-                             // as if they are not HFA types.
-#endif                       // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
-                )
+            if (isVarArg)
+            {
+                hfaType = TYP_UNDEF;
+            }
+            else
+#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
+            {
+                hfaType = GetHfaType(clsHnd);
+            }
+            if (varTypeIsValidHfaType(hfaType))
              {
                  // HFA's of count one should have been handled by getPrimitiveTypeForStruct
                  assert(GetHfaCount(clsHnd) >= 2);
@@ -851,7 +829,6 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
              {
  
  #ifdef UNIX_AMD64_ABI
-
                  // The case of (structDesc.eightByteCount == 1) should have already been handled
                  if ((structDesc.eightByteCount > 1) || !structDesc.passedInRegisters)
                  {
@@ -1035,10 +1012,10 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
      // Check for cases where a small struct is returned in a register
      // via a primitive type.
      //
-    // The largest primitive type is 8 bytes (TYP_DOUBLE)
+    // The largest "primitive type" is MAX_PASS_SINGLEREG_BYTES
      // so we can skip calling getPrimitiveTypeForStruct when we
      // have a struct that is larger than that.
-    if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= sizeof(double)))
+    if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= MAX_PASS_SINGLEREG_BYTES))
      {
          // We set the "primitive" useType based upon the structSize
          // and also examine the clsHnd to see if it is an HFA of count one
@@ -1070,7 +1047,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
      // because when HFA are enabled, normally we would use two FP registers to pass or return it
      //
      // But if we don't have support for multiple register return types, we have to change this.
-    // Since we what we have an 8-byte struct (float + float)  we change useType to TYP_I_IMPL
+    // Since what we have is an 8-byte struct (float + float)  we change useType to TYP_I_IMPL
      // so that the struct is returned instead using an 8-byte integer register.
      //
      if ((FEATURE_MULTIREG_RET == 0) && (useType == TYP_UNKNOWN) && (structSize == (2 * sizeof(float))) && IsHfa(clsHnd))
diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h

index c734b5a..dc1f844 100644 (file)
--- a/src/coreclr/src/jit/compiler.h
+++ b/src/coreclr/src/jit/compiler.h
@@ -134,6 +134,61 @@ const unsigned FLG_CCTOR = (CORINFO_FLG_CONSTRUCTOR | CORINFO_FLG_STATIC);
  const int BAD_STK_OFFS = 0xBAADF00D; // for LclVarDsc::lvStkOffs
  #endif
  
+//------------------------------------------------------------------------
+// HFA info shared by LclVarDsc and fgArgTabEntry
+//------------------------------------------------------------------------
+#ifdef FEATURE_HFA
+enum HfaElemKind : unsigned int
+{
+    HFA_ELEM_NONE,
+    HFA_ELEM_FLOAT,
+    HFA_ELEM_DOUBLE,
+    HFA_ELEM_SIMD16
+};
+inline bool IsHfa(HfaElemKind kind)
+{
+    return kind != HFA_ELEM_NONE;
+}
+inline var_types HfaTypeFromElemKind(HfaElemKind kind)
+{
+    switch (kind)
+    {
+        case HFA_ELEM_FLOAT:
+            return TYP_FLOAT;
+        case HFA_ELEM_DOUBLE:
+            return TYP_DOUBLE;
+#ifdef FEATURE_SIMD
+        case HFA_ELEM_SIMD16:
+            return TYP_SIMD16;
+#endif
+        case HFA_ELEM_NONE:
+            return TYP_UNDEF;
+        default:
+            assert(!"Invalid HfaElemKind");
+            return TYP_UNDEF;
+    }
+}
+inline HfaElemKind HfaElemKindFromType(var_types type)
+{
+    switch (type)
+    {
+        case TYP_FLOAT:
+            return HFA_ELEM_FLOAT;
+        case TYP_DOUBLE:
+            return HFA_ELEM_DOUBLE;
+#ifdef FEATURE_SIMD
+        case TYP_SIMD16:
+            return HFA_ELEM_SIMD16;
+#endif
+        case TYP_UNDEF:
+            return HFA_ELEM_NONE;
+        default:
+            assert(!"Invalid HFA Type");
+            return HFA_ELEM_NONE;
+    }
+}
+#endif // FEATURE_HFA
+
  // The following holds the Local var info (scope information)
  typedef const char* VarName; // Actual ASCII string
  struct VarScopeDsc
@@ -595,11 +650,8 @@ public:
      unsigned char lvIsMultiRegRet : 1; // true if this is a multireg LclVar struct assigned from a multireg call
  
  #ifdef FEATURE_HFA
-    unsigned char _lvIsHfa : 1;          // Is this a struct variable who's class handle is an HFA type
-    unsigned char _lvIsHfaRegArg : 1;    // Is this a HFA argument variable?    // TODO-CLEANUP: Remove this and replace
-                                         // with (lvIsRegArg && lvIsHfa())
-    unsigned char _lvHfaTypeIsFloat : 1; // Is the HFA type float or double?
-#endif                                   // FEATURE_HFA
+    HfaElemKind _lvHfaElemKind : 2; // What kind of an HFA this is (HFA_ELEM_NONE if it is not an HFA).
+#endif                              // FEATURE_HFA
  
  #ifdef DEBUG
      // TODO-Cleanup: See the note on lvSize() - this flag is only in use by asserts that are checking for struct
@@ -666,70 +718,60 @@ public:
      bool lvIsHfa() const
      {
  #ifdef FEATURE_HFA
-        return _lvIsHfa;
+        return IsHfa(_lvHfaElemKind);
  #else
          return false;
  #endif
      }
  
-    void lvSetIsHfa()
-    {
-#ifdef FEATURE_HFA
-        _lvIsHfa = true;
-#endif
-    }
-
      bool lvIsHfaRegArg() const
      {
  #ifdef FEATURE_HFA
-        return _lvIsHfaRegArg;
+        return lvIsRegArg && lvIsHfa();
  #else
          return false;
  #endif
      }
  
-    void lvSetIsHfaRegArg(bool value = true)
-    {
-#ifdef FEATURE_HFA
-        _lvIsHfaRegArg = value;
-#endif
-    }
-
-    bool lvHfaTypeIsFloat() const
-    {
-#ifdef FEATURE_HFA
-        return _lvHfaTypeIsFloat;
-#else
-        return false;
-#endif
-    }
-
-    void lvSetHfaTypeIsFloat(bool value)
-    {
-#ifdef FEATURE_HFA
-        _lvHfaTypeIsFloat = value;
-#endif
-    }
-
-    // on Arm64 - Returns 1-4 indicating the number of register slots used by the HFA
-    // on Arm32 - Returns the total number of single FP register slots used by the HFA, max is 8
+    //------------------------------------------------------------------------------
+    // lvHfaSlots: Get the number of slots used by an HFA local
+    //
+    // Return Value:
+    //    On Arm64 - Returns 1-4 indicating the number of register slots used by the HFA
+    //    On Arm32 - Returns the total number of single FP register slots used by the HFA, max is 8
      //
      unsigned lvHfaSlots() const
      {
          assert(lvIsHfa());
          assert(varTypeIsStruct(lvType));
+        unsigned slots = 0;
  #ifdef _TARGET_ARM_
-        return lvExactSize / sizeof(float);
-#else  //  _TARGET_ARM64_
-        if (lvHfaTypeIsFloat())
-        {
-            return lvExactSize / sizeof(float);
-        }
-        else
+        slots = lvExactSize / sizeof(float);
+        assert(slots <= 8);
+#elif defined(_TARGET_ARM64_)
+        switch (_lvHfaElemKind)
          {
-            return lvExactSize / sizeof(double);
+            case HFA_ELEM_NONE:
+                assert(!"lvHfaSlots called for non-HFA");
+                break;
+            case HFA_ELEM_FLOAT:
+                assert((lvExactSize % 4) == 0);
+                slots = lvExactSize >> 2;
+                break;
+            case HFA_ELEM_DOUBLE:
+                assert((lvExactSize % 8) == 0);
+                slots = lvExactSize >> 3;
+                break;
+            case HFA_ELEM_SIMD16:
+                assert((lvExactSize % 16) == 0);
+                slots = lvExactSize >> 4;
+                break;
+            default:
+                unreached();
          }
+        assert(slots <= 4);
  #endif //  _TARGET_ARM64_
+        return slots;
      }
  
      // lvIsMultiRegArgOrRet()
@@ -750,7 +792,7 @@ private:
      regNumberSmall _lvOtherReg; // Used for "upper half" of long var.
  #endif                          // !defined(_TARGET_64BIT_)
  
-    regNumberSmall _lvArgReg; // The register in which this argument is passed.
+    regNumberSmall _lvArgReg; // The (first) register in which this argument is passed.
  
  #if FEATURE_MULTIREG_ARGS
      regNumberSmall _lvOtherArgReg; // Used for the second part of the struct passed in a register.
@@ -1030,14 +1072,21 @@ public:
      {
          return isFloatRegType(lvType) || lvIsHfaRegArg();
      }
+
      var_types GetHfaType() const
      {
-        return lvIsHfa() ? (lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE) : TYP_UNDEF;
+#ifdef FEATURE_HFA
+        assert(lvIsHfa());
+        return HfaTypeFromElemKind(_lvHfaElemKind);
+#endif // FEATURE_HFA
+        return TYP_UNDEF;
      }
+
      void SetHfaType(var_types type)
      {
-        assert(varTypeIsFloating(type));
-        lvSetHfaTypeIsFloat(type == TYP_FLOAT);
+#ifdef FEATURE_HFA
+        _lvHfaElemKind = HfaElemKindFromType(type);
+#endif // FEATURE_HFA
      }
  
      var_types lvaArgType();
@@ -1487,8 +1536,7 @@ public:
      bool _isSplit : 1; // True when this argument is split between the registers and OutArg area
  #endif                 // FEATURE_ARG_SPLIT
  #ifdef FEATURE_HFA
-    bool _isHfaArg : 1;    // True when the argument is an HFA type.
-    bool _isDoubleHfa : 1; // True when the argument is an HFA, with an element type of DOUBLE.
+    HfaElemKind _hfaElemKind : 2; // What kind of an HFA this is (HFA_ELEM_NONE if it is not an HFA).
  #endif
  
      bool isLateArg()
@@ -1569,7 +1617,7 @@ public:
      bool getIsHfaArg()
      {
  #ifdef FEATURE_HFA
-        return _isHfaArg;
+        return IsHfa(_hfaElemKind);
  #else
          return false;
  #endif
@@ -1579,23 +1627,22 @@ public:
      bool getIsHfaRegArg()
      {
  #ifdef FEATURE_HFA
-        return _isHfaArg && isPassedInRegisters();
+        return IsHfa(_hfaElemKind) && isPassedInRegisters();
  #else
          return false;
  #endif
      }
  
-    __declspec(property(get = getHfaType)) var_types hfaType;
-    var_types getHfaType()
+    __declspec(property(get = GetHfaType)) var_types hfaType;
+    var_types GetHfaType()
      {
  #ifdef FEATURE_HFA
-        return _isHfaArg ? (_isDoubleHfa ? TYP_DOUBLE : TYP_FLOAT) : TYP_UNDEF;
-#else
+        return HfaTypeFromElemKind(_hfaElemKind);
+#endif // FEATURE_HFA
          return TYP_UNDEF;
-#endif
      }
  
-    void setHfaType(var_types type, unsigned hfaSlots)
+    void SetHfaType(var_types type, unsigned hfaSlots)
      {
  #ifdef FEATURE_HFA
          if (type != TYP_UNDEF)
@@ -1607,29 +1654,33 @@ public:
              // Note that hfaSlots is the number of registers we will use. For ARM, that is twice
              // the number of "double registers".
              unsigned numHfaRegs = hfaSlots;
-            if (isPassedInRegisters())
-            {
  #ifdef _TARGET_ARM_
-                if (type == TYP_DOUBLE)
-                {
-                    // Must be an even number of registers.
-                    assert((numRegs & 1) == 0);
-                    numHfaRegs = hfaSlots / 2;
-                }
+            if (type == TYP_DOUBLE)
+            {
+                // Must be an even number of registers.
+                assert((numRegs & 1) == 0);
+                numHfaRegs = hfaSlots / 2;
+            }
  #endif // _TARGET_ARM_
-                if (_isHfaArg)
+
+            if (!isHfaArg)
+            {
+                // We haven't previously set this; do so now.
+                _hfaElemKind = HfaElemKindFromType(type);
+                if (isPassedInRegisters())
                  {
-                    // This should already be set correctly.
-                    assert(numRegs == numHfaRegs);
-                    assert(_isDoubleHfa == (type == TYP_DOUBLE));
+                    numRegs = numHfaRegs;
                  }
-                else
+            }
+            else
+            {
+                // We've already set this; ensure that it's consistent.
+                if (isPassedInRegisters())
                  {
-                    numRegs = numHfaRegs;
+                    assert(numRegs == numHfaRegs);
                  }
+                assert(type == HfaTypeFromElemKind(_hfaElemKind));
              }
-            _isDoubleHfa = (type == TYP_DOUBLE);
-            _isHfaArg    = true;
          }
  #endif // FEATURE_HFA
      }
@@ -1701,22 +1752,30 @@ public:
      {
          unsigned size = getSlotCount();
  #ifdef FEATURE_HFA
-#ifdef _TARGET_ARM_
-        // We counted the number of regs, but if they are DOUBLE hfa regs we have to double the size.
-        if (isHfaRegArg && (hfaType == TYP_DOUBLE))
+        if (isHfaRegArg)
          {
-            assert(!isSplit);
-            size <<= 1;
-        }
+#ifdef _TARGET_ARM_
+            // We counted the number of regs, but if they are DOUBLE hfa regs we have to double the size.
+            if (hfaType == TYP_DOUBLE)
+            {
+                assert(!isSplit);
+                size <<= 1;
+            }
  #elif defined(_TARGET_ARM64_)
-        // We counted the number of regs, but if they are FLOAT hfa regs we have to halve the size.
-        if (isHfaRegArg && (hfaType == TYP_FLOAT))
-        {
-            // Round up in case of odd HFA count.
-            size = (size + 1) >> 1;
-        }
+            // We counted the number of regs, but if they are FLOAT hfa regs we have to halve the size,
+            // or if they are SIMD16 vector hfa regs we have to double the size.
+            if (hfaType == TYP_FLOAT)
+            {
+                // Round up in case of odd HFA count.
+                size = (size + 1) >> 1;
+            }
+            else if (hfaType == TYP_SIMD16)
+            {
+                size <<= 1;
+            }
  #endif // _TARGET_ARM64_
-#endif
+        }
+#endif // FEATURE_HFA
          return size;
      }
  
@@ -7614,6 +7673,17 @@ private:
      // Should we support SIMD intrinsics?
      bool featureSIMD;
  
+    // Should we recognize SIMD types?
+    // We always do this on ARM64 to support HVA types.
+    bool supportSIMDTypes()
+    {
+#ifdef _TARGET_ARM64_
+        return true;
+#else
+        return featureSIMD;
+#endif
+    }
+
      // Have we identified any SIMD types?
      // This is currently used by struct promotion to avoid getting type information for a struct
      // field to see if it is a SIMD type, if we haven't seen any SIMD types or operations in
diff --git a/src/coreclr/src/jit/compiler.hpp b/src/coreclr/src/jit/compiler.hpp

index 901a58e..0b30114 100644 (file)
--- a/src/coreclr/src/jit/compiler.hpp
+++ b/src/coreclr/src/jit/compiler.hpp
@@ -2919,7 +2919,7 @@ inline regNumber genMapFloatRegArgNumToRegNum(unsigned argNum)
  
  __forceinline regNumber genMapRegArgNumToRegNum(unsigned argNum, var_types type)
  {
-    if (varTypeIsFloating(type))
+    if (varTypeUsesFloatArgReg(type))
      {
          return genMapFloatRegArgNumToRegNum(argNum);
      }
@@ -2957,7 +2957,7 @@ inline regMaskTP genMapFloatRegArgNumToRegMask(unsigned argNum)
  __forceinline regMaskTP genMapArgNumToRegMask(unsigned argNum, var_types type)
  {
      regMaskTP result;
-    if (varTypeIsFloating(type))
+    if (varTypeUsesFloatArgReg(type))
      {
          result = genMapFloatRegArgNumToRegMask(argNum);
  #ifdef _TARGET_ARM_
@@ -3076,7 +3076,7 @@ inline unsigned genMapFloatRegNumToRegArgNum(regNumber regNum)
  
  inline unsigned genMapRegNumToRegArgNum(regNumber regNum, var_types type)
  {
-    if (varTypeIsFloating(type))
+    if (varTypeUsesFloatArgReg(type))
      {
          return genMapFloatRegNumToRegArgNum(regNum);
      }
diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp

index b24ea0e..236302c 100644 (file)
--- a/src/coreclr/src/jit/flowgraph.cpp
+++ b/src/coreclr/src/jit/flowgraph.cpp
@@ -23273,7 +23273,7 @@ GenTreeStmt* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
                      if (varTypeIsStruct(argType))
                      {
                          structHnd = gtGetStructHandleIfPresent(argNode);
-                        noway_assert(structHnd != NO_CLASS_HANDLE);
+                        noway_assert((structHnd != NO_CLASS_HANDLE) || (argType != TYP_STRUCT));
                      }
  
                      // Unsafe value cls check is not needed for
diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp

index be064c2..9e58db0 100644 (file)
--- a/src/coreclr/src/jit/gentree.cpp
+++ b/src/coreclr/src/jit/gentree.cpp
@@ -17682,7 +17682,8 @@ GenTreeSIMD* Compiler::gtNewSIMDNode(
      assert(op1 != nullptr);
      SetOpLclRelatedToSIMDIntrinsic(op1);
  
-    return new (this, GT_SIMD) GenTreeSIMD(type, op1, simdIntrinsicID, baseType, size);
+    GenTreeSIMD* simdNode = new (this, GT_SIMD) GenTreeSIMD(type, op1, simdIntrinsicID, baseType, size);
+    return simdNode;
  }
  
  GenTreeSIMD* Compiler::gtNewSIMDNode(
@@ -17692,7 +17693,8 @@ GenTreeSIMD* Compiler::gtNewSIMDNode(
      SetOpLclRelatedToSIMDIntrinsic(op1);
      SetOpLclRelatedToSIMDIntrinsic(op2);
  
-    return new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size);
+    GenTreeSIMD* simdNode = new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size);
+    return simdNode;
  }
  
  //-------------------------------------------------------------------
@@ -18064,7 +18066,7 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, CORINFO_CLASS_HA
          case Compiler::SPK_PrimitiveType:
          {
              assert(returnType != TYP_UNKNOWN);
-            assert(!varTypeIsStruct(returnType));
+            assert(returnType != TYP_STRUCT);
              m_regType[0] = returnType;
              break;
          }
@@ -18075,7 +18077,7 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, CORINFO_CLASS_HA
              var_types hfaType = comp->GetHfaType(retClsHnd);
  
              // We should have an hfa struct type
-            assert(varTypeIsFloating(hfaType));
+            assert(varTypeIsValidHfaType(hfaType));
  
              // Note that the retail build issues a warning about a potential divsion by zero without this Max function
              unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h

index 5d45427..b294748 100644 (file)
--- a/src/coreclr/src/jit/gentree.h
+++ b/src/coreclr/src/jit/gentree.h
@@ -3532,6 +3532,9 @@ struct GenTreeCall final : public GenTree
          return varTypeIsLong(gtType);
  #elif FEATURE_MULTIREG_RET && defined(_TARGET_ARM_)
          return varTypeIsLong(gtType) || (varTypeIsStruct(gtType) && !HasRetBufArg());
+#elif defined(FEATURE_HFA) && defined(_TARGET_ARM64_)
+        // SIMD types are returned in vector regs on ARM64.
+        return (gtType == TYP_STRUCT) && !HasRetBufArg();
  #elif FEATURE_MULTIREG_RET
          return varTypeIsStruct(gtType) && !HasRetBufArg();
  #else
diff --git a/src/coreclr/src/jit/hwintrinsicArm64.cpp b/src/coreclr/src/jit/hwintrinsicArm64.cpp

index 98b4956..dec6038 100644 (file)
--- a/src/coreclr/src/jit/hwintrinsicArm64.cpp
+++ b/src/coreclr/src/jit/hwintrinsicArm64.cpp
@@ -214,8 +214,8 @@ GenTree* Compiler::addRangeCheckIfNeeded(GenTree* immOp, unsigned int max, bool
  {
      assert(immOp != nullptr);
  
-    // Need to range check only if we're must expand and don't have an appropriate constant
-    if (mustExpand && (!immOp->IsCnsIntOrI() || (immOp->AsIntConCommon()->IconValue() < max)))
+    // Need to range check only if we're must expand.
+    if (mustExpand)
      {
          GenTree* upperBoundNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, max);
          GenTree* index          = nullptr;
@@ -463,20 +463,40 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
              return gtNewSimdHWIntrinsicNode(simdType, op1, intrinsic, simdBaseType, simdSizeBytes);
  
          case HWIntrinsicInfo::SimdExtractOp:
-            op2 =
-                addRangeCheckIfNeeded(impPopStack().val, getSIMDVectorLength(simdSizeBytes, simdBaseType), mustExpand);
+        {
+            int vectorLength = getSIMDVectorLength(simdSizeBytes, simdBaseType);
+            op2              = impStackTop().val;
+            if (!mustExpand && (!op2->IsCnsIntOrI() || op2->AsIntConCommon()->IconValue() >= vectorLength))
+            {
+                // This is either an out-of-range constant or a non-constant.
+                // We won't expand it; it will be handled recursively, at which point 'mustExpand'
+                // will be true.
+                return nullptr;
+            }
+            op2 = impPopStack().val;
+            op2 = addRangeCheckIfNeeded(op2, vectorLength, mustExpand);
              op1 = impSIMDPopStack(simdType);
  
              return gtNewScalarHWIntrinsicNode(JITtype2varType(sig->retType), op1, op2, intrinsic);
-
+        }
          case HWIntrinsicInfo::SimdInsertOp:
+        {
+            int vectorLength = getSIMDVectorLength(simdSizeBytes, simdBaseType);
+            op2              = impStackTop(1).val;
+            if (!mustExpand && (!op2->IsCnsIntOrI() || op2->AsIntConCommon()->IconValue() >= vectorLength))
+            {
+                // This is either an out-of-range constant or a non-constant.
+                // We won't expand it; it will be handled recursively, at which point 'mustExpand'
+                // will be true.
+                return nullptr;
+            }
              op3 = impPopStack().val;
-            op2 =
-                addRangeCheckIfNeeded(impPopStack().val, getSIMDVectorLength(simdSizeBytes, simdBaseType), mustExpand);
+            op2 = impPopStack().val;
+            op2 = addRangeCheckIfNeeded(op2, vectorLength, mustExpand);
              op1 = impSIMDPopStack(simdType);
  
              return gtNewSimdHWIntrinsicNode(simdType, op1, op2, op3, intrinsic, simdBaseType, simdSizeBytes);
-
+        }
          case HWIntrinsicInfo::Sha1HashOp:
              op3 = impSIMDPopStack(simdType);
              op2 = impPopStack().val;
diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp

index 8600bf3..8863809 100644 (file)
--- a/src/coreclr/src/jit/importer.cpp
+++ b/src/coreclr/src/jit/importer.cpp
@@ -1217,7 +1217,7 @@ GenTree* Compiler::impAssignStructPtr(GenTree*             destAddr,
                  // If it is a multi-reg struct return, don't change the oper to GT_LCL_FLD.
                  // That is, the IR will be of the form lclVar = call for multi-reg return
                  //
-                GenTree* lcl = destAddr->gtOp.gtOp1;
+                GenTreeLclVar* lcl = destAddr->gtOp.gtOp1->AsLclVar();
                  if (src->AsCall()->HasMultiRegRetVal())
                  {
                      // Mark the struct LclVar as used in a MultiReg return context
@@ -1227,7 +1227,7 @@ GenTree* Compiler::impAssignStructPtr(GenTree*             destAddr,
                      lcl->gtFlags |= GTF_DONT_CSE;
                      lvaTable[lcl->gtLclVarCommon.gtLclNum].lvIsMultiRegRet = true;
                  }
-                else // The call result is not a multireg return
+                else if (lcl->gtType != src->gtType)
                  {
                      // We change this to a GT_LCL_FLD (from a GT_ADDR of a GT_LCL_VAR)
                      lcl->ChangeOper(GT_LCL_FLD);
@@ -1532,7 +1532,7 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd,
  
  #ifdef FEATURE_SIMD
      // Check to see if this is a SIMD type.
-    if (featureSIMD && !mayContainGCPtrs)
+    if (supportSIMDTypes() && !mayContainGCPtrs)
      {
          unsigned originalSize = info.compCompHnd->getClassSize(structHnd);
  
@@ -9057,7 +9057,7 @@ REDO_RETURN_NODE:
      {
          // It is possible that we now have a lclVar of scalar type.
          // If so, don't transform it to GT_LCL_FLD.
-        if (varTypeIsStruct(lvaTable[op->AsLclVar()->gtLclNum].lvType))
+        if (lvaTable[op->AsLclVar()->gtLclNum].lvType != info.compRetNativeType)
          {
              op->ChangeOper(GT_LCL_FLD);
          }
@@ -18983,7 +18983,7 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo)
          if ((!foundSIMDType || (type == TYP_STRUCT)) && isSIMDorHWSIMDClass(&(lclVarInfo[i + argCnt].lclVerTypeInfo)))
          {
              foundSIMDType = true;
-            if (featureSIMD && type == TYP_STRUCT)
+            if (supportSIMDTypes() && type == TYP_STRUCT)
              {
                  var_types structType = impNormStructType(lclVarInfo[i + argCnt].lclVerTypeInfo.GetClassHandle());
                  lclVarInfo[i + argCnt].lclTypeInfo = structType;
diff --git a/src/coreclr/src/jit/lclvars.cpp b/src/coreclr/src/jit/lclvars.cpp

index 6f34e24..d7d0f11 100644 (file)
--- a/src/coreclr/src/jit/lclvars.cpp
+++ b/src/coreclr/src/jit/lclvars.cpp
@@ -124,7 +124,7 @@ void Compiler::lvaInitTypeRef()
      info.compILargsCount = info.compArgsCount;
  
  #ifdef FEATURE_SIMD
-    if (featureSIMD && (info.compRetNativeType == TYP_STRUCT))
+    if (supportSIMDTypes() && (info.compRetNativeType == TYP_STRUCT))
      {
          var_types structType = impNormStructType(info.compMethodInfo->args.retTypeClass);
          info.compRetType     = structType;
@@ -149,7 +149,7 @@ void Compiler::lvaInitTypeRef()
          if ((howToReturnStruct == SPK_PrimitiveType) || (howToReturnStruct == SPK_EnclosingType))
          {
              assert(returnType != TYP_UNKNOWN);
-            assert(!varTypeIsStruct(returnType));
+            assert(returnType != TYP_STRUCT);
  
              info.compRetNativeType = returnType;
  
@@ -397,7 +397,7 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo* varDscInfo)
          {
              varDsc->lvType = TYP_BYREF;
  #ifdef FEATURE_SIMD
-            if (featureSIMD)
+            if (supportSIMDTypes())
              {
                  var_types simdBaseType = TYP_UNKNOWN;
                  var_types type         = impNormStructType(info.compClassHnd, nullptr, nullptr, &simdBaseType);
@@ -505,7 +505,7 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo)
              }
          }
  #ifdef FEATURE_SIMD
-        else if (featureSIMD && varTypeIsSIMD(info.compRetType))
+        else if (supportSIMDTypes() && varTypeIsSIMD(info.compRetType))
          {
              varDsc->lvSIMDType = true;
              varDsc->lvBaseType =
@@ -598,8 +598,9 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
              // If the argType is a struct, then check if it is an HFA
              if (varTypeIsStruct(argType))
              {
-                hfaType  = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF
-                isHfaArg = varTypeIsFloating(hfaType);
+                // hfaType is set to float, double or SIMD type if it is an HFA, otherwise TYP_UNDEF.
+                hfaType  = GetHfaType(typeHnd);
+                isHfaArg = varTypeIsValidHfaType(hfaType);
              }
          }
          else if (info.compIsVarArgs)
@@ -616,11 +617,12 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
  
          if (isHfaArg)
          {
-            // We have an HFA argument, so from here on out treat the type as a float or double.
+            // We have an HFA argument, so from here on out treat the type as a float, double or vector.
              // The orginal struct type is available by using origArgType
              // We also update the cSlots to be the number of float/double fields in the HFA
              argType = hfaType;
-            cSlots  = varDsc->lvHfaSlots();
+            varDsc->SetHfaType(hfaType);
+            cSlots = varDsc->lvHfaSlots();
          }
          // The number of slots that must be enregistered if we are to consider this argument enregistered.
          // This is normally the same as cSlots, since we normally either enregister the entire object,
@@ -818,18 +820,31 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
              if (isHfaArg)
              {
                  // We need to save the fact that this HFA is enregistered
-                varDsc->lvSetIsHfa();
-                varDsc->lvSetIsHfaRegArg();
-                varDsc->SetHfaType(hfaType);
-                varDsc->lvIsMultiRegArg = (varDsc->lvHfaSlots() > 1);
+                // Note that we can have HVAs of SIMD types even if we are not recognizing intrinsics.
+                // In that case, we won't have normalized the vector types on the varDsc, so if we have a single vector
+                // register, we need to set the type now. Otherwise, later we'll assume this is passed by reference.
+                if (varDsc->lvHfaSlots() != 1)
+                {
+                    varDsc->lvIsMultiRegArg = true;
+                }
              }
  
              varDsc->lvIsRegArg = 1;
  
  #if FEATURE_MULTIREG_ARGS
+#ifdef _TARGET_ARM64_
+            if (argType == TYP_STRUCT)
+            {
+                varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL);
+                if (cSlots == 2)
+                {
+                    varDsc->lvOtherArgReg          = genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL);
+                    varDscInfo->hasMultiSlotStruct = true;
+                }
+            }
+#elif defined(UNIX_AMD64_ABI)
              if (varTypeIsStruct(argType))
              {
-#if defined(UNIX_AMD64_ABI)
                  varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType);
  
                  // If there is a second eightbyte, get a register for it too and map the arg to the reg number.
@@ -844,17 +859,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
                  {
                      varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType);
                  }
-#else // ARM32 or ARM64
+            }
+#else  // ARM32
+            if (varTypeIsStruct(argType))
+            {
                  varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL);
-#ifdef _TARGET_ARM64_
-                if (cSlots == 2)
-                {
-                    varDsc->lvOtherArgReg          = genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL);
-                    varDscInfo->hasMultiSlotStruct = true;
-                }
-#endif //  _TARGET_ARM64_
-#endif // defined(UNIX_AMD64_ABI)
              }
+#endif // ARM32
              else
  #endif // FEATURE_MULTIREG_ARGS
              {
@@ -879,14 +890,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
                      isFloat = varTypeIsFloating(firstEightByteType);
                  }
                  else
-#else
+#endif // !UNIX_AMD64_ABI
                  {
                      isFloat = varTypeIsFloating(argType);
                  }
-#endif // !UNIX_AMD64_ABI
  
  #if defined(UNIX_AMD64_ABI)
-                    if (varTypeIsStruct(argType))
+                if (varTypeIsStruct(argType))
                  {
                      // Print both registers, just to be clear
                      if (firstEightByteType == TYP_UNDEF)
@@ -1270,7 +1280,11 @@ void Compiler::lvaInitVarDsc(LclVarDsc*              varDsc,
          varDsc->lvStructGcCount = 1;
      }
  
-    // Set the lvType (before this point it is TYP_UNDEF).
+// Set the lvType (before this point it is TYP_UNDEF).
+
+#ifdef FEATURE_HFA
+    varDsc->SetHfaType(TYP_UNDEF);
+#endif
      if ((varTypeIsStruct(type)))
      {
          lvaSetStruct(varNum, typeHnd, typeHnd != nullptr, !tiVerificationNeeded);
@@ -2513,10 +2527,9 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool
              if (varDsc->lvExactSize <= MAX_PASS_MULTIREG_BYTES)
              {
                  var_types hfaType = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF
-                if (varTypeIsFloating(hfaType))
+                if (varTypeIsValidHfaType(hfaType))
                  {
-                    varDsc->_lvIsHfa = true;
-                    varDsc->lvSetHfaTypeIsFloat(hfaType == TYP_FLOAT);
+                    varDsc->SetHfaType(hfaType);
  
                      // hfa variables can never contain GC pointers
                      assert(varDsc->lvStructGcCount == 0);
@@ -2588,8 +2601,7 @@ void Compiler::lvaSetStructUsedAsVarArg(unsigned varNum)
      LclVarDsc* varDsc = &lvaTable[varNum];
      // For varargs methods incoming and outgoing arguments should not be treated
      // as HFA.
-    varDsc->_lvIsHfa          = false;
-    varDsc->_lvHfaTypeIsFloat = false;
+    varDsc->SetHfaType(TYP_UNDEF);
  #endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
  #endif // FEATURE_HFA
  }
@@ -6913,16 +6925,9 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r
          }
      }
  
-    if (varDsc->lvIsHfaRegArg())
+    if (varDsc->lvIsHfa())
      {
-        if (varDsc->lvHfaTypeIsFloat())
-        {
-            printf(" (enregistered HFA: float) ");
-        }
-        else
-        {
-            printf(" (enregistered HFA: double)");
-        }
+        printf(" HFA(%s) ", varTypeName(varDsc->GetHfaType()));
      }
  
      if (varDsc->lvDoNotEnregister)
diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp

index a2f7080..75c354d 100644 (file)
--- a/src/coreclr/src/jit/lower.cpp
+++ b/src/coreclr/src/jit/lower.cpp
@@ -5691,10 +5691,11 @@ void Lowering::ContainCheckRet(GenTreeOp* ret)
          {
              GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
              LclVarDsc*           varDsc       = &(comp->lvaTable[lclVarCommon->gtLclNum]);
-            assert(varDsc->lvIsMultiRegRet);
+            // This must be a multi-reg return or an HFA of a single element.
+            assert(varDsc->lvIsMultiRegRet || (varDsc->lvIsHfa() && varTypeIsValidHfaType(varDsc->lvType)));
  
              // Mark var as contained if not enregistrable.
-            if (!varTypeIsEnregisterableStruct(op1))
+            if (!varTypeIsEnregisterable(op1))
              {
                  MakeSrcContained(ret, op1);
              }
diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp

index 3e0d636..adfc34a 100644 (file)
--- a/src/coreclr/src/jit/lowerarmarch.cpp
+++ b/src/coreclr/src/jit/lowerarmarch.cpp
@@ -892,6 +892,11 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
              {
                  MakeSrcContained(node, op2);
  
+#if 0
+                // This is currently not supported downstream. The following (at least) need to be modifed:
+                //   GenTree::isContainableHWIntrinsic() needs to handle this.
+                //   CodeGen::genConsumRegs()
+                // 
                  GenTree* op3 = argList->Rest()->Rest()->Current();
  
                  // In the HW intrinsics C# API there is no direct way to specify a vector element to element mov
@@ -909,6 +914,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                          MakeSrcContained(node, op3);
                      }
                  }
+#endif
              }
              break;
  
diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp

index e672582..2e431aa 100644 (file)
--- a/src/coreclr/src/jit/lsra.cpp
+++ b/src/coreclr/src/jit/lsra.cpp
@@ -1425,7 +1425,7 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc)
      // or enregistered, on x86 -- it is believed that we can enregister pinned (more properly, "pinning")
      // references when using the general GC encoding.
      unsigned lclNum = (unsigned)(varDsc - compiler->lvaTable);
-    if (varDsc->lvAddrExposed || !varTypeIsEnregisterableStruct(varDsc))
+    if (varDsc->lvAddrExposed || !varTypeIsEnregisterable(varDsc))
      {
  #ifdef DEBUG
          Compiler::DoNotEnregisterReason dner = Compiler::DNER_AddrExposed;
diff --git a/src/coreclr/src/jit/lsraarmarch.cpp b/src/coreclr/src/jit/lsraarmarch.cpp

index 251bf53..9a54c32 100644 (file)
--- a/src/coreclr/src/jit/lsraarmarch.cpp
+++ b/src/coreclr/src/jit/lsraarmarch.cpp
@@ -208,7 +208,7 @@ int LinearScan::BuildCall(GenTreeCall* call)
          assert(retTypeDesc != nullptr);
          dstCandidates = retTypeDesc->GetABIReturnRegs();
      }
-    else if (varTypeIsFloating(registerType))
+    else if (varTypeUsesFloatArgReg(registerType))
      {
          dstCandidates = RBM_FLOATRET;
      }
diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp

index da1fa8f..1b0f61d 100644 (file)
--- a/src/coreclr/src/jit/lsrabuild.cpp
+++ b/src/coreclr/src/jit/lsrabuild.cpp
@@ -1841,15 +1841,7 @@ void LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
      {
          RegState* intRegState   = &compiler->codeGen->intRegState;
          RegState* floatRegState = &compiler->codeGen->floatRegState;
-        // In the case of AMD64 we'll still use the floating point registers
-        // to model the register usage for argument on vararg calls, so
-        // we will ignore the varargs condition to determine whether we use
-        // XMM registers or not for setting up the call.
-        bool isFloat = (isFloatRegType(argDsc->lvType)
-#ifndef _TARGET_AMD64_
-                        && !compiler->info.compIsVarArgs
-#endif
-                        && !compiler->opts.compUseSoftFP);
+        bool      isFloat       = emitter::isFloatReg(argDsc->lvArgReg);
  
          if (argDsc->lvIsHfaRegArg())
          {
@@ -3070,6 +3062,15 @@ int LinearScan::BuildReturn(GenTree* tree)
          regMaskTP useCandidates = RBM_NONE;
  
  #if FEATURE_MULTIREG_RET
+#ifdef _TARGET_ARM64_
+        if (varTypeIsSIMD(tree))
+        {
+            useCandidates = allSIMDRegs();
+            BuildUse(op1, useCandidates);
+            return 1;
+        }
+#endif // !_TARGET_ARM64_
+
          if (varTypeIsStruct(tree))
          {
              // op1 has to be either an lclvar or a multi-reg returning call
@@ -3209,7 +3210,7 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node)
          GenTreeObj* obj  = op1->AsObj();
          GenTree*    addr = obj->Addr();
          unsigned    size = obj->gtBlkSize;
-        assert(size <= TARGET_POINTER_SIZE);
+        assert(size <= MAX_PASS_SINGLEREG_BYTES);
          if (addr->OperIsLocalAddr())
          {
              // We don't need a source register.
diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp

index 50143d3..fbfa173 100644 (file)
--- a/src/coreclr/src/jit/morph.cpp
+++ b/src/coreclr/src/jit/morph.cpp
@@ -828,6 +828,7 @@ void fgArgTabEntry::Dump()
  {
      printf("fgArgTabEntry[arg %u", argNum);
      printf(" %d.%s", node->gtTreeID, GenTree::OpName(node->gtOper));
+    printf(" %s", varTypeName(argType));
      if (regNum != REG_STK)
      {
          printf(", %u reg%s:", numRegs, numRegs == 1 ? "" : "s");
@@ -867,7 +868,7 @@ void fgArgTabEntry::Dump()
      }
      if (isHfaRegArg)
      {
-        printf(", isHfa");
+        printf(", isHfa(%s)", varTypeName(GetHfaType()));
      }
      if (isBackFilled)
      {
@@ -1140,6 +1141,7 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned  argNum,
  
      curArgTabEntry->argNum     = argNum;
      curArgTabEntry->node       = node;
+    curArgTabEntry->argType    = node->TypeGet();
      curArgTabEntry->parent     = parent;
      curArgTabEntry->slotNum    = 0;
      curArgTabEntry->numRegs    = numRegs;
@@ -1153,7 +1155,7 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned  argNum,
      curArgTabEntry->needPlace  = false;
      curArgTabEntry->processed  = false;
  #ifdef FEATURE_HFA
-    curArgTabEntry->_isHfaArg = false;
+    curArgTabEntry->_hfaElemKind = HFA_ELEM_NONE;
  #endif
      curArgTabEntry->isBackFilled  = false;
      curArgTabEntry->isNonStandard = false;
@@ -1213,6 +1215,7 @@ fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum,
      curArgTabEntry->setRegNum(0, REG_STK);
      curArgTabEntry->argNum     = argNum;
      curArgTabEntry->node       = node;
+    curArgTabEntry->argType    = node->TypeGet();
      curArgTabEntry->parent     = parent;
      curArgTabEntry->slotNum    = nextSlotNum;
      curArgTabEntry->numRegs    = 0;
@@ -1226,7 +1229,7 @@ fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum,
      curArgTabEntry->needPlace  = false;
      curArgTabEntry->processed  = false;
  #ifdef FEATURE_HFA
-    curArgTabEntry->_isHfaArg = false;
+    curArgTabEntry->_hfaElemKind = HFA_ELEM_NONE;
  #endif
      curArgTabEntry->isBackFilled  = false;
      curArgTabEntry->isNonStandard = false;
@@ -2300,12 +2303,16 @@ void fgArgInfo::EvalArgsToTemps()
                      {
                          setupArg = compiler->fgMorphCopyBlock(setupArg);
  #if defined(_TARGET_ARMARCH_) || defined(UNIX_AMD64_ABI)
-                        // This scalar LclVar widening step is only performed for ARM and AMD64 unix.
-                        //
-                        CORINFO_CLASS_HANDLE clsHnd     = compiler->lvaGetStruct(tmpVarNum);
-                        unsigned             structSize = varDsc->lvExactSize;
+                        if (lclVarType == TYP_STRUCT)
+                        {
+                            // This scalar LclVar widening step is only performed for ARM architectures.
+                            //
+                            CORINFO_CLASS_HANDLE clsHnd     = compiler->lvaGetStruct(tmpVarNum);
+                            unsigned             structSize = varDsc->lvExactSize;
  
-                        scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->isVararg);
+                            scalarType =
+                                compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->isVararg);
+                        }
  #endif // _TARGET_ARMARCH_ || defined (UNIX_AMD64_ABI)
                      }
  
@@ -2391,7 +2398,7 @@ void fgArgInfo::EvalArgsToTemps()
  
  #else // !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)
  
-            if (varTypeIsStruct(defArg))
+            if (defArg->TypeGet() == TYP_STRUCT)
              {
                  clsHnd = compiler->gtGetStructHandleIfPresent(defArg);
                  noway_assert(clsHnd != NO_CLASS_HANDLE);
@@ -3079,7 +3086,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
  
  #ifdef FEATURE_HFA
          hfaType  = GetHfaType(argx);
-        isHfaArg = varTypeIsFloating(hfaType);
+        isHfaArg = varTypeIsValidHfaType(hfaType);
  
  #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
          // Make sure for vararg methods isHfaArg is not true.
@@ -3628,7 +3635,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
  #ifdef FEATURE_HFA
          if (isHfaArg)
          {
-            newArgEntry->setHfaType(hfaType, hfaSlots);
+            newArgEntry->SetHfaType(hfaType, hfaSlots);
          }
  #endif // FEATURE_HFA
          newArgEntry->SetMultiRegNums();
@@ -3872,7 +3879,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                  {
                      if (isPow2(passingSize))
                      {
-                        canTransform = true;
+                        canTransform = (!argEntry->isHfaArg || (passingSize == genTypeSize(argEntry->GetHfaType())));
                      }
  
  #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
@@ -3957,15 +3964,16 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                  }
                  else
                  {
-                    // We have a struct argument that's less than pointer size, and it is either a power of 2,
+                    // We have a struct argument that fits into a register, and it is either a power of 2,
                      // or a local.
-                    // Change our GT_OBJ into a GT_IND of the correct type.
+                    // Change our argument, as needed, into a value of the appropriate type.
                      CLANG_FORMAT_COMMENT_ANCHOR;
  
  #ifdef _TARGET_ARM_
                      assert((size == 1) || ((structBaseType == TYP_DOUBLE) && (size == 2)));
  #else
-                    assert(size == 1);
+                    assert((size == 1) ||
+                           (varTypeIsSIMD(structBaseType) && size == (genTypeSize(structBaseType) / REGSIZE_BYTES)));
  #endif
  
                      assert((structBaseType != TYP_STRUCT) && (genTypeSize(structBaseType) >= originalSize));
@@ -4012,7 +4020,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                                      // we will use the first and only promoted field
                                      argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
  
-                                    if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
+                                    if (varTypeIsEnregisterable(fieldVarDsc->TypeGet()) &&
                                          (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
                                      {
                                          // Just use the existing field's type
@@ -4025,7 +4033,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                                          argObj->ChangeOper(GT_LCL_FLD);
                                          argObj->gtType = structBaseType;
                                      }
-                                    assert(varTypeCanReg(argObj->TypeGet()));
+                                    assert(varTypeIsEnregisterable(argObj->TypeGet()));
                                      assert(copyBlkClass == NO_CLASS_HANDLE);
                                  }
                                  else
@@ -4043,7 +4051,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                                  copyBlkClass = objClass;
                              }
                          }
-                        else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
+                        else if (genActualType(varDsc->TypeGet()) != structBaseType)
                          {
                              // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
                              argObj->ChangeOper(GT_LCL_FLD);
@@ -4055,44 +4063,41 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                          // Not a GT_LCL_VAR, so we can just change the type on the node
                          argObj->gtType = structBaseType;
                      }
-                    assert(varTypeCanReg(argObj->TypeGet()) ||
-                           ((copyBlkClass != NO_CLASS_HANDLE) && varTypeCanReg(structBaseType)));
-
-                    size = 1;
+                    assert(varTypeIsEnregisterable(argObj->TypeGet()) ||
+                           ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsEnregisterable(structBaseType)));
                  }
  #endif // !_TARGET_X86_
  
  #ifndef UNIX_AMD64_ABI
                  // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
-                if (varTypeIsStruct(structBaseType) && !argEntry->passedByRef)
+                if (isHfaArg && passUsingFloatRegs)
                  {
-                    if (isHfaArg && passUsingFloatRegs)
-                    {
-                        size = argEntry->numRegs;
-                    }
-                    else
-                    {
-                        // If the valuetype size is not a multiple of TARGET_POINTER_SIZE,
-                        // we must copyblk to a temp before doing the obj to avoid
-                        // the obj reading memory past the end of the valuetype
-                        CLANG_FORMAT_COMMENT_ANCHOR;
+                    size = argEntry->numRegs;
+                }
+                else if (structBaseType == TYP_STRUCT)
+                {
+                    // If the valuetype size is not a multiple of TARGET_POINTER_SIZE,
+                    // we must copyblk to a temp before doing the obj to avoid
+                    // the obj reading memory past the end of the valuetype
+                    CLANG_FORMAT_COMMENT_ANCHOR;
  
-                        if (roundupSize > originalSize)
-                        {
-                            copyBlkClass = objClass;
+                    if (roundupSize > originalSize)
+                    {
+                        copyBlkClass = objClass;
  
-                            // There are a few special cases where we can omit using a CopyBlk
-                            // where we normally would need to use one.
+                        // There are a few special cases where we can omit using a CopyBlk
+                        // where we normally would need to use one.
  
-                            if (argObj->gtObj.gtOp1->IsLocalAddrExpr() != nullptr) // Is the source a LclVar?
-                            {
-                                copyBlkClass = NO_CLASS_HANDLE;
-                            }
+                        if (argObj->OperIs(GT_OBJ) &&
+                            argObj->AsObj()->gtGetOp1()->IsLocalAddrExpr() != nullptr) // Is the source a LclVar?
+                        {
+                            copyBlkClass = NO_CLASS_HANDLE;
                          }
-
-                        size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
                      }
+
+                    size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
                  }
+
  #endif // !UNIX_AMD64_ABI
              }
          }
@@ -4159,7 +4164,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
  #if FEATURE_MULTIREG_ARGS
          if (isStructArg)
          {
-            if (size > 1 || isHfaArg)
+            if (((argEntry->numRegs + argEntry->numSlots) > 1) || (isHfaArg && argx->TypeGet() == TYP_STRUCT))
              {
                  hasMultiregStructArgs = true;
              }
@@ -4376,11 +4381,37 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
          }
  
          unsigned size = (fgEntryPtr->numRegs + fgEntryPtr->numSlots);
-        if ((size > 1) || fgEntryPtr->isHfaArg)
+        if ((size > 1) || (fgEntryPtr->isHfaArg && argx->TypeGet() == TYP_STRUCT))
          {
              foundStructArg = true;
              if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST))
              {
+                if (fgEntryPtr->isHfaArg)
+                {
+                    var_types hfaType = fgEntryPtr->hfaType;
+                    unsigned  structSize;
+                    if (argx->OperIs(GT_OBJ))
+                    {
+                        structSize = argx->AsObj()->gtBlkSize;
+                    }
+                    else
+                    {
+                        assert(argx->OperIs(GT_LCL_VAR));
+                        structSize = lvaGetDesc(argx->AsLclVar()->gtLclNum)->lvExactSize;
+                    }
+                    assert(structSize > 0);
+                    if (structSize == genTypeSize(hfaType))
+                    {
+                        if (argx->OperIs(GT_OBJ))
+                        {
+                            fgMorphBlkToInd(argx->AsObj(), hfaType);
+                        }
+                        else
+                        {
+                            argx->gtType = hfaType;
+                        }
+                    }
+                }
                  arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
  
                  // Did we replace 'argx' with a new tree?
@@ -4490,14 +4521,19 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
  #if FEATURE_MULTIREG_ARGS
      // Examine 'arg' and setup argValue objClass and structSize
      //
-    CORINFO_CLASS_HANDLE objClass   = gtGetStructHandleIfPresent(arg);
-    GenTree*             argValue   = arg; // normally argValue will be arg, but see right below
-    unsigned             structSize = 0;
+    CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg);
+    noway_assert(objClass != NO_CLASS_HANDLE);
+    GenTree* argValue   = arg; // normally argValue will be arg, but see right below
+    unsigned structSize = 0;
  
-    if (arg->OperGet() == GT_OBJ)
+    if (arg->TypeGet() != TYP_STRUCT)
+    {
+        structSize = genTypeSize(arg->TypeGet());
+        assert(structSize == info.compCompHnd->getClassSize(objClass));
+    }
+    else if (arg->OperGet() == GT_OBJ)
      {
          GenTreeObj* argObj = arg->AsObj();
-        objClass           = argObj->gtClass;
          structSize         = argObj->Size();
          assert(structSize == info.compCompHnd->getClassSize(objClass));
  
@@ -4527,7 +4563,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
      }
      else
      {
-        objClass   = gtGetStructHandleIfPresent(arg);
          structSize = info.compCompHnd->getClassSize(objClass);
      }
      noway_assert(objClass != NO_CLASS_HANDLE);
@@ -4538,8 +4573,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
      unsigned  elemSize                = 0;
      var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
  
-    hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
-    if (varTypeIsFloating(hfaType)
+    hfaType = fgEntryPtr->hfaType;
+    if (varTypeIsValidHfaType(hfaType)
  #if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
          && !fgEntryPtr->isVararg
  #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
@@ -4657,8 +4692,13 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
  #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
              )
          {
-            // We have a HFA struct
-            noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
+            // We have a HFA struct.
+            // Note that GetHfaType may not be the same as elemType, since TYP_SIMD8 is handled the same as TYP_DOUBLE.
+            var_types useElemType = elemType;
+#ifdef _TARGET_ARM64_
+            useElemType = (elemType == TYP_SIMD8) ? TYP_DOUBLE : useElemType;
+#endif // _TARGET_ARM64_
+            noway_assert(useElemType == varDsc->GetHfaType());
              noway_assert(elemSize == genTypeSize(elemType));
              noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
              noway_assert(elemSize * elemCount == varDsc->lvExactSize);
@@ -5291,7 +5331,7 @@ void Compiler::fgFixupStructReturn(GenTree* callNode)
  
  #if FEATURE_MULTIREG_RET
      // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
-    assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
+    assert((call->gtType != TYP_STRUCT) || call->HasMultiRegRetVal() || callHasRetBuffArg);
  #else // !FEATURE_MULTIREG_RET
      // No more struct returns
      assert(call->TypeGet() != TYP_STRUCT);
@@ -7104,7 +7144,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
  
  #elif defined(_TARGET_ARM64_) // ARM64
                  var_types hfaType  = GetHfaType(argx);
-                bool      isHfaArg = varTypeIsFloating(hfaType);
+                bool      isHfaArg = varTypeIsValidHfaType(hfaType);
                  size_t    size     = 1;
  
                  if (isHfaArg)
@@ -17414,12 +17454,10 @@ void Compiler::fgMorphLocalField(GenTree* tree, GenTree* parent)
              // The field must be an enregisterable type; otherwise it would not be a promoted field.
              // The tree type may not match, e.g. for return types that have been morphed, but both
              // must be enregisterable types.
-            // TODO-Cleanup: varTypeCanReg should presumably return true for SIMD types, but
-            // there may be places where that would violate existing assumptions.
              var_types treeType  = tree->TypeGet();
              var_types fieldType = fldVarDsc->TypeGet();
-            assert((varTypeCanReg(treeType) || varTypeIsSIMD(treeType)) &&
-                   (varTypeCanReg(fieldType) || varTypeIsSIMD(fieldType)));
+            assert((varTypeIsEnregisterable(treeType) || varTypeIsSIMD(treeType)) &&
+                   (varTypeIsEnregisterable(fieldType) || varTypeIsSIMD(fieldType)));
  
              tree->ChangeOper(GT_LCL_VAR);
              assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
@@ -17474,23 +17512,28 @@ void Compiler::fgMarkImplicitByRefArgs()
  
          if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
          {
-            size_t size;
+            size_t size = varDsc->lvExactSize;
+            assert(size == info.compCompHnd->getClassSize(varDsc->lvVerTypeInfo.GetClassHandle()));
  
-            if (varDsc->lvSize() > REGSIZE_BYTES)
+            bool isPassedByReference;
+#if defined(_TARGET_AMD64_)
+            isPassedByReference = (size > REGSIZE_BYTES || (size & (size - 1)) != 0);
+#elif defined(_TARGET_ARM64_)
+            if (size > TARGET_POINTER_SIZE)
              {
-                size = varDsc->lvSize();
+                CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandleForValueClass();
+                structPassingKind    howToPassStruct;
+                var_types            type =
+                    getArgTypeForStruct(clsHnd, &howToPassStruct, this->info.compIsVarArgs, varDsc->lvExactSize);
+                isPassedByReference = (howToPassStruct == SPK_ByReference);
              }
              else
              {
-                CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
-                size                         = info.compCompHnd->getClassSize(typeHnd);
+                isPassedByReference = false;
              }
-
-#if defined(_TARGET_AMD64_)
-            if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
-#elif defined(_TARGET_ARM64_)
-            if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc, this->info.compIsVarArgs))
  #endif
+
+            if (isPassedByReference)
              {
                  // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
                  // So I am now using it to indicate that this is one of the weird implicit
@@ -17636,8 +17679,7 @@ void Compiler::fgRetypeImplicitByRefArgs()
                      // the parameter which is really a pointer to the struct.
                      fieldVarDsc->lvIsRegArg      = false;
                      fieldVarDsc->lvIsMultiRegArg = false;
-                    fieldVarDsc->lvSetIsHfaRegArg(false);
-                    fieldVarDsc->lvArgReg = REG_NA;
+                    fieldVarDsc->lvArgReg        = REG_NA;
  #if FEATURE_MULTIREG_ARGS
                      fieldVarDsc->lvOtherArgReg = REG_NA;
  #endif
diff --git a/src/coreclr/src/jit/register_arg_convention.h b/src/coreclr/src/jit/register_arg_convention.h

index 28f29b7..ad20b4a 100644 (file)
--- a/src/coreclr/src/jit/register_arg_convention.h
+++ b/src/coreclr/src/jit/register_arg_convention.h
@@ -58,7 +58,7 @@ public:
      // return ref to current register arg for this type
      unsigned& regArgNum(var_types type)
      {
-        return varTypeIsFloating(type) ? floatRegArgNum : intRegArgNum;
+        return varTypeUsesFloatArgReg(type) ? floatRegArgNum : intRegArgNum;
      }
  
      // Allocate a set of contiguous argument registers. "type" is either an integer
@@ -110,7 +110,7 @@ private:
      // return max register arg for this type
      unsigned maxRegArgNum(var_types type)
      {
-        return varTypeIsFloating(type) ? maxFloatRegArgNum : maxIntRegArgNum;
+        return varTypeUsesFloatArgReg(type) ? maxFloatRegArgNum : maxIntRegArgNum;
      }
  
      bool enoughAvailRegs(var_types type, unsigned numRegs = 1);
diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp

index b4cecb3..81494b0 100644 (file)
--- a/src/coreclr/src/jit/simd.cpp
+++ b/src/coreclr/src/jit/simd.cpp
@@ -121,7 +121,7 @@ int Compiler::getSIMDTypeAlignment(var_types simdType)
  //
  var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, unsigned* sizeBytes /*= nullptr */)
  {
-    assert(featureSIMD);
+    assert(supportSIMDTypes());
  
      if (m_simdHandleCache == nullptr)
      {
diff --git a/src/coreclr/src/jit/target.h b/src/coreclr/src/jit/target.h

index 97df447..a225d1a 100644 (file)
--- a/src/coreclr/src/jit/target.h
+++ b/src/coreclr/src/jit/target.h
@@ -246,6 +246,7 @@ typedef unsigned char   regNumberSmall;
    #define FEATURE_MULTIREG_ARGS_OR_RET  1  // Support for passing and/or returning single values in more than one register
    #define FEATURE_MULTIREG_ARGS         0  // Support for passing a single argument in more than one register  
    #define FEATURE_MULTIREG_RET          1  // Support for returning a single value in more than one register
+  #define MAX_PASS_SINGLEREG_BYTES      8  // Maximum size of a struct passed in a single register (double).
    #define MAX_PASS_MULTIREG_BYTES       0  // No multireg arguments (note this seems wrong as MAX_ARG_REG_COUNT is 2)
    #define MAX_RET_MULTIREG_BYTES        8  // Maximum size of a struct that could be returned in more than one register
  
@@ -540,6 +541,7 @@ typedef unsigned char   regNumberSmall;
    #define FEATURE_FASTTAILCALL     1       // Tail calls made as epilog+jmp
    #define FEATURE_TAILCALL_OPT     1       // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
    #define FEATURE_SET_FLAGS        0       // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
+  #define MAX_PASS_SINGLEREG_BYTES      8  // Maximum size of a struct passed in a single register (double).
  #ifdef    UNIX_AMD64_ABI
    #define FEATURE_MULTIREG_ARGS_OR_RET  1  // Support for passing and/or returning single values in more than one register
    #define FEATURE_MULTIREG_ARGS         1  // Support for passing a single argument in more than one register  
@@ -924,6 +926,7 @@ typedef unsigned char   regNumberSmall;
    #define FEATURE_MULTIREG_ARGS         1  // Support for passing a single argument in more than one register (including passing HFAs)
    #define FEATURE_MULTIREG_RET          1  // Support for returning a single value in more than one register (including HFA returns)
    #define FEATURE_STRUCT_CLASSIFIER     0  // Uses a classifier function to determine is structs are passed/returned in more than one register
+  #define MAX_PASS_SINGLEREG_BYTES      8  // Maximum size of a struct passed in a single register (double).
    #define MAX_PASS_MULTIREG_BYTES      32  // Maximum size of a struct that could be passed in more than one register (Max is an HFA of 4 doubles)
    #define MAX_RET_MULTIREG_BYTES       32  // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles)
    #define MAX_ARG_REG_COUNT             4  // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA)
@@ -1231,9 +1234,10 @@ typedef unsigned char   regNumberSmall;
    #define FEATURE_MULTIREG_ARGS         1  // Support for passing a single argument in more than one register  
    #define FEATURE_MULTIREG_RET          1  // Support for returning a single value in more than one register  
    #define FEATURE_STRUCT_CLASSIFIER     0  // Uses a classifier function to determine is structs are passed/returned in more than one register
-  #define MAX_PASS_MULTIREG_BYTES      32  // Maximum size of a struct that could be passed in more than one register (max is 4 doubles using an HFA)
-  #define MAX_RET_MULTIREG_BYTES       32  // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles)
-  #define MAX_ARG_REG_COUNT             4  // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA)
+  #define MAX_PASS_SINGLEREG_BYTES     16  // Maximum size of a struct passed in a single register (16-byte vector).
+  #define MAX_PASS_MULTIREG_BYTES      64  // Maximum size of a struct that could be passed in more than one register (max is 4 16-byte vectors using an HVA)
+  #define MAX_RET_MULTIREG_BYTES       64  // Maximum size of a struct that could be returned in more than one register (Max is an HVA of 4 16-byte vectors)
+  #define MAX_ARG_REG_COUNT             4  // Maximum registers used to pass a single argument in multiple registers. (max is 4 128-bit vectors using an HVA)
    #define MAX_RET_REG_COUNT             4  // Maximum registers used to return a value.
  
    #define NOGC_WRITE_BARRIERS      1       // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers
@@ -1955,10 +1959,10 @@ inline regNumber regNextOfType(regNumber reg, var_types type)
   *  Type checks
   */
  
-inline bool isFloatRegType(int /* s/b "var_types" */ type)
+inline bool isFloatRegType(var_types type)
  {
  #if CPU_HAS_FP_SUPPORT
-    return type == TYP_DOUBLE || type == TYP_FLOAT;
+    return varTypeUsesFloatReg(type);
  #else
      return false;
  #endif
diff --git a/src/coreclr/src/jit/vartype.h b/src/coreclr/src/jit/vartype.h

index 04793ea..83824ac 100644 (file)
--- a/src/coreclr/src/jit/vartype.h
+++ b/src/coreclr/src/jit/vartype.h
@@ -174,9 +174,9 @@ inline bool varTypeIsI(T vt)
  }
  
  template <class T>
-inline bool varTypeCanReg(T vt)
+inline bool varTypeIsEnregisterable(T vt)
  {
-    return ((varTypeClassification[TypeGet(vt)] & (VTF_INT | VTF_I | VTF_FLT)) != 0);
+    return (TypeGet(vt) != TYP_STRUCT);
  }
  
  template <class T>
@@ -271,9 +271,56 @@ inline bool varTypeIsStruct(T vt)
  }
  
  template <class T>
-inline bool varTypeIsEnregisterableStruct(T vt)
+inline bool varTypeUsesFloatReg(T vt)
  {
-    return (TypeGet(vt) != TYP_STRUCT);
+    // Note that not all targets support SIMD, but if they don't, varTypeIsSIMD will
+    // always return false.
+    return varTypeIsFloating(vt) || varTypeIsSIMD(vt);
+}
+
+template <class T>
+inline bool varTypeUsesFloatArgReg(T vt)
+{
+#ifdef _TARGET_ARM64_
+    // Arm64 passes SIMD types in floating point registers.
+    return varTypeUsesFloatReg(vt);
+#else
+    // Other targets pass them as regular structs - by reference or by value.
+    return varTypeIsFloating(vt);
+#endif
+}
+
+//------------------------------------------------------------------------
+// varTypeIsValidHfaType: Determine if the type is a valid HFA type
+//
+// Arguments:
+//    vt - the type of interest
+//
+// Return Value:
+//    Returns true iff the type is a valid HFA type.
+//
+// Notes:
+//    This should only be called with the return value from GetHfaType().
+//    The only valid values are TYP_UNDEF, for which this returns false,
+//    TYP_FLOAT, TYP_DOUBLE, or (ARM64-only) TYP_SIMD*.
+//
+template <class T>
+inline bool varTypeIsValidHfaType(T vt)
+{
+#ifdef FEATURE_HFA
+    bool isValid = (TypeGet(vt) != TYP_UNDEF);
+    if (isValid)
+    {
+#ifdef _TARGET_ARM64_
+        assert(varTypeUsesFloatReg(vt));
+#else  // !_TARGET_ARM64_
+        assert(varTypeIsFloating(vt));
+#endif // !_TARGET_ARM64_
+    }
+    return isValid;
+#else  // !FEATURE_HFA
+    return false;
+#endif // !FEATURE_HFA
  }
  
  /*****************************************************************************/
diff --git a/src/coreclr/src/vm/argdestination.h b/src/coreclr/src/vm/argdestination.h

index 386ba57..8ddd7b2 100644 (file)
--- a/src/coreclr/src/vm/argdestination.h
+++ b/src/coreclr/src/vm/argdestination.h
@@ -60,22 +60,24 @@ public:
      //  fieldBytes - size of the structure
      void CopyHFAStructToRegister(void *src, int fieldBytes)
      {
-        // We are either copying either a float or double HFA and need to
+        // We are copying a float, double or vector HFA/HVA and need to
          // enregister each field.
  
          int floatRegCount = m_argLocDescForStructInRegs->m_cFloatReg;
-        bool typeFloat = m_argLocDescForStructInRegs->m_isSinglePrecision;
+        int hfaFieldSize = m_argLocDescForStructInRegs->m_hfaFieldSize;
          UINT64* dest = (UINT64*) this->GetDestinationAddress();
  
          for (int i = 0; i < floatRegCount; ++i) 
          {
              // Copy 4 or 8 bytes from src.
-            UINT64 val = typeFloat ? *((UINT32*)src + i) : *((UINT64*)src + i);
+            UINT64 val = (hfaFieldSize == 4) ? *((UINT32*)src) : *((UINT64*)src);
              // Always store 8 bytes
              *(dest++) = val;
-            // For now, always zero the next 8 bytes.
-            // (When HVAs are supported we will get the next 8 bytes from src.)
-            *(dest++) = 0;
+            // Either zero the next 8 bytes or get the next 8 bytes from src for 16-byte vector.
+            *(dest++) = (hfaFieldSize == 16) ? *((UINT64*)src + 1) : 0;
+
+            // Increment src by the appropriate amount.
+            src = (void*)((char*)src + hfaFieldSize);
          }
      }
  
diff --git a/src/coreclr/src/vm/arm64/CallDescrWorkerARM64.asm b/src/coreclr/src/vm/arm64/CallDescrWorkerARM64.asm

index fe277ce..9f2ec24 100644 (file)
--- a/src/coreclr/src/vm/arm64/CallDescrWorkerARM64.asm
+++ b/src/coreclr/src/vm/arm64/CallDescrWorkerARM64.asm
@@ -93,7 +93,7 @@ LNoFloatingPoint
          bne     LNoDoubleReturn
  
  LFloatReturn
-        str     d0, [x19, #(CallDescrData__returnValue + 0)]
+        str     q0, [x19, #(CallDescrData__returnValue + 0)]
          b       LReturnDone
  
  LNoDoubleReturn
@@ -117,6 +117,16 @@ LNoFloatHFAReturn
  
  LNoDoubleHFAReturn
  
+        ;;VectorHFAReturn  return case
+        cmp     w3, #64
+        bne     LNoVectorHFAReturn
+
+        stp     q0, q1, [x19, #(CallDescrData__returnValue + 0)]
+        stp     q2, q3, [x19, #(CallDescrData__returnValue + 0x20)]
+        b       LReturnDone
+
+LNoVectorHFAReturn
+
          EMIT_BREAKPOINT ; Unreachable
  
  LIntReturn
diff --git a/src/coreclr/src/vm/arm64/asmconstants.h b/src/coreclr/src/vm/arm64/asmconstants.h

index caffa80..8c99ed8 100644 (file)
--- a/src/coreclr/src/vm/arm64/asmconstants.h
+++ b/src/coreclr/src/vm/arm64/asmconstants.h
@@ -61,7 +61,7 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__FloatArgumentRegisters == sizeof(FloatArgumentRegi
  #define CallDescrData__fpReturnSize             0x20
  #define CallDescrData__pTarget                  0x28
  #define CallDescrData__pRetBuffArg              0x30
-#define CallDescrData__returnValue              0x38
+#define CallDescrData__returnValue              0x40
  
  ASMCONSTANTS_C_ASSERT(CallDescrData__pSrc                 == offsetof(CallDescrData, pSrc))
  ASMCONSTANTS_C_ASSERT(CallDescrData__numStackSlots        == offsetof(CallDescrData, numStackSlots))
diff --git a/src/coreclr/src/vm/arm64/asmhelpers.asm b/src/coreclr/src/vm/arm64/asmhelpers.asm

index 5883597..71e53d3 100644 (file)
--- a/src/coreclr/src/vm/arm64/asmhelpers.asm
+++ b/src/coreclr/src/vm/arm64/asmhelpers.asm
@@ -705,8 +705,9 @@ NoFloatingPointRetVal
  
          ; x0 = fpRetSize
  
-        ; return value is stored before float argument registers
-        add         x1, sp, #(__PWTB_FloatArgumentRegisters - 0x20)
+        ; The return value is stored before float argument registers
+        ; The maximum size of a return value is 0x40 (HVA of 4x16)
+        add         x1, sp, #(__PWTB_FloatArgumentRegisters - 0x40)
          bl          setStubReturnValue
  
          EPILOG_WITH_TRANSITION_BLOCK_RETURN
diff --git a/src/coreclr/src/vm/arm64/calldescrworkerarm64.S b/src/coreclr/src/vm/arm64/calldescrworkerarm64.S

index f987d40..8e8084b 100644 (file)
--- a/src/coreclr/src/vm/arm64/calldescrworkerarm64.S
+++ b/src/coreclr/src/vm/arm64/calldescrworkerarm64.S
@@ -85,7 +85,7 @@ LOCAL_LABEL(NoFloatingPoint):
      bne     LOCAL_LABEL(NoDoubleReturn)
  
  LOCAL_LABEL(FloatReturn):
-    str     d0, [x19, #(CallDescrData__returnValue + 0)]
+    str     q0, [x19, #(CallDescrData__returnValue + 0)]
      b       LOCAL_LABEL(ReturnDone)
  
  LOCAL_LABEL(NoDoubleReturn):
@@ -97,6 +97,7 @@ LOCAL_LABEL(NoDoubleReturn):
      stp     s0, s1, [x19, #(CallDescrData__returnValue + 0)]
      stp     s2, s3, [x19, #(CallDescrData__returnValue + 0x08)]
      b       LOCAL_LABEL(ReturnDone)
+
  LOCAL_LABEL(NoFloatHFAReturn):
  
      //DoubleHFAReturn  return case
@@ -109,6 +110,16 @@ LOCAL_LABEL(NoFloatHFAReturn):
  
  LOCAL_LABEL(NoDoubleHFAReturn):
  
+    //VectorHFAReturn  return case
+    cmp     w3, #64
+    bne     LOCAL_LABEL(LNoVectorHFAReturn)
+
+    stp     q0, q1, [x19, #(CallDescrData__returnValue + 0)]
+    stp     q2, q3, [x19, #(CallDescrData__returnValue + 0x20)]
+    b       LOCAL_LABEL(ReturnDone)
+
+LOCAL_LABEL(LNoVectorHFAReturn):
+
      EMIT_BREAKPOINT // Unreachable
  
  LOCAL_LABEL(IntReturn):
diff --git a/src/coreclr/src/vm/arm64/cgencpu.h b/src/coreclr/src/vm/arm64/cgencpu.h

index fd1fbaf..a2cac4e 100644 (file)
--- a/src/coreclr/src/vm/arm64/cgencpu.h
+++ b/src/coreclr/src/vm/arm64/cgencpu.h
@@ -51,7 +51,7 @@ extern PCODE GetPreStubEntryPoint();
  #define CACHE_LINE_SIZE                         64
  #define LOG2SLOT                                LOG2_PTRSIZE
  
-#define ENREGISTERED_RETURNTYPE_MAXSIZE         32  // bytes (four FP registers: d0,d1,d2 and d3)
+#define ENREGISTERED_RETURNTYPE_MAXSIZE         64  // bytes (four vector registers: q0,q1,q2 and q3)
  #define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 16  // bytes (two int registers: x0 and x1)
  #define ENREGISTERED_PARAMTYPE_MAXSIZE          16  // bytes (max value type size that can be passed by value)
  
diff --git a/src/coreclr/src/vm/callhelpers.h b/src/coreclr/src/vm/callhelpers.h

index db9cfad..f0d718c 100644 (file)
--- a/src/coreclr/src/vm/callhelpers.h
+++ b/src/coreclr/src/vm/callhelpers.h
@@ -39,8 +39,13 @@ struct CallDescrData
      // Return value
      //
  #ifdef ENREGISTERED_RETURNTYPE_MAXSIZE
+#ifdef _TARGET_ARM64_
+    // Use NEON128 to ensure proper alignment for vectors.
+    DECLSPEC_ALIGN(16) NEON128 returnValue[ENREGISTERED_RETURNTYPE_MAXSIZE / sizeof(NEON128)];
+#else
      // Use UINT64 to ensure proper alignment
      UINT64 returnValue[ENREGISTERED_RETURNTYPE_MAXSIZE / sizeof(UINT64)];
+#endif
  #else
      UINT64 returnValue;
  #endif
diff --git a/src/coreclr/src/vm/callingconvention.h b/src/coreclr/src/vm/callingconvention.h

index 7368fec..cbc6aad 100644 (file)
--- a/src/coreclr/src/vm/callingconvention.h
+++ b/src/coreclr/src/vm/callingconvention.h
@@ -49,9 +49,25 @@ struct ArgLocDesc
  
  #endif // UNIX_AMD64_ABI
  
+#ifdef FEATURE_HFA
+    static unsigned getHFAFieldSize(CorElementType  hfaType)
+    {
+        switch (hfaType)
+        {
+        case ELEMENT_TYPE_R4: return 4;
+        case ELEMENT_TYPE_R8: return 8;
+            // We overload VALUETYPE for 16-byte vectors.
+        case ELEMENT_TYPE_VALUETYPE: return 16;
+        default: _ASSERTE(!"Invalid HFA Type"); return 0;
+        }
+    }
+#endif
  #if defined(_TARGET_ARM64_)
-    bool    m_isSinglePrecision;  // For determining if HFA is single or double
-                                  // precision
+    unsigned m_hfaFieldSize;      // Size of HFA field in bytes.
+    void setHFAFieldSize(CorElementType  hfaType)
+    {
+        m_hfaFieldSize = getHFAFieldSize(hfaType);
+    }
  #endif // defined(_TARGET_ARM64_)
  
  #if defined(_TARGET_ARM_)
@@ -76,7 +92,7 @@ struct ArgLocDesc
          m_fRequires64BitAlignment = FALSE;
  #endif
  #if defined(_TARGET_ARM64_)
-        m_isSinglePrecision = FALSE;
+        m_hfaFieldSize = 0;
  #endif // defined(_TARGET_ARM64_)
  #if defined(UNIX_AMD64_ABI)
          m_eeClass = NULL;
@@ -589,10 +605,9 @@ public:
              if (!m_argTypeHandle.IsNull() && m_argTypeHandle.IsHFA())
              {
                  CorElementType type = m_argTypeHandle.GetHFAType();
-                bool isFloatType = (type == ELEMENT_TYPE_R4);
+                pLoc->setHFAFieldSize(type);
+                pLoc->m_cFloatReg = GetArgSize()/pLoc->m_hfaFieldSize;
  
-                pLoc->m_cFloatReg = isFloatType ? GetArgSize()/sizeof(float): GetArgSize()/sizeof(double);
-                pLoc->m_isSinglePrecision = isFloatType;
              }
              else
              {
@@ -1297,16 +1312,14 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset()
          if (thValueType.IsHFA())
          {
              CorElementType type = thValueType.GetHFAType();
-            bool isFloatType = (type == ELEMENT_TYPE_R4);
-
-            cFPRegs = (type == ELEMENT_TYPE_R4)? (argSize/sizeof(float)): (argSize/sizeof(double));
  
              m_argLocDescForStructInRegs.Init();
-            m_argLocDescForStructInRegs.m_cFloatReg = cFPRegs;
              m_argLocDescForStructInRegs.m_idxFloatReg = m_idxFPReg;
  
-            m_argLocDescForStructInRegs.m_isSinglePrecision = isFloatType;
-                
+            m_argLocDescForStructInRegs.setHFAFieldSize(type);
+            cFPRegs = argSize/m_argLocDescForStructInRegs.m_hfaFieldSize;
+            m_argLocDescForStructInRegs.m_cFloatReg = cFPRegs;
+
              m_hasArgLocDescForStructInRegs = true;
          }
          else 
@@ -1474,10 +1487,8 @@ void ArgIteratorTemplate<ARGITERATOR_BASE>::ComputeReturnFlags()
              {
                  CorElementType hfaType = thValueType.GetHFAType();
  
-                flags |= (hfaType == ELEMENT_TYPE_R4) ? 
-                    ((4 * sizeof(float)) << RETURN_FP_SIZE_SHIFT) : 
-                    ((4 * sizeof(double)) << RETURN_FP_SIZE_SHIFT);
-
+                int hfaFieldSize = ArgLocDesc::getHFAFieldSize(hfaType);
+                flags |= ((4 * hfaFieldSize) << RETURN_FP_SIZE_SHIFT);
                  break;
              }
  #endif
diff --git a/src/coreclr/src/vm/class.cpp b/src/coreclr/src/vm/class.cpp

index af1073f..14eb059 100644 (file)
--- a/src/coreclr/src/vm/class.cpp
+++ b/src/coreclr/src/vm/class.cpp
@@ -1173,6 +1173,58 @@ bool MethodTable::IsHFA()
  #endif // !FEATURE_HFA
  
  //*******************************************************************************
+int MethodTable::GetVectorSize()
+{
+    // This is supported for finding HVA types for Arm64. In order to support the altjit,
+    // we support this on 64-bit platforms (i.e. Arm64 and X64).
+#ifdef _TARGET_64BIT_
+    if (IsIntrinsicType())
+    {
+        LPCUTF8 namespaceName;
+        LPCUTF8 className = GetFullyQualifiedNameInfo(&namespaceName);
+        int vectorSize = 0;
+
+        if (strcmp(className, "Vector`1") == 0)
+        {
+            vectorSize = GetNumInstanceFieldBytes();
+            _ASSERTE(strcmp(namespaceName, "System.Numerics") == 0);
+            return vectorSize;
+        }
+        if (strcmp(className, "Vector128`1") == 0)
+        {
+            vectorSize = 16;
+        }
+        else if (strcmp(className, "Vector256`1") == 0)
+        {
+            vectorSize = 32;
+        }
+        else if (strcmp(className, "Vector64`1") == 0)
+        {
+            vectorSize = 8;
+        }
+        if (vectorSize != 0)
+        {
+            // We need to verify that T (the element or "base" type) is a primitive type.
+            TypeHandle typeArg = GetInstantiation()[0];
+            CorElementType corType = typeArg.GetSignatureCorElementType();
+            bool isSupportedElementType = (corType >= ELEMENT_TYPE_I1 && corType <= ELEMENT_TYPE_R8);
+            // These element types are not supported for Vector64<T>.
+            if ((vectorSize == 8) && (corType == ELEMENT_TYPE_I8 || corType == ELEMENT_TYPE_U8 || corType == ELEMENT_TYPE_R8))
+            {
+                isSupportedElementType = false;
+            }
+            if (isSupportedElementType)
+            {
+                _ASSERTE(strcmp(namespaceName, "System.Runtime.Intrinsics") == 0);
+                return vectorSize;
+            }
+        }
+    }
+#endif // _TARGET_64BIT_
+    return 0;
+}
+
+//*******************************************************************************
  CorElementType MethodTable::GetHFAType()
  {
      CONTRACTL
@@ -1191,17 +1243,28 @@ CorElementType MethodTable::GetHFAType()
          _ASSERTE(pMT->IsValueType());
          _ASSERTE(pMT->GetNumInstanceFields() > 0);
  
+        int vectorSize = pMT->GetVectorSize();
+        if (vectorSize != 0)
+        {
+            return (vectorSize == 8) ? ELEMENT_TYPE_R8 : ELEMENT_TYPE_VALUETYPE;
+        }
+
          PTR_FieldDesc pFirstField = pMT->GetApproxFieldDescListRaw();
  
          CorElementType fieldType = pFirstField->GetFieldType();
-        
+
          // All HFA fields have to be of the same type, so we can just return the type of the first field
          switch (fieldType)
          {
          case ELEMENT_TYPE_VALUETYPE:
              pMT = pFirstField->LookupApproxFieldTypeHandle().GetMethodTable();
+            vectorSize = pMT->GetVectorSize();
+            if (vectorSize != 0)
+            {
+                return (vectorSize == 8) ? ELEMENT_TYPE_R8 : ELEMENT_TYPE_VALUETYPE;
+            }
              break;
-            
+
          case ELEMENT_TYPE_R4:
          case ELEMENT_TYPE_R8:
              return fieldType;
@@ -1212,7 +1275,7 @@ CorElementType MethodTable::GetHFAType()
              _ASSERTE(false);
              return ELEMENT_TYPE_END;
          }
-    }    
+    }
  }
  
  bool MethodTable::IsNativeHFA()
@@ -1231,6 +1294,7 @@ CorElementType MethodTable::GetNativeHFAType()
  //
  // When FEATURE_HFA is defined, we cache the value; otherwise we recompute it with each
  // call. The latter is only for the armaltjit and the arm64altjit.
+//
  bool
  #if defined(FEATURE_HFA)
  EEClass::CheckForHFA(MethodTable ** pByValueClassCache)
@@ -1243,25 +1307,18 @@ EEClass::CheckForHFA()
      // This method should be called for valuetypes only
      _ASSERTE(GetMethodTable()->IsValueType());
  
-    // The SIMD Intrinsic types are meant to be handled specially and should not be treated as HFA
-    if (GetMethodTable()->IsIntrinsicType())
-    {
-        LPCUTF8 namespaceName;
-        LPCUTF8 className = GetMethodTable()->GetFullyQualifiedNameInfo(&namespaceName);
  
-        if ((strcmp(className, "Vector256`1") == 0) || (strcmp(className, "Vector128`1") == 0) ||
-            (strcmp(className, "Vector64`1") == 0))
-        {
-            assert(strcmp(namespaceName, "System.Runtime.Intrinsics") == 0);
-            return false;
-        }
-       
-        if ((strcmp(className, "Vector`1") == 0) && (strcmp(namespaceName, "System.Numerics") == 0))
-        {
-            return false;
-        }
+    // The opaque Vector types appear to have multiple fields, but need to be treated
+    // as an opaque type of a single vector.
+    if (GetMethodTable()->GetVectorSize() != 0)
+    {
+#if defined(FEATURE_HFA)
+        GetMethodTable()->SetIsHFA();
+#endif
+        return true;
      }
  
+    int elemSize = 0;
      CorElementType hfaType = ELEMENT_TYPE_END;
  
      FieldDesc *pFieldDescList = GetFieldDescList();
@@ -1278,11 +1335,41 @@ EEClass::CheckForHFA()
          switch (fieldType)
          {
          case ELEMENT_TYPE_VALUETYPE:
+            {
+#ifdef _TARGET_ARM64_
+            // hfa/hva types are unique by size, except for Vector64 which we can conveniently
+                // treat as if it were a double for ABI purposes. However, it only qualifies as
+                // an HVA if all fields are the same type. This will ensure that we only
+                // consider it an HVA if all the fields are ELEMENT_TYPE_VALUETYPE (which have been
+                // determined above to be vectors) of the same size.
+                MethodTable* pMT;
+#if defined(FEATURE_HFA)
+                pMT = pByValueClassCache[i];
+#else
+                pMT = pFD->LookupApproxFieldTypeHandle().AsMethodTable();
+#endif
+                int thisElemSize = pMT->GetVectorSize();
+                if (thisElemSize != 0)
+                {
+                    if (elemSize == 0)
+                    {
+                        elemSize = thisElemSize;
+                    }
+                    else if ((thisElemSize != elemSize) || (hfaType != ELEMENT_TYPE_VALUETYPE))
+                    {
+                        return false;
+                    }
+                }
+                else
+#endif // _TARGET_ARM64_
+                {
  #if defined(FEATURE_HFA)
-            fieldType = pByValueClassCache[i]->GetHFAType();
+                    fieldType = pByValueClassCache[i]->GetHFAType();
  #else
-            fieldType = pFD->LookupApproxFieldTypeHandle().AsMethodTable()->GetHFAType();
+                    fieldType = pFD->LookupApproxFieldTypeHandle().AsMethodTable()->GetHFAType();
  #endif
+                }
+            }
              break;
  
          case ELEMENT_TYPE_R4:
@@ -1326,14 +1413,31 @@ EEClass::CheckForHFA()
          }
      }
  
-    if (hfaType == ELEMENT_TYPE_END)
+    switch (hfaType)
+    {
+    case ELEMENT_TYPE_R4:
+        elemSize = 4;
+        break;
+    case ELEMENT_TYPE_R8:
+        elemSize = 8;
+        break;
+#ifdef _TARGET_ARM64_
+    case ELEMENT_TYPE_VALUETYPE:
+        // Should already have set elemSize, but be conservative
+        if (elemSize == 0)
+        {
+            return false;
+        }
+        break;
+#endif
+    default:
+        // ELEMENT_TYPE_END
          return false;
+    }
          
      if (!hasZeroOffsetField) // If the struct doesn't have a zero-offset field, it's not an HFA.
          return false;
  
-    int elemSize = (hfaType == ELEMENT_TYPE_R8) ? sizeof(double) : sizeof(float);
-
      // Note that we check the total size, but do not perform any checks on number of fields:
      // - Type of fields can be HFA valuetype itself
      // - Managed C++ HFA valuetypes have just one <alignment member> of type float to signal that 
@@ -1348,7 +1452,7 @@ EEClass::CheckForHFA()
      if (totalSize / elemSize > 4)
          return false;
  
-    // All the above tests passed. It's HFA!
+    // All the above tests passed. It's HFA(/HVA)!
  #if defined(FEATURE_HFA)
      GetMethodTable()->SetIsHFA();
  #endif
@@ -1421,7 +1525,16 @@ CorElementType EEClassLayoutInfo::GetNativeHFATypeRaw()
      if (hfaType == ELEMENT_TYPE_END)
          return ELEMENT_TYPE_END;
  
-    int elemSize = (hfaType == ELEMENT_TYPE_R8) ? sizeof(double) : sizeof(float);
+    int elemSize = 1;
+    switch (hfaType)
+    {
+    case ELEMENT_TYPE_R4: elemSize = sizeof(float); break;
+    case ELEMENT_TYPE_R8: elemSize = sizeof(double); break;
+#ifdef _TARGET_ARM64_
+    case ELEMENT_TYPE_VALUETYPE: elemSize = 16; break;
+#endif
+    default: _ASSERTE(!"Invalid HFA Type");
+    }
  
      // Note that we check the total size, but do not perform any checks on number of fields:
      // - Type of fields can be HFA valuetype itself
diff --git a/src/coreclr/src/vm/class.h b/src/coreclr/src/vm/class.h

index 2853aee..a1e7aeb 100644 (file)
--- a/src/coreclr/src/vm/class.h
+++ b/src/coreclr/src/vm/class.h
@@ -414,8 +414,11 @@ class EEClassLayoutInfo
  #endif // UNIX_AMD64_ABI
  #ifdef FEATURE_HFA
              // HFA type of the unmanaged layout
+            // Note that these are not flags, they are discrete values.
              e_R4_HFA                    = 0x10,
              e_R8_HFA                    = 0x20,
+            e_16_HFA                    = 0x30,
+            e_HFATypeFlags              = 0x30,
  #endif
          };
  
@@ -526,15 +529,19 @@ class EEClassLayoutInfo
          bool IsNativeHFA()
          {
              LIMITED_METHOD_CONTRACT;
-            return (m_bFlags & (e_R4_HFA | e_R8_HFA)) != 0;
+            return (m_bFlags & e_HFATypeFlags) != 0;
          }
  
          CorElementType GetNativeHFAType()
          {
              LIMITED_METHOD_CONTRACT;
-            if (IsNativeHFA())                      
-                return (m_bFlags & e_R4_HFA) ? ELEMENT_TYPE_R4 : ELEMENT_TYPE_R8;
-            return ELEMENT_TYPE_END;
+            switch (m_bFlags & e_HFATypeFlags)
+            {
+            case e_R4_HFA: return ELEMENT_TYPE_R4;
+            case e_R8_HFA: return ELEMENT_TYPE_R8;
+            case e_16_HFA: return ELEMENT_TYPE_VALUETYPE;
+            default:       return ELEMENT_TYPE_END;
+            }
          }
  #else // !FEATURE_HFA
          bool IsNativeHFA()
@@ -580,7 +587,15 @@ class EEClassLayoutInfo
          void SetNativeHFAType(CorElementType hfaType)
          {
              LIMITED_METHOD_CONTRACT;
-            m_bFlags |= (hfaType == ELEMENT_TYPE_R4) ? e_R4_HFA : e_R8_HFA;
+            // We should call this at most once.
+            _ASSERTE((m_bFlags & e_HFATypeFlags) == 0);
+            switch (hfaType)
+            {
+            case ELEMENT_TYPE_R4: m_bFlags |= e_R4_HFA; break;
+            case ELEMENT_TYPE_R8: m_bFlags |= e_R8_HFA; break;
+            case ELEMENT_TYPE_VALUETYPE: m_bFlags |= e_16_HFA; break;
+            default: _ASSERTE(!"Invalid HFA Type");
+            }
          }
  #endif
  #ifdef UNIX_AMD64_ABI
diff --git a/src/coreclr/src/vm/methodtable.h b/src/coreclr/src/vm/methodtable.h

index 74febeb..154efa2 100644 (file)
--- a/src/coreclr/src/vm/methodtable.h
+++ b/src/coreclr/src/vm/methodtable.h
@@ -1929,6 +1929,9 @@ public:
      bool IsHFA();
  #endif // FEATURE_HFA
  
+    // Returns the size in bytes of this type if it is a HW vector type; 0 otherwise.
+    int GetVectorSize();
+
      // Get the HFA type. This is supported both with FEATURE_HFA, in which case it
      // depends on the cached bit on the class, or without, in which case it is recomputed
      // for each invocation.
diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/Arm64/Simd.cs b/src/coreclr/tests/src/JIT/HardwareIntrinsics/Arm64/Simd.cs

index 97c0a42..ad76022 100644 (file)
--- a/src/coreclr/tests/src/JIT/HardwareIntrinsics/Arm64/Simd.cs
+++ b/src/coreclr/tests/src/JIT/HardwareIntrinsics/Arm64/Simd.cs
@@ -1355,7 +1355,6 @@ namespace Arm64intrisicsTest
                  testExtractOp<int,    Vector64< int   >>(name, (x) => Simd.Extract(x, 1), (x) => x[ 1]);
                  testExtractOp<uint,   Vector64< uint  >>(name, (x) => Simd.Extract(x, 0), (x) => x[ 0]);
                  testExtractOp<uint,   Vector64< uint  >>(name, (x) => Simd.Extract(x, 1), (x) => x[ 1]);
-#if Broken
  
                  // Test non-constant call
                  testExtractOp<float,  Vector128<float >>(name, (x) => simdExtract(x, 0), (x) => x[ 0]);
@@ -1472,7 +1471,6 @@ namespace Arm64intrisicsTest
                  testThrowsArgumentOutOfRangeException<ushort, Vector64< ushort>>(name, (x, y) => Simd.Extract(x, 4));
                  testThrowsArgumentOutOfRangeException<int,    Vector64< int   >>(name, (x, y) => Simd.Extract(x, 2));
                  testThrowsArgumentOutOfRangeException<uint,   Vector64< uint  >>(name, (x, y) => Simd.Extract(x, 2));
-#endif
  
                  testThrowsTypeNotSupported<Vector64< long >>(name, (x, y) => { return Simd.Extract(x, 1) > 1 ? x : y; });
                  testThrowsTypeNotSupported<Vector64< ulong>>(name, (x, y) => { return Simd.Extract(x, 1) > 1 ? x : y; });
@@ -1528,7 +1526,6 @@ namespace Arm64intrisicsTest
                  testPermuteOp<ushort, Vector64< ushort>>(name, (x, y) => Simd.Insert(x, 1, (ushort)2), (i, x, y) => (ushort)(i != 1 ? x[i] : 2));
                  testPermuteOp<int,    Vector64< int   >>(name, (x, y) => Simd.Insert(x, 1, (int   )2), (i, x, y) => (int   )(i != 1 ? x[i] : 2));
                  testPermuteOp<uint,   Vector64< uint  >>(name, (x, y) => Simd.Insert(x, 1, (uint  )2), (i, x, y) => (uint  )(i != 1 ? x[i] : 2));
-#if Broken
  
                  testPermuteOp<float,  Vector128<float >>(name, (x, y) => Simd.Insert(x, 3, Simd.Extract(y, 1)), (i, x, y) => (float )(i != 3 ? x[i] : y[1]));
                  testPermuteOp<double, Vector128<double>>(name, (x, y) => Simd.Insert(x, 0, Simd.Extract(y, 1)), (i, x, y) => (double)(i != 0 ? x[i] : y[1]));
@@ -1565,7 +1562,6 @@ namespace Arm64intrisicsTest
                  testThrowsArgumentOutOfRangeException<ushort, Vector64< ushort>, Vector64< ushort>>(name, (x, y) => Simd.Insert(x, 4, (ushort)1));
                  testThrowsArgumentOutOfRangeException<int,    Vector64< int   >, Vector64< int   >>(name, (x, y) => Simd.Insert(x, 2, (int   )1));
                  testThrowsArgumentOutOfRangeException<uint,   Vector64< uint  >, Vector64< uint  >>(name, (x, y) => Simd.Insert(x, 2, (uint  )1));
-#endif
  
                  testThrowsTypeNotSupported<Vector128<bool >>(name, (x, y) => Simd.Insert(x, 1,      true));
                  testThrowsTypeNotSupported<Vector64< long >>(name, (x, y) => Simd.Insert(x, 1, ( long )5));
author	Carol Eidt <carol.eidt@microsoft.com>
	Tue, 16 Apr 2019 16:35:49 +0000 (09:35 -0700)
committer	GitHub <noreply@github.com>
	Tue, 16 Apr 2019 16:35:49 +0000 (09:35 -0700)
src/coreclr/src/jit/assertionprop.cpp		patch \| blob \| history
src/coreclr/src/jit/codegenarm64.cpp		patch \| blob \| history
src/coreclr/src/jit/codegenarmarch.cpp		patch \| blob \| history
src/coreclr/src/jit/codegencommon.cpp		patch \| blob \| history
src/coreclr/src/jit/codegenxarch.cpp		patch \| blob \| history
src/coreclr/src/jit/compiler.cpp		patch \| blob \| history
src/coreclr/src/jit/compiler.h		patch \| blob \| history
src/coreclr/src/jit/compiler.hpp		patch \| blob \| history
src/coreclr/src/jit/flowgraph.cpp		patch \| blob \| history
src/coreclr/src/jit/gentree.cpp		patch \| blob \| history
src/coreclr/src/jit/gentree.h		patch \| blob \| history
src/coreclr/src/jit/hwintrinsicArm64.cpp		patch \| blob \| history
src/coreclr/src/jit/importer.cpp		patch \| blob \| history
src/coreclr/src/jit/lclvars.cpp		patch \| blob \| history
src/coreclr/src/jit/lower.cpp		patch \| blob \| history
src/coreclr/src/jit/lowerarmarch.cpp		patch \| blob \| history
src/coreclr/src/jit/lsra.cpp		patch \| blob \| history
src/coreclr/src/jit/lsraarmarch.cpp		patch \| blob \| history
src/coreclr/src/jit/lsrabuild.cpp		patch \| blob \| history
src/coreclr/src/jit/morph.cpp		patch \| blob \| history
src/coreclr/src/jit/register_arg_convention.h		patch \| blob \| history
src/coreclr/src/jit/simd.cpp		patch \| blob \| history
src/coreclr/src/jit/target.h		patch \| blob \| history
src/coreclr/src/jit/vartype.h		patch \| blob \| history
src/coreclr/src/vm/argdestination.h		patch \| blob \| history
src/coreclr/src/vm/arm64/CallDescrWorkerARM64.asm		patch \| blob \| history
src/coreclr/src/vm/arm64/asmconstants.h		patch \| blob \| history
src/coreclr/src/vm/arm64/asmhelpers.asm		patch \| blob \| history
src/coreclr/src/vm/arm64/calldescrworkerarm64.S		patch \| blob \| history
src/coreclr/src/vm/arm64/cgencpu.h		patch \| blob \| history
src/coreclr/src/vm/callhelpers.h		patch \| blob \| history
src/coreclr/src/vm/callingconvention.h		patch \| blob \| history
src/coreclr/src/vm/class.cpp		patch \| blob \| history
src/coreclr/src/vm/class.h		patch \| blob \| history
src/coreclr/src/vm/methodtable.h		patch \| blob \| history
src/coreclr/tests/src/JIT/HardwareIntrinsics/Arm64/Simd.cs		patch \| blob \| history