From 540b9d8f443f0a69f28278a93b173a2652536933 Mon Sep 17 00:00:00 2001
From: Mike Danes <onemihaid@hotmail.com>
Date: Sun, 5 Aug 2018 12:16:00 +0300
Subject: [PATCH] Refactor genIntToIntCast on XARCH

Commit migrated from https://github.com/dotnet/coreclr/commit/a2ba07ab7ebbd47fc673a1f4a7521a400badb298
---
 src/coreclr/src/jit/codegen.h        |   1 +
 src/coreclr/src/jit/codegenxarch.cpp | 380 ++++++++++++++---------------------
 2 files changed, 157 insertions(+), 224 deletions(-)

diff --git a/src/coreclr/src/jit/codegen.h b/src/coreclr/src/jit/codegen.h
index 9eb1ba3..82e89ab 100644
--- a/src/coreclr/src/jit/codegen.h
+++ b/src/coreclr/src/jit/codegen.h
@@ -789,6 +789,7 @@ protected:
     void genLongToIntCast(GenTree* treeNode);
 #endif
 
+    void genIntCastOverflowCheck(GenTreeCast* cast, regNumber reg);
     void genIntToIntCast(GenTree* treeNode);
     void genFloatToFloatCast(GenTree* treeNode);
     void genFloatToIntCast(GenTree* treeNode);
diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp
index caccf47..b1723eb 100644
--- a/src/coreclr/src/jit/codegenxarch.cpp
+++ b/src/coreclr/src/jit/codegenxarch.cpp
@@ -6345,269 +6345,201 @@ void CodeGen::genLongToIntCast(GenTree* cast)
 #endif
 
 //------------------------------------------------------------------------
-// genIntToIntCast: Generate code for an integer cast
-//    This method handles integer overflow checking casts
-//    as well as ordinary integer casts.
+// genIntCastOverflowCheck: Generate overflow checking code for an integer cast.
 //
 // Arguments:
-//    treeNode - The GT_CAST node
+//    cast - The GT_CAST node
+//    reg  - The register containing the value to check
 //
-// Return Value:
-//    None.
+void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, regNumber reg)
+{
+    const var_types srcType      = genActualType(cast->gtGetOp1()->TypeGet());
+    const bool      srcUnsigned  = cast->IsUnsigned();
+    const unsigned  srcSize      = genTypeSize(srcType);
+    const var_types castType     = cast->gtCastType;
+    const bool      castUnsigned = varTypeIsUnsigned(castType);
+    const unsigned  castSize     = genTypeSize(castType);
+
+    if (castSize >= srcSize)
+    {
+        // It's either a widening cast from a signed type to an unsigned
+        // type or a sign changing cast.
+        assert((castSize > srcSize) ? (!srcUnsigned && castUnsigned) : (srcUnsigned != castUnsigned));
+
+        // In both cases we just need to check if the value is positive.
+        getEmitter()->emitIns_R_R(INS_test, EA_SIZE(srcSize), reg, reg);
+        genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
+    }
+#ifdef _TARGET_64BIT_
+    // We handled widening and sign changing casts so we're left with narrowing casts.
+    // Handle (U)LONG to (U)INT casts first as they're 64 bit target specific and there
+    // are some complications related to encoding of the large immediate values they need.
+    else if (castSize == 4)
+    {
+        assert(srcSize == 8);
+
+        if (castUnsigned) // (U)LONG to UINT cast
+        {
+            // We need to check if the value is not greater than 0xFFFFFFFF but this value
+            // cannot be encoded in an immediate operand. Use a right shift to test if the
+            // upper 32 bits are zero. This requires a temporary register.
+            const regNumber tempReg = cast->GetSingleTempReg();
+            assert(tempReg != reg);
+            getEmitter()->emitIns_R_R(INS_mov, EA_8BYTE, tempReg, reg);
+            getEmitter()->emitIns_R_I(INS_shr_N, EA_8BYTE, tempReg, 32);
+            genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
+        }
+        else if (srcUnsigned) // ULONG to INT cast
+        {
+            getEmitter()->emitIns_R_I(INS_cmp, EA_8BYTE, reg, INT32_MAX);
+            genJumpToThrowHlpBlk(EJ_ja, SCK_OVERFLOW);
+        }
+        else // LONG to INT cast
+        {
+            // We could combine this case with the unsigned case above but let's keep
+            // theses two separated to maintain the code structure similar to the ARM
+            // implementation that here requires a temporary register.
+            getEmitter()->emitIns_R_I(INS_cmp, EA_8BYTE, reg, INT32_MAX);
+            genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW);
+
+            getEmitter()->emitIns_R_I(INS_cmp, EA_8BYTE, reg, INT32_MIN);
+            genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
+        }
+    }
+#endif
+    else // if (castSize < srcSize)
+    {
+        // This is a narrowing cast. We already handled (U)INT so what's left are small int types.
+        assert(castSize < 4);
+
+        // This happens to allow for easy compututation of the min and max
+        // values of the castType without risk of integer overflow.
+        const int castNumBits  = (castSize * 8) - (castUnsigned ? 0 : 1);
+        const int castMaxValue = (1 << castNumBits) - 1;
+        const int castMinValue = (castUnsigned | srcUnsigned) ? 0 : (-castMaxValue - 1);
+
+        getEmitter()->emitIns_R_I(INS_cmp, EA_SIZE(srcSize), reg, castMaxValue);
+        genJumpToThrowHlpBlk((castMinValue == 0) ? EJ_ja : EJ_jg, SCK_OVERFLOW);
+
+        if (castMinValue != 0)
+        {
+            getEmitter()->emitIns_R_I(INS_cmp, EA_SIZE(srcSize), reg, castMinValue);
+            genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for an integer cast, with or without overflow check.
+//
+// Arguments:
+//    treeNode - The GT_CAST node
 //
 // Assumptions:
 //    The treeNode is not a contained node and must have an assigned register.
-//    For a signed convert from byte, the source must be in a byte-addressable register.
 //    Neither the source nor target type can be a floating point type.
+//    On x86 casts to (U)BYTE require that the source be in a byte register.
 //
 // TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register.
 // TODO: refactor to use getCastDescription
 //
 void CodeGen::genIntToIntCast(GenTree* treeNode)
 {
-    assert(treeNode->OperGet() == GT_CAST);
+    GenTreeCast* cast = treeNode->AsCast();
+    genConsumeOperands(cast);
 
-    GenTree*  castOp  = treeNode->gtCast.CastOp();
-    var_types srcType = genActualType(castOp->TypeGet());
-    noway_assert(genTypeSize(srcType) >= 4);
-    assert(genTypeSize(srcType) <= genTypeSize(TYP_I_IMPL));
+    GenTree* src = cast->gtGetOp1();
 
-    regNumber targetReg     = treeNode->gtRegNum;
-    regNumber sourceReg     = castOp->gtRegNum;
-    var_types dstType       = treeNode->CastToType();
-    bool      isUnsignedDst = varTypeIsUnsigned(dstType);
-    bool      isUnsignedSrc = varTypeIsUnsigned(srcType);
+    const var_types srcType  = genActualType(src->TypeGet());
+    const unsigned  srcSize  = genTypeSize(srcType);
+    const var_types castType = cast->gtCastType;
+    const unsigned  castSize = genTypeSize(castType);
+    const var_types dstType  = genActualType(cast->TypeGet());
+    const unsigned  dstSize  = genTypeSize(dstType);
 
-    // if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set
-    if (!isUnsignedSrc && treeNode->IsUnsigned())
-    {
-        srcType       = genUnsignedType(srcType);
-        isUnsignedSrc = true;
-    }
+    assert((srcSize == 4) || (srcSize == genTypeSize(TYP_I_IMPL)));
+    assert((dstSize == 4) || (dstSize == genTypeSize(TYP_I_IMPL)));
+    assert(castSize <= dstSize);
 
-    bool requiresOverflowCheck = false;
+    const regNumber srcReg = src->gtRegNum;
+    const regNumber dstReg = cast->gtRegNum;
 
-    assert(genIsValidIntReg(targetReg));
-    assert(genIsValidIntReg(sourceReg));
+    assert(genIsValidIntReg(srcReg));
+    assert(genIsValidIntReg(dstReg));
 
-    instruction ins     = INS_invalid;
-    emitAttr    srcSize = EA_ATTR(genTypeSize(srcType));
-    emitAttr    dstSize = EA_ATTR(genTypeSize(dstType));
+    instruction ins = INS_none;
+    emitAttr    insSize;
 
-    if (srcSize < dstSize)
+    Lowering::CastInfo castInfo;
+    Lowering::getCastDescription(cast, &castInfo);
+    if (castInfo.requiresOverflowCheck)
     {
-#ifdef _TARGET_X86_
-        // dstType cannot be a long type on x86, such casts should have been decomposed.
-        // srcType cannot be a small type since it's the "actual type" of the cast operand.
-        // This means that widening casts do not actually occur on x86.
-        unreached();
-#else
-        // This is a widening cast from TYP_(U)INT to TYP_(U)LONG.
-        assert(dstSize == EA_8BYTE);
-        assert(srcSize == EA_4BYTE);
+        genIntCastOverflowCheck(cast, srcReg);
 
-        // When widening, overflows can only happen if the source type is signed and the
-        // destination type is unsigned. Since the overflow check ensures that the value
-        // is positive a cheaper mov instruction can be used instead of movsxd.
-        if (treeNode->gtOverflow() && !isUnsignedSrc && isUnsignedDst)
-        {
-            requiresOverflowCheck = true;
-            ins                   = INS_mov;
-        }
-        else
+#ifdef _TARGET_64BIT_
+        if ((srcType == TYP_INT) && !cast->IsUnsigned() && (castType == TYP_ULONG))
         {
-            ins = isUnsignedSrc ? INS_mov : INS_movsxd;
+            // This is the only overflow checking cast that requires changing the
+            // source value (by zero extending), all others will copy the value as is.
+            ins     = INS_mov;
+            insSize = EA_4BYTE;
         }
 #endif
     }
-    else
+    else // Non-overflow checking cast.
     {
-        // Narrowing cast, or sign-changing cast
-        noway_assert(srcSize >= dstSize);
-
-        // Is this an Overflow checking cast?
-        if (treeNode->gtOverflow())
+        if (castSize < 4)
         {
-            requiresOverflowCheck = true;
-            ins                   = INS_mov;
-        }
-        else
-        {
-            ins = ins_Move_Extend(dstType, false);
-        }
-    }
-
-    noway_assert(ins != INS_invalid);
-
-    genConsumeReg(castOp);
-
-    if (requiresOverflowCheck)
-    {
-        ssize_t typeMin        = 0;
-        ssize_t typeMax        = 0;
-        ssize_t typeMask       = 0;
-        bool    needScratchReg = false;
-        bool    signCheckOnly  = false;
-
-        /* Do we need to compare the value, or just check masks */
-
-        switch (dstType)
-        {
-            case TYP_BYTE:
-                typeMask = ssize_t((int)0xFFFFFF80);
-                typeMin  = SCHAR_MIN;
-                typeMax  = SCHAR_MAX;
-                break;
-
-            case TYP_UBYTE:
-                typeMask = ssize_t((int)0xFFFFFF00L);
-                break;
-
-            case TYP_SHORT:
-                typeMask = ssize_t((int)0xFFFF8000);
-                typeMin  = SHRT_MIN;
-                typeMax  = SHRT_MAX;
-                break;
+            assert((castSize == 1) || (castSize == 2));
 
-            case TYP_USHORT:
-                typeMask = ssize_t((int)0xFFFF0000L);
-                break;
-
-            case TYP_INT:
-                if (srcType == TYP_UINT)
-                {
-                    signCheckOnly = true;
-                }
-                else
-                {
-                    typeMask = ssize_t((int)0x80000000);
-                    typeMin  = INT_MIN;
-                    typeMax  = INT_MAX;
-                }
-                break;
-
-            case TYP_UINT:
-                if (srcType == TYP_INT)
-                {
-                    signCheckOnly = true;
-                }
-                else
-                {
-                    needScratchReg = true;
-                }
-                break;
-
-            case TYP_LONG:
-                noway_assert(srcType == TYP_ULONG);
-                signCheckOnly = true;
-                break;
-
-            case TYP_ULONG:
-                noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT));
-                signCheckOnly = true;
-                break;
-
-            default:
-                NO_WAY("Unknown type");
-                return;
-        }
-
-        if (signCheckOnly)
-        {
-            // We only need to check for a negative value in sourceReg
-            inst_RV_RV(INS_test, sourceReg, sourceReg, srcType, srcSize);
-            genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
+            // Casting to a small type really means widening from that small type to INT/LONG.
+            ins     = varTypeIsUnsigned(castType) ? INS_movzx : INS_movsx;
+            insSize = EA_ATTR(castSize);
         }
-        else
+#ifdef _TARGET_64BIT_
+        // castType cannot be a long type on 32 bit targets, such casts should have been decomposed.
+        // srcType cannot be a small type since it's the "actual type" of the cast operand.
+        // This means that widening casts do not occur on 32 bit targets.
+        else if (castSize > srcSize)
         {
-            // When we are converting from unsigned or to unsigned, we
-            // will only have to check for any bits set using 'typeMask'
-            if (isUnsignedSrc || isUnsignedDst)
-            {
-                if (needScratchReg)
-                {
-                    regNumber tmpReg = treeNode->GetSingleTempReg();
-                    inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writable temp reg
-                    inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, srcSize, tmpReg, 32); // Shift right by 32 bits
-                    genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);               // Throw if result shift is non-zero
-                }
-                else
-                {
-                    noway_assert(typeMask != 0);
-                    inst_RV_IV(INS_TEST, sourceReg, typeMask, srcSize);
-                    genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
-                }
-            }
-            else
-            {
-                // For a narrowing signed cast
-                //
-                // We must check the value is in a signed range.
-
-                // Compare with the MAX
+            // (U)INT to (U)LONG widening cast
+            assert((srcSize == 4) && (castSize == 8));
 
-                noway_assert((typeMin != 0) && (typeMax != 0));
-
-                inst_RV_IV(INS_cmp, sourceReg, typeMax, srcSize);
-                genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW);
-
-                // Compare with the MIN
-
-                inst_RV_IV(INS_cmp, sourceReg, typeMin, srcSize);
-                genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
-            }
+            ins = cast->IsUnsigned() ? INS_mov : INS_movsxd;
+            // We need a 32 bit MOV to zero out the upper 32 bit. MOVSXD ignores the size.
+            insSize = EA_4BYTE;
         }
-
-        if (targetReg != sourceReg
-#ifdef _TARGET_AMD64_
-            // On amd64, we can hit this path for a same-register
-            // 4-byte to 8-byte widening conversion, and need to
-            // emit the instruction to set the high bits correctly.
-            || (dstSize == EA_8BYTE && srcSize == EA_4BYTE)
-#endif // _TARGET_AMD64_
-                )
-            inst_RV_RV(ins, targetReg, sourceReg, srcType, srcSize);
+#endif
     }
-    else // non-overflow checking cast
+
+    if (ins == INS_none)
     {
-        // We may have code transformations that result in casts where srcType is the same as dstType.
-        // e.g. Bug 824281, in which a comma is split by the rationalizer, leaving an assignment of a
-        // long constant to a long lclVar.
-        if (srcType == dstType)
-        {
-            ins = INS_mov;
-        }
+        // If the instruction has not been selected yet it means we're dealing with
+        // a narrowing/same type/sign changing cast.
+        assert(castSize <= srcSize);
+        // Make sure the destination size is correct. This prevents unsupported casts
+        // such as LONG->INT->LONG, these would be classified as narrowing but in fact
+        // they're widening casts. It may be useful to allow such casts but that
+        // requires more work here and throughout the JIT.
+        assert(dstSize <= srcSize);
 
-        if (ins == INS_mov)
-        {
-            if (targetReg != sourceReg
-#ifdef _TARGET_AMD64_
-                // On amd64, 'mov' is the opcode used to zero-extend from
-                // 4 bytes to 8 bytes.
-                || (dstSize == EA_8BYTE && srcSize == EA_4BYTE)
-#endif // _TARGET_AMD64_
-                    )
-            {
-                inst_RV_RV(ins, targetReg, sourceReg, srcType, srcSize);
-            }
-        }
-#ifdef _TARGET_AMD64_
-        else if (ins == INS_movsxd)
-        {
-            inst_RV_RV(ins, targetReg, sourceReg, srcType, srcSize);
-        }
-#endif // _TARGET_AMD64_
-        else
-        {
-            noway_assert(ins == INS_movsx || ins == INS_movzx);
-            noway_assert(srcSize >= dstSize);
+        // This cast basically does nothing, even when narrowing it is the job of the
+        // consumer of this node to use the appropiate register size (32 or 64 bit)
+        // and not rely on the cast to set the upper 32 bits in a certain manner.
+        // Still, we will need to generate a MOV instruction if the source and destination
+        // registers are different.
+        ins = (srcReg != dstReg) ? INS_mov : INS_none;
+        // We can use either the destination size or the source size. Use the destination
+        // size as on x64 it may avoid the need for a REX prefix when casting LONG to INT.
+        insSize = EA_SIZE(dstSize);
+    }
 
-            /* Generate "mov targetReg, castOp->gtReg */
-            inst_RV_RV(ins, targetReg, sourceReg, srcType, dstSize);
-        }
+    if (ins != INS_none)
+    {
+        getEmitter()->emitIns_R_R(ins, insSize, dstReg, srcReg);
     }
 
-    genProduceReg(treeNode);
+    genProduceReg(cast);
 }
 
 //------------------------------------------------------------------------
-- 
2.7.4