Updating the HWIntrinsic codegen to ensure ival always fits in a byte.
authorTanner Gooding <tagoo@outlook.com>
Fri, 1 Jun 2018 14:08:02 +0000 (07:08 -0700)
committerTanner Gooding <tagoo@outlook.com>
Fri, 1 Jun 2018 23:06:43 +0000 (16:06 -0700)
src/jit/emitxarch.cpp
src/jit/hwintrinsiccodegenxarch.cpp

index da25cf7..748eead 100644 (file)
@@ -50,6 +50,48 @@ bool IsFMAInstruction(instruction ins)
     return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION);
 }
 
+regNumber getSseShiftRegNumber(instruction ins)
+{
+    switch (ins)
+    {
+        case INS_psrldq:
+        {
+            return (regNumber)3;
+        }
+
+        case INS_pslldq:
+        {
+            return (regNumber)7;
+        }
+
+        case INS_psrld:
+        case INS_psrlw:
+        case INS_psrlq:
+        {
+            return (regNumber)2;
+        }
+
+        case INS_pslld:
+        case INS_psllw:
+        case INS_psllq:
+        {
+            return (regNumber)6;
+        }
+
+        case INS_psrad:
+        case INS_psraw:
+        {
+            return (regNumber)4;
+        }
+
+        default:
+        {
+            assert(!"Invalid instruction for SSE2 instruction of the form: opcode reg, immed8");
+            return REG_NA;
+        }
+    }
+}
+
 bool emitter::IsAVXInstruction(instruction ins)
 {
     return UseVEXEncoding() && IsSSEOrAVXInstruction(ins);
@@ -185,9 +227,11 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
         case INS_psubusw:
         case INS_psubw:
         case INS_pslld:
+        case INS_pslldq:
         case INS_psllq:
         case INS_psllw:
         case INS_psrld:
+        case INS_psrldq:
         case INS_psrlq:
         case INS_psrlw:
         case INS_psrad:
@@ -5383,26 +5427,6 @@ void emitter::emitIns_SIMD_R_R_R(instruction ins, emitAttr attr, regNumber reg,
     }
 }
 
-static bool isSseShift(instruction ins)
-{
-    switch (ins)
-    {
-        case INS_psrldq:
-        case INS_pslldq:
-        case INS_psrld:
-        case INS_psrlw:
-        case INS_psrlq:
-        case INS_pslld:
-        case INS_psllw:
-        case INS_psllq:
-        case INS_psrad:
-        case INS_psraw:
-            return true;
-        default:
-            return false;
-    }
-}
-
 //------------------------------------------------------------------------
 // IsDstSrcImmAvxInstruction: check if instruction has "R(M) R(M) I" format
 // for EVEX, VEX and legacy SSE encodings and has no (E)VEX.NDS
@@ -5433,9 +5457,7 @@ static bool IsDstSrcImmAvxInstruction(instruction ins)
 
 void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int ival)
 {
-    // TODO-XARCH refactoring emitIns_R_R_I to handle SSE2/AVX2 shift as well as emitIns_R_I
-    bool isShift = isSseShift(ins);
-    if (IsDstSrcImmAvxInstruction(ins) || (UseVEXEncoding() && !isShift))
+    if (UseVEXEncoding() || IsDstSrcImmAvxInstruction(ins))
     {
         emitIns_R_R_I(ins, attr, reg, reg1, ival);
     }
@@ -5445,12 +5467,6 @@ void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber reg,
         {
             emitIns_R_R(INS_movaps, attr, reg, reg1);
         }
-        // TODO-XARCH-BUG emitOutputRI cannot work with SSE2 shift instruction on imm8 > 127, so we replace it by the
-        // semantic alternatives. https://github.com/dotnet/coreclr/issues/16543
-        if (isShift && ival > 127)
-        {
-            ival = 127;
-        }
         emitIns_R_I(ins, attr, reg, ival);
     }
 }
@@ -10723,35 +10739,9 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
 
         assert(id->idGCref() == GCT_NONE);
         assert(valInByte);
+
         // The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field.
-        regNumber regOpcode;
-        switch (ins)
-        {
-            case INS_psrldq:
-                regOpcode = (regNumber)3;
-                break;
-            case INS_pslldq:
-                regOpcode = (regNumber)7;
-                break;
-            case INS_psrld:
-            case INS_psrlw:
-            case INS_psrlq:
-                regOpcode = (regNumber)2;
-                break;
-            case INS_pslld:
-            case INS_psllw:
-            case INS_psllq:
-                regOpcode = (regNumber)6;
-                break;
-            case INS_psrad:
-            case INS_psraw:
-                regOpcode = (regNumber)4;
-                break;
-            default:
-                assert(!"Invalid instruction for SSE2 instruction of the form: opcode reg, immed8");
-                regOpcode = REG_NA;
-                break;
-        }
+        regNumber regOpcode = getSseShiftRegNumber(ins);
 
         // Get the 'base' opcode.
         code = insCodeMI(ins);
@@ -11892,7 +11882,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             // Also, determine which operand goes where in the ModRM byte.
             regNumber mReg;
             regNumber rReg;
-            // if (ins == INS_shld || ins == INS_shrd || ins == INS_vextractf128 || ins == INS_vinsertf128)
             if (hasCodeMR(ins))
             {
                 code = insCodeMR(ins);
@@ -11902,6 +11891,21 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                 mReg = id->idReg1();
                 rReg = id->idReg2();
             }
+            else if (hasCodeMI(ins))
+            {
+                code = insCodeMI(ins);
+
+                // Emit the VEX prefix if it exists
+                code = AddVexPrefixIfNeeded(ins, code, size);
+
+                assert((code & 0xC000) == 0);
+                code |= 0xC000;
+
+                mReg = id->idReg2();
+
+                // The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field.
+                rReg = getSseShiftRegNumber(ins);
+            }
             else
             {
                 code = insCodeRM(ins);
index 42f0c71..80272e4 100644 (file)
@@ -94,7 +94,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 }
                 else if ((ival != -1) && varTypeIsFloating(baseType))
                 {
-                    emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival);
+                    assert((ival >= 0) && (ival <= 127));
+                    emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, (int8_t)ival);
                 }
                 else
                 {
@@ -130,6 +131,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 }
                 else if ((ival != -1) && varTypeIsFloating(baseType))
                 {
+                    assert((ival >= 0) && (ival <= 127));
                     genHWIntrinsic_R_R_RM_I(node, ins);
                 }
                 else if (category == HW_Category_MemoryLoad)
@@ -145,19 +147,21 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 }
                 else if (Compiler::isImmHWIntrinsic(intrinsicID, op2))
                 {
+                    assert(ival == -1);
+
                     if (intrinsicID == NI_SSE2_Extract)
                     {
                         // extract instructions return to GP-registers, so it needs int size as the emitsize
                         simdSize = emitTypeSize(TYP_INT);
                     }
-                    auto emitSwCase = [&](unsigned i) {
-                        emit->emitIns_SIMD_R_R_I(ins, simdSize, targetReg, op1Reg, (int)i);
-                    };
+
+                    auto emitSwCase = [&](int8_t i) { emit->emitIns_SIMD_R_R_I(ins, simdSize, targetReg, op1Reg, i); };
 
                     if (op2->IsCnsIntOrI())
                     {
                         ssize_t ival = op2->AsIntCon()->IconValue();
-                        emitSwCase((unsigned)ival);
+                        assert((ival >= 0) && (ival <= 255));
+                        emitSwCase((int8_t)ival);
                     }
                     else
                     {
@@ -199,13 +203,17 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
 
                 if (Compiler::isImmHWIntrinsic(intrinsicID, op3))
                 {
-                    auto emitSwCase = [&](unsigned i) {
-                        emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2Reg, (int)i);
+                    assert(ival == -1);
+
+                    auto emitSwCase = [&](int8_t i) {
+                        emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2Reg, i);
                     };
+
                     if (op3->IsCnsIntOrI())
                     {
                         ssize_t ival = op3->AsIntCon()->IconValue();
-                        emitSwCase((unsigned)ival);
+                        assert((ival >= 0) && (ival <= 255));
+                        emitSwCase((int8_t)ival);
                     }
                     else
                     {
@@ -443,9 +451,11 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
     GenTree*  op1        = node->gtGetOp1();
     GenTree*  op2        = node->gtGetOp2();
     emitAttr  simdSize   = EA_ATTR(node->gtSIMDSize);
-    int       ival       = Compiler::ivalOfHWIntrinsic(node->gtHWIntrinsicId);
     emitter*  emit       = getEmitter();
 
+    int ival = Compiler::ivalOfHWIntrinsic(node->gtHWIntrinsicId);
+    assert((ival >= 0) && (ival <= 127));
+
     // TODO-XArch-CQ: Commutative operations can have op1 be contained
     // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained
 
@@ -748,7 +758,7 @@ void CodeGen::genHWIntrinsicJumpTableFallback(NamedIntrinsic            intrinsi
     for (unsigned i = 0; i < maxByte; i++)
     {
         genDefineTempLabel(jmpTable[i]);
-        emitSwCase(i);
+        emitSwCase((int8_t)i);
         emit->emitIns_J(INS_jmp, switchTableEnd);
     }
 
@@ -975,7 +985,6 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
     regNumber      op1Reg      = REG_NA;
     regNumber      op2Reg      = REG_NA;
     emitter*       emit        = getEmitter();
-    int            ival        = -1;
 
     if ((op1 != nullptr) && !op1->OperIsList())
     {
@@ -990,11 +999,14 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
         {
             assert(op1 != nullptr);
             assert(op2 != nullptr);
+
             assert(baseType == TYP_DOUBLE);
+
+            int ival = Compiler::ivalOfHWIntrinsic(intrinsicID);
+            assert((ival >= 0) && (ival <= 127));
+
             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
             op2Reg          = op2->gtRegNum;
-            ival            = Compiler::ivalOfHWIntrinsic(intrinsicID);
-            assert(ival != -1);
             emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival);
 
             break;
@@ -1305,23 +1317,25 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node)
             {
                 tmpTargetReg = node->ExtractTempReg();
             }
-            auto emitSwCase = [&](unsigned i) {
+
+            auto emitSwCase = [&](int8_t i) {
                 if (baseType == TYP_FLOAT)
                 {
                     // extract instructions return to GP-registers, so it needs int size as the emitsize
-                    emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), tmpTargetReg, op1Reg, (int)i);
+                    emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), tmpTargetReg, op1Reg, i);
                     emit->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, targetReg, tmpTargetReg);
                 }
                 else
                 {
-                    emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), targetReg, op1Reg, (int)i);
+                    emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), targetReg, op1Reg, i);
                 }
             };
 
             if (op2->IsCnsIntOrI())
             {
                 ssize_t ival = op2->AsIntCon()->IconValue();
-                emitSwCase((unsigned)ival);
+                assert((ival >= 0) && (ival <= 255));
+                emitSwCase((int8_t)ival);
             }
             else
             {
@@ -1584,38 +1598,36 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node)
 
             regNumber op3Reg = lastOp->gtRegNum;
 
-            auto emitSwCase = [&](unsigned i) {
-                // TODO-XARCH-Bug the emitter cannot work with imm8 >= 128,
-                // so clear the 8th bit that is not used by the instructions
-                i &= 0x7FU;
+            auto emitSwCase = [&](int8_t i) {
                 if (numArgs == 3)
                 {
                     if (intrinsicID == NI_AVX_ExtractVector128 || intrinsicID == NI_AVX2_ExtractVector128)
                     {
-                        emit->emitIns_AR_R_I(ins, attr, op1Reg, 0, op2Reg, (int)i);
+                        emit->emitIns_AR_R_I(ins, attr, op1Reg, 0, op2Reg, i);
                     }
                     else if (op2->TypeGet() == TYP_I_IMPL)
                     {
-                        emit->emitIns_SIMD_R_R_AR_I(ins, attr, targetReg, op1Reg, op2Reg, (int)i);
+                        emit->emitIns_SIMD_R_R_AR_I(ins, attr, targetReg, op1Reg, op2Reg, i);
                     }
                     else
                     {
                         assert(op2->TypeGet() == TYP_SIMD16);
-                        emit->emitIns_SIMD_R_R_R_I(ins, attr, targetReg, op1Reg, op2Reg, (int)i);
+                        emit->emitIns_SIMD_R_R_R_I(ins, attr, targetReg, op1Reg, op2Reg, i);
                     }
                 }
                 else
                 {
                     assert(numArgs == 2);
                     assert(intrinsicID == NI_AVX_ExtractVector128 || intrinsicID == NI_AVX2_ExtractVector128);
-                    emit->emitIns_SIMD_R_R_I(ins, attr, targetReg, op1Reg, (int)i);
+                    emit->emitIns_SIMD_R_R_I(ins, attr, targetReg, op1Reg, i);
                 }
             };
 
             if (lastOp->IsCnsIntOrI())
             {
                 ssize_t ival = lastOp->AsIntCon()->IconValue();
-                emitSwCase((unsigned)ival);
+                assert((ival >= 0) && (ival <= 255));
+                emitSwCase((int8_t)ival);
             }
             else
             {