[Arm64] Implement Simd.Extract
authorSteve MacLean <sdmaclea.qdt@qualcommdatacenter.com>
Fri, 26 Jan 2018 22:24:54 +0000 (17:24 -0500)
committerTanner Gooding <tagoo@outlook.com>
Thu, 1 Feb 2018 15:14:04 +0000 (07:14 -0800)
src/jit/codegenarm64.cpp
src/jit/codegenlinear.h
src/jit/compiler.h
src/jit/hwintrinsicArm64.cpp
src/jit/lowerarmarch.cpp
src/jit/lsraarm64.cpp

index e418d16..112e77d 100644 (file)
@@ -5059,9 +5059,191 @@ void CodeGen::genHWIntrinsicSimdBinaryOp(GenTreeHWIntrinsic* node)
     genProduceReg(node);
 }
 
+//------------------------------------------------------------------------
+// genHWIntrinsicSwitchTable:
+//
+// Generate code for an immediate switch table
+//
+// In cases where an instruction only supports const immediate operands, we
+// need to generate functionally correct code when the operand is not constant
+//
+// This is required by the HW Intrinsic design to handle:
+//   debugger calls
+//   reflection
+//   call backs
+//
+// Generated code implements a switch of this form
+//
+// switch (swReg)
+// {
+// case 0:
+//    ins0; // emitSwCase(0)
+//    break;
+// case 1:
+//    ins1; // emitSwCase(1)
+//    break;
+// ...
+// ...
+// ...
+// case swMax - 1:
+//    insLast; // emitSwCase(swMax - 1)
+//    break;
+// default:
+//    throw ArgumentOutOfRangeException
+// }
+//
+// Generated code looks like:
+//
+//     cmp swReg, #swMax
+//     b.hs ThrowArgumentOutOfRangeExceptionHelper
+//     adr tmpReg, labelFirst
+//     add tmpReg, tmpReg, swReg, LSL #3
+//     b   [tmpReg]
+// labelFirst:
+//     ins0
+//     b labelBreakTarget
+//     ins1
+//     b labelBreakTarget
+//     ...
+//     ...
+//     ...
+//     insLast
+//     b labelBreakTarget
+// labelBreakTarget:
+//
+//
+// Arguments:
+//    swReg      - register containing the switch case to execute
+//    tmpReg     - temporary integer register for calculating the switch indirect branch target
+//    swMax      - the number of switch cases.  If swReg >= swMax throw SCK_ARG_RNG_EXCPN
+//    emitSwCase - function like argument taking an immediate value and emitting one instruction
+//
+// Return Value:
+//    None.
+//
+template <typename HWIntrinsicSwitchCaseBody>
+void CodeGen::genHWIntrinsicSwitchTable(regNumber                 swReg,
+                                        regNumber                 tmpReg,
+                                        int                       swMax,
+                                        HWIntrinsicSwitchCaseBody emitSwCase)
+{
+    assert(swMax > 0);
+    assert(swMax <= 256);
+
+    assert(genIsValidIntReg(tmpReg));
+    assert(genIsValidIntReg(swReg));
+
+    BasicBlock* labelFirst       = genCreateTempLabel();
+    BasicBlock* labelBreakTarget = genCreateTempLabel();
+
+    // Detect and throw out of range exception
+    getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, swReg, swMax);
+
+    emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+    genJumpToThrowHlpBlk(jmpGEU, SCK_ARG_RNG_EXCPN);
+
+    // Calculate switch target
+    labelFirst->bbFlags |= BBF_JMP_TARGET;
+
+    // tmpReg = labelFirst
+    getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, labelFirst, tmpReg);
+
+    // tmpReg = labelFirst + swReg * 8
+    getEmitter()->emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, tmpReg, swReg, 3, INS_OPTS_LSL);
+
+    // br tmpReg
+    getEmitter()->emitIns_R(INS_br, EA_PTRSIZE, tmpReg);
+
+    genDefineTempLabel(labelFirst);
+    for (int i = 0; i < swMax; ++i)
+    {
+        unsigned prevInsCount = getEmitter()->emitInsCount;
+
+        emitSwCase(i);
+
+        assert(getEmitter()->emitInsCount == prevInsCount + 1);
+
+        inst_JMP(EJ_jmp, labelBreakTarget);
+
+        assert(getEmitter()->emitInsCount == prevInsCount + 2);
+    }
+    genDefineTempLabel(labelBreakTarget);
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicSimdExtractOp:
+//
+// Produce code for a GT_HWIntrinsic node with form SimdExtractOp.
+//
+// Consumes one SIMD operand and one scalar
+//
+// The element index operand is typically a const immediate
+// When it is not, a switch table is generated
+//
+// See genHWIntrinsicSwitchTable comments
+//
+// Arguments:
+//    node - the GT_HWIntrinsic node
+//
+// Return Value:
+//    None.
+//
 void CodeGen::genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node)
 {
-    NYI("HWIntrinsic form not implemented");
+    GenTree*  op1        = node->gtGetOp1();
+    GenTree*  op2        = node->gtGetOp2();
+    var_types simdType   = op1->TypeGet();
+    var_types targetType = node->TypeGet();
+    regNumber targetReg  = node->gtRegNum;
+
+    assert(targetReg != REG_NA);
+
+    genConsumeOperands(node);
+
+    regNumber op1Reg = op1->gtRegNum;
+
+    assert(genIsValidFloatReg(op1Reg));
+
+    emitAttr baseTypeSize = emitTypeSize(targetType);
+
+    int elements = emitTypeSize(simdType) / baseTypeSize;
+
+    auto emitSwCase = [&](int element) {
+        assert(element >= 0);
+        assert(element < elements);
+
+        if (varTypeIsFloating(targetType))
+        {
+            assert(genIsValidFloatReg(targetReg));
+            getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op1Reg, 0, element);
+        }
+        else if (varTypeIsUnsigned(targetType) || (baseTypeSize == EA_8BYTE))
+        {
+            assert(genIsValidIntReg(targetReg));
+            getEmitter()->emitIns_R_R_I(INS_umov, baseTypeSize, targetReg, op1Reg, element);
+        }
+        else
+        {
+            assert(genIsValidIntReg(targetReg));
+            getEmitter()->emitIns_R_R_I(INS_smov, baseTypeSize, targetReg, op1Reg, element);
+        }
+    };
+
+    if (op2->isContainedIntOrIImmed())
+    {
+        int element = (int)op2->AsIntConCommon()->IconValue();
+
+        emitSwCase(element);
+    }
+    else
+    {
+        regNumber elementReg = op2->gtRegNum;
+        regNumber tmpReg     = node->GetSingleTempReg();
+
+        genHWIntrinsicSwitchTable(elementReg, tmpReg, elements, emitSwCase);
+    }
+
+    genProduceReg(node);
 }
 
 void CodeGen::genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node)
index 4cc1f79..5376eb2 100644 (file)
@@ -144,6 +144,8 @@ void genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node);
 void genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node);
 void genHWIntrinsicSimdSelectOp(GenTreeHWIntrinsic* node);
 void genHWIntrinsicSimdUnaryOp(GenTreeHWIntrinsic* node);
+template <typename HWIntrinsicSwitchCaseBody>
+void genHWIntrinsicSwitchTable(regNumber swReg, regNumber tmpReg, int swMax, HWIntrinsicSwitchCaseBody emitSwCase);
 #endif // defined(_TARGET_XARCH_)
 #endif // FEATURE_HW_INTRINSICS
 
index f95c89d..5133fb1 100644 (file)
@@ -3125,6 +3125,7 @@ protected:
 #ifdef _TARGET_ARM64_
     InstructionSet lookupHWIntrinsicISA(const char* className);
     NamedIntrinsic lookupHWIntrinsic(const char* className, const char* methodName);
+    bool impCheckImmediate(GenTree* immediateOp, unsigned int max);
     const HWIntrinsicInfo& getHWIntrinsicInfo(NamedIntrinsic);
 #endif // _TARGET_ARM64_
 #endif // FEATURE_HW_INTRINSICS
index 0ea77d3..08a59cb 100644 (file)
@@ -126,6 +126,14 @@ NamedIntrinsic Compiler::lookupHWIntrinsic(const char* className, const char* me
 }
 
 //------------------------------------------------------------------------
+// impCheckImmediate: check if immediate is const and in range for inlining
+//
+bool Compiler::impCheckImmediate(GenTree* immediateOp, unsigned int max)
+{
+    return immediateOp->IsCnsIntOrI() && (immediateOp->AsIntConCommon()->IconValue() < max);
+}
+
+//------------------------------------------------------------------------
 // impHWIntrinsic: dispatch hardware intrinsics to their own implementation
 // function
 //
@@ -142,30 +150,40 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
                                   CORINFO_SIG_INFO*     sig,
                                   bool                  mustExpand)
 {
-    GenTree*  retNode       = nullptr;
-    GenTree*  op1           = nullptr;
-    GenTree*  op2           = nullptr;
-    var_types simdType      = TYP_UNKNOWN;
-    var_types simdBaseType  = TYP_UNKNOWN;
-    unsigned  simdSizeBytes = 0;
-
-    // Instantiation type check
+    GenTree*             retNode       = nullptr;
+    GenTree*             op1           = nullptr;
+    GenTree*             op2           = nullptr;
+    GenTree*             op3           = nullptr;
+    CORINFO_CLASS_HANDLE simdClass     = nullptr;
+    var_types            simdType      = TYP_UNKNOWN;
+    var_types            simdBaseType  = TYP_UNKNOWN;
+    unsigned             simdSizeBytes = 0;
+
     switch (getHWIntrinsicInfo(intrinsic).form)
     {
         case HWIntrinsicInfo::SimdBinaryOp:
         case HWIntrinsicInfo::SimdUnaryOp:
-            simdBaseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSizeBytes);
-
-            if (simdBaseType == TYP_UNKNOWN)
-            {
-                return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED, method, sig, mustExpand);
-            }
-            simdType = getSIMDTypeForSize(simdSizeBytes);
+            simdClass = sig->retTypeClass;
+            break;
+        case HWIntrinsicInfo::SimdExtractOp:
+            info.compCompHnd->getArgType(sig, sig->args, &simdClass);
             break;
         default:
             break;
     }
 
+    // Simd instantiation type check
+    if (simdClass != nullptr)
+    {
+        simdBaseType = getBaseTypeAndSizeOfSIMDType(simdClass, &simdSizeBytes);
+
+        if (simdBaseType == TYP_UNKNOWN)
+        {
+            return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED, method, sig, mustExpand);
+        }
+        simdType = getSIMDTypeForSize(simdSizeBytes);
+    }
+
     switch (getHWIntrinsicInfo(intrinsic).form)
     {
         case HWIntrinsicInfo::IsSupported:
@@ -187,6 +205,17 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
 
             return gtNewSimdHWIntrinsicNode(simdType, op1, nullptr, intrinsic, simdBaseType, simdSizeBytes);
 
+        case HWIntrinsicInfo::SimdExtractOp:
+            if (!mustExpand && !impCheckImmediate(impStackTop(0).val, getSIMDVectorLength(simdSizeBytes, simdBaseType)))
+            {
+                // Immediate lane not constant or out of range
+                return nullptr;
+            }
+            op2 = impPopStack().val;
+            op1 = impSIMDPopStack(simdType);
+
+            return gtNewScalarHWIntrinsicNode(JITtype2varType(sig->retType), op1, op2, intrinsic);
+
         default:
             JITDUMP("Not implemented hardware intrinsic form");
             assert(!"Unimplemented SIMD Intrinsic form");
index 642bbb2..4adb682 100644 (file)
@@ -828,6 +828,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
 #endif // FEATURE_SIMD
 
 #ifdef FEATURE_HW_INTRINSICS
+#include "hwintrinsicArm64.h"
 //----------------------------------------------------------------------------------------------
 // ContainCheckHWIntrinsic: Perform containment analysis for a hardware intrinsic node.
 //
@@ -840,10 +841,16 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
     GenTree*       op1         = node->gtOp.gtOp1;
     GenTree*       op2         = node->gtOp.gtOp2;
 
-    switch (node->gtHWIntrinsicId)
+    switch (comp->getHWIntrinsicInfo(node->gtHWIntrinsicId).form)
     {
+        case HWIntrinsicInfo::SimdExtractOp:
+            if (op2->IsCnsIntOrI())
+            {
+                MakeSrcContained(node, op2);
+            }
+            break;
+
         default:
-            assert((intrinsicID > NI_HW_INTRINSIC_START) && (intrinsicID < NI_HW_INTRINSIC_END));
             break;
     }
 }
index 2bfcfa2..ec1e7f0 100644 (file)
@@ -982,6 +982,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info)
 #endif // FEATURE_SIMD
 
 #ifdef FEATURE_HW_INTRINSICS
+#include "hwintrinsicArm64.h"
 //------------------------------------------------------------------------
 // TreeNodeInfoInitHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree.
 //
@@ -999,6 +1000,20 @@ void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree,
     {
         info->srcCount += GetOperandInfo(intrinsicTree->gtOp.gtOp2);
     }
+
+    switch (compiler->getHWIntrinsicInfo(intrinsicID).form)
+    {
+        case HWIntrinsicInfo::SimdExtractOp:
+            if (!intrinsicTree->gtOp.gtOp2->isContained())
+            {
+                // We need a temp to create a switch table
+                info->internalIntCount = 1;
+                info->setInternalCandidates(this, allRegs(TYP_INT));
+            }
+            break;
+        default:
+            break;
+    }
 }
 #endif