[Arm64] Implement Simd.Insert
authorSteve MacLean <sdmaclea.qdt@qualcommdatacenter.com>
Tue, 30 Jan 2018 21:11:39 +0000 (16:11 -0500)
committerSteve MacLean <sdmaclea.qdt@qualcommdatacenter.com>
Fri, 9 Feb 2018 22:51:37 +0000 (17:51 -0500)
src/jit/codegenarm64.cpp
src/jit/hwintrinsicArm64.cpp
src/jit/lowerarmarch.cpp
src/jit/lsraarm64.cpp

index cb881cb..610e572 100644 (file)
@@ -5301,9 +5301,107 @@ void CodeGen::genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node)
     genProduceReg(node);
 }
 
+//------------------------------------------------------------------------
+// genHWIntrinsicSimdInsertOp:
+//
+// Produce code for a GT_HWIntrinsic node with form SimdInsertOp.
+//
+// Consumes one SIMD operand and two scalars
+//
+// The element index operand is typically a const immediate
+// When it is not, a switch table is generated
+//
+// See genHWIntrinsicSwitchTable comments
+//
+// Arguments:
+//    node - the GT_HWIntrinsic node
+//
+// Return Value:
+//    None.
+//
 void CodeGen::genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node)
 {
-    NYI("genHWIntrinsicSimdExtractOp not implemented");
+    GenTreeArgList* argList   = node->gtGetOp1()->AsArgList();
+    GenTree*        op1       = argList->Current();
+    GenTree*        op2       = argList->Rest()->Current();
+    GenTree*        op3       = argList->Rest()->Rest()->Current();
+    var_types       simdType  = op1->TypeGet();
+    var_types       baseType  = node->gtSIMDBaseType;
+    regNumber       targetReg = node->gtRegNum;
+
+    assert(targetReg != REG_NA);
+
+    genConsumeRegs(op1);
+    genConsumeRegs(op2);
+    genConsumeRegs(op3);
+
+    regNumber op1Reg = op1->gtRegNum;
+    regNumber op3Reg = op3->gtRegNum;
+
+    assert(genIsValidFloatReg(targetReg));
+    assert(genIsValidFloatReg(op1Reg));
+
+    emitAttr baseTypeSize = emitTypeSize(baseType);
+
+    int elements = emitTypeSize(simdType) / baseTypeSize;
+
+    if (targetReg != op1Reg)
+    {
+        bool     is16Byte = (node->gtSIMDSize > 8);
+        emitAttr attr     = is16Byte ? EA_16BYTE : EA_8BYTE;
+        getEmitter()->emitIns_R_R(INS_mov, baseTypeSize, targetReg, op1Reg);
+    }
+
+    if (op3->isContained())
+    {
+        // Handle vector element to vector element case
+        assert(genIsValidFloatReg(op3Reg));
+        assert(op2->isContainedIntOrIImmed());
+        assert(op3->OperIs(GT_HWIntrinsic));
+        assert(op3->AsHWIntrinsic()->gtHWIntrinsicId == NI_ARM64_SIMD_GetItem);
+        assert(op3->gtGetOp2()->isContainedIntOrIImmed());
+
+        int element = (int)op2->AsIntConCommon()->IconValue();
+        int srcLane = (int)op3->gtGetOp2()->AsIntConCommon()->IconValue();
+
+        getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op3Reg, element, srcLane);
+    }
+    else
+    {
+        // Handle scalar to vector element case
+        // TODO-ARM64-CQ handle containing scalar const where possible
+        auto emitSwCase = [&](int element) {
+            assert(element >= 0);
+            assert(element < elements);
+
+            if (varTypeIsFloating(baseType))
+            {
+                assert(genIsValidFloatReg(op3Reg));
+                getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op3Reg, element, 0);
+            }
+            else
+            {
+                assert(genIsValidIntReg(op3Reg));
+                getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, op3Reg, element);
+            }
+        };
+
+        if (op2->isContainedIntOrIImmed())
+        {
+            int element = (int)op2->AsIntConCommon()->IconValue();
+
+            emitSwCase(element);
+        }
+        else
+        {
+            regNumber elementReg = op2->gtRegNum;
+            regNumber tmpReg     = node->GetSingleTempReg();
+
+            genHWIntrinsicSwitchTable(elementReg, tmpReg, elements, emitSwCase);
+        }
+    }
+
+    genProduceReg(node);
 }
 
 //------------------------------------------------------------------------
index 2bd1c4f..394ab21 100644 (file)
@@ -162,6 +162,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
     switch (getHWIntrinsicInfo(intrinsic).form)
     {
         case HWIntrinsicInfo::SimdBinaryOp:
+        case HWIntrinsicInfo::SimdInsertOp:
         case HWIntrinsicInfo::SimdSelectOp:
         case HWIntrinsicInfo::SimdSetAllOp:
         case HWIntrinsicInfo::SimdUnaryOp:
@@ -233,6 +234,18 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
 
             return gtNewScalarHWIntrinsicNode(JITtype2varType(sig->retType), op1, op2, intrinsic);
 
+        case HWIntrinsicInfo::SimdInsertOp:
+            if (!mustExpand && !impCheckImmediate(impStackTop(1).val, getSIMDVectorLength(simdSizeBytes, simdBaseType)))
+            {
+                // Immediate lane not constant or out of range
+                return nullptr;
+            }
+            op3 = impPopStack().val;
+            op2 = impPopStack().val;
+            op1 = impSIMDPopStack(simdType);
+
+            return gtNewSimdHWIntrinsicNode(simdType, op1, op2, op3, intrinsic, simdBaseType, simdSizeBytes);
+
         default:
             JITDUMP("Not implemented hardware intrinsic form");
             assert(!"Unimplemented SIMD Intrinsic form");
index a914c61..4d7632f 100644 (file)
@@ -888,9 +888,17 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
 //
 void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
 {
-    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
-    GenTree*       op1         = node->gtOp.gtOp1;
-    GenTree*       op2         = node->gtOp.gtOp2;
+    NamedIntrinsic  intrinsicID = node->gtHWIntrinsicId;
+    GenTreeArgList* argList     = nullptr;
+    GenTree*        op1         = node->gtOp.gtOp1;
+    GenTree*        op2         = node->gtOp.gtOp2;
+
+    if (op1->OperIs(GT_LIST))
+    {
+        argList = op1->AsArgList();
+        op1     = argList->Current();
+        op2     = argList->Rest()->Current();
+    }
 
     switch (comp->getHWIntrinsicInfo(node->gtHWIntrinsicId).form)
     {
@@ -901,6 +909,28 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
             }
             break;
 
+        case HWIntrinsicInfo::SimdInsertOp:
+            if (op2->IsCnsIntOrI())
+            {
+                MakeSrcContained(node, op2);
+
+                GenTree* op3 = argList->Rest()->Rest()->Current();
+
+#if NYI_ARM64_HW_INTRINSIC_CONTAINMENT
+                // TODO-ARM64-CQ Support containing NI_ARM64_SIMD_GetItem (vector element to element move)
+                if (op3->OperIs(GT_HWIntrinsic) && (op3->AsHWIntrinsic()->gtHWIntrinsicId == NI_ARM64_SIMD_GetItem))
+                {
+                    ContainCheckHWIntrinsic(op3->AsHWIntrinsic());
+
+                    if (op3->gtOp.gtOp2->isContained())
+                    {
+                        MakeSrcContained(node, op3);
+                    }
+                }
+#endif // NYI_ARM64_HW_INTRINSIC_CONTAINMENT
+            }
+            break;
+
         default:
             break;
     }
index 82ca013..d74c080 100644 (file)
@@ -952,6 +952,7 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
     switch (compiler->getHWIntrinsicInfo(intrinsicID).form)
     {
         case HWIntrinsicInfo::SimdExtractOp:
+        case HWIntrinsicInfo::SimdInsertOp:
             if (!op2->isContained())
             {
                 // We need a temp to create a switch table