From cf1e2d8e4191250d461ea09d40f446fc5a71a050 Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Tue, 30 Jan 2018 16:11:39 -0500 Subject: [PATCH] [Arm64] Implement Simd.Insert --- src/jit/codegenarm64.cpp | 100 ++++++++++++++++++++++++++++++++++++++++++- src/jit/hwintrinsicArm64.cpp | 13 ++++++ src/jit/lowerarmarch.cpp | 36 ++++++++++++++-- src/jit/lsraarm64.cpp | 1 + 4 files changed, 146 insertions(+), 4 deletions(-) diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index cb881cb..610e572 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -5301,9 +5301,107 @@ void CodeGen::genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node) genProduceReg(node); } +//------------------------------------------------------------------------ +// genHWIntrinsicSimdInsertOp: +// +// Produce code for a GT_HWIntrinsic node with form SimdInsertOp. +// +// Consumes one SIMD operand and two scalars +// +// The element index operand is typically a const immediate +// When it is not, a switch table is generated +// +// See genHWIntrinsicSwitchTable comments +// +// Arguments: +// node - the GT_HWIntrinsic node +// +// Return Value: +// None. +// void CodeGen::genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node) { - NYI("genHWIntrinsicSimdExtractOp not implemented"); + GenTreeArgList* argList = node->gtGetOp1()->AsArgList(); + GenTree* op1 = argList->Current(); + GenTree* op2 = argList->Rest()->Current(); + GenTree* op3 = argList->Rest()->Rest()->Current(); + var_types simdType = op1->TypeGet(); + var_types baseType = node->gtSIMDBaseType; + regNumber targetReg = node->gtRegNum; + + assert(targetReg != REG_NA); + + genConsumeRegs(op1); + genConsumeRegs(op2); + genConsumeRegs(op3); + + regNumber op1Reg = op1->gtRegNum; + regNumber op3Reg = op3->gtRegNum; + + assert(genIsValidFloatReg(targetReg)); + assert(genIsValidFloatReg(op1Reg)); + + emitAttr baseTypeSize = emitTypeSize(baseType); + + int elements = emitTypeSize(simdType) / baseTypeSize; + + if (targetReg != op1Reg) + { + bool is16Byte = (node->gtSIMDSize > 8); + emitAttr attr = is16Byte ? EA_16BYTE : EA_8BYTE; + getEmitter()->emitIns_R_R(INS_mov, baseTypeSize, targetReg, op1Reg); + } + + if (op3->isContained()) + { + // Handle vector element to vector element case + assert(genIsValidFloatReg(op3Reg)); + assert(op2->isContainedIntOrIImmed()); + assert(op3->OperIs(GT_HWIntrinsic)); + assert(op3->AsHWIntrinsic()->gtHWIntrinsicId == NI_ARM64_SIMD_GetItem); + assert(op3->gtGetOp2()->isContainedIntOrIImmed()); + + int element = (int)op2->AsIntConCommon()->IconValue(); + int srcLane = (int)op3->gtGetOp2()->AsIntConCommon()->IconValue(); + + getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op3Reg, element, srcLane); + } + else + { + // Handle scalar to vector element case + // TODO-ARM64-CQ handle containing scalar const where possible + auto emitSwCase = [&](int element) { + assert(element >= 0); + assert(element < elements); + + if (varTypeIsFloating(baseType)) + { + assert(genIsValidFloatReg(op3Reg)); + getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op3Reg, element, 0); + } + else + { + assert(genIsValidIntReg(op3Reg)); + getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, op3Reg, element); + } + }; + + if (op2->isContainedIntOrIImmed()) + { + int element = (int)op2->AsIntConCommon()->IconValue(); + + emitSwCase(element); + } + else + { + regNumber elementReg = op2->gtRegNum; + regNumber tmpReg = node->GetSingleTempReg(); + + genHWIntrinsicSwitchTable(elementReg, tmpReg, elements, emitSwCase); + } + } + + genProduceReg(node); } //------------------------------------------------------------------------ diff --git a/src/jit/hwintrinsicArm64.cpp b/src/jit/hwintrinsicArm64.cpp index 2bd1c4f..394ab21 100644 --- a/src/jit/hwintrinsicArm64.cpp +++ b/src/jit/hwintrinsicArm64.cpp @@ -162,6 +162,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, switch (getHWIntrinsicInfo(intrinsic).form) { case HWIntrinsicInfo::SimdBinaryOp: + case HWIntrinsicInfo::SimdInsertOp: case HWIntrinsicInfo::SimdSelectOp: case HWIntrinsicInfo::SimdSetAllOp: case HWIntrinsicInfo::SimdUnaryOp: @@ -233,6 +234,18 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, return gtNewScalarHWIntrinsicNode(JITtype2varType(sig->retType), op1, op2, intrinsic); + case HWIntrinsicInfo::SimdInsertOp: + if (!mustExpand && !impCheckImmediate(impStackTop(1).val, getSIMDVectorLength(simdSizeBytes, simdBaseType))) + { + // Immediate lane not constant or out of range + return nullptr; + } + op3 = impPopStack().val; + op2 = impPopStack().val; + op1 = impSIMDPopStack(simdType); + + return gtNewSimdHWIntrinsicNode(simdType, op1, op2, op3, intrinsic, simdBaseType, simdSizeBytes); + default: JITDUMP("Not implemented hardware intrinsic form"); assert(!"Unimplemented SIMD Intrinsic form"); diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp index a914c61..4d7632f 100644 --- a/src/jit/lowerarmarch.cpp +++ b/src/jit/lowerarmarch.cpp @@ -888,9 +888,17 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) // void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; - GenTree* op1 = node->gtOp.gtOp1; - GenTree* op2 = node->gtOp.gtOp2; + NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; + GenTreeArgList* argList = nullptr; + GenTree* op1 = node->gtOp.gtOp1; + GenTree* op2 = node->gtOp.gtOp2; + + if (op1->OperIs(GT_LIST)) + { + argList = op1->AsArgList(); + op1 = argList->Current(); + op2 = argList->Rest()->Current(); + } switch (comp->getHWIntrinsicInfo(node->gtHWIntrinsicId).form) { @@ -901,6 +909,28 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } break; + case HWIntrinsicInfo::SimdInsertOp: + if (op2->IsCnsIntOrI()) + { + MakeSrcContained(node, op2); + + GenTree* op3 = argList->Rest()->Rest()->Current(); + +#if NYI_ARM64_HW_INTRINSIC_CONTAINMENT + // TODO-ARM64-CQ Support containing NI_ARM64_SIMD_GetItem (vector element to element move) + if (op3->OperIs(GT_HWIntrinsic) && (op3->AsHWIntrinsic()->gtHWIntrinsicId == NI_ARM64_SIMD_GetItem)) + { + ContainCheckHWIntrinsic(op3->AsHWIntrinsic()); + + if (op3->gtOp.gtOp2->isContained()) + { + MakeSrcContained(node, op3); + } + } +#endif // NYI_ARM64_HW_INTRINSIC_CONTAINMENT + } + break; + default: break; } diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp index 82ca013..d74c080 100644 --- a/src/jit/lsraarm64.cpp +++ b/src/jit/lsraarm64.cpp @@ -952,6 +952,7 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) switch (compiler->getHWIntrinsicInfo(intrinsicID).form) { case HWIntrinsicInfo::SimdExtractOp: + case HWIntrinsicInfo::SimdInsertOp: if (!op2->isContained()) { // We need a temp to create a switch table -- 2.7.4