From 8b8b8d62e31aa40c3626bdf8d25faca4fb6819bf Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Fri, 26 Jan 2018 17:24:54 -0500 Subject: [PATCH] [Arm64] Implement Simd.Extract --- src/jit/codegenarm64.cpp | 184 ++++++++++++++++++++++++++++++++++++++++++- src/jit/codegenlinear.h | 2 + src/jit/compiler.h | 1 + src/jit/hwintrinsicArm64.cpp | 59 ++++++++++---- src/jit/lowerarmarch.cpp | 11 ++- src/jit/lsraarm64.cpp | 15 ++++ 6 files changed, 254 insertions(+), 18 deletions(-) diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index e418d16..112e77d 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -5059,9 +5059,191 @@ void CodeGen::genHWIntrinsicSimdBinaryOp(GenTreeHWIntrinsic* node) genProduceReg(node); } +//------------------------------------------------------------------------ +// genHWIntrinsicSwitchTable: +// +// Generate code for an immediate switch table +// +// In cases where an instruction only supports const immediate operands, we +// need to generate functionally correct code when the operand is not constant +// +// This is required by the HW Intrinsic design to handle: +// debugger calls +// reflection +// call backs +// +// Generated code implements a switch of this form +// +// switch (swReg) +// { +// case 0: +// ins0; // emitSwCase(0) +// break; +// case 1: +// ins1; // emitSwCase(1) +// break; +// ... +// ... +// ... +// case swMax - 1: +// insLast; // emitSwCase(swMax - 1) +// break; +// default: +// throw ArgumentOutOfRangeException +// } +// +// Generated code looks like: +// +// cmp swReg, #swMax +// b.hs ThrowArgumentOutOfRangeExceptionHelper +// adr tmpReg, labelFirst +// add tmpReg, tmpReg, swReg, LSL #3 +// b [tmpReg] +// labelFirst: +// ins0 +// b labelBreakTarget +// ins1 +// b labelBreakTarget +// ... +// ... +// ... +// insLast +// b labelBreakTarget +// labelBreakTarget: +// +// +// Arguments: +// swReg - register containing the switch case to execute +// tmpReg - temporary integer register for calculating the switch indirect branch target +// swMax - the number of switch cases. If swReg >= swMax throw SCK_ARG_RNG_EXCPN +// emitSwCase - function like argument taking an immediate value and emitting one instruction +// +// Return Value: +// None. +// +template +void CodeGen::genHWIntrinsicSwitchTable(regNumber swReg, + regNumber tmpReg, + int swMax, + HWIntrinsicSwitchCaseBody emitSwCase) +{ + assert(swMax > 0); + assert(swMax <= 256); + + assert(genIsValidIntReg(tmpReg)); + assert(genIsValidIntReg(swReg)); + + BasicBlock* labelFirst = genCreateTempLabel(); + BasicBlock* labelBreakTarget = genCreateTempLabel(); + + // Detect and throw out of range exception + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, swReg, swMax); + + emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); + genJumpToThrowHlpBlk(jmpGEU, SCK_ARG_RNG_EXCPN); + + // Calculate switch target + labelFirst->bbFlags |= BBF_JMP_TARGET; + + // tmpReg = labelFirst + getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, labelFirst, tmpReg); + + // tmpReg = labelFirst + swReg * 8 + getEmitter()->emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, tmpReg, swReg, 3, INS_OPTS_LSL); + + // br tmpReg + getEmitter()->emitIns_R(INS_br, EA_PTRSIZE, tmpReg); + + genDefineTempLabel(labelFirst); + for (int i = 0; i < swMax; ++i) + { + unsigned prevInsCount = getEmitter()->emitInsCount; + + emitSwCase(i); + + assert(getEmitter()->emitInsCount == prevInsCount + 1); + + inst_JMP(EJ_jmp, labelBreakTarget); + + assert(getEmitter()->emitInsCount == prevInsCount + 2); + } + genDefineTempLabel(labelBreakTarget); +} + +//------------------------------------------------------------------------ +// genHWIntrinsicSimdExtractOp: +// +// Produce code for a GT_HWIntrinsic node with form SimdExtractOp. +// +// Consumes one SIMD operand and one scalar +// +// The element index operand is typically a const immediate +// When it is not, a switch table is generated +// +// See genHWIntrinsicSwitchTable comments +// +// Arguments: +// node - the GT_HWIntrinsic node +// +// Return Value: +// None. +// void CodeGen::genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node) { - NYI("HWIntrinsic form not implemented"); + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + var_types simdType = op1->TypeGet(); + var_types targetType = node->TypeGet(); + regNumber targetReg = node->gtRegNum; + + assert(targetReg != REG_NA); + + genConsumeOperands(node); + + regNumber op1Reg = op1->gtRegNum; + + assert(genIsValidFloatReg(op1Reg)); + + emitAttr baseTypeSize = emitTypeSize(targetType); + + int elements = emitTypeSize(simdType) / baseTypeSize; + + auto emitSwCase = [&](int element) { + assert(element >= 0); + assert(element < elements); + + if (varTypeIsFloating(targetType)) + { + assert(genIsValidFloatReg(targetReg)); + getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op1Reg, 0, element); + } + else if (varTypeIsUnsigned(targetType) || (baseTypeSize == EA_8BYTE)) + { + assert(genIsValidIntReg(targetReg)); + getEmitter()->emitIns_R_R_I(INS_umov, baseTypeSize, targetReg, op1Reg, element); + } + else + { + assert(genIsValidIntReg(targetReg)); + getEmitter()->emitIns_R_R_I(INS_smov, baseTypeSize, targetReg, op1Reg, element); + } + }; + + if (op2->isContainedIntOrIImmed()) + { + int element = (int)op2->AsIntConCommon()->IconValue(); + + emitSwCase(element); + } + else + { + regNumber elementReg = op2->gtRegNum; + regNumber tmpReg = node->GetSingleTempReg(); + + genHWIntrinsicSwitchTable(elementReg, tmpReg, elements, emitSwCase); + } + + genProduceReg(node); } void CodeGen::genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node) diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index 4cc1f79..5376eb2 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -144,6 +144,8 @@ void genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node); void genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node); void genHWIntrinsicSimdSelectOp(GenTreeHWIntrinsic* node); void genHWIntrinsicSimdUnaryOp(GenTreeHWIntrinsic* node); +template +void genHWIntrinsicSwitchTable(regNumber swReg, regNumber tmpReg, int swMax, HWIntrinsicSwitchCaseBody emitSwCase); #endif // defined(_TARGET_XARCH_) #endif // FEATURE_HW_INTRINSICS diff --git a/src/jit/compiler.h b/src/jit/compiler.h index f95c89d..5133fb1 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -3125,6 +3125,7 @@ protected: #ifdef _TARGET_ARM64_ InstructionSet lookupHWIntrinsicISA(const char* className); NamedIntrinsic lookupHWIntrinsic(const char* className, const char* methodName); + bool impCheckImmediate(GenTree* immediateOp, unsigned int max); const HWIntrinsicInfo& getHWIntrinsicInfo(NamedIntrinsic); #endif // _TARGET_ARM64_ #endif // FEATURE_HW_INTRINSICS diff --git a/src/jit/hwintrinsicArm64.cpp b/src/jit/hwintrinsicArm64.cpp index 0ea77d3..08a59cb 100644 --- a/src/jit/hwintrinsicArm64.cpp +++ b/src/jit/hwintrinsicArm64.cpp @@ -126,6 +126,14 @@ NamedIntrinsic Compiler::lookupHWIntrinsic(const char* className, const char* me } //------------------------------------------------------------------------ +// impCheckImmediate: check if immediate is const and in range for inlining +// +bool Compiler::impCheckImmediate(GenTree* immediateOp, unsigned int max) +{ + return immediateOp->IsCnsIntOrI() && (immediateOp->AsIntConCommon()->IconValue() < max); +} + +//------------------------------------------------------------------------ // impHWIntrinsic: dispatch hardware intrinsics to their own implementation // function // @@ -142,30 +150,40 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_INFO* sig, bool mustExpand) { - GenTree* retNode = nullptr; - GenTree* op1 = nullptr; - GenTree* op2 = nullptr; - var_types simdType = TYP_UNKNOWN; - var_types simdBaseType = TYP_UNKNOWN; - unsigned simdSizeBytes = 0; - - // Instantiation type check + GenTree* retNode = nullptr; + GenTree* op1 = nullptr; + GenTree* op2 = nullptr; + GenTree* op3 = nullptr; + CORINFO_CLASS_HANDLE simdClass = nullptr; + var_types simdType = TYP_UNKNOWN; + var_types simdBaseType = TYP_UNKNOWN; + unsigned simdSizeBytes = 0; + switch (getHWIntrinsicInfo(intrinsic).form) { case HWIntrinsicInfo::SimdBinaryOp: case HWIntrinsicInfo::SimdUnaryOp: - simdBaseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSizeBytes); - - if (simdBaseType == TYP_UNKNOWN) - { - return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED, method, sig, mustExpand); - } - simdType = getSIMDTypeForSize(simdSizeBytes); + simdClass = sig->retTypeClass; + break; + case HWIntrinsicInfo::SimdExtractOp: + info.compCompHnd->getArgType(sig, sig->args, &simdClass); break; default: break; } + // Simd instantiation type check + if (simdClass != nullptr) + { + simdBaseType = getBaseTypeAndSizeOfSIMDType(simdClass, &simdSizeBytes); + + if (simdBaseType == TYP_UNKNOWN) + { + return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED, method, sig, mustExpand); + } + simdType = getSIMDTypeForSize(simdSizeBytes); + } + switch (getHWIntrinsicInfo(intrinsic).form) { case HWIntrinsicInfo::IsSupported: @@ -187,6 +205,17 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, return gtNewSimdHWIntrinsicNode(simdType, op1, nullptr, intrinsic, simdBaseType, simdSizeBytes); + case HWIntrinsicInfo::SimdExtractOp: + if (!mustExpand && !impCheckImmediate(impStackTop(0).val, getSIMDVectorLength(simdSizeBytes, simdBaseType))) + { + // Immediate lane not constant or out of range + return nullptr; + } + op2 = impPopStack().val; + op1 = impSIMDPopStack(simdType); + + return gtNewScalarHWIntrinsicNode(JITtype2varType(sig->retType), op1, op2, intrinsic); + default: JITDUMP("Not implemented hardware intrinsic form"); assert(!"Unimplemented SIMD Intrinsic form"); diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp index 642bbb2..4adb682 100644 --- a/src/jit/lowerarmarch.cpp +++ b/src/jit/lowerarmarch.cpp @@ -828,6 +828,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS +#include "hwintrinsicArm64.h" //---------------------------------------------------------------------------------------------- // ContainCheckHWIntrinsic: Perform containment analysis for a hardware intrinsic node. // @@ -840,10 +841,16 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) GenTree* op1 = node->gtOp.gtOp1; GenTree* op2 = node->gtOp.gtOp2; - switch (node->gtHWIntrinsicId) + switch (comp->getHWIntrinsicInfo(node->gtHWIntrinsicId).form) { + case HWIntrinsicInfo::SimdExtractOp: + if (op2->IsCnsIntOrI()) + { + MakeSrcContained(node, op2); + } + break; + default: - assert((intrinsicID > NI_HW_INTRINSIC_START) && (intrinsicID < NI_HW_INTRINSIC_END)); break; } } diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp index 2bfcfa2..ec1e7f0 100644 --- a/src/jit/lsraarm64.cpp +++ b/src/jit/lsraarm64.cpp @@ -982,6 +982,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info) #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS +#include "hwintrinsicArm64.h" //------------------------------------------------------------------------ // TreeNodeInfoInitHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree. // @@ -999,6 +1000,20 @@ void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, { info->srcCount += GetOperandInfo(intrinsicTree->gtOp.gtOp2); } + + switch (compiler->getHWIntrinsicInfo(intrinsicID).form) + { + case HWIntrinsicInfo::SimdExtractOp: + if (!intrinsicTree->gtOp.gtOp2->isContained()) + { + // We need a temp to create a switch table + info->internalIntCount = 1; + info->setInternalCandidates(this, allRegs(TYP_INT)); + } + break; + default: + break; + } } #endif -- 2.7.4