From 1c3336ef7c0d832f68922572a00901dba1d15bbb Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Tue, 28 Nov 2017 19:46:43 -0500 Subject: [PATCH] [Arm64] SIMDIntrinsicGetItem contain mem op1 --- src/jit/codegenarm64.cpp | 106 +++++++++++++++++++++++++++++++++++++---------- src/jit/lowerarmarch.cpp | 12 +++++- src/jit/lsraarm64.cpp | 34 +++++++++++---- 3 files changed, 121 insertions(+), 31 deletions(-) diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index da9c7bf..e63d961 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -4578,13 +4578,9 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) // - the source of SIMD type (op1) // - the index of the value to be returned. genConsumeOperands(simdNode); - regNumber srcReg = op1->gtRegNum; - // TODO-ARM64-CQ Optimize SIMDIntrinsicGetItem - // Optimize the case of op1 is in memory and trying to access ith element. - assert(op1->isUsedFromReg()); - - emitAttr baseTypeSize = emitTypeSize(baseType); + emitAttr baseTypeSize = emitTypeSize(baseType); + unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize)); if (op2->IsCnsIntOrI()) { @@ -4592,34 +4588,102 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) ssize_t index = op2->gtIntCon.gtIconVal; + // We only need to generate code for the get if the index is valid + // If the index is invalid, previously generated for the range check will throw if (getEmitter()->isValidVectorIndex(emitTypeSize(simdType), baseTypeSize, index)) { - // Only generate code for the get if the index is valid - // Otherwise generated code will throw - getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, srcReg, index); + if (op1->isContained()) + { + int offset = (int)index * genTypeSize(baseType); + instruction ins = ins_Load(baseType); + baseTypeSize = varTypeIsFloating(baseType) + ? baseTypeSize + : getEmitter()->emitInsAdjustLoadStoreAttr(ins, baseTypeSize); + + assert(!op1->isUsedFromReg()); + + if (op1->OperIsLocal()) + { + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + + getEmitter()->emitIns_R_S(ins, baseTypeSize, targetReg, varNum, offset); + } + else + { + assert(op1->OperGet() == GT_IND); + + GenTree* addr = op1->AsIndir()->Addr(); + assert(!addr->isContained()); + regNumber baseReg = addr->gtRegNum; + + // ldr targetReg, [baseReg, #offset] + getEmitter()->emitIns_R_R_I(ins, baseTypeSize, targetReg, baseReg, offset); + } + } + else + { + assert(op1->isUsedFromReg()); + regNumber srcReg = op1->gtRegNum; + + // mov targetReg, srcReg[#index] + getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, srcReg, index); + } } } else { - unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum; - noway_assert(compiler->lvaSIMDInitTempVarNum != BAD_VAR_NUM); + assert(!op2->isContained()); + regNumber baseReg = REG_NA; regNumber indexReg = op2->gtRegNum; - regNumber tmpReg = simdNode->ExtractTempReg(); - assert(genIsValidIntReg(tmpReg)); - assert(tmpReg != indexReg); + if (op1->isContained()) + { + // Optimize the case of op1 is in memory and trying to access ith element. + assert(!op1->isUsedFromReg()); + if (op1->OperIsLocal()) + { + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + + baseReg = simdNode->ExtractTempReg(); + + // Load the address of varNum + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, varNum, 0); + } + else + { + // Require GT_IND addr to be not contained. + assert(op1->OperGet() == GT_IND); + + GenTree* addr = op1->AsIndir()->Addr(); + assert(!addr->isContained()); - unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize)); + baseReg = addr->gtRegNum; + } + } + else + { + assert(op1->isUsedFromReg()); + regNumber srcReg = op1->gtRegNum; - // Load the address of simdInitTempVarNum - getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, tmpReg, simdInitTempVarNum, 0); + unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum; + noway_assert(compiler->lvaSIMDInitTempVarNum != BAD_VAR_NUM); + + baseReg = simdNode->ExtractTempReg(); + + // Load the address of simdInitTempVarNum + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, simdInitTempVarNum, 0); + + // Store the vector to simdInitTempVarNum + getEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), srcReg, baseReg); + } - // Store the vector to simdInitTempVarNum - getEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), srcReg, tmpReg); + assert(genIsValidIntReg(indexReg)); + assert(genIsValidIntReg(baseReg)); + assert(baseReg != indexReg); - // Load item at simdInitTempVarNum[index] - getEmitter()->emitIns_R_R_R_Ext(ins_Load(baseType), baseTypeSize, targetReg, tmpReg, indexReg, INS_OPTS_LSL, + // Load item at baseReg[index] + getEmitter()->emitIns_R_R_R_Ext(ins_Load(baseType), baseTypeSize, targetReg, baseReg, indexReg, INS_OPTS_LSL, baseTypeScale); } diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp index 2316501..5aa3ff1 100644 --- a/src/jit/lowerarmarch.cpp +++ b/src/jit/lowerarmarch.cpp @@ -785,12 +785,11 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) case SIMDIntrinsicGetItem: { - // TODO-ARM64-CQ Support containing op1 memory ops - // This implements get_Item method. The sources are: // - the source SIMD struct // - index (which element to get) // The result is baseType of SIMD struct. + op1 = simdNode->gtOp.gtOp1; op2 = simdNode->gtOp.gtOp2; // If the index is a constant, mark it as contained. @@ -798,6 +797,15 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) { MakeSrcContained(simdNode, op2); } + + if (IsContainableMemoryOp(op1)) + { + MakeSrcContained(simdNode, op1); + if (op1->OperGet() == GT_IND) + { + op1->AsIndir()->Addr()->ClearContained(); + } + } break; } diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp index 22fb56e..7c9b601 100644 --- a/src/jit/lsraarm64.cpp +++ b/src/jit/lsraarm64.cpp @@ -815,20 +815,38 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) break; case SIMDIntrinsicGetItem: + op1 = simdTree->gtGetOp1(); + op2 = simdTree->gtGetOp2(); + // We have an object and an item, which may be contained. - info->srcCount = simdTree->gtGetOp2()->isContained() ? 1 : 2; + info->srcCount = (op2->isContained() ? 1 : 2); - if (!simdTree->gtGetOp2()->IsCnsIntOrI()) + if (op1->isContained()) { - // If the index is not a constant, we will need a general purpose register - info->internalIntCount = 1; + // Although GT_IND of TYP_SIMD12 reserves an internal register for reading 4 and 8 bytes from memory + // and assembling them into target reg, it is not required in this case. + op1->gtLsraInfo.internalIntCount = 0; + op1->gtLsraInfo.internalFloatCount = 0; + info->srcCount -= 1; + info->srcCount += GetOperandSourceCount(op1); + } - // If the index is not a constant, we will use the SIMD temp location to store the vector. - compiler->getSIMDInitTempVarNum(); + if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal())) + { + // If the index is not a constant and not contained or is a local + // we will need a general purpose register to calculate the address + info->internalIntCount = 1; // internal register must not clobber input index - simdTree->gtOp.gtOp2->gtLsraInfo.isDelayFree = true; - info->hasDelayFreeSrc = true; + op2->gtLsraInfo.isDelayFree = true; + info->hasDelayFreeSrc = true; + } + + if (!op2->IsCnsIntOrI() && (!op1->isContained())) + { + // If vector is not already in memory (contained) and the index is not a constant, + // we will use the SIMD temp location to store the vector. + compiler->getSIMDInitTempVarNum(); } break; -- 2.7.4