From: Bruce Forstall Date: Thu, 13 Apr 2017 00:16:46 +0000 (-0700) Subject: Introduce API for codegen getting temp registers from gtRsvdRegs X-Git-Tag: submit/tizen/20210909.063632~11030^2~7256^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a07bddd49d411523e2b496bcf26d0dbeb7213183;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Introduce API for codegen getting temp registers from gtRsvdRegs LSRA puts a set of temporary registers needed by a node in the gtRsvdRegs register mask. Currently, evey codegen function that needs to use a temporary register manually manipulates this mask. Introduce a few functions to make this simpler and more regular: 1. GetSingleTempReg() // Gets a temp; asserts there is exactly one temp reg. 2. ExtractTempReg() // Gets the lowest temp, removes it from gtRsvdRegs so subsequent calls don't see it. 3. AvailableTempRegCount() // Returns a count of available temp registers. All take an optional register mask, so you can extract a register from just the set specified by the mask (typically RBM_ALLINT or RBM_ALLFLOAT). Commit migrated from https://github.com/dotnet/coreclr/commit/950b0904511058e3d0254009573535e576fe4574 --- diff --git a/src/coreclr/src/jit/codegenarm.cpp b/src/coreclr/src/jit/codegenarm.cpp index 41bd804..84737ab 100644 --- a/src/coreclr/src/jit/codegenarm.cpp +++ b/src/coreclr/src/jit/codegenarm.cpp @@ -166,9 +166,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre if (targetType == TYP_FLOAT) { // Get a temp integer register - regMaskTP tmpRegMask = tree->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - assert(tmpReg != REG_NA); + regNumber tmpReg = tree->GetSingleTempReg(); float f = forceCastToFloat(constValue); genSetRegToIcon(tmpReg, *((int*)(&f))); @@ -181,15 +179,8 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre unsigned* cv = (unsigned*)&constValue; // Get two temp integer registers - regMaskTP tmpRegsMask = tree->gtRsvdRegs; - regMaskTP tmpRegMask = genFindHighestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask - regNumber tmpReg1 = genRegNumFromMask(tmpRegMask); - assert(tmpReg1 != REG_NA); - - tmpRegsMask &= ~genRegMask(tmpReg1); // remove the bit for 'tmpReg1' - tmpRegMask = genFindHighestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask - regNumber tmpReg2 = genRegNumFromMask(tmpRegMask); - assert(tmpReg2 != REG_NA); + regNumber tmpReg1 = tree->ExtractTempReg(); + regNumber tmpReg2 = tree->GetSingleTempReg(); genSetRegToIcon(tmpReg1, cv[0]); genSetRegToIcon(tmpReg2, cv[1]); @@ -1129,7 +1120,6 @@ void CodeGen::genLclHeap(GenTreePtr tree) // Also it used as temporary register in code generation // for storing allocation size regNumber regCnt = tree->gtRegNum; - regMaskTP tmpRegsMask = tree->gtRsvdRegs; regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); @@ -1198,10 +1188,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) stackAdjustment += STACK_ALIGN; // Save a copy of PSPSym - assert(genCountBits(tmpRegsMask) >= 1); - regMaskTP pspSymRegMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~pspSymRegMask; - pspSymReg = genRegNumFromMask(pspSymRegMask); + pspSymReg = tree->ExtractTempReg(); getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0); } #endif @@ -1266,12 +1253,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) // Since we have to zero out the allocated memory AND ensure that RSP is always valid // by tickling the pages, we will just push 0's on the stack. - assert(tmpRegsMask != RBM_NONE); - assert(genCountBits(tmpRegsMask) >= 1); - - regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~regCntMask; - regNumber regTmp = genRegNumFromMask(regCntMask); + regNumber regTmp = tree->ExtractTempReg(); instGen_Set_Reg_To_Zero(EA_PTRSIZE, regTmp); // Loop: @@ -1323,9 +1305,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) // // Setup the regTmp - assert(tmpRegsMask != RBM_NONE); - assert(genCountBits(tmpRegsMask) == 1); - regNumber regTmp = genRegNumFromMask(tmpRegsMask); + regNumber regTmp = tree->ExtractTempReg(); BasicBlock* loop = genCreateTempLabel(); BasicBlock* done = genCreateTempLabel(); diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 7f98221..286128c 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -1441,9 +1441,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre else { // Get a temp integer register to compute long address. - regMaskTP addrRegMask = tree->gtRsvdRegs; - regNumber addrReg = genRegNumFromMask(addrRegMask); - noway_assert(addrReg != REG_NA); + regNumber addrReg = tree->GetSingleTempReg(); // We must load the FP constant from the constant pool // Emit a data section constant for the float or double constant. @@ -2854,7 +2852,6 @@ void CodeGen::genLclHeap(GenTreePtr tree) noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); regNumber targetReg = tree->gtRegNum; - regMaskTP tmpRegsMask = tree->gtRsvdRegs; regNumber regCnt = REG_NA; regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); @@ -2923,17 +2920,16 @@ void CodeGen::genLclHeap(GenTreePtr tree) // since we don't need any internal registers. if (!hasPspSym && compiler->info.compInitMem) { - assert(genCountBits(tmpRegsMask) == 0); + assert(tree->AvailableTempRegCount() == 0); regCnt = targetReg; } else { - assert(genCountBits(tmpRegsMask) >= 1); - regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~regCntMask; - regCnt = genRegNumFromMask(regCntMask); + regCnt = tree->ExtractTempReg(); if (regCnt != targetReg) + { inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet()); + } } // Align to STACK_ALIGN @@ -2950,10 +2946,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) stackAdjustment += STACK_ALIGN; // Save a copy of PSPSym - assert(genCountBits(tmpRegsMask) >= 1); - regMaskTP pspSymRegMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~pspSymRegMask; - pspSymReg = genRegNumFromMask(pspSymRegMask); + pspSymReg = tree->ExtractTempReg(); getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0); } #endif @@ -3021,15 +3014,12 @@ void CodeGen::genLclHeap(GenTreePtr tree) assert(regCnt == REG_NA); if (!hasPspSym && compiler->info.compInitMem) { - assert(genCountBits(tmpRegsMask) == 0); + assert(tree->AvailableTempRegCount() == 0); regCnt = targetReg; } else { - assert(genCountBits(tmpRegsMask) >= 1); - regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~regCntMask; - regCnt = genRegNumFromMask(regCntMask); + regCnt = tree->ExtractTempReg(); } genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG); } @@ -3094,9 +3084,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) // // Setup the regTmp - assert(tmpRegsMask != RBM_NONE); - assert(genCountBits(tmpRegsMask) == 1); - regNumber regTmp = genRegNumFromMask(tmpRegsMask); + regNumber regTmp = tree->GetSingleTempReg(); BasicBlock* loop = genCreateTempLabel(); BasicBlock* done = genCreateTempLabel(); @@ -3391,13 +3379,11 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) unsigned offset = 0; // Grab the integer temp register to emit the loads and stores. - regMaskTP tmpMask = genFindLowestBit(cpBlkNode->gtRsvdRegs & RBM_ALLINT); - regNumber tmpReg = genRegNumFromMask(tmpMask); + regNumber tmpReg = cpBlkNode->ExtractTempReg(RBM_ALLINT); if (size >= 2 * REGSIZE_BYTES) { - regMaskTP tmp2Mask = cpBlkNode->gtRsvdRegs & RBM_ALLINT & ~tmpMask; - regNumber tmp2Reg = genRegNumFromMask(tmp2Mask); + regNumber tmp2Reg = cpBlkNode->ExtractTempReg(RBM_ALLINT); size_t slots = size / (2 * REGSIZE_BYTES); @@ -3498,13 +3484,8 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet()); // Temp register used to perform the sequence of loads and stores. - regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs); - -#ifdef DEBUG - assert(cpObjNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(cpObjNode->gtRsvdRegs) == 1); + regNumber tmpReg = cpObjNode->GetSingleTempReg(); assert(genIsValidIntReg(tmpReg)); -#endif // DEBUG unsigned slots = cpObjNode->gtSlots; emitter* emit = getEmitter(); @@ -3571,7 +3552,7 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode) regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum; regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum; - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); // load the ip-relative offset (which is relative to start of fgFirstBB) getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, baseReg, baseReg, idxReg, INS_OPTS_LSL); @@ -4022,9 +4003,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) if (offset != 0) { - regMaskTP tmpRegMask = lea->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = lea->GetSingleTempReg(); if (emitter::emitIns_valid_imm_for_add(offset, EA_8BYTE)) { @@ -4041,7 +4020,6 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) // Then compute target reg from [tmpReg + offset] emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, tmpReg, offset); - ; } else // large offset { @@ -4091,9 +4069,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) else { // We require a tmpReg to hold the offset - regMaskTP tmpRegMask = lea->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = lea->GetSingleTempReg(); // First load tmpReg with the large offset constant instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); @@ -4484,9 +4460,8 @@ void CodeGen::genCkfinite(GenTreePtr treeNode) emitter* emit = getEmitter(); // Extract exponent into a register. - regNumber intReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber intReg = treeNode->GetSingleTempReg(); regNumber fpReg = genConsumeReg(op1); - assert(intReg != REG_NA); emit->emitIns_R_R(ins_Copy(targetType), emitTypeSize(treeNode), intReg, fpReg); emit->emitIns_R_R_I(INS_lsr, emitTypeSize(targetType), intReg, intReg, shiftAmount); diff --git a/src/coreclr/src/jit/codegenarmarch.cpp b/src/coreclr/src/jit/codegenarmarch.cpp index af9fdfe..e887a8c 100644 --- a/src/coreclr/src/jit/codegenarmarch.cpp +++ b/src/coreclr/src/jit/codegenarmarch.cpp @@ -212,13 +212,10 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) // We will copy this struct to the stack, possibly using a ldp instruction // Setup loReg and hiReg from the internal registers that we reserved in lower. // - regNumber loReg = REG_NA; - regNumber hiReg = REG_NA; + regNumber loReg = treeNode->ExtractTempReg(); + regNumber hiReg = treeNode->GetSingleTempReg(); regNumber addrReg = REG_NA; - // In lowerArm64/TreeNodeInfoInitPutArgStk we have reserved two internal integer registers - genGetRegPairFromMask(treeNode->gtRsvdRegs, &loReg, &hiReg); - GenTreeLclVarCommon* varNode = nullptr; GenTreePtr addrNode = nullptr; @@ -709,15 +706,19 @@ void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) regNumber tgtReg = arrIndex->gtRegNum; noway_assert(tgtReg != REG_NA); - // We will use a temp register to load the lower bound and dimension size values + // We will use a temp register to load the lower bound and dimension size values. // - regMaskTP tmpRegsMask = arrIndex->gtRsvdRegs; // there will be two bits set - tmpRegsMask &= ~genRegMask(tgtReg); // remove the bit for 'tgtReg' from 'tmpRegsMask' - - regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask - regNumber tmpReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask - noway_assert(tmpReg != REG_NA); - + // This should be simply: + // regNumber tmpReg = arrIndex->GetSingleTempReg(); + // + // However, since LSRA might give us an internal temp register that is the same as the dst + // register, and the codegen here reuses the temp register after a definition of the target + // register, we requested two internal registers. If one is the target register, we simply + // use the other one. We can use ExtractTempReg() since it only asserts that there is at + // least one available temporary register (not that there is exactly one, for instance). + // Here, masking out tgtReg, there will be either 1 or 2. + + regNumber tmpReg = arrIndex->ExtractTempReg(~genRegMask(tgtReg)); assert(tgtReg != tmpReg); unsigned dim = arrIndex->gtCurrDim; @@ -773,9 +774,7 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) noway_assert(indexReg != REG_NA); noway_assert(arrReg != REG_NA); - regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = arrOffset->GetSingleTempReg(); unsigned dim = arrOffset->gtCurrDim; unsigned rank = arrOffset->gtArrRank; @@ -1073,12 +1072,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) const regNumber regThis = genGetThisArgReg(call); #if defined(_TARGET_ARM_) - regMaskTP tempMask = genFindLowestBit(call->gtRsvdRegs); - const regNumber tmpReg = genRegNumFromMask(tempMask); - if (genCountBits(call->gtRsvdRegs) > 1) - { - call->gtRsvdRegs &= ~tempMask; - } + const regNumber tmpReg = call->ExtractTempReg(); getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, regThis, 0); #elif defined(_TARGET_ARM64_) getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, regThis, 0); @@ -1239,7 +1233,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) #ifdef _TARGET_ARM_ if (!arm_Valid_Imm_For_BL((ssize_t)addr)) { - regNumber tmpReg = genRegNumFromMask(call->gtRsvdRegs); + regNumber tmpReg = call->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)addr); genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) NULL, retSize, ilOffset, tmpReg); } @@ -1388,7 +1382,7 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) regNumber sourceReg = castOp->gtRegNum; // For Long to Int conversion we will have a reserved integer register to hold the immediate mask - regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = (treeNode->AvailableTempRegCount() == 0) ? REG_NA : treeNode->GetSingleTempReg(); assert(genIsValidIntReg(targetReg)); assert(genIsValidIntReg(sourceReg)); diff --git a/src/coreclr/src/jit/codegencommon.cpp b/src/coreclr/src/jit/codegencommon.cpp index 89d6a4c..5b97bca 100644 --- a/src/coreclr/src/jit/codegencommon.cpp +++ b/src/coreclr/src/jit/codegencommon.cpp @@ -557,30 +557,6 @@ regMaskTP CodeGenInterface::genGetRegMask(GenTreePtr tree) return regMask; } -//------------------------------------------------------------------------ -// getRegistersFromMask: Given a register mask return the two registers -// specified by the mask. -// -// Arguments: -// regPairMask: a register mask that has exactly two bits set -// Return values: -// pLoReg: the address of where to write the first register -// pHiReg: the address of where to write the second register -// -void CodeGenInterface::genGetRegPairFromMask(regMaskTP regPairMask, regNumber* pLoReg, regNumber* pHiReg) -{ - assert(genCountBits(regPairMask) == 2); - - regMaskTP loMask = genFindLowestBit(regPairMask); // set loMask to a one-bit mask - regMaskTP hiMask = regPairMask - loMask; // set hiMask to the other bit that was in tmpRegMask - - regNumber loReg = genRegNumFromMask(loMask); // set loReg from loMask - regNumber hiReg = genRegNumFromMask(hiMask); // set hiReg from hiMask - - *pLoReg = loReg; - *pHiReg = hiReg; -} - // The given lclVar is either going live (being born) or dying. // It might be both going live and dying (that is, it is a dead store) under MinOpts. // Update regSet.rsMaskVars accordingly. diff --git a/src/coreclr/src/jit/codegeninterface.h b/src/coreclr/src/jit/codegeninterface.h index 08d854e..a993ddb 100644 --- a/src/coreclr/src/jit/codegeninterface.h +++ b/src/coreclr/src/jit/codegeninterface.h @@ -149,8 +149,6 @@ protected: regMaskTP genLiveMask(VARSET_VALARG_TP liveSet); #endif - void genGetRegPairFromMask(regMaskTP regPairMask, regNumber* pLoReg, regNumber* pHiReg); - // The following property indicates whether the current method sets up // an explicit stack frame or not. private: diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 23c2a18..a7881de 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -617,7 +617,7 @@ void CodeGen::genCodeForLongUMod(GenTreeOp* node) // xor edx, edx // div divisor->gtRegNum // mov eax, temp - const regNumber tempReg = genRegNumFromMask(node->gtRsvdRegs); + const regNumber tempReg = node->GetSingleTempReg(); inst_RV_RV(INS_mov, tempReg, REG_EAX, TYP_INT); inst_RV_RV(INS_mov, REG_EAX, REG_EDX, TYP_INT); instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX); @@ -1924,9 +1924,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) inst_JMP(jmpEqual, skipLabel); // emit the call to the EE-helper that stops for GC (or other reasons) - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); assert(genIsValidIntReg(tmpReg)); genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg); @@ -2431,12 +2429,11 @@ void CodeGen::genLclHeap(GenTreePtr tree) GenTreePtr size = tree->gtOp.gtOp1; noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); - regNumber targetReg = tree->gtRegNum; - regMaskTP tmpRegsMask = tree->gtRsvdRegs; - regNumber regCnt = REG_NA; - var_types type = genActualType(size->gtType); - emitAttr easz = emitTypeSize(type); - BasicBlock* endLabel = nullptr; + regNumber targetReg = tree->gtRegNum; + regNumber regCnt = REG_NA; + var_types type = genActualType(size->gtType); + emitAttr easz = emitTypeSize(type); + BasicBlock* endLabel = nullptr; #ifdef DEBUG // Verify ESP @@ -2494,15 +2491,12 @@ void CodeGen::genLclHeap(GenTreePtr tree) // since we don't need any internal registers. if (compiler->info.compInitMem) { - assert(genCountBits(tmpRegsMask) == 0); + assert(tree->AvailableTempRegCount() == 0); regCnt = targetReg; } else { - assert(genCountBits(tmpRegsMask) >= 1); - regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~regCntMask; - regCnt = genRegNumFromMask(regCntMask); + regCnt = tree->ExtractTempReg(); if (regCnt != targetReg) { // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary. @@ -2594,15 +2588,12 @@ void CodeGen::genLclHeap(GenTreePtr tree) assert(regCnt == REG_NA); if (compiler->info.compInitMem) { - assert(genCountBits(tmpRegsMask) == 0); + assert(tree->AvailableTempRegCount() == 0); regCnt = targetReg; } else { - assert(genCountBits(tmpRegsMask) >= 1); - regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~regCntMask; - regCnt = genRegNumFromMask(regCntMask); + regCnt = tree->ExtractTempReg(); } } @@ -2717,9 +2708,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) // This is a harmless trick to avoid the emitter trying to track the // decrement of the ESP - we do the subtraction in another reg instead // of adjusting ESP directly. - assert(tmpRegsMask != RBM_NONE); - assert(genCountBits(tmpRegsMask) == 1); - regNumber regTmp = genRegNumFromMask(tmpRegsMask); + regNumber regTmp = tree->GetSingleTempReg(); inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE); @@ -2922,13 +2911,8 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode) // Perform an unroll using SSE2 loads and stores. if (size >= XMM_REGSIZE_BYTES) { - regNumber tmpReg = genRegNumFromMask(initBlkNode->gtRsvdRegs); - -#ifdef DEBUG - assert(initBlkNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(initBlkNode->gtRsvdRegs) == 1); + regNumber tmpReg = initBlkNode->GetSingleTempReg(); assert(genIsValidFloatReg(tmpReg)); -#endif // DEBUG if (initVal->gtIntCon.gtIconVal != 0) { @@ -3122,8 +3106,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) if (size >= XMM_REGSIZE_BYTES) { - assert(cpBlkNode->gtRsvdRegs != RBM_NONE); - regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLFLOAT); + regNumber xmmReg = cpBlkNode->GetSingleTempReg(RBM_ALLFLOAT); assert(genIsValidFloatReg(xmmReg)); size_t slots = size / XMM_REGSIZE_BYTES; @@ -3144,7 +3127,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) if ((size & 0xf) != 0) { // Grab the integer temp register to emit the remaining loads and stores. - regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT); + regNumber tmpReg = cpBlkNode->GetSingleTempReg(RBM_ALLINT); if ((size & 8) != 0) { @@ -3390,22 +3373,22 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode) // less than 16 bytes, we will just be using pushes if (size >= 8) { - xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT); + xmmTmpReg = putArgNode->GetSingleTempReg(RBM_ALLFLOAT); longTmpReg = xmmTmpReg; } if ((size & 0x7) != 0) { - intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT); + intTmpReg = putArgNode->GetSingleTempReg(RBM_ALLINT); } #else // !_TARGET_X86_ // On x64 we use an XMM register only for 16-byte chunks. if (size >= XMM_REGSIZE_BYTES) { - xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT); + xmmTmpReg = putArgNode->GetSingleTempReg(RBM_ALLFLOAT); } if ((size & 0xf) != 0) { - intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT); + intTmpReg = putArgNode->GetSingleTempReg(RBM_ALLINT); longTmpReg = intTmpReg; } #endif // !_TARGET_X86_ @@ -3418,7 +3401,6 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode) #ifdef _TARGET_X86_ assert(!m_pushStkArg); #endif // _TARGET_X86_ - assert(putArgNode->gtRsvdRegs != RBM_NONE); size_t slots = size / XMM_REGSIZE_BYTES; assert(putArgNode->gtGetOp1()->isContained()); @@ -3578,7 +3560,6 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) bool dstOnStack = dstAddr->OperIsLocalAddr(); #ifdef DEBUG - bool isRepMovspUsed = false; assert(dstAddr->isUsedFromReg()); @@ -3629,13 +3610,9 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) { if (slots >= CPOBJ_NONGC_SLOTS_LIMIT) { -#ifdef DEBUG // If the destination of the CpObj is on the stack, make sure we allocated // RCX to emit the movsp (alias for movsd or movsq for 32 and 64 bits respectively). assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0); - regNumber tmpReg = REG_RCX; - isRepMovspUsed = true; -#endif // DEBUG getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots); instGen(INS_r_movsp); @@ -3685,13 +3662,10 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) } else { -#ifdef DEBUG // Otherwise, we can save code-size and improve CQ by emitting // rep movsp (alias for movsd/movsq for x86/x64) assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0); - regNumber tmpReg = REG_RCX; - isRepMovspUsed = true; -#endif // DEBUG + getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); instGen(INS_r_movsp); } @@ -3767,7 +3741,7 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode) regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum; regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum; - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); // load the ip-relative offset (which is relative to start of fgFirstBB) getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0); @@ -4057,8 +4031,7 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) offsetReg = genConsumeReg(offsetNode); // We will use a temp register for the offset*scale+effectiveIndex computation. - regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; - tmpReg = genRegNumFromMask(tmpRegMask); + tmpReg = arrOffset->GetSingleTempReg(); } else { @@ -6664,26 +6637,16 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) } else { - regNumber tmpReg = REG_NA; - - if (needScratchReg) - { - // We need an additional temp register - // Make sure we have exactly one allocated. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); - } - // When we are converting from unsigned or to unsigned, we // will only have to check for any bits set using 'typeMask' if (isUnsignedSrc || isUnsignedDst) { if (needScratchReg) { + regNumber tmpReg = treeNode->GetSingleTempReg(); inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writeable temp reg inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, size, tmpReg, 32); // Shift right by 32 bits - genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); // Thow if result shift is non-zero + genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); // Throw if result shift is non-zero } else { @@ -7089,9 +7052,7 @@ void CodeGen::genCkfinite(GenTreePtr treeNode) regNumber targetReg = treeNode->gtRegNum; // Extract exponent into a register. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); genConsumeReg(op1); @@ -7397,10 +7358,7 @@ void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode) } // We need an additional register for bitmask. - // Make sure we have one allocated. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); // Move operand into targetReg only if the reg reserved for // internal purpose is not the same as targetReg. @@ -7716,17 +7674,17 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) unsigned prevFieldOffset = currentOffset; regNumber intTmpReg = REG_NA; regNumber simdTmpReg = REG_NA; - if (putArgStk->gtRsvdRegs != RBM_NONE) + if (putArgStk->AvailableTempRegCount() != 0) { regMaskTP rsvdRegs = putArgStk->gtRsvdRegs; if ((rsvdRegs & RBM_ALLINT) != 0) { - intTmpReg = genRegNumFromMask(rsvdRegs & RBM_ALLINT); + intTmpReg = putArgStk->GetSingleTempReg(RBM_ALLINT); assert(genIsValidIntReg(intTmpReg)); } if ((rsvdRegs & RBM_ALLFLOAT) != 0) { - simdTmpReg = genRegNumFromMask(rsvdRegs & RBM_ALLFLOAT); + simdTmpReg = putArgStk->GetSingleTempReg(RBM_ALLFLOAT); assert(genIsValidFloatReg(simdTmpReg)); } assert(genCountBits(rsvdRegs) == (unsigned)((intTmpReg == REG_NA) ? 0 : 1) + ((simdTmpReg == REG_NA) ? 0 : 1)); diff --git a/src/coreclr/src/jit/emitarm.cpp b/src/coreclr/src/jit/emitarm.cpp index 53ee88b..30eaca9 100644 --- a/src/coreclr/src/jit/emitarm.cpp +++ b/src/coreclr/src/jit/emitarm.cpp @@ -7568,9 +7568,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR if (offset != 0) { - regMaskTP tmpRegMask = indir->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = indir->GetSingleTempReg(); if (emitIns_valid_imm_for_add(offset, INS_FLAGS_DONT_CARE)) { @@ -7632,9 +7630,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR else { // We require a tmpReg to hold the offset - regMaskTP tmpRegMask = indir->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = indir->GetSingleTempReg(); // First load/store tmpReg with the large offset constant codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); @@ -7732,9 +7728,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, assert(!src1->isContained()); } } - bool isMulOverflow = false; - bool isUnsignedMul = false; - regNumber extraReg = REG_NA; + bool isMulOverflow = false; if (dst->gtOverflowEx()) { NYI_ARM("emitInsTernary overflow"); @@ -7750,7 +7744,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, else if (ins == INS_mul) { isMulOverflow = true; - isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0); assert(intConst == nullptr); // overflow format doesn't support an int constant operand } else @@ -7769,17 +7762,10 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, { NYI_ARM("emitInsTernary overflow"); #if 0 - // Make sure that we have an internal register - assert(genCountBits(dst->gtRsvdRegs) == 2); + regNumber extraReg = dst->GetSingleTempReg(); + assert(extraReg != dst->gtRegNum); - // There will be two bits set in tmpRegsMask. - // Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask' - regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum); - assert(tmpRegsMask != RBM_NONE); - regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask - extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask - - if (isUnsignedMul) + if ((dst->gtFlags & GTF_UNSIGNED) != 0) { if (attr == EA_4BYTE) { @@ -7799,7 +7785,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); } - // zero-sign bit comparision to detect overflow. + // zero-sign bit comparison to detect overflow. emitIns_R_I(INS_cmp, attr, extraReg, 0); } else @@ -7827,7 +7813,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, bitShift = 63; } - // Sign bit comparision to detect overflow. + // Sign bit comparison to detect overflow. emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR); } #endif diff --git a/src/coreclr/src/jit/emitarm64.cpp b/src/coreclr/src/jit/emitarm64.cpp index 93994e7..7ebf420 100644 --- a/src/coreclr/src/jit/emitarm64.cpp +++ b/src/coreclr/src/jit/emitarm64.cpp @@ -10809,9 +10809,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR if (offset != 0) { - regMaskTP tmpRegMask = indir->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = indir->GetSingleTempReg(); emitAttr addType = varTypeIsGC(memBase) ? EA_BYREF : EA_PTRSIZE; @@ -10833,7 +10831,6 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR // Then load/store dataReg from/to [tmpReg + offset] emitIns_R_R_I(ins, ldstAttr, dataReg, tmpReg, offset); - ; } else // large offset { @@ -10874,9 +10871,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR else { // We require a tmpReg to hold the offset - regMaskTP tmpRegMask = indir->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = indir->GetSingleTempReg(); // First load/store tmpReg with the large offset constant codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); @@ -11047,9 +11042,8 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, assert(!src1->isContained()); } } - bool isMulOverflow = false; - bool isUnsignedMul = false; - regNumber extraReg = REG_NA; + + bool isMulOverflow = false; if (dst->gtOverflowEx()) { if (ins == INS_add) @@ -11063,7 +11057,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, else if (ins == INS_mul) { isMulOverflow = true; - isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0); assert(intConst == nullptr); // overflow format doesn't support an int constant operand } else @@ -11079,17 +11072,20 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, { if (isMulOverflow) { - // Make sure that we have an internal register - assert(genCountBits(dst->gtRsvdRegs) == 2); + // This should be simply: + // regNumber extraReg = dst->GetSingleTempReg(); + // + // However, since LSRA might give us an internal temp register that is the same as the dst + // register, and the codegen here reuses the temp register after a definition of the target + // register, we requested two internal registers. If one is the target register, we simply + // use the other one. We can use ExtractTempReg() since it only asserts that there is at + // least one available temporary register (not that there is exactly one, for instance). + // Here, masking out tgtReg, there will be either 1 or 2. - // There will be two bits set in tmpRegsMask. - // Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask' - regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum); - assert(tmpRegsMask != RBM_NONE); - regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask - extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask + regNumber extraReg = dst->ExtractTempReg(~genRegMask(dst->gtRegNum)); + assert(extraReg != dst->gtRegNum); - if (isUnsignedMul) + if ((dst->gtFlags & GTF_UNSIGNED) != 0) { if (attr == EA_4BYTE) { @@ -11109,7 +11105,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); } - // zero-sign bit comparision to detect overflow. + // zero-sign bit comparison to detect overflow. emitIns_R_I(INS_cmp, attr, extraReg, 0); } else @@ -11137,7 +11133,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, bitShift = 63; } - // Sign bit comparision to detect overflow. + // Sign bit comparison to detect overflow. emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR); } } diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index 3374b8c..c385155 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -9124,7 +9124,7 @@ void Compiler::fgSimpleLowering() else { con = gtNewIconNode(arrLen->ArrLenOffset(), TYP_I_IMPL); - con->gtRsvdRegs = 0; + con->gtRsvdRegs = RBM_NONE; add = gtNewOperNode(GT_ADD, TYP_REF, arr, con); add->gtRsvdRegs = arr->gtRsvdRegs; diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index c5733b8..c75b814 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -17403,3 +17403,68 @@ regMaskTP ReturnTypeDesc::GetABIReturnRegs() return resultMask; } + +#ifndef LEGACY_BACKEND + +//------------------------------------------------------------------------ +// The following functions manage the gtRsvdRegs set of temporary registers +// created by LSRA during code generation. + +//------------------------------------------------------------------------ +// AvailableTempRegCount: return the number of available temporary registers in the (optional) given set +// (typically, RBM_ALLINT or RBM_ALLFLOAT). +// +// Arguments: +// mask - (optional) Check for available temporary registers only in this set. +// +// Return Value: +// Count of available temporary registers in given set. +// +unsigned GenTree::AvailableTempRegCount(regMaskTP mask /* = (regMaskTP)-1 */) const +{ + return genCountBits(gtRsvdRegs & mask); +} + +//------------------------------------------------------------------------ +// GetSingleTempReg: There is expected to be exactly one available temporary register +// in the given mask in the gtRsvdRegs set. Get that register. No future calls to get +// a temporary register are expected. Removes the register from the set, but only in +// DEBUG to avoid doing unnecessary work in non-DEBUG builds. +// +// Arguments: +// mask - (optional) Get an available temporary register only in this set. +// +// Return Value: +// Available temporary register in given mask. +// +regNumber GenTree::GetSingleTempReg(regMaskTP mask /* = (regMaskTP)-1 */) +{ + regMaskTP availableSet = gtRsvdRegs & mask; + assert(genCountBits(availableSet) == 1); + regNumber tempReg = genRegNumFromMask(availableSet); + INDEBUG(gtRsvdRegs &= ~availableSet;) // Remove the register from the set, so it can't be used again. + return tempReg; +} + +//------------------------------------------------------------------------ +// ExtractTempReg: Find the lowest number temporary register from the gtRsvdRegs set +// that is also in the optional given mask (typically, RBM_ALLINT or RBM_ALLFLOAT), +// and return it. Remove this register from the temporary register set, so it won't +// be returned again. +// +// Arguments: +// mask - (optional) Extract an available temporary register only in this set. +// +// Return Value: +// Available temporary register in given mask. +// +regNumber GenTree::ExtractTempReg(regMaskTP mask /* = (regMaskTP)-1 */) +{ + regMaskTP availableSet = gtRsvdRegs & mask; + assert(genCountBits(availableSet) >= 1); + regMaskTP tempRegMask = genFindLowestBit(availableSet); + gtRsvdRegs &= ~tempRegMask; + return genRegNumFromMask(tempRegMask); +} + +#endif // !LEGACY_BACKEND \ No newline at end of file diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 1d52248..d3a03ee 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -733,6 +733,13 @@ public: ValueNumPair gtVNPair; regMaskSmall gtRsvdRegs; // set of fixed trashed registers + +#ifndef LEGACY_BACKEND + unsigned AvailableTempRegCount(regMaskTP mask = (regMaskTP)-1) const; + regNumber GetSingleTempReg(regMaskTP mask = (regMaskTP)-1); + regNumber ExtractTempReg(regMaskTP mask = (regMaskTP)-1); +#endif // !LEGACY_BACKEND + #ifdef LEGACY_BACKEND regMaskSmall gtUsedRegs; // set of used (trashed) registers #endif // LEGACY_BACKEND diff --git a/src/coreclr/src/jit/lsraarm.cpp b/src/coreclr/src/jit/lsraarm.cpp index e35e579..6f00323 100644 --- a/src/coreclr/src/jit/lsraarm.cpp +++ b/src/coreclr/src/jit/lsraarm.cpp @@ -425,7 +425,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) if (tree->gtOverflow()) { // Need a register different from target reg to check for overflow. - info->internalIntCount = 2; + info->internalIntCount = 1; } __fallthrough; diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 3b2d465..87ad4ec 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -288,7 +288,11 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_MUL: if (tree->gtOverflow()) { - // Need a register different from target reg to check for overflow. + // Need a register different from target reg to check for overflow; + // code generation requires the temp reg to live beyond the definition + // of the target reg. Since we have no way to tell LSRA that, we request + // two temp registers, and use one that is not the target reg. + // TODO-ARM64-CQ: Figure out a way to only reserve one. info->internalIntCount = 2; } __fallthrough; @@ -605,9 +609,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) info->srcCount = 2; info->dstCount = 1; - // We need one internal register when generating code for GT_ARR_INDEX, however the - // register allocator always may just give us the same one as it gives us for the 'dst' - // as a workaround we will just ask for two internal registers. + // We need one internal register when generating code for GT_ARR_INDEX. However, the + // register allocator may give us the same one it gives us for 'dst'. + // As a workaround we will just ask for two internal registers. + // TODO-ARM64-CQ: Figure out a way to only reserve one. // info->internalIntCount = 2; diff --git a/src/coreclr/src/jit/simdcodegenxarch.cpp b/src/coreclr/src/jit/simdcodegenxarch.cpp index 468d302..940ba5f 100644 --- a/src/coreclr/src/jit/simdcodegenxarch.cpp +++ b/src/coreclr/src/jit/simdcodegenxarch.cpp @@ -694,9 +694,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) ins = ins_CopyIntToFloat(TYP_INT, TYP_FLOAT); inst_RV_RV(ins, targetReg, op1loReg, TYP_INT, emitTypeSize(TYP_INT)); - assert(simdNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(simdNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(simdNode->gtRsvdRegs); + regNumber tmpReg = simdNode->GetSingleTempReg(); regNumber op1hiReg = genConsumeReg(op1hi); ins = ins_CopyIntToFloat(TYP_INT, TYP_FLOAT); @@ -863,9 +861,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) // Note that we cannot use targetReg before consumed all source operands. Therefore, // Need an internal register to stitch together all the values into a single vector // in an XMM reg. - assert(simdNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(simdNode->gtRsvdRegs) == 1); - regNumber vectorReg = genRegNumFromMask(simdNode->gtRsvdRegs); + regNumber vectorReg = simdNode->GetSingleTempReg(); // Zero out vectorReg if we are constructing a vector whose size is not equal to targetType vector size. // For example in case of Vector4f we don't need to zero when using SSE2. @@ -992,14 +988,9 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) { // We need a temporary register that is NOT the same as the target, // and we MAY need another. - assert(simdNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(simdNode->gtRsvdRegs) == 2); - - regMaskTP tmpRegsMask = simdNode->gtRsvdRegs; - regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~tmpReg1Mask; - regNumber tmpReg = genRegNumFromMask(tmpReg1Mask); - regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask); + regNumber tmpReg = simdNode->ExtractTempReg(); + regNumber tmpReg2 = simdNode->GetSingleTempReg(); + // The register allocator guarantees the following conditions: // - the only registers that may be the same among op1Reg, op2Reg, tmpReg // and tmpReg2 are op1Reg and op2Reg. @@ -1269,7 +1260,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) else { // We need one additional SIMD register to store the result of the SIMD compare. - regNumber tmpReg1 = genRegNumFromMask(simdNode->gtRsvdRegs & RBM_ALLFLOAT); + regNumber tmpReg1 = simdNode->GetSingleTempReg(RBM_ALLFLOAT); // tmpReg1 = (op1Reg == op2Reg) // Call this value of tmpReg1 as 'compResult' for further reference below. @@ -1305,7 +1296,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) { // If we are not materializing result into a register, // we would have reserved an int type internal register. - intReg = genRegNumFromMask(simdNode->gtRsvdRegs & RBM_ALLINT); + intReg = simdNode->GetSingleTempReg(RBM_ALLINT); } else { @@ -1313,7 +1304,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) intReg = targetReg; // Must have not reserved any int type internal registers. - assert(genCountBits(simdNode->gtRsvdRegs & RBM_ALLINT) == 0); + assert(simdNode->AvailableTempRegCount(RBM_ALLINT) == 0); } inst_RV_RV(INS_pmovmskb, intReg, tmpReg1, simdType, emitActualTypeSize(simdType)); @@ -1430,16 +1421,12 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) { if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) || (simdEvalType == TYP_SIMD32)) { - assert(simdNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(simdNode->gtRsvdRegs) == 1); - - tmpReg1 = genRegNumFromMask(simdNode->gtRsvdRegs); - assert(tmpReg1 != REG_NA); + tmpReg1 = simdNode->GetSingleTempReg(); assert(tmpReg1 != targetReg); } else { - assert(simdNode->gtRsvdRegs == RBM_NONE); + assert(simdNode->AvailableTempRegCount() == 0); } } else @@ -1449,17 +1436,12 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) if (iset == InstructionSet_SSE3_4) { - // Must have reserved 1 scratch register. - assert(genCountBits(simdNode->gtRsvdRegs) == 1); - tmpReg1 = genRegNumFromMask(simdNode->gtRsvdRegs); + tmpReg1 = simdNode->GetSingleTempReg(); } else { - // Must have reserved 2 scratch registers. - assert(genCountBits(simdNode->gtRsvdRegs) == 2); - regMaskTP tmpRegMask = genFindLowestBit(simdNode->gtRsvdRegs); - tmpReg1 = genRegNumFromMask(tmpRegMask); - tmpReg2 = genRegNumFromMask(simdNode->gtRsvdRegs & ~tmpRegMask); + tmpReg1 = simdNode->ExtractTempReg(); + tmpReg2 = simdNode->GetSingleTempReg(); } } @@ -1803,10 +1785,9 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) } regNumber tmpReg = REG_NA; - if (simdNode->gtRsvdRegs != RBM_NONE) + if (simdNode->AvailableTempRegCount() != 0) { - assert(genCountBits(simdNode->gtRsvdRegs) == 1); - tmpReg = genRegNumFromMask(simdNode->gtRsvdRegs); + tmpReg = simdNode->GetSingleTempReg(); } else { @@ -2011,9 +1992,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2) { // We need one additional int register as scratch - assert(simdNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(simdNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(simdNode->gtRsvdRegs); + regNumber tmpReg = simdNode->GetSingleTempReg(); assert(genIsValidIntReg(tmpReg)); // Move the value from xmm reg to an int reg @@ -2103,9 +2082,7 @@ void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode) #endif // Need an addtional Xmm register to extract upper 4 bytes from data. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); genConsumeOperands(treeNode->AsOp()); @@ -2141,10 +2118,7 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode) regNumber operandReg = genConsumeReg(op1); // Need an addtional Xmm register to read upper 4 bytes, which is different from targetReg - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); assert(tmpReg != targetReg); // Load upper 4 bytes in tmpReg @@ -2188,9 +2162,7 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) regNumber operandReg = genConsumeReg(op1); // Need an addtional Xmm register to extract upper 4 bytes from data. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); // store lower 8 bytes getEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, varNum, offs); @@ -2227,12 +2199,8 @@ void CodeGen::genLoadLclTypeSIMD12(GenTree* treeNode) offs = treeNode->gtLclFld.gtLclOffs; } - // Need an additional Xmm register that is different from - // targetReg to read upper 4 bytes. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + // Need an additional Xmm register that is different from targetReg to read upper 4 bytes. + regNumber tmpReg = treeNode->GetSingleTempReg(); assert(tmpReg != targetReg); // Read upper 4 bytes to tmpReg @@ -2298,9 +2266,7 @@ void CodeGen::genPutArgStkSIMD12(GenTree* treeNode) regNumber operandReg = genConsumeReg(op1); // Need an addtional Xmm register to extract upper 4 bytes from data. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); genStoreSIMD12ToStack(operandReg, tmpReg); }