1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
10 #ifdef LEGACY_BACKEND // This file is NOT used for the RyuJIT backend that uses the linear scan register allocator.
13 #error AMD64 must be !LEGACY_BACKEND
21 // N=normal, R=reverse, P=pop
22 #if FEATURE_STACK_FP_X87
23 const static instruction FPmathNN[] = {INS_fadd, INS_fsub, INS_fmul, INS_fdiv};
24 const static instruction FPmathNP[] = {INS_faddp, INS_fsubp, INS_fmulp, INS_fdivp};
25 const static instruction FPmathRN[] = {INS_fadd, INS_fsubr, INS_fmul, INS_fdivr};
26 const static instruction FPmathRP[] = {INS_faddp, INS_fsubrp, INS_fmulp, INS_fdivrp};
28 FlatFPStateX87* CodeGenInterface::FlatFPAllocFPState(FlatFPStateX87* pInitFrom)
30 FlatFPStateX87* pNewState;
32 pNewState = new (compiler, CMK_FlatFPStateX87) FlatFPStateX87;
33 pNewState->Init(pInitFrom);
38 bool CodeGen::FlatFPSameRegisters(FlatFPStateX87* pState, regMaskTP mask)
41 for (i = REG_FPV0; i < REG_FPCOUNT; i++)
43 if (pState->Mapped(i))
45 regMaskTP regmask = genRegMaskFloat((regNumber)i);
46 if ((mask & regmask) == 0)
55 return mask ? false : true;
58 bool FlatFPStateX87::Mapped(unsigned uEntry)
60 return m_uVirtualMap[uEntry] != (unsigned)FP_VRNOTMAPPED;
63 void FlatFPStateX87::Unmap(unsigned uEntry)
65 assert(Mapped(uEntry));
66 m_uVirtualMap[uEntry] = (unsigned)FP_VRNOTMAPPED;
69 bool FlatFPStateX87::AreEqual(FlatFPStateX87* pA, FlatFPStateX87* pB)
73 assert(pA->IsConsistent());
74 assert(pB->IsConsistent());
76 if (pA->m_uStackSize != pB->m_uStackSize)
81 for (i = 0; i < pA->m_uStackSize; i++)
83 if (pA->m_uStack[i] != pB->m_uStack[i])
93 bool FlatFPStateX87::IsValidEntry(unsigned uEntry)
95 return (Mapped(uEntry) && (m_uVirtualMap[uEntry] >= 0 && m_uVirtualMap[uEntry] < m_uStackSize)) || !Mapped(uEntry);
98 bool FlatFPStateX87::IsConsistent()
102 for (i = 0; i < FP_VIRTUALREGISTERS; i++)
104 if (!IsValidEntry(i))
106 if (m_bIgnoreConsistencyChecks)
112 assert(!"Virtual register is marked as mapped but out of the stack range");
118 for (i = 0; i < m_uStackSize; i++)
120 if (m_uVirtualMap[m_uStack[i]] != i)
122 if (m_bIgnoreConsistencyChecks)
128 assert(!"Register File and stack layout don't match!");
137 void FlatFPStateX87::Dump()
141 assert(IsConsistent());
143 if (m_uStackSize > 0)
145 printf("Virtual stack state: ");
146 for (i = 0; i < m_uStackSize; i++)
148 printf("ST(%i): FPV%i | ", StackToST(i), m_uStack[i]);
154 void FlatFPStateX87::UpdateMappingFromStack()
156 memset(m_uVirtualMap, -1, sizeof(m_uVirtualMap));
160 for (i = 0; i < m_uStackSize; i++)
162 m_uVirtualMap[m_uStack[i]] = i;
168 unsigned FlatFPStateX87::StackToST(unsigned uEntry)
170 assert(IsValidEntry(uEntry));
171 return m_uStackSize - 1 - uEntry;
174 unsigned FlatFPStateX87::VirtualToST(unsigned uEntry)
176 assert(Mapped(uEntry));
178 return StackToST(m_uVirtualMap[uEntry]);
181 unsigned FlatFPStateX87::STToVirtual(unsigned uST)
183 assert(uST < m_uStackSize);
185 return m_uStack[m_uStackSize - 1 - uST];
188 void FlatFPStateX87::Init(FlatFPStateX87* pFrom)
192 memcpy(this, pFrom, sizeof(*this));
196 memset(m_uVirtualMap, -1, sizeof(m_uVirtualMap));
199 memset(m_uStack, -1, sizeof(m_uStack));
205 m_bIgnoreConsistencyChecks = false;
209 void FlatFPStateX87::Associate(unsigned uEntry, unsigned uStack)
211 assert(uStack < m_uStackSize);
213 m_uStack[uStack] = uEntry;
214 m_uVirtualMap[uEntry] = uStack;
217 unsigned FlatFPStateX87::TopIndex()
219 return m_uStackSize - 1;
222 unsigned FlatFPStateX87::TopVirtual()
224 assert(m_uStackSize > 0);
225 return m_uStack[m_uStackSize - 1];
228 void FlatFPStateX87::Rename(unsigned uVirtualTo, unsigned uVirtualFrom)
230 assert(!Mapped(uVirtualTo));
232 unsigned uSlot = m_uVirtualMap[uVirtualFrom];
235 Associate(uVirtualTo, uSlot);
238 void FlatFPStateX87::Push(unsigned uEntry)
240 assert(m_uStackSize <= FP_PHYSICREGISTERS);
241 assert(!Mapped(uEntry));
244 Associate(uEntry, TopIndex());
246 assert(IsConsistent());
249 unsigned FlatFPStateX87::Pop()
251 assert(m_uStackSize != 0);
253 unsigned uVirtual = m_uStack[--m_uStackSize];
256 m_uStack[m_uStackSize] = (unsigned)-1;
264 bool FlatFPStateX87::IsEmpty()
266 return m_uStackSize == 0;
269 void CodeGen::genCodeForTransitionStackFP(FlatFPStateX87* pSrc, FlatFPStateX87* pDst)
271 FlatFPStateX87 fpState;
272 FlatFPStateX87* pTmp;
276 memcpy(&fpState, pSrc, sizeof(FlatFPStateX87));
279 // Make sure everything seems consistent.
280 assert(pSrc->m_uStackSize >= pDst->m_uStackSize);
282 for (i = 0; i < FP_VIRTUALREGISTERS; i++)
284 if (!pTmp->Mapped(i) && pDst->Mapped(i))
286 assert(!"Dst stack state can't have a virtual register live if Src target has it dead");
291 // First we need to get rid of the stuff that's dead in pDst
292 for (i = 0; i < FP_VIRTUALREGISTERS; i++)
294 if (pTmp->Mapped(i) && !pDst->Mapped(i))
296 // We have to get rid of this one
297 JITDUMP("Removing virtual register V%i from stack\n", i);
299 // Don't need this virtual register any more
300 FlatFPX87_Unload(pTmp, i);
304 assert(pTmp->m_uStackSize == pDst->m_uStackSize);
309 // We start with the top of the stack so that we can
310 // easily recognize the cycle that contains it
311 for (i = pTmp->m_uStackSize - 1; i >= 0; i--)
313 // Have we processed this stack element yet?
314 if (((1 << i) & iProcessed) == 0)
317 int iCycle[FP_VIRTUALREGISTERS];
318 int iCycleLength = 0;
320 int iTOS = pTmp->m_uStackSize - 1;
324 // Mark current stack element as processed
325 iProcessed |= (1 << iCurrent);
328 iCycle[iCycleLength++] = iCurrent;
330 // Next element in cycle
331 iCurrent = pDst->m_uVirtualMap[pTmp->m_uStack[iCurrent]];
333 } while ((iProcessed & (1 << iCurrent)) == 0);
339 for (int l = 0; l < iCycleLength; l++)
341 printf("%i", pTmp->StackToST(iCycle[l]));
342 if (l + 1 < iCycleLength)
350 if (iCycleLength == 1)
352 // Stack element in the same place. Nothing to do
356 if (iCycle[0] == iTOS)
358 // Cycle includes stack element 0
361 for (j = 1; j < iCycleLength; j++)
363 FlatFPX87_SwapStack(pTmp, iCycle[j], iTOS);
368 // Cycle doesn't include stack element 0
371 for (j = 0; j < iCycleLength; j++)
373 FlatFPX87_SwapStack(pTmp, iCycle[j], iTOS);
376 FlatFPX87_SwapStack(pTmp, iCycle[0], iTOS);
382 assert(FlatFPStateX87::AreEqual(pTmp, pDst));
385 void CodeGen::genCodeForTransitionFromMask(FlatFPStateX87* pSrc, regMaskTP mask, bool bEmitCode)
388 for (i = REG_FPV0; i < REG_FPCOUNT; i++)
392 if ((mask & genRegMaskFloat((regNumber)i)) == 0)
394 FlatFPX87_Unload(pSrc, i, bEmitCode);
399 assert((mask & genRegMaskFloat((regNumber)i)) == 0 &&
400 "A register marked as incoming live in the target block isnt live in the current block");
405 void CodeGen::genCodeForPrologStackFP()
407 assert(compiler->compGeneratingProlog);
408 assert(compiler->fgFirstBB);
410 FlatFPStateX87* pState = compiler->fgFirstBB->bbFPStateX87;
412 if (pState && pState->m_uStackSize)
414 VARSET_TP liveEnregIn(
415 VarSetOps::Intersection(compiler, compiler->fgFirstBB->bbLiveIn, compiler->optAllFPregVars));
422 assert(pState->m_uStackSize <= FP_VIRTUALREGISTERS);
423 for (i = 0; i < pState->m_uStackSize; i++)
425 // Get the virtual register that matches
426 unsigned iVirtual = pState->STToVirtual(pState->m_uStackSize - i - 1);
431 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
433 if (varDsc->IsFloatRegType() && varDsc->lvRegister && varDsc->lvRegNum == iVirtual)
435 unsigned varIndex = varDsc->lvVarIndex;
437 // Is this variable live on entry?
438 if (VarSetOps::IsMember(compiler, liveEnregIn, varIndex))
440 if (varDsc->lvIsParam)
442 getEmitter()->emitIns_S(INS_fld, EmitSize(varDsc->TypeGet()), varNum, 0);
446 // unitialized regvar
447 getEmitter()->emitIns(INS_fldz);
458 assert(varNum != compiler->lvaCount); // We have to find the matching var!!!!
461 assert(uLoads == VarSetOps::Count(compiler, liveEnregIn));
465 void CodeGen::genCodeForEndBlockTransitionStackFP(BasicBlock* block)
467 switch (block->bbJumpKind)
469 case BBJ_EHFINALLYRET:
470 case BBJ_EHFILTERRET:
473 assert(compCurFPState.m_uStackSize == 0);
479 assert((varTypeIsFloating(compiler->info.compRetType) && compCurFPState.m_uStackSize == 1) ||
480 compCurFPState.m_uStackSize == 0);
484 genCodeForBBTransitionStackFP(block->bbNext);
487 genCodeForBBTransitionStackFP(block->bbJumpDest);
490 assert(!"BBJ_LEAVE blocks shouldn't get here");
492 case BBJ_CALLFINALLY:
493 assert(compCurFPState.IsEmpty() && "we don't enregister variables live on entry to finallys");
494 genCodeForBBTransitionStackFP(block->bbJumpDest);
497 // Nothing to do here
500 noway_assert(!"Unexpected bbJumpKind");
505 regMaskTP CodeGen::genRegMaskFromLivenessStackFP(VARSET_VALARG_TP varset)
509 regMaskTP result = 0;
511 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
513 if (varDsc->IsFloatRegType() && varDsc->lvRegister)
516 unsigned varIndex = varDsc->lvVarIndex;
518 /* Is this variable live on entry? */
520 if (VarSetOps::IsMember(compiler, varset, varIndex))
522 // We should only call this function doing a transition
523 // To a block which hasn't state yet. All incoming live enregistered variables
524 // should have been already initialized.
525 assert(varDsc->lvRegNum != REG_FPNONE);
527 result |= genRegMaskFloat(varDsc->lvRegNum);
535 void CodeGen::genCodeForBBTransitionStackFP(BasicBlock* pDst)
537 assert(compCurFPState.IsConsistent());
538 if (pDst->bbFPStateX87)
540 // Target block has an associated state. generate transition
541 genCodeForTransitionStackFP(&compCurFPState, pDst->bbFPStateX87);
545 // Target block hasn't got an associated state. As it can only possibly
546 // have a subset of the current state, we'll take advantage of this and
547 // generate the optimal transition
549 // Copy current state
550 pDst->bbFPStateX87 = FlatFPAllocFPState(&compCurFPState);
552 regMaskTP liveRegIn =
553 genRegMaskFromLivenessStackFP(VarSetOps::Intersection(compiler, pDst->bbLiveIn, compiler->optAllFPregVars));
555 // Match to live vars
556 genCodeForTransitionFromMask(pDst->bbFPStateX87, liveRegIn);
560 void CodeGen::SpillTempsStackFP(regMaskTP canSpillMask)
564 regMaskTP spillMask = 0;
567 // First pass we determine which registers we spill
568 for (i = 0; i < compCurFPState.m_uStackSize; i++)
570 reg = (regNumber)compCurFPState.m_uStack[i];
571 regMaskTP regMask = genRegMaskFloat(reg);
572 if ((regMask & canSpillMask) && (regMask & regSet.rsMaskRegVarFloat) == 0)
574 spillMask |= regMask;
578 // Second pass we do the actual spills
579 for (i = REG_FPV0; i < REG_FPCOUNT; i++)
581 if ((genRegMaskFloat((regNumber)i) & spillMask))
583 JITDUMP("spilling temp in register %s\n", regVarNameStackFP((regNumber)i));
584 SpillFloat((regNumber)i, true);
589 // Spills all the fp stack. We need this to spill
591 void CodeGen::SpillForCallStackFP()
594 unsigned uSize = compCurFPState.m_uStackSize;
596 for (i = 0; i < uSize; i++)
598 SpillFloat((regNumber)compCurFPState.m_uStack[compCurFPState.TopIndex()], true);
602 void CodeGenInterface::SpillFloat(regNumber reg, bool bIsCall)
605 regMaskTP mask = genRegMaskFloat(reg);
607 // We can allow spilling regvars, but we don't need it at the moment, and we're
608 // missing code in setupopforflatfp, so assert.
609 assert(bIsCall || (mask & (regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat)) == 0);
612 JITDUMP("SpillFloat spilling register %s\n", regVarNameStackFP(reg));
614 // We take the virtual register to the top of the stack
615 FlatFPX87_MoveToTOS(&compCurFPState, reg);
617 // Allocate spill structure
618 RegSet::SpillDsc* spill = RegSet::SpillDsc::alloc(compiler, ®Set, TYP_FLOAT);
620 // Fill out spill structure
622 if (regSet.genUsedRegsFloat[reg])
624 JITDUMP("will spill tree [%08p]\n", dspPtr(regSet.genUsedRegsFloat[reg]));
625 // register used for temp stack
626 spill->spillTree = regSet.genUsedRegsFloat[reg];
627 spill->bEnregisteredVariable = false;
629 regSet.genUsedRegsFloat[reg]->gtFlags |= GTF_SPILLED;
631 type = genActualType(regSet.genUsedRegsFloat[reg]->TypeGet());
634 regSet.SetUsedRegFloat(regSet.genUsedRegsFloat[reg], false);
638 JITDUMP("will spill varDsc [%08p]\n", dspPtr(regSet.genRegVarsFloat[reg]));
640 // enregistered variable
641 spill->spillVarDsc = regSet.genRegVarsFloat[reg];
642 assert(spill->spillVarDsc);
644 spill->bEnregisteredVariable = true;
647 spill->spillVarDsc->lvSpilled = true;
648 type = genActualType(regSet.genRegVarsFloat[reg]->TypeGet());
650 // Clear register flag
651 SetRegVarFloat(reg, type, 0);
655 spill->spillNext = regSet.rsSpillFloat;
656 regSet.rsSpillFloat = spill;
659 TempDsc* temp = spill->spillTemp = compiler->tmpGetTemp(type);
660 emitAttr size = EmitSize(type);
662 getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
663 compCurFPState.Pop();
666 void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc, bool useSameReg)
668 NYI(!"Need not be implemented for x86.");
671 void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc)
674 if (spillDsc->bEnregisteredVariable)
676 assert(spillDsc->spillVarDsc->lvSpilled);
678 // Do the logic as it was a regvar birth
679 genRegVarBirthStackFP(spillDsc->spillVarDsc);
681 // Mark as not spilled any more
682 spillDsc->spillVarDsc->lvSpilled = false;
684 // Update stack layout.
685 compCurFPState.Push(spillDsc->spillVarDsc->lvRegNum);
689 assert(spillDsc->spillTree->gtFlags & GTF_SPILLED);
691 spillDsc->spillTree->gtFlags &= ~GTF_SPILLED;
693 regNumber reg = regSet.PickRegFloat();
694 genMarkTreeInReg(spillDsc->spillTree, reg);
695 regSet.SetUsedRegFloat(spillDsc->spillTree, true);
697 compCurFPState.Push(reg);
700 // load from spilled spot
701 emitAttr size = EmitSize(spillDsc->spillTemp->tdTempType());
702 getEmitter()->emitIns_S(INS_fld, size, spillDsc->spillTemp->tdTempNum(), 0);
705 // unspills any reg var that we have in the spill list. We need this
706 // because we can't have any spilled vars across basic blocks
707 void CodeGen::UnspillRegVarsStackFp()
709 RegSet::SpillDsc* cur;
710 RegSet::SpillDsc* next;
712 for (cur = regSet.rsSpillFloat; cur; cur = next)
714 next = cur->spillNext;
716 if (cur->bEnregisteredVariable)
724 const char* regNamesFP[] = {
725 #define REGDEF(name, rnum, mask, sname) sname,
726 #include "registerfp.h"
730 const char* CodeGenInterface::regVarNameStackFP(regNumber reg)
732 return regNamesFP[reg];
735 bool CodeGen::ConsistentAfterStatementStackFP()
737 if (!compCurFPState.IsConsistent())
742 if (regSet.rsMaskUsedFloat != 0)
744 assert(!"FP register marked as used after statement");
747 if (regSet.rsMaskLockedFloat != 0)
749 assert(!"FP register marked as locked after statement");
752 if (genCountBits(regSet.rsMaskRegVarFloat) > compCurFPState.m_uStackSize)
754 assert(!"number of FP regvars in regSet.rsMaskRegVarFloat doesnt match current FP state");
763 int CodeGen::genNumberTemps()
765 return compCurFPState.m_uStackSize - genCountBits(regSet.rsMaskRegVarFloat);
768 void CodeGen::genDiscardStackFP(GenTree* tree)
770 assert(tree->InReg());
771 assert(varTypeIsFloating(tree));
773 FlatFPX87_Unload(&compCurFPState, tree->gtRegNum, true);
776 void CodeGen::genRegRenameWithMasks(regNumber dstReg, regNumber srcReg)
778 regMaskTP dstregmask = genRegMaskFloat(dstReg);
779 regMaskTP srcregmask = genRegMaskFloat(srcReg);
781 // rename use register
782 compCurFPState.Rename(dstReg, srcReg);
784 regSet.rsMaskUsedFloat &= ~srcregmask;
785 regSet.rsMaskUsedFloat |= dstregmask;
787 if (srcregmask & regSet.rsMaskLockedFloat)
789 assert((dstregmask & regSet.rsMaskLockedFloat) == 0);
790 // We will set the new one as locked
791 regSet.rsMaskLockedFloat &= ~srcregmask;
792 regSet.rsMaskLockedFloat |= dstregmask;
796 assert(!regSet.genUsedRegsFloat[dstReg]);
797 regSet.genUsedRegsFloat[dstReg] = regSet.genUsedRegsFloat[srcReg];
798 regSet.genUsedRegsFloat[dstReg]->gtRegNum = dstReg;
799 regSet.genUsedRegsFloat[srcReg] = NULL;
802 void CodeGen::genRegVarBirthStackFP(LclVarDsc* varDsc)
804 // Mark the virtual register we're assigning to this local;
805 regNumber reg = varDsc->lvRegNum;
808 regMaskTP regmask = genRegMaskFloat(reg);
811 assert(varDsc->lvTracked && varDsc->lvRegister && reg != REG_FPNONE);
812 if (regSet.genUsedRegsFloat[reg])
815 // Register was marked as used... will have to rename it so we can put the
816 // regvar where it belongs.
817 JITDUMP("Renaming used register %s\n", regVarNameStackFP(reg));
821 newreg = regSet.PickRegFloat();
824 regMaskTP newregmask = genRegMaskFloat(newreg);
828 assert((regSet.rsMaskUsedFloat & regmask) && (regSet.rsMaskUsedFloat & newregmask) == 0);
830 genRegRenameWithMasks(newreg, reg);
833 // Mark the reg as holding a regvar
834 varDsc->lvSpilled = false;
835 SetRegVarFloat(reg, varDsc->TypeGet(), varDsc);
838 void CodeGen::genRegVarBirthStackFP(GenTree* tree)
841 if (compiler->verbose)
843 printf("variable V%i is going live in ", tree->gtLclVarCommon.gtLclNum);
844 Compiler::printTreeID(tree);
849 // Update register in local var
850 LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
852 genRegVarBirthStackFP(varDsc);
853 assert(tree->gtRegNum == tree->gtRegVar.gtRegNum && tree->gtRegNum == varDsc->lvRegNum);
856 void CodeGen::genRegVarDeathStackFP(LclVarDsc* varDsc)
858 regNumber reg = varDsc->lvRegNum;
860 assert(varDsc->lvTracked && varDsc->lvRegister && reg != REG_FPNONE);
861 SetRegVarFloat(reg, varDsc->TypeGet(), 0);
864 void CodeGen::genRegVarDeathStackFP(GenTree* tree)
867 if (compiler->verbose)
869 printf("register %s is going dead in ", regVarNameStackFP(tree->gtRegVar.gtRegNum));
870 Compiler::printTreeID(tree);
875 LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
876 genRegVarDeathStackFP(varDsc);
879 void CodeGen::genLoadStackFP(GenTree* tree, regNumber reg)
882 if (compiler->verbose)
884 printf("genLoadStackFP");
885 Compiler::printTreeID(tree);
886 printf(" %s\n", regVarNameStackFP(reg));
890 if (tree->IsRegVar())
892 // if it has been spilled, unspill it.%
893 LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
894 if (varDsc->lvSpilled)
896 UnspillFloat(varDsc);
899 // if it's dying, just rename the register, else load it normally
900 if (tree->IsRegVarDeath())
902 genRegVarDeathStackFP(tree);
903 compCurFPState.Rename(reg, tree->gtRegVar.gtRegNum);
907 assert(tree->gtRegNum == tree->gtRegVar.gtRegNum);
908 inst_FN(INS_fld, compCurFPState.VirtualToST(tree->gtRegVar.gtRegNum));
909 FlatFPX87_PushVirtual(&compCurFPState, reg);
914 FlatFPX87_PushVirtual(&compCurFPState, reg);
915 inst_FS_TT(INS_fld, tree);
919 void CodeGen::genMovStackFP(GenTree* dst, regNumber dstreg, GenTree* src, regNumber srcreg)
921 if (dstreg == REG_FPNONE && !dst->IsRegVar())
926 if (srcreg == REG_FPNONE)
928 assert(src->IsRegVar());
936 // Mov src to top of the stack
937 FlatFPX87_MoveToTOS(&compCurFPState, reg);
939 if (srcreg != REG_FPNONE || (src->IsRegVar() && src->IsRegVarDeath()))
942 inst_FS_TT(INS_fstp, dst);
945 compCurFPState.Pop();
949 inst_FS_TT(INS_fst, dst);
954 if (dstreg == REG_FPNONE)
956 assert(dst->IsRegVar());
957 dstreg = dst->gtRegNum;
960 if (srcreg == REG_FPNONE && !src->IsRegVar())
963 assert(dst->IsRegVar() && dst->IsRegVarBirth());
965 FlatFPX87_PushVirtual(&compCurFPState, dstreg);
966 FlatFPX87_MoveToTOS(&compCurFPState, dstreg);
968 if (src->gtOper == GT_CNS_DBL)
970 genConstantLoadStackFP(src);
974 inst_FS_TT(INS_fld, src);
979 // disposable reg to reg, use renaming
980 assert(dst->IsRegVar() && dst->IsRegVarBirth());
981 assert(src->IsRegVar() || (src->InReg()));
982 assert(src->gtRegNum != REG_FPNONE);
984 if ((src->InReg()) || (src->IsRegVar() && src->IsRegVarDeath()))
986 // src is disposable and dst is a regvar, so we'll rename src to dst
988 // SetupOp should have masked out the regvar
989 assert(!src->IsRegVar() || !src->IsRegVarDeath() ||
990 !(genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat));
992 // get slot that holds the value
993 unsigned uStack = compCurFPState.m_uVirtualMap[src->gtRegNum];
995 // unlink the slot that holds the value
996 compCurFPState.Unmap(src->gtRegNum);
998 regNumber tgtreg = dst->gtRegVar.gtRegNum;
1000 compCurFPState.IgnoreConsistencyChecks(true);
1002 if (regSet.genUsedRegsFloat[tgtreg])
1004 // tgtreg is used, we move it to src reg. We do this here as src reg won't be
1005 // marked as used, if tgtreg is used it srcreg will be a candidate for moving
1006 // which is something we don't want, so we do the renaming here.
1007 genRegRenameWithMasks(src->gtRegNum, tgtreg);
1010 compCurFPState.IgnoreConsistencyChecks(false);
1013 genRegVarBirthStackFP(dst);
1015 // Associate target reg with source physical register
1016 compCurFPState.Associate(tgtreg, uStack);
1020 if (src->IsRegVar())
1022 // regvar that isnt dying to regvar
1023 assert(!src->IsRegVarDeath());
1026 genRegVarBirthStackFP(dst);
1029 inst_FN(INS_fld, compCurFPState.VirtualToST(src->gtRegVar.gtRegNum));
1031 // update our logic stack
1032 FlatFPX87_PushVirtual(&compCurFPState, dst->gtRegVar.gtRegNum);
1039 genRegVarBirthStackFP(dst);
1042 inst_FS_TT(INS_fld, src);
1044 // update our logic stack
1045 FlatFPX87_PushVirtual(&compCurFPState, dst->gtRegVar.gtRegNum);
1052 void CodeGen::genCodeForTreeStackFP_DONE(GenTree* tree, regNumber reg)
1054 return genCodeForTree_DONE(tree, reg);
1057 // Does the setup of the FP stack on entry to block
1058 void CodeGen::genSetupStateStackFP(BasicBlock* block)
1060 bool bGenerate = !block->bbFPStateX87;
1063 // Allocate FP state
1064 block->bbFPStateX87 = FlatFPAllocFPState();
1065 block->bbFPStateX87->Init();
1068 // Update liveset and lock enregistered live vars on entry
1069 VARSET_TP liveSet(VarSetOps::Intersection(compiler, block->bbLiveIn, compiler->optAllFPregVars));
1071 if (!VarSetOps::IsEmpty(compiler, liveSet))
1076 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
1078 if (varDsc->IsFloatRegType() && varDsc->lvRegister)
1081 unsigned varIndex = varDsc->lvVarIndex;
1083 // Is this variable live on entry?
1084 if (VarSetOps::IsMember(compiler, liveSet, varIndex))
1086 JITDUMP("genSetupStateStackFP(): enregistered variable V%i is live on entry to block\n", varNum);
1088 assert(varDsc->lvTracked);
1089 assert(varDsc->lvRegNum != REG_FPNONE);
1091 genRegVarBirthStackFP(varDsc);
1095 // If we're generating layout, update it.
1096 block->bbFPStateX87->Push(varDsc->lvRegNum);
1103 compCurFPState.Init(block->bbFPStateX87);
1105 assert(block->bbFPStateX87->IsConsistent());
1108 regMaskTP CodeGen::genPushArgumentStackFP(GenTree* args)
1110 regMaskTP addrReg = 0;
1111 unsigned opsz = genTypeSize(genActualType(args->TypeGet()));
1113 switch (args->gtOper)
1123 if (args->TypeGet() == TYP_FLOAT)
1125 f = (float)args->gtDblCon.gtDconVal;
1126 // *(long*) (&f) used instead of *addr because of of strict
1127 // pointer aliasing optimization. According to the ISO C/C++
1128 // standard, an optimizer can assume two pointers of
1129 // non-compatible types do not point to the same memory.
1130 inst_IV(INS_push, *((int*)(&f)));
1136 addr = (int*)&args->gtDblCon.gtDconVal;
1138 // store forwarding fix for pentium 4 and Centrino
1139 // (even for down level CPUs as we don't care about their perf any more)
1140 fval = genMakeConst(&args->gtDblCon.gtDconVal, args->gtType, args, true);
1141 inst_FS_TT(INS_fld, fval);
1143 inst_RV_IV(INS_sub, REG_ESP, flopsz, EA_PTRSIZE);
1144 getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(flopsz), REG_NA, REG_ESP, 0);
1156 // Is the value a cast from double ?
1157 if ((args->gtOper == GT_CAST) && (args->CastFromType() == TYP_DOUBLE))
1159 /* Load the value onto the FP stack */
1161 genCodeForTreeFlt(args->gtCast.CastOp(), false);
1163 /* Go push the value as a float/double */
1164 args = args->gtCast.CastOp();
1169 // Fall through to default case....
1173 temp = genMakeAddrOrFPstk(args, &addrReg, false);
1178 // We have the address of the float operand, push its bytes
1180 assert(offs % sizeof(int) == 0);
1184 assert(args->gtType == temp->gtType);
1187 offs -= sizeof(int);
1188 inst_TT(INS_push, temp, offs);
1194 // store forwarding fix for pentium 4 and Centrino
1195 inst_FS_TT(INS_fld, temp);
1196 flopsz = (size_t)offs;
1197 inst_RV_IV(INS_sub, REG_ESP, (size_t)flopsz, EA_PTRSIZE);
1198 getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(flopsz), REG_NA, REG_ESP, 0);
1205 // The argument is on the FP stack -- pop it into [ESP-4/8]
1209 inst_RV_IV(INS_sub, REG_ESP, opsz, EA_PTRSIZE);
1212 if (opsz == 2 * sizeof(unsigned))
1215 // Take reg to top of stack
1216 FlatFPX87_MoveToTOS(&compCurFPState, args->gtRegNum);
1218 // Pop it off to stack
1219 compCurFPState.Pop();
1220 getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(opsz), REG_NA, REG_ESP, 0);
1223 gcInfo.gcMarkRegSetNpt(addrReg);
1231 void CodeGen::genRoundFpExpressionStackFP(GenTree* op, var_types type)
1233 // Do nothing with memory resident opcodes - these are the right precision
1234 // (even if genMakeAddrOrFPstk loads them to the FP stack)
1235 if (type == TYP_UNDEF)
1236 type = op->TypeGet();
1246 if (type == op->TypeGet())
1252 assert(op->gtRegNum != REG_FPNONE);
1254 // Take register to top of stack
1255 FlatFPX87_MoveToTOS(&compCurFPState, op->gtRegNum);
1257 // Allocate a temp for the expression
1258 TempDsc* temp = compiler->tmpGetTemp(type);
1260 // Store the FP value into the temp
1261 inst_FS_ST(INS_fstp, EmitSize(type), temp, 0);
1263 // Load the value back onto the FP stack
1264 inst_FS_ST(INS_fld, EmitSize(type), temp, 0);
1266 // We no longer need the temp
1267 compiler->tmpRlsTemp(temp);
1270 void CodeGen::genCodeForTreeStackFP_Const(GenTree* tree)
1273 if (compiler->verbose)
1275 printf("genCodeForTreeStackFP_Const() ");
1276 Compiler::printTreeID(tree);
1282 if (tree->OperGet() != GT_CNS_DBL)
1284 compiler->gtDispTree(tree);
1285 assert(!"bogus float const");
1289 regNumber reg = regSet.PickRegFloat();
1292 genConstantLoadStackFP(tree);
1294 // Push register to virtual stack
1295 FlatFPX87_PushVirtual(&compCurFPState, reg);
1298 genCodeForTreeStackFP_DONE(tree, reg);
1301 void CodeGen::genCodeForTreeStackFP_Leaf(GenTree* tree)
1304 if (compiler->verbose)
1306 printf("genCodeForTreeStackFP_Leaf() ");
1307 Compiler::printTreeID(tree);
1312 switch (tree->OperGet())
1317 assert(!compiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvRegister);
1320 regNumber reg = regSet.PickRegFloat();
1323 genLoadStackFP(tree, reg);
1325 genCodeForTreeStackFP_DONE(tree, reg);
1332 regNumber reg = regSet.PickRegFloat();
1334 genLoadStackFP(tree, reg);
1336 genCodeForTreeStackFP_DONE(tree, reg);
1344 regNumber reg = regSet.PickRegFloat();
1347 genLoadStackFP(tree, reg);
1349 genCodeForTreeStackFP_DONE(tree, reg);
1356 compiler->gtDispTree(tree);
1358 assert(!"unexpected leaf");
1361 genUpdateLife(tree);
1364 void CodeGen::genCodeForTreeStackFP_Asg(GenTree* tree)
1367 if (compiler->verbose)
1369 printf("genCodeForTreeStackFP_Asg() ");
1370 Compiler::printTreeID(tree);
1377 GenTree* op1 = tree->gtOp.gtOp1;
1378 GenTree* op2 = tree->gtGetOp2IfPresent();
1380 assert(tree->OperGet() == GT_ASG);
1382 if (!op1->IsRegVar() && (op2->gtOper == GT_CAST) && (op1->gtType == op2->gtType) &&
1383 varTypeIsFloating(op2->gtCast.CastOp()))
1385 /* We can discard the cast */
1386 op2 = op2->gtCast.CastOp();
1389 size = EmitSize(op1);
1392 // If lhs is a comma expression, evaluate the non-last parts, make op1 be the remainder.
1393 // (But can't do this if the assignment is reversed...)
1394 if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
1396 op1 = genCodeForCommaTree(op1);
1399 GenTree* op1NonCom = op1->gtEffectiveVal();
1400 if (op1NonCom->gtOper == GT_LCL_VAR)
1403 LclVarDsc* varDsc = &compiler->lvaTable[op1NonCom->gtLclVarCommon.gtLclNum];
1405 assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1NonCom->gtFlags & GTF_VAR_DEATH));
1408 /* For non-debuggable code, every definition of a lcl-var has
1409 * to be checked to see if we need to open a new scope for it.
1412 if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
1414 siCheckVarScope(op1NonCom->gtLclVarCommon.gtLclNum, op1NonCom->gtLclVar.gtLclILoffs);
1419 switch (op2->gtOper)
1423 assert(compCurFPState.m_uStackSize <= FP_PHYSICREGISTERS);
1425 regMaskTP addrRegInt;
1427 regMaskTP addrRegFlt;
1430 // op2 is already "evaluated," so doesn't matter if they're reversed or not...
1431 op1 = genCodeForCommaTree(op1);
1432 op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
1434 // We want to 'cast' the constant to the op1'a type
1435 double constantValue;
1436 constantValue = op2->gtDblCon.gtDconVal;
1437 if (op1->gtType == TYP_FLOAT)
1439 float temp = forceCastToFloat(constantValue);
1440 constantValue = (double)temp;
1443 GenTree* constantTree;
1444 constantTree = compiler->gtNewDconNode(constantValue);
1445 if (genConstantLoadStackFP(constantTree, true))
1447 if (op1->IsRegVar())
1450 genRegVarBirthStackFP(op1);
1453 compCurFPState.Push(op1->gtRegNum);
1458 inst_FS_TT(INS_fstp, op1);
1463 // Standard constant
1464 if (op1->IsRegVar())
1466 // Load constant to fp stack.
1470 // Create slot for constant
1471 if (op1->gtType == TYP_FLOAT || StackFPIsSameAsFloat(op2->gtDblCon.gtDconVal))
1473 // We're going to use that double as a float, so recompute addr
1474 float f = forceCastToFloat(op2->gtDblCon.gtDconVal);
1475 cnsaddr = genMakeConst(&f, TYP_FLOAT, tree, true);
1479 cnsaddr = genMakeConst(&op2->gtDblCon.gtDconVal, TYP_DOUBLE, tree, true);
1483 inst_FS_TT(INS_fld, cnsaddr);
1486 genRegVarBirthStackFP(op1);
1489 compCurFPState.Push(op1->gtRegNum);
1496 float f = forceCastToFloat(op2->gtDblCon.gtDconVal);
1497 int* addr = (int*)&f;
1501 inst_TT_IV(INS_mov, op1, *addr++, offs);
1502 offs += sizeof(int);
1503 } while (offs < size);
1507 // store forwarding fix for pentium 4 and centrino and also
1508 // fld for doubles that can be represented as floats, saving
1512 // Create slot for constant
1513 if (op1->gtType == TYP_FLOAT || StackFPIsSameAsFloat(op2->gtDblCon.gtDconVal))
1515 // We're going to use that double as a float, so recompute addr
1516 float f = forceCastToFloat(op2->gtDblCon.gtDconVal);
1517 cnsaddr = genMakeConst(&f, TYP_FLOAT, tree, true);
1521 assert(tree->gtType == TYP_DOUBLE);
1522 cnsaddr = genMakeConst(&op2->gtDblCon.gtDconVal, TYP_DOUBLE, tree, true);
1525 inst_FS_TT(INS_fld, cnsaddr);
1526 inst_FS_TT(INS_fstp, op1);
1531 genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
1539 // Not one of the easy optimizations. Proceed normally
1540 if (tree->gtFlags & GTF_REVERSE_OPS)
1542 /* Evaluate the RHS onto the FP stack.
1543 We don't need to round it as we will be doing a spill for
1544 the assignment anyway (unless op1 is a GT_REG_VAR). */
1546 genSetupForOpStackFP(op1, op2, true, true, false, true);
1549 genMovStackFP(op1, REG_FPNONE, op2, (op2->InReg()) ? op2->gtRegNum : REG_FPNONE);
1553 // Have to evaluate left side before
1555 // This should never happen
1556 assert(!op1->IsRegVar());
1558 genSetupForOpStackFP(op1, op2, false, true, false, true);
1560 // Do the actual move
1561 genMovStackFP(op1, REG_FPNONE, op2, (op2->InReg()) ? op2->gtRegNum : REG_FPNONE);
1565 void CodeGen::genSetupForOpStackFP(
1566 GenTree*& op1, GenTree*& op2, bool bReverse, bool bMakeOp1Addressable, bool bOp1ReadOnly, bool bOp2ReadOnly)
1568 if (bMakeOp1Addressable)
1572 genSetupForOpStackFP(op2, op1, false, false, bOp2ReadOnly, bOp1ReadOnly);
1576 regMaskTP addrRegInt = 0;
1577 regMaskTP addrRegFlt = 0;
1579 op1 = genCodeForCommaTree(op1);
1581 // Evaluate RHS on FP stack
1582 if (bOp2ReadOnly && op2->IsRegVar() && !op2->IsRegVarDeath())
1584 // read only and not dying, so just make addressable
1585 op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
1586 genKeepAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
1591 // Make target addressable
1592 op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
1594 op2 = genCodeForCommaTree(op2);
1596 genCodeForTreeFloat(op2);
1598 regSet.SetUsedRegFloat(op2, true);
1599 regSet.SetLockedRegFloat(op2, true);
1601 // Make sure target is still adressable
1602 genKeepAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
1604 regSet.SetLockedRegFloat(op2, false);
1605 regSet.SetUsedRegFloat(op2, false);
1608 /* Free up anything that was tied up by the target address */
1609 genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
1615 !"Can't do this. if op2 is a reg var and dies in op1, we have a serious problem. For the "
1616 "moment, handle this in the caller");
1618 regMaskTP addrRegInt = 0;
1619 regMaskTP addrRegFlt = 0;
1621 op1 = genCodeForCommaTree(op1);
1623 if (bOp1ReadOnly && op1->IsRegVar() && !op1->IsRegVarDeath() &&
1624 !genRegVarDiesInSubTree(op2, op1->gtRegVar.gtRegNum)) // regvar can't die in op2 either
1626 // First update liveness for op1, since we're "evaluating" it here
1629 op2 = genCodeForCommaTree(op2);
1631 // read only and not dying, we dont have to do anything.
1632 op2 = genMakeAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
1633 genKeepAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
1637 genCodeForTreeFloat(op1);
1639 regSet.SetUsedRegFloat(op1, true);
1641 op2 = genCodeForCommaTree(op2);
1643 op2 = genMakeAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
1645 // Restore op1 if necessary
1646 if (op1->gtFlags & GTF_SPILLED)
1652 regSet.SetLockedRegFloat(op1, true);
1654 genKeepAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
1657 regSet.SetLockedRegFloat(op1, false);
1660 regSet.SetUsedRegFloat(op1, false);
1663 genDoneAddressableStackFP(op2, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
1667 void CodeGen::genCodeForTreeStackFP_Arithm(GenTree* tree)
1670 if (compiler->verbose)
1672 printf("genCodeForTreeStackFP_Arithm() ");
1673 Compiler::printTreeID(tree);
1678 assert(tree->OperGet() == GT_ADD || tree->OperGet() == GT_SUB || tree->OperGet() == GT_MUL ||
1679 tree->OperGet() == GT_DIV);
1681 // We handle the reverse here instead of leaving setupop to do it. As for this case
1686 // and in regvar dies in op1, we would need a load of regvar, instead of a noop. So we handle this
1687 // here and tell genArithmStackFP to do the reverse operation
1693 if (tree->gtFlags & GTF_REVERSE_OPS)
1696 op1 = tree->gtGetOp2IfPresent();
1697 op2 = tree->gtOp.gtOp1;
1702 op1 = tree->gtOp.gtOp1;
1703 op2 = tree->gtGetOp2IfPresent();
1709 genTreeOps oper = tree->OperGet();
1710 if (op1->IsRegVar() && op2->IsRegVar() && !op1->IsRegVarDeath() && op2->IsRegVarDeath())
1712 // In this fastpath, we will save a load by doing the operation directly on the op2
1713 // register, as it's dying.
1716 genRegVarDeathStackFP(op2);
1719 result = genArithmStackFP(oper, op2, op2->gtRegVar.gtRegNum, op1, REG_FPNONE, !bReverse);
1724 else if (!op1->IsRegVar() && // We don't do this for regvars, as we'll need a scratch reg
1725 ((tree->gtFlags & GTF_SIDE_EFFECT) == 0) && // No side effects
1726 GenTree::Compare(op1, op2)) // op1 and op2 are the same
1728 // op1 is same thing as op2. Ideal for CSEs that werent optimized
1729 // due to their low cost.
1731 // First we need to update lifetimes from op1
1732 VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, genUpdateLiveSetForward(op1));
1733 compiler->compCurLifeTree = op1;
1735 genCodeForTreeFloat(op2);
1737 result = genArithmStackFP(oper, op2, op2->gtRegNum, op2, op2->gtRegNum, bReverse);
1741 genSetupForOpStackFP(op1, op2, false, false, false, true);
1743 result = genArithmStackFP(oper, op1, (op1->InReg()) ? op1->gtRegNum : REG_FPNONE, op2,
1744 (op2->InReg()) ? op2->gtRegNum : REG_FPNONE, bReverse);
1747 genCodeForTreeStackFP_DONE(tree, result);
1750 regNumber CodeGen::genArithmStackFP(
1751 genTreeOps oper, GenTree* dst, regNumber dstreg, GenTree* src, regNumber srcreg, bool bReverse)
1754 if (compiler->verbose)
1756 printf("genArithmStackFP() dst: ");
1757 Compiler::printTreeID(dst);
1759 Compiler::printTreeID(src);
1760 printf(" dstreg: %s srcreg: %s\n", dstreg == REG_FPNONE ? "NONE" : regVarNameStackFP(dstreg),
1761 srcreg == REG_FPNONE ? "NONE" : regVarNameStackFP(srcreg));
1765 // Select instruction depending on oper and bReverseOp
1775 assert(!"Unexpected oper");
1781 /* Make sure the instruction tables look correctly ordered */
1782 assert(FPmathNN[GT_ADD - GT_ADD] == INS_fadd);
1783 assert(FPmathNN[GT_SUB - GT_ADD] == INS_fsub);
1784 assert(FPmathNN[GT_MUL - GT_ADD] == INS_fmul);
1785 assert(FPmathNN[GT_DIV - GT_ADD] == INS_fdiv);
1787 assert(FPmathNP[GT_ADD - GT_ADD] == INS_faddp);
1788 assert(FPmathNP[GT_SUB - GT_ADD] == INS_fsubp);
1789 assert(FPmathNP[GT_MUL - GT_ADD] == INS_fmulp);
1790 assert(FPmathNP[GT_DIV - GT_ADD] == INS_fdivp);
1792 assert(FPmathRN[GT_ADD - GT_ADD] == INS_fadd);
1793 assert(FPmathRN[GT_SUB - GT_ADD] == INS_fsubr);
1794 assert(FPmathRN[GT_MUL - GT_ADD] == INS_fmul);
1795 assert(FPmathRN[GT_DIV - GT_ADD] == INS_fdivr);
1797 assert(FPmathRP[GT_ADD - GT_ADD] == INS_faddp);
1798 assert(FPmathRP[GT_SUB - GT_ADD] == INS_fsubrp);
1799 assert(FPmathRP[GT_MUL - GT_ADD] == INS_fmulp);
1800 assert(FPmathRP[GT_DIV - GT_ADD] == INS_fdivrp);
1804 ins_NN = FPmathRN[oper - GT_ADD];
1805 ins_NP = FPmathRP[oper - GT_ADD];
1806 ins_RN = FPmathNN[oper - GT_ADD];
1807 ins_RP = FPmathNP[oper - GT_ADD];
1811 ins_NN = FPmathNN[oper - GT_ADD];
1812 ins_NP = FPmathNP[oper - GT_ADD];
1813 ins_RN = FPmathRN[oper - GT_ADD];
1814 ins_RP = FPmathRP[oper - GT_ADD];
1818 regNumber result = REG_FPNONE;
1820 if (dstreg != REG_FPNONE)
1822 if (srcreg == REG_FPNONE)
1824 if (src->IsRegVar())
1826 if (src->IsRegVarDeath())
1828 if (compCurFPState.TopVirtual() == (unsigned)dst->gtRegNum)
1830 // Do operation and store in srcreg
1831 inst_FS(ins_RP, compCurFPState.VirtualToST(src->gtRegNum));
1833 // kill current dst and rename src as dst.
1834 FlatFPX87_Kill(&compCurFPState, dstreg);
1835 compCurFPState.Rename(dstreg, src->gtRegNum);
1839 // Take src to top of stack
1840 FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
1842 // do reverse and pop operation
1843 inst_FS(ins_NP, compCurFPState.VirtualToST(dstreg));
1845 // Kill the register
1846 FlatFPX87_Kill(&compCurFPState, src->gtRegNum);
1849 assert(!src->IsRegVar() || !src->IsRegVarDeath() ||
1850 !(genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat));
1854 if (compCurFPState.TopVirtual() == (unsigned)src->gtRegNum)
1856 inst_FS(ins_RN, compCurFPState.VirtualToST(dst->gtRegNum));
1860 FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum);
1861 inst_FN(ins_NN, compCurFPState.VirtualToST(src->gtRegNum));
1867 // do operation with memory and store in dest
1868 FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum);
1869 inst_FS_TT(ins_NN, src);
1874 if (dstreg == srcreg)
1876 FlatFPX87_MoveToTOS(&compCurFPState, dstreg);
1877 inst_FN(ins_NN, compCurFPState.VirtualToST(dstreg));
1881 if (compCurFPState.TopVirtual() == (unsigned)dst->gtRegNum)
1883 // Do operation and store in srcreg
1884 inst_FS(ins_RP, compCurFPState.VirtualToST(srcreg));
1886 // kill current dst and rename src as dst.
1887 FlatFPX87_Kill(&compCurFPState, dstreg);
1888 compCurFPState.Rename(dstreg, srcreg);
1892 FlatFPX87_MoveToTOS(&compCurFPState, srcreg);
1894 // do reverse and pop operation
1895 inst_FS(ins_NP, compCurFPState.VirtualToST(dstreg));
1897 // Kill the register
1898 FlatFPX87_Kill(&compCurFPState, srcreg);
1907 assert(!"if we get here it means we didnt load op1 into a temp. Investigate why");
1910 assert(result != REG_FPNONE);
1914 void CodeGen::genCodeForTreeStackFP_AsgArithm(GenTree* tree)
1917 if (compiler->verbose)
1919 printf("genCodeForTreeStackFP_AsgArithm() ");
1920 Compiler::printTreeID(tree);
1925 assert(tree->OperGet() == GT_ASG_ADD || tree->OperGet() == GT_ASG_SUB || tree->OperGet() == GT_ASG_MUL ||
1926 tree->OperGet() == GT_ASG_DIV);
1928 GenTree* op1 = tree->gtOp.gtOp1;
1929 GenTree* op2 = tree->gtGetOp2IfPresent();
1931 genSetupForOpStackFP(op1, op2, (tree->gtFlags & GTF_REVERSE_OPS) ? true : false, true, false, true);
1933 regNumber result = genAsgArithmStackFP(tree->OperGet(), op1, (op1->InReg()) ? op1->gtRegNum : REG_FPNONE, op2,
1934 (op2->InReg()) ? op2->gtRegNum : REG_FPNONE);
1936 genCodeForTreeStackFP_DONE(tree, result);
1939 regNumber CodeGen::genAsgArithmStackFP(genTreeOps oper, GenTree* dst, regNumber dstreg, GenTree* src, regNumber srcreg)
1941 regNumber result = REG_FPNONE;
1944 if (compiler->verbose)
1946 printf("genAsgArithmStackFP() dst: ");
1947 Compiler::printTreeID(dst);
1949 Compiler::printTreeID(src);
1950 printf(" dstreg: %s srcreg: %s\n", dstreg == REG_FPNONE ? "NONE" : regVarNameStackFP(dstreg),
1951 srcreg == REG_FPNONE ? "NONE" : regVarNameStackFP(srcreg));
1963 assert(!"Unexpected oper");
1970 assert(FPmathRN[GT_ASG_ADD - GT_ASG_ADD] == INS_fadd);
1971 assert(FPmathRN[GT_ASG_SUB - GT_ASG_ADD] == INS_fsubr);
1972 assert(FPmathRN[GT_ASG_MUL - GT_ASG_ADD] == INS_fmul);
1973 assert(FPmathRN[GT_ASG_DIV - GT_ASG_ADD] == INS_fdivr);
1975 assert(FPmathRP[GT_ASG_ADD - GT_ASG_ADD] == INS_faddp);
1976 assert(FPmathRP[GT_ASG_SUB - GT_ASG_ADD] == INS_fsubrp);
1977 assert(FPmathRP[GT_ASG_MUL - GT_ASG_ADD] == INS_fmulp);
1978 assert(FPmathRP[GT_ASG_DIV - GT_ASG_ADD] == INS_fdivrp);
1980 ins_NN = FPmathNN[oper - GT_ASG_ADD];
1981 ins_NP = FPmathNP[oper - GT_ASG_ADD];
1983 ins_RN = FPmathRN[oper - GT_ASG_ADD];
1984 ins_RP = FPmathRP[oper - GT_ASG_ADD];
1986 if (dstreg != REG_FPNONE)
1988 assert(!"dst should be a regvar or memory");
1992 if (dst->IsRegVar())
1994 if (src->IsRegVar())
1996 if (src->IsRegVarDeath())
1998 // Take src to top of stack
1999 FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
2002 inst_FS(ins_NP, compCurFPState.VirtualToST(dst->gtRegNum));
2004 // Kill the register
2005 FlatFPX87_Kill(&compCurFPState, src->gtRegNum);
2007 // SetupOp should mark the regvar as dead
2008 assert((genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat) == 0);
2012 assert(src->gtRegNum == src->gtRegVar.gtRegNum &&
2013 "We shoudnt be loading regvar src on the stack as src is readonly");
2015 // Take src to top of stack
2016 FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
2019 inst_FS(ins_RN, compCurFPState.VirtualToST(dst->gtRegNum));
2024 if (srcreg == REG_FPNONE)
2026 // take enregistered variable to top of stack
2027 FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum);
2029 // Do operation with mem
2030 inst_FS_TT(ins_NN, src);
2034 // take enregistered variable to top of stack
2035 FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
2038 inst_FS(ins_NP, compCurFPState.VirtualToST(dst->gtRegNum));
2040 // Kill the register
2041 FlatFPX87_Kill(&compCurFPState, src->gtRegNum);
2048 if ((src->IsRegVar()) && !src->IsRegVarDeath())
2050 // We set src as read only, but as dst is in memory, we will need
2051 // an extra physical register (which we should have, as we have a
2052 // spare one for transitions).
2054 // There used to be an assertion: assert(src->gtRegNum == src->gtRegVar.gtRegNum, ...)
2055 // here, but there's actually no reason to assume that. AFAICT, for FP vars under stack FP,
2056 // src->gtRegVar.gtRegNum is the allocated stack pseudo-register, but src->gtRegNum is the
2057 // FP stack position into which that is loaded to represent a particular use of the variable.
2058 inst_FN(INS_fld, compCurFPState.VirtualToST(src->gtRegNum));
2060 // Do operation with mem
2061 inst_FS_TT(ins_RN, dst);
2064 inst_FS_TT(INS_fstp, dst);
2068 // put src in top of stack
2069 FlatFPX87_MoveToTOS(&compCurFPState, srcreg);
2071 // Do operation with mem
2072 inst_FS_TT(ins_RN, dst);
2075 inst_FS_TT(INS_fstp, dst);
2077 // SetupOp should have marked the regvar as dead in tat case
2078 assert(!src->IsRegVar() || !src->IsRegVarDeath() ||
2079 (genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat) == 0);
2081 FlatFPX87_Kill(&compCurFPState, srcreg);
2090 void CodeGen::genCodeForTreeStackFP_SmpOp(GenTree* tree)
2093 if (compiler->verbose)
2095 printf("genCodeForTreeStackFP_SmpOp() ");
2096 Compiler::printTreeID(tree);
2101 assert(tree->OperKind() & GTK_SMPOP);
2103 switch (tree->OperGet())
2108 genCodeForTreeStackFP_Asg(tree);
2112 // Arithmetic binops
2118 genCodeForTreeStackFP_Arithm(tree);
2122 // Asg-Arithmetic ops
2128 genCodeForTreeStackFP_AsgArithm(tree);
2137 // Make sure the address value is 'addressable' */
2138 addrReg = genMakeAddressable(tree, 0, RegSet::FREE_REG);
2140 // Load the value onto the FP stack
2141 regNumber reg = regSet.PickRegFloat();
2142 genLoadStackFP(tree, reg);
2144 genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
2146 genCodeForTreeStackFP_DONE(tree, reg);
2153 GenTree* op1 = tree->gtOp.gtOp1;
2156 // Compute the result onto the FP stack
2157 if (op1->gtType == TYP_FLOAT)
2160 bool roundOp1 = false;
2162 switch (getRoundFloatLevel())
2165 /* No rounding at all */
2168 case ROUND_CMP_CONST:
2172 /* Round all comparands and return values*/
2177 /* Round everything */
2182 assert(!"Unsupported Round Level");
2186 genCodeForTreeFlt(op1);
2190 assert(op1->gtType == TYP_DOUBLE);
2191 genCodeForTreeFloat(op1);
2194 if ((op1->gtOper == GT_CAST) && (op1->CastFromType() == TYP_LONG))
2195 genRoundFpExpressionStackFP(op1);
2199 // kill enregistered variables
2200 compCurFPState.Pop();
2201 assert(compCurFPState.m_uStackSize == 0);
2207 GenTree* op1 = tree->gtOp.gtOp1;
2208 GenTree* op2 = tree->gtGetOp2IfPresent();
2210 if (tree->gtFlags & GTF_REVERSE_OPS)
2212 genCodeForTreeFloat(op2);
2214 regSet.SetUsedRegFloat(op2, true);
2216 genEvalSideEffects(op1);
2218 if (op2->gtFlags & GTF_SPILLED)
2223 regSet.SetUsedRegFloat(op2, false);
2227 genEvalSideEffects(op1);
2228 genCodeForTreeFloat(op2);
2231 genCodeForTreeStackFP_DONE(tree, op2->gtRegNum);
2236 genCodeForTreeStackFP_Cast(tree);
2242 GenTree* op1 = tree->gtOp.gtOp1;
2244 // get the tree into a register
2245 genCodeForTreeFloat(op1);
2247 // Take reg to top of stack
2248 FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
2253 // mark register that holds tree
2254 genCodeForTreeStackFP_DONE(tree, op1->gtRegNum);
2259 assert(compiler->IsMathIntrinsic(tree));
2261 GenTree* op1 = tree->gtOp.gtOp1;
2263 // get tree into a register
2264 genCodeForTreeFloat(op1);
2266 // Take reg to top of stack
2267 FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
2269 static const instruction mathIns[] = {
2270 INS_fsin, INS_fcos, INS_invalid, INS_fsqrt, INS_fabs, INS_frndint,
2273 assert(mathIns[CORINFO_INTRINSIC_Sin] == INS_fsin);
2274 assert(mathIns[CORINFO_INTRINSIC_Cos] == INS_fcos);
2275 assert(mathIns[CORINFO_INTRINSIC_Sqrt] == INS_fsqrt);
2276 assert(mathIns[CORINFO_INTRINSIC_Abs] == INS_fabs);
2277 assert(mathIns[CORINFO_INTRINSIC_Round] == INS_frndint);
2278 assert((unsigned)(tree->gtIntrinsic.gtIntrinsicId) < _countof(mathIns));
2279 instGen(mathIns[tree->gtIntrinsic.gtIntrinsicId]);
2281 // mark register that holds tree
2282 genCodeForTreeStackFP_DONE(tree, op1->gtRegNum);
2291 GenTree* op1 = tree->gtOp.gtOp1;
2293 // Offset of the DWord containing the exponent
2294 offs = (op1->gtType == TYP_FLOAT) ? 0 : sizeof(int);
2296 // get tree into a register
2297 genCodeForTreeFloat(op1);
2299 // Take reg to top of stack
2300 FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
2302 temp = compiler->tmpGetTemp(op1->TypeGet());
2303 emitAttr size = EmitSize(op1);
2305 // Store the value from the FP stack into the temp
2306 getEmitter()->emitIns_S(INS_fst, size, temp->tdTempNum(), 0);
2308 regNumber reg = regSet.rsPickReg();
2310 // Load the DWord containing the exponent into a general reg.
2311 inst_RV_ST(INS_mov, reg, temp, offs, op1->TypeGet(), EA_4BYTE);
2312 compiler->tmpRlsTemp(temp);
2314 // 'reg' now contains the DWord containing the exponent
2315 regTracker.rsTrackRegTrash(reg);
2317 // Mask of exponent with all 1's - appropriate for given type
2320 expMask = (op1->gtType == TYP_FLOAT) ? 0x7F800000 // TYP_FLOAT
2321 : 0x7FF00000; // TYP_DOUBLE
2323 // Check if the exponent is all 1's
2325 inst_RV_IV(INS_and, reg, expMask, EA_4BYTE);
2326 inst_RV_IV(INS_cmp, reg, expMask, EA_4BYTE);
2328 // If exponent was all 1's, we need to throw ArithExcep
2329 genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
2331 genUpdateLife(tree);
2333 genCodeForTreeStackFP_DONE(tree, op1->gtRegNum);
2341 void CodeGen::genCodeForTreeStackFP_Cast(GenTree* tree)
2344 if (compiler->verbose)
2346 printf("genCodeForTreeStackFP_Cast() ");
2347 Compiler::printTreeID(tree);
2353 bool roundResult = true;
2360 GenTree* op1 = tree->gtOp.gtOp1;
2362 // If op1 is a comma expression, evaluate the non-last parts, make op1 be the rest.
2363 op1 = genCodeForCommaTree(op1);
2365 switch (op1->gtType)
2374 // Operand too small for 'fild', load it into a register
2375 genCodeForTree(op1, 0);
2378 // no need to round, can't overflow float or dbl
2379 roundResult = false;
2388 // Can't 'fild' a constant, it has to be loaded from memory
2389 switch (op1->gtOper)
2392 op1 = genMakeConst(&op1->gtIntCon.gtIconVal, TYP_INT, tree, false);
2396 // Our encoder requires fild on m64int to be 64-bit aligned.
2397 op1 = genMakeConst(&op1->gtLngCon.gtLconVal, TYP_LONG, tree, true);
2403 addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG);
2405 // Grab register for the cast
2406 regNumber reg = regSet.PickRegFloat();
2407 genMarkTreeInReg(tree, reg);
2408 compCurFPState.Push(reg);
2410 // Is the value now sitting in a register?
2413 // We'll have to store the value into the stack */
2414 size = EA_ATTR(roundUp(genTypeSize(op1->gtType)));
2415 temp = compiler->tmpGetTemp(op1->TypeGet());
2417 // Move the value into the temp
2418 if (op1->gtType == TYP_LONG)
2420 regPairNo regPair = op1->gtRegPair;
2422 // This code is pretty ugly, but straightforward
2424 if (genRegPairLo(regPair) == REG_STK)
2426 regNumber rg1 = genRegPairHi(regPair);
2428 assert(rg1 != REG_STK);
2430 /* Move enregistered half to temp */
2432 inst_ST_RV(INS_mov, temp, 4, rg1, TYP_LONG);
2434 /* Move lower half to temp via "high register" */
2436 inst_RV_TT(INS_mov, rg1, op1, 0);
2437 inst_ST_RV(INS_mov, temp, 0, rg1, TYP_LONG);
2439 /* Reload transfer register */
2441 inst_RV_ST(INS_mov, rg1, temp, 4, TYP_LONG);
2443 else if (genRegPairHi(regPair) == REG_STK)
2445 regNumber rg1 = genRegPairLo(regPair);
2447 assert(rg1 != REG_STK);
2449 /* Move enregistered half to temp */
2451 inst_ST_RV(INS_mov, temp, 0, rg1, TYP_LONG);
2453 /* Move high half to temp via "low register" */
2455 inst_RV_TT(INS_mov, rg1, op1, 4);
2456 inst_ST_RV(INS_mov, temp, 4, rg1, TYP_LONG);
2458 /* Reload transfer register */
2460 inst_RV_ST(INS_mov, rg1, temp, 0, TYP_LONG);
2464 /* Move the value into the temp */
2466 inst_ST_RV(INS_mov, temp, 0, genRegPairLo(regPair), TYP_LONG);
2467 inst_ST_RV(INS_mov, temp, 4, genRegPairHi(regPair), TYP_LONG);
2469 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
2471 /* Load the long from the temp */
2473 inst_FS_ST(INS_fildl, size, temp, 0);
2477 /* Move the value into the temp */
2479 inst_ST_RV(INS_mov, temp, 0, op1->gtRegNum, TYP_INT);
2481 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
2483 /* Load the integer from the temp */
2485 inst_FS_ST(INS_fild, size, temp, 0);
2488 // We no longer need the temp
2489 compiler->tmpRlsTemp(temp);
2493 // Load the value from its address
2494 if (op1->gtType == TYP_LONG)
2495 inst_TT(INS_fildl, op1);
2497 inst_TT(INS_fild, op1);
2499 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
2503 /* integer to fp conversions can overflow. roundResult
2504 * is cleared above in cases where it can't
2507 ((tree->gtType == TYP_FLOAT) || ((tree->gtType == TYP_DOUBLE) && (op1->gtType == TYP_LONG))))
2508 genRoundFpExpression(tree);
2515 // This is a cast from float to double.
2516 // Note that conv.r(r4/r8) and conv.r8(r4/r9) are indistinguishable
2517 // as we will generate GT_CAST-TYP_DOUBLE for both. This would
2518 // cause us to truncate precision in either case. However,
2519 // conv.r was needless in the first place, and should have
2521 genCodeForTreeFloat(op1); // Trucate its precision
2523 if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD || op1->gtOper == GT_CLS_VAR ||
2524 op1->gtOper == GT_IND || op1->gtOper == GT_LEA)
2526 // We take advantage here of the fact that we know that our
2527 // codegen will have just loaded this from memory, and that
2528 // therefore, no cast is really needed.
2529 // Ideally we wouldn't do this optimization here, but in
2530 // morphing, however, we need to do this after regalloc, as
2531 // this optimization doesnt apply if what we're loading is a
2536 genRoundFpExpressionStackFP(op1, tree->TypeGet());
2539 // Assign reg to tree
2540 genMarkTreeInReg(tree, op1->gtRegNum);
2546 // This is a cast from double to float or double
2547 // Load the value, store as destType, load back
2548 genCodeForTreeFlt(op1);
2550 if ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD || op1->gtOper == GT_CLS_VAR ||
2551 op1->gtOper == GT_IND || op1->gtOper == GT_LEA) &&
2552 tree->TypeGet() == TYP_DOUBLE)
2554 // We take advantage here of the fact that we know that our
2555 // codegen will have just loaded this from memory, and that
2556 // therefore, no cast is really needed.
2557 // Ideally we wouldn't do this optimization here, but in
2558 // morphing. However, we need to do this after regalloc, as
2559 // this optimization doesnt apply if what we're loading is a
2564 genRoundFpExpressionStackFP(op1, tree->TypeGet());
2567 // Assign reg to tree
2568 genMarkTreeInReg(tree, op1->gtRegNum);
2574 assert(!"unsupported cast");
2580 void CodeGen::genCodeForTreeStackFP_Special(GenTree* tree)
2583 if (compiler->verbose)
2585 printf("genCodeForTreeStackFP_Special() ");
2586 Compiler::printTreeID(tree);
2591 switch (tree->OperGet())
2595 genCodeForCall(tree->AsCall(), true);
2599 NYI("genCodeForTreeStackFP_Special");
2604 void CodeGen::genCodeForTreeFloat(GenTree* tree, RegSet::RegisterPreference* pref)
2606 // TestTransitions();
2611 assert(tree->gtOper != GT_STMT);
2612 assert(varTypeIsFloating(tree));
2614 // What kind of node do we have?
2615 oper = tree->OperGet();
2616 kind = tree->OperKind();
2618 if (kind & GTK_CONST)
2620 genCodeForTreeStackFP_Const(tree);
2622 else if (kind & GTK_LEAF)
2624 genCodeForTreeStackFP_Leaf(tree);
2626 else if (kind & GTK_SMPOP)
2628 genCodeForTreeStackFP_SmpOp(tree);
2632 genCodeForTreeStackFP_Special(tree);
2640 assert(compCurFPState.IsConsistent());
2644 bool CodeGen::genCompInsStackFP(GenTree* tos, GenTree* other)
2646 // assume gensetupop done
2648 bool bUseFcomip = genUse_fcomip();
2649 bool bReverse = false;
2651 // Take op1 to top of the stack
2652 FlatFPX87_MoveToTOS(&compCurFPState, tos->gtRegNum);
2654 // We pop top of stack if it's not a live regvar
2655 bool bPopTos = !(tos->IsRegVar() && !tos->IsRegVarDeath()) || (tos->InReg());
2656 bool bPopOther = !(other->IsRegVar() && !other->IsRegVarDeath()) || (other->InReg());
2658 assert(tos->IsRegVar() || (tos->InReg()));
2660 if (!(other->IsRegVar() || (other->InReg())))
2667 // We should have space for a load
2668 assert(compCurFPState.m_uStackSize < FP_PHYSICREGISTERS);
2670 // load from mem, now the comparison will be the other way around
2671 inst_FS_TT(INS_fld, other);
2672 inst_FN(INS_fcomip, 1);
2674 // pop if we've been asked to do so
2677 inst_FS(INS_fstp, 0);
2678 FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
2685 // compare directly with memory
2688 inst_FS_TT(INS_fcomp, other);
2689 FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
2693 inst_FS_TT(INS_fcom, other);
2703 inst_FN(INS_fcomip, compCurFPState.VirtualToST(other->gtRegNum));
2704 FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
2708 inst_FN(INS_fcomi, compCurFPState.VirtualToST(other->gtRegNum));
2713 FlatFPX87_Unload(&compCurFPState, other->gtRegNum);
2720 inst_FN(INS_fcomp, compCurFPState.VirtualToST(other->gtRegNum));
2721 FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
2725 inst_FN(INS_fcom, compCurFPState.VirtualToST(other->gtRegNum));
2730 FlatFPX87_Unload(&compCurFPState, other->gtRegNum);
2737 // oops, we have to put result of compare in eflags
2739 // Grab EAX for the result of the fnstsw
2740 regSet.rsGrabReg(RBM_EAX);
2742 // Generate the 'fnstsw' and test its result
2743 inst_RV(INS_fnstsw, REG_EAX, TYP_INT);
2744 regTracker.rsTrackRegTrash(REG_EAX);
2751 void CodeGen::genCondJumpFltStackFP(GenTree* cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bDoTransition)
2753 assert(jumpTrue && jumpFalse);
2754 assert(!(cond->gtFlags & GTF_REVERSE_OPS)); // Done in genCondJump()
2755 assert(varTypeIsFloating(cond->gtOp.gtOp1));
2757 GenTree* op1 = cond->gtOp.gtOp1;
2758 GenTree* op2 = cond->gtOp.gtOp2;
2759 genTreeOps cmp = cond->OperGet();
2761 // Prepare operands.
2762 genSetupForOpStackFP(op1, op2, false, false, true, false);
2766 bool bReverseCmp = false;
2768 if ((op2->IsRegVar() || (op2->InReg())) && // op2 is in a reg
2769 (compCurFPState.TopVirtual() == (unsigned)op2->gtRegNum && // Is it already at the top of the stack?
2770 (!op2->IsRegVar() || op2->IsRegVarDeath()))) // are we going to pop it off?
2780 bReverseCmp = false;
2783 if (genCompInsStackFP(tos, other))
2785 bReverseCmp = !bReverseCmp;
2788 // do .un comparison
2789 if (cond->gtFlags & GTF_RELOP_NAN_UN)
2791 // Generate the first jump (NaN check)
2792 genCondJmpInsStackFP(EJ_jpe, jumpTrue, NULL, bDoTransition);
2796 jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
2798 // Generate the first jump (NaN check)
2799 genCondJmpInsStackFP(EJ_jpe, jumpFalse, NULL, bDoTransition);
2802 /* Generate the second jump (comparison) */
2803 const static BYTE dblCmpTstJmp2[] = {
2812 // Swap comp order if necessary
2815 cmp = GenTree::SwapRelop(cmp);
2818 genCondJmpInsStackFP((emitJumpKind)dblCmpTstJmp2[cmp - GT_EQ], jumpTrue, jumpFalse, bDoTransition);
2821 BasicBlock* CodeGen::genTransitionBlockStackFP(FlatFPStateX87* pState, BasicBlock* pFrom, BasicBlock* pTarget)
2823 // Fast paths where a transition block is not necessary
2824 if ((pTarget->bbFPStateX87 && FlatFPStateX87::AreEqual(pState, pTarget->bbFPStateX87)) || pState->IsEmpty())
2829 // We shouldn't have any handlers if we're generating transition blocks, as we don't know
2830 // how to recover them
2831 assert(compiler->compMayHaveTransitionBlocks);
2832 assert(compiler->compHndBBtabCount == 0);
2835 compiler->fgSafeBasicBlockCreation = true;
2838 // Create a temp block
2839 BasicBlock* pBlock = compiler->bbNewBasicBlock(BBJ_ALWAYS);
2842 compiler->fgSafeBasicBlockCreation = false;
2845 VarSetOps::Assign(compiler, pBlock->bbLiveIn, pFrom->bbLiveOut);
2846 VarSetOps::Assign(compiler, pBlock->bbLiveOut, pFrom->bbLiveOut);
2848 pBlock->bbJumpDest = pTarget;
2849 pBlock->bbFlags |= BBF_JMP_TARGET;
2851 // If either pFrom or pTarget are cold blocks then
2852 // the transition block also must be cold
2854 pBlock->bbFlags |= (pFrom->bbFlags & BBF_COLD);
2855 pBlock->bbFlags |= (pTarget->bbFlags & BBF_COLD);
2857 // The FP state for the block is the same as the current one
2858 pBlock->bbFPStateX87 = FlatFPAllocFPState(pState);
2860 if ((pBlock->bbFlags & BBF_COLD) || (compiler->fgFirstColdBlock == NULL))
2863 // If this block is cold or if all blocks are hot
2864 // then we just insert it at the end of the method.
2866 compiler->fgMoveBlocksAfter(pBlock, pBlock, compiler->fgLastBBInMainFunction());
2871 // This block is hot so we need to insert it in the hot region
2874 BasicBlock* lastHotBlock = compiler->fgFirstColdBlock->bbPrev;
2875 noway_assert(lastHotBlock != nullptr);
2877 if (lastHotBlock->bbFallsThrough())
2878 NO_WAY("Bad fgFirstColdBlock in genTransitionBlockStackFP()");
2881 // Insert pBlock between lastHotBlock and fgFirstColdBlock
2883 compiler->fgInsertBBafter(lastHotBlock, pBlock);
2889 void CodeGen::genCondJumpLngStackFP(GenTree* cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
2891 // For the moment, and so we don't have to deal with the amount of special cases
2892 // we have, will insert a dummy block for jumpTrue (if necessary) that will do the
2893 // transition for us. For the jumpFalse case, we play a trick. For the false case ,
2894 // a Long conditional has a fallthrough (least significant DWORD check is false) and
2895 // also has a jump to the fallthrough (bbNext) if the most significant DWORD check
2896 // fails. However, we do want to make an FP transition if we're in the later case,
2897 // So what we do is create a label and make jumpFalse go there. This label is defined
2898 // before doing the FP transition logic at the end of the block, so now both exit paths
2899 // for false condition will go through the transition and then fall through to bbnext.
2900 assert(jumpFalse == compiler->compCurBB->bbNext);
2902 BasicBlock* pTransition = genCreateTempLabel();
2904 genCondJumpLng(cond, jumpTrue, pTransition, true);
2906 genDefineTempLabel(pTransition);
2909 void CodeGen::genQMarkRegVarTransition(GenTree* nextNode, VARSET_VALARG_TP liveset)
2911 // Kill any vars that may die in the transition
2912 VARSET_TP newLiveSet(VarSetOps::Intersection(compiler, liveset, compiler->optAllFPregVars));
2914 regMaskTP liveRegIn = genRegMaskFromLivenessStackFP(newLiveSet);
2915 genCodeForTransitionFromMask(&compCurFPState, liveRegIn);
2920 for (i = REG_FPV0; i < REG_FPCOUNT; i++)
2922 if ((genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat))
2925 genRegVarDeathStackFP(regSet.genRegVarsFloat[i]);
2929 // Born necessary regvars
2930 for (i = 0; i < compiler->lvaTrackedCount; i++)
2932 unsigned lclVar = compiler->lvaTrackedToVarNum[i];
2933 LclVarDsc* varDsc = compiler->lvaTable + lclVar;
2935 assert(varDsc->lvTracked);
2937 if (varDsc->lvRegister && VarSetOps::IsMember(compiler, newLiveSet, i))
2939 genRegVarBirthStackFP(varDsc);
2944 void CodeGen::genQMarkBeforeElseStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTree* nextNode)
2946 assert(regSet.rsMaskLockedFloat == 0);
2948 // Save current state at colon
2949 pState->stackState.Init(&compCurFPState);
2951 // Kill any vars that may die in the transition to then
2952 genQMarkRegVarTransition(nextNode, varsetCond);
2955 void CodeGen::genQMarkAfterElseBlockStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTree* nextNode)
2957 assert(regSet.rsMaskLockedFloat == 0);
2959 FlatFPStateX87 tempSwap;
2961 // Save current state. Now tempFPState will store the target state for the else block
2962 tempSwap.Init(&compCurFPState);
2964 compCurFPState.Init(&pState->stackState);
2966 pState->stackState.Init(&tempSwap);
2968 // Did any regvars die in the then block that are live on entry to the else block?
2970 for (i = 0; i < compiler->lvaTrackedCount; i++)
2972 if (VarSetOps::IsMember(compiler, varsetCond, i) && VarSetOps::IsMember(compiler, compiler->optAllFPregVars, i))
2974 // This variable should be live
2975 unsigned lclnum = compiler->lvaTrackedToVarNum[i];
2976 LclVarDsc* varDsc = compiler->lvaTable + lclnum;
2978 if (regSet.genRegVarsFloat[varDsc->lvRegNum] != varDsc)
2980 JITDUMP("genQMarkAfterThenBlockStackFP(): Fixing up regvar that was modified in then\n");
2981 if (regSet.genRegVarsFloat[varDsc->lvRegNum])
2983 genRegVarDeathStackFP(regSet.genRegVarsFloat[varDsc->lvRegNum]);
2986 genRegVarBirthStackFP(varDsc);
2991 // Kill any vars that may die in the transition
2992 genQMarkRegVarTransition(nextNode, varsetCond);
2995 void CodeGen::genQMarkAfterThenBlockStackFP(QmarkStateStackFP* pState)
2997 JITDUMP("genQMarkAfterThenBlockStackFP()\n");
2998 assert(regSet.rsMaskLockedFloat == 0);
3000 // Generate transition to the previous one set by the then block
3001 genCodeForTransitionStackFP(&compCurFPState, &pState->stackState);
3004 compCurFPState.Init(&pState->stackState);
3007 void CodeGenInterface::SetRegVarFloat(regNumber reg, var_types type, LclVarDsc* varDsc)
3009 regMaskTP mask = genRegMaskFloat(reg, type);
3013 JITDUMP("marking register %s as a regvar\n", getRegNameFloat(reg, type));
3015 assert(mask && ((regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat | regSet.rsMaskUsedFloat) & mask) == 0);
3017 regSet.rsMaskRegVarFloat |= mask;
3021 JITDUMP("unmarking register %s as a regvar\n", getRegNameFloat(reg, type));
3023 assert(mask && (regSet.rsMaskRegVarFloat & mask));
3025 regSet.rsMaskRegVarFloat &= ~mask;
3028 // Update lookup table
3029 regSet.genRegVarsFloat[reg] = varDsc;
3032 // Generates a conditional jump. It will do the appropiate stack matching for the jmpTrue.
3033 // We don't use jumpFalse anywhere and the integer codebase assumes that it will be bbnext, and that is
3034 // taken care of at the end of the bb code generation.
3035 void CodeGen::genCondJmpInsStackFP(emitJumpKind jumpKind,
3036 BasicBlock* jumpTrue,
3037 BasicBlock* jumpFalse,
3040 // Assert the condition above.
3041 assert(!jumpFalse || jumpFalse == compiler->compCurBB->bbNext || !bDoTransition);
3043 // Do the fp stack matching.
3044 if (bDoTransition && !jumpTrue->bbFPStateX87 &&
3045 FlatFPSameRegisters(&compCurFPState, genRegMaskFromLivenessStackFP(jumpTrue->bbLiveIn)))
3047 // Target block doesn't have state yet, but has the same registers, so
3048 // we allocate the block and generate the normal jump
3049 genCodeForBBTransitionStackFP(jumpTrue);
3050 inst_JMP(jumpKind, jumpTrue);
3052 else if (!bDoTransition || compCurFPState.IsEmpty() || // If it's empty, target has to be empty too.
3053 (jumpTrue->bbFPStateX87 && FlatFPStateX87::AreEqual(&compCurFPState, jumpTrue->bbFPStateX87)))
3055 // Nothing to do here. Proceed normally and generate the jump
3056 inst_JMP(jumpKind, jumpTrue);
3058 if (jumpFalse && jumpFalse != compiler->compCurBB->bbNext)
3060 inst_JMP(EJ_jmp, jumpFalse);
3065 // temporal workaround for stack matching
3066 // do a forward conditional jump, generate the transition and jump to the target
3067 // The payload is an aditional jump instruction, but both jumps will be correctly
3068 // predicted by the processor in the loop case.
3069 BasicBlock* endLabel = NULL;
3071 endLabel = genCreateTempLabel();
3073 inst_JMP(emitter::emitReverseJumpKind(jumpKind), endLabel);
3075 genCodeForBBTransitionStackFP(jumpTrue);
3077 inst_JMP(EJ_jmp, jumpTrue);
3079 genDefineTempLabel(endLabel);
3083 void CodeGen::genTableSwitchStackFP(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab)
3085 // Only come here when we have to do something special for the FPU stack!
3087 assert(!compCurFPState.IsEmpty());
3088 VARSET_TP liveInFP(VarSetOps::MakeEmpty(compiler));
3089 VARSET_TP liveOutFP(VarSetOps::MakeEmpty(compiler));
3090 for (unsigned i = 0; i < jumpCnt; i++)
3092 VarSetOps::Assign(compiler, liveInFP, jumpTab[i]->bbLiveIn);
3093 VarSetOps::IntersectionD(compiler, liveInFP, compiler->optAllFPregVars);
3094 VarSetOps::Assign(compiler, liveOutFP, compiler->compCurBB->bbLiveOut);
3095 VarSetOps::IntersectionD(compiler, liveOutFP, compiler->optAllFPregVars);
3097 if (!jumpTab[i]->bbFPStateX87 && VarSetOps::Equal(compiler, liveInFP, liveOutFP))
3099 // Hasn't state yet and regvar set is the same, so just copy state and don't change the jump
3100 jumpTab[i]->bbFPStateX87 = FlatFPAllocFPState(&compCurFPState);
3102 else if (jumpTab[i]->bbFPStateX87 && FlatFPStateX87::AreEqual(&compCurFPState, jumpTab[i]->bbFPStateX87))
3104 // Same state, don't change the jump
3108 // We have to do a transition. First check if we can reuse another one
3110 for (j = 0; j < i; j++)
3112 // Has to be already forwarded. If not it can't be targetting the same block
3113 if (jumpTab[j]->bbFlags & BBF_FORWARD_SWITCH)
3115 if (jumpTab[i] == jumpTab[j]->bbJumpDest)
3117 // yipee, we can reuse this transition block
3118 jumpTab[i] = jumpTab[j];
3126 // We will have to create a new transition block
3127 jumpTab[i] = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTab[i]);
3129 jumpTab[i]->bbFlags |= BBF_FORWARD_SWITCH;
3135 for (unsigned i = 0; i < jumpCnt; i++)
3137 jumpTab[i]->bbFlags &= ~BBF_FORWARD_SWITCH;
3140 // everything's fixed now, so go down the normal path
3141 return genTableSwitch(reg, jumpCnt, jumpTab);
3144 bool CodeGen::genConstantLoadStackFP(GenTree* tree, bool bOnlyNoMemAccess)
3146 assert(tree->gtOper == GT_CNS_DBL);
3148 bool bFastConstant = false;
3149 instruction ins_ConstantNN = INS_fldz; // keep compiler happy
3151 // Both positive 0 and 1 are represnetable in float and double, beware if we add other constants
3152 switch (*((__int64*)&(tree->gtDblCon.gtDconVal)))
3155 // CAREFUL here!, -0 is different than +0, a -0 shouldn't issue a fldz.
3156 ins_ConstantNN = INS_fldz;
3157 bFastConstant = true;
3159 case I64(0x3ff0000000000000):
3160 ins_ConstantNN = INS_fld1;
3161 bFastConstant = true;
3164 if (bFastConstant == false && bOnlyNoMemAccess)
3166 // Caller asked only to generate instructions if it didn't involve memory accesses
3172 assert(compCurFPState.m_uStackSize <= FP_PHYSICREGISTERS);
3173 instGen(ins_ConstantNN);
3178 if (tree->gtType == TYP_FLOAT || StackFPIsSameAsFloat(tree->gtDblCon.gtDconVal))
3180 float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
3181 addr = genMakeConst(&f, TYP_FLOAT, tree, false);
3185 addr = genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
3188 inst_FS_TT(INS_fld, addr);
3194 // Function called at the end of every statement. For stack based x87 its mission is to
3195 // remove any remaining temps on the stack.
3196 void CodeGen::genEndOfStatement()
3202 unsigned uTemps = 0;
3203 for (i = REG_FPV0; i < REG_FPCOUNT; i++)
3205 if (compCurFPState.Mapped(i) && // register is mapped
3206 (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat) == 0) // but not enregistered
3211 assert(uTemps <= 1);
3214 for (i = REG_FPV0; i < REG_FPCOUNT; i++)
3216 if (compCurFPState.Mapped(i) && // register is mapped
3217 (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat) == 0) // but not enregistered
3219 // remove register from stacks
3220 FlatFPX87_Unload(&compCurFPState, i);
3224 assert(ConsistentAfterStatementStackFP());
3227 bool CodeGen::StackFPIsSameAsFloat(double d)
3229 if (forceCastToFloat(d) == d)
3231 JITDUMP("StackFPIsSameAsFloat is true for value %lf\n", d);
3236 JITDUMP("StackFPIsSameAsFloat is false for value %lf\n", d);
3242 GenTree* CodeGen::genMakeAddressableStackFP(GenTree* tree,
3243 regMaskTP* regMaskIntPtr,
3244 regMaskTP* regMaskFltPtr,
3245 bool bCollapseConstantDoubles)
3247 *regMaskIntPtr = *regMaskFltPtr = 0;
3249 switch (tree->OperGet())
3252 if (tree->gtDblCon.gtDconVal == 0.0 || tree->gtDblCon.gtDconVal == 1.0)
3254 // For constants like 0 or 1 don't waste memory
3255 genCodeForTree(tree, 0);
3256 regSet.SetUsedRegFloat(tree, true);
3258 *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
3264 if (tree->gtType == TYP_FLOAT ||
3265 (bCollapseConstantDoubles && StackFPIsSameAsFloat(tree->gtDblCon.gtDconVal)))
3267 float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
3268 addr = genMakeConst(&f, TYP_FLOAT, tree, true);
3272 addr = genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
3275 if (compiler->verbose)
3277 printf("Generated new constant in tree ");
3278 Compiler::printTreeID(addr);
3279 printf(" with value %lf\n", tree->gtDblCon.gtDconVal);
3282 tree->ReplaceWith(addr, compiler);
3287 // We take care about this in genKeepAddressableStackFP
3295 if (!genMakeIndAddrMode(tree, tree, false, 0, RegSet::KEEP_REG, regMaskIntPtr, false))
3299 genUpdateLife(tree);
3303 // Try to make the address directly addressable
3305 if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, 0, RegSet::KEEP_REG, regMaskIntPtr, false))
3307 genUpdateLife(tree);
3312 GenTree* addr = tree;
3313 tree = tree->gtOp.gtOp1;
3315 genCodeForTree(tree, 0);
3316 regSet.rsMarkRegUsed(tree, addr);
3318 *regMaskIntPtr = genRegMask(tree->gtRegNum);
3325 genCodeForTreeFloat(tree);
3326 regSet.SetUsedRegFloat(tree, true);
3329 *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
3336 void CodeGen::genKeepAddressableStackFP(GenTree* tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr)
3338 regMaskTP regMaskInt, regMaskFlt;
3340 regMaskInt = *regMaskIntPtr;
3341 regMaskFlt = *regMaskFltPtr;
3343 *regMaskIntPtr = *regMaskFltPtr = 0;
3345 switch (tree->OperGet())
3348 // If register has been spilled, unspill it
3349 if (tree->gtFlags & GTF_SPILLED)
3351 UnspillFloat(&compiler->lvaTable[tree->gtLclVarCommon.gtLclNum]);
3354 // If regvar is dying, take it out of the regvar mask
3355 if (tree->IsRegVarDeath())
3357 genRegVarDeathStackFP(tree);
3359 genUpdateLife(tree);
3364 if (tree->gtFlags & GTF_SPILLED)
3369 *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
3376 genUpdateLife(tree);
3386 *regMaskIntPtr = genKeepAddressable(tree, regMaskInt, 0);
3393 if (tree->gtFlags & GTF_SPILLED)
3397 *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
3402 void CodeGen::genDoneAddressableStackFP(GenTree* tree,
3403 regMaskTP addrRegInt,
3404 regMaskTP addrRegFlt,
3405 RegSet::KeepReg keptReg)
3407 assert(!(addrRegInt && addrRegFlt));
3411 return genDoneAddressable(tree, addrRegInt, keptReg);
3413 else if (addrRegFlt)
3415 if (keptReg == RegSet::KEEP_REG)
3417 for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++)
3419 if (genRegMaskFloat((regNumber)i) & addrRegFlt)
3421 regSet.SetUsedRegFloat(tree, false);
3428 void CodeGen::FlatFPX87_Kill(FlatFPStateX87* pState, unsigned uVirtual)
3430 JITDUMP("Killing %s\n", regVarNameStackFP((regNumber)uVirtual));
3432 assert(pState->TopVirtual() == uVirtual);
3436 void CodeGen::FlatFPX87_PushVirtual(FlatFPStateX87* pState, unsigned uRegister, bool bEmitCode)
3438 JITDUMP("Pushing %s to stack\n", regVarNameStackFP((regNumber)uRegister));
3440 pState->Push(uRegister);
3443 unsigned CodeGen::FlatFPX87_Pop(FlatFPStateX87* pState, bool bEmitCode)
3445 assert(pState->m_uStackSize > 0);
3448 unsigned uVirtual = pState->Pop();
3453 inst_FS(INS_fstp, 0);
3459 unsigned CodeGen::FlatFPX87_Top(FlatFPStateX87* pState, bool bEmitCode)
3461 return pState->TopVirtual();
3464 void CodeGen::FlatFPX87_Unload(FlatFPStateX87* pState, unsigned uVirtual, bool bEmitCode)
3466 if (uVirtual != pState->TopVirtual())
3468 // We will do an fstp to the right place
3471 unsigned uStack = pState->m_uVirtualMap[uVirtual];
3472 unsigned uPhysic = pState->StackToST(uStack);
3474 pState->Unmap(uVirtual);
3475 pState->Associate(pState->TopVirtual(), uStack);
3476 pState->m_uStackSize--;
3480 pState->m_uStack[pState->m_uStackSize] = (unsigned)-1;
3486 inst_FS(INS_fstp, uPhysic);
3492 FlatFPX87_Pop(pState, bEmitCode);
3495 assert(pState->IsConsistent());
3498 void CodeGenInterface::FlatFPX87_MoveToTOS(FlatFPStateX87* pState, unsigned uVirtual, bool bEmitCode)
3500 assert(!IsUninitialized(uVirtual));
3502 JITDUMP("Moving %s to top of stack\n", regVarNameStackFP((regNumber)uVirtual));
3504 if (uVirtual != pState->TopVirtual())
3506 FlatFPX87_SwapStack(pState, pState->m_uVirtualMap[uVirtual], pState->TopIndex(), bEmitCode);
3510 JITDUMP("%s already on the top of stack\n", regVarNameStackFP((regNumber)uVirtual));
3513 assert(pState->IsConsistent());
3516 void CodeGenInterface::FlatFPX87_SwapStack(FlatFPStateX87* pState, unsigned i, unsigned j, bool bEmitCode)
3519 assert(i < pState->m_uStackSize);
3520 assert(j < pState->m_uStackSize);
3522 JITDUMP("Exchanging ST(%i) and ST(%i)\n", pState->StackToST(i), pState->StackToST(j));
3524 // issue actual swaps
3525 int iPhysic = pState->StackToST(i);
3526 int jPhysic = pState->StackToST(j);
3530 if (iPhysic == 0 || jPhysic == 0)
3532 inst_FN(INS_fxch, iPhysic ? iPhysic : jPhysic);
3536 inst_FN(INS_fxch, iPhysic);
3537 inst_FN(INS_fxch, jPhysic);
3538 inst_FN(INS_fxch, iPhysic);
3544 // Swap Register file
3545 pState->m_uVirtualMap[pState->m_uStack[i]] = j;
3546 pState->m_uVirtualMap[pState->m_uStack[j]] = i;
3550 temp = pState->m_uStack[i];
3551 pState->m_uStack[i] = pState->m_uStack[j];
3552 pState->m_uStack[j] = temp;
3554 assert(pState->IsConsistent());
3559 void CodeGen::JitDumpFPState()
3563 if ((regSet.rsMaskUsedFloat != 0) || (regSet.rsMaskRegVarFloat != 0))
3565 printf("FPSTATE\n");
3566 printf("Used virtual registers: ");
3567 for (i = REG_FPV0; i < REG_FPCOUNT; i++)
3569 if (genRegMaskFloat((regNumber)i) & regSet.rsMaskUsedFloat)
3571 printf("FPV%i ", i);
3576 printf("virtual registers holding reg vars: ");
3577 for (i = REG_FPV0; i < REG_FPCOUNT; i++)
3579 if (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat)
3581 printf("FPV%i ", i);
3586 compCurFPState.Dump();
3592 // Register allocation
3594 struct ChangeToRegVarCallback
3600 void Compiler::raInitStackFP()
3602 // Reset local/reg interference
3603 for (int i = 0; i < REG_FPCOUNT; i++)
3605 VarSetOps::AssignNoCopy(this, raLclRegIntfFloat[i], VarSetOps::MakeEmpty(this));
3608 VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::MakeEmpty(this));
3609 VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
3610 VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
3612 raCntStkStackFP = 0;
3613 raCntWtdStkDblStackFP = 0;
3614 raCntStkParamDblStackFP = 0;
3616 VarSetOps::AssignNoCopy(this, raMaskDontEnregFloat, VarSetOps::MakeEmpty(this));
3618 // Calculate the set of all tracked FP/non-FP variables
3619 // into compiler->optAllFloatVars and compiler->optAllNonFPvars
3623 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
3625 /* Ignore the variable if it's not tracked */
3627 if (!varDsc->lvTracked)
3630 /* Get hold of the index and the interference mask for the variable */
3632 unsigned varNum = varDsc->lvVarIndex;
3634 /* add to the set of all tracked FP/non-FP variables */
3636 if (varDsc->IsFloatRegType())
3637 VarSetOps::AddElemD(this, optAllFloatVars, varNum);
3639 VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
3644 void Compiler::raDumpVariableRegIntfFloat()
3649 for (i = REG_FPV0; i < REG_FPCOUNT; i++)
3651 if (!VarSetOps::IsEmpty(this, raLclRegIntfFloat[i]))
3653 JITDUMP("FPV%u interferes with ", i);
3654 for (j = 0; j < lvaTrackedCount; j++)
3656 assert(VarSetOps::IsEmpty(this, VarSetOps::Diff(this, raLclRegIntfFloat[i], optAllFloatVars)));
3658 if (VarSetOps::IsMember(this, raLclRegIntfFloat[i], j))
3660 JITDUMP("T%02u/V%02u, ", j, lvaTrackedToVarNum[j]);
3669 // Returns the regnum for the variable passed as param takin in account
3670 // the fpvar to register interference mask. If we can't find anything, we
3671 // will return REG_FPNONE
3672 regNumber Compiler::raRegForVarStackFP(unsigned varTrackedIndex)
3674 for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++)
3676 if (!VarSetOps::IsMember(this, raLclRegIntfFloat[i], varTrackedIndex))
3678 return (regNumber)i;
3685 void Compiler::raAddPayloadStackFP(VARSET_VALARG_TP maskArg, unsigned weight)
3687 VARSET_TP mask(VarSetOps::Intersection(this, maskArg, optAllFloatVars));
3688 if (VarSetOps::IsEmpty(this, mask))
3693 for (unsigned i = 0; i < lvaTrackedCount; i++)
3695 if (VarSetOps::IsMember(this, mask, i))
3697 raPayloadStackFP[i] += weight;
3702 bool Compiler::raVarIsGreaterValueStackFP(LclVarDsc* lv1, LclVarDsc* lv2)
3704 assert(lv1->lvTracked);
3705 assert(lv2->lvTracked);
3707 bool bSmall = (compCodeOpt() == SMALL_CODE);
3709 double weight1 = double(bSmall ? lv1->lvRefCnt : lv1->lvRefCntWtd) - double(raPayloadStackFP[lv1->lvVarIndex]) -
3710 double(raHeightsStackFP[lv1->lvVarIndex][FP_VIRTUALREGISTERS]);
3712 double weight2 = double(bSmall ? lv2->lvRefCnt : lv2->lvRefCntWtd) - double(raPayloadStackFP[lv2->lvVarIndex]) -
3713 double(raHeightsStackFP[lv2->lvVarIndex][FP_VIRTUALREGISTERS]);
3715 double diff = weight1 - weight2;
3719 return diff > 0 ? true : false;
3723 return int(lv1->lvRefCnt - lv2->lvRefCnt) ? true : false;
3728 // Dumps only interesting vars (the ones that are not enregistered yet
3729 void Compiler::raDumpHeightsStackFP()
3734 JITDUMP("raDumpHeightsStackFP():\n");
3735 JITDUMP("--------------------------------------------------------\n");
3736 JITDUMP("Weighted Height Table Dump\n ");
3737 for (i = 0; i < FP_VIRTUALREGISTERS; i++)
3739 JITDUMP(" %i ", i + 1);
3744 for (i = 0; i < lvaTrackedCount; i++)
3746 if (VarSetOps::IsMember(this, optAllFloatVars, i) && !VarSetOps::IsMember(this, optAllFPregVars, i))
3748 JITDUMP("V%02u/T%02u: ", lvaTrackedToVarNum[i], i);
3750 for (j = 0; j <= FP_VIRTUALREGISTERS; j++)
3752 JITDUMP("%5u ", raHeightsStackFP[i][j]);
3758 JITDUMP("\nNonweighted Height Table Dump\n ");
3759 for (i = 0; i < FP_VIRTUALREGISTERS; i++)
3761 JITDUMP(" %i ", i + 1);
3766 for (i = 0; i < lvaTrackedCount; i++)
3768 if (VarSetOps::IsMember(this, optAllFloatVars, i) && !VarSetOps::IsMember(this, optAllFPregVars, i))
3770 JITDUMP("V%02u/T%02u: ", lvaTrackedToVarNum[i], i);
3772 for (j = 0; j <= FP_VIRTUALREGISTERS; j++)
3774 JITDUMP("%5u ", raHeightsNonWeightedStackFP[i][j]);
3779 JITDUMP("--------------------------------------------------------\n");
3783 // Increases heights for tracked variables given in mask. We call this
3784 // function when we enregister a variable and will cause the heights to
3785 // shift one place to the right.
3786 void Compiler::raUpdateHeightsForVarsStackFP(VARSET_VALARG_TP mask)
3788 assert(VarSetOps::IsSubset(this, mask, optAllFloatVars));
3790 for (unsigned i = 0; i < lvaTrackedCount; i++)
3792 if (VarSetOps::IsMember(this, mask, i))
3794 for (unsigned j = FP_VIRTUALREGISTERS; j > 0; j--)
3796 raHeightsStackFP[i][j] = raHeightsStackFP[i][j - 1];
3799 raHeightsNonWeightedStackFP[i][j] = raHeightsNonWeightedStackFP[i][j - 1];
3803 raHeightsStackFP[i][0] = 0;
3805 raHeightsNonWeightedStackFP[i][0] = 0;
3811 raDumpHeightsStackFP();
3815 // This is the prepass we do to adjust refcounts across calls and
3816 // create the height structure.
3817 void Compiler::raEnregisterVarsPrePassStackFP()
3821 assert(!VarSetOps::IsEmpty(this, optAllFloatVars));
3823 // Initialization of the height table
3824 memset(raHeightsStackFP, 0, sizeof(raHeightsStackFP));
3826 // Initialization of the payload table
3827 memset(raPayloadStackFP, 0, sizeof(raPayloadStackFP));
3830 memset(raHeightsNonWeightedStackFP, 0, sizeof(raHeightsStackFP));
3833 // We will have a quick table with the pointers to the interesting varDscs
3834 // so that we don't have to scan for them for each tree.
3835 unsigned FPVars[lclMAX_TRACKED];
3836 unsigned numFPVars = 0;
3837 for (unsigned i = 0; i < lvaTrackedCount; i++)
3839 if (VarSetOps::IsMember(this, optAllFloatVars, i))
3841 FPVars[numFPVars++] = i;
3845 assert(numFPVars == VarSetOps::Count(this, optAllFloatVars));
3847 // Things we check here:
3849 // We substract 2 for each FP variable that's live across a call, as we will
3850 // have 2 memory accesses to spill and unpsill around it.
3854 VARSET_TP blockLiveOutFloats(VarSetOps::MakeEmpty(this));
3855 for (block = fgFirstBB; block; block = block->bbNext)
3859 This opt fails in the case of a variable that has it's entire lifetime contained in the 'then' of
3860 a qmark. The use mask for the whole qmark won't contain that variable as it variable's value comes
3861 from a def in the else, and the def can't be set for the qmark if the else side of
3862 the qmark doesn't do a def.
3864 See VSW# 354454 for more info. Leaving the comment and code here just in case we try to be
3865 'smart' again in the future
3868 if (((block->bbVarUse |
3870 block->bbLiveIn ) & optAllFloatVars) == 0)
3876 VarSetOps::Assign(this, blockLiveOutFloats, block->bbLiveOut);
3877 VarSetOps::IntersectionD(this, blockLiveOutFloats, optAllFloatVars);
3878 if (!VarSetOps::IsEmpty(this, blockLiveOutFloats))
3880 // See comment in compiler.h above declaration of compMayHaveTransitionBlocks
3881 // to understand the reason for this limitation of FP optimizer.
3882 switch (block->bbJumpKind)
3887 stmt = block->bbTreeList->gtPrev;
3888 assert(stmt->gtNext == NULL && stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3890 assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
3891 GenTree* cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
3893 assert(cond->OperIsCompare());
3895 if (cond->gtOp.gtOp1->TypeGet() == TYP_LONG)
3897 if (compHndBBtabCount > 0)
3899 // If we have any handlers we won't enregister whatever is live out of this block
3900 JITDUMP("PERF Warning: Taking out FP candidates due to transition blocks + exception "
3902 VarSetOps::UnionD(this, raMaskDontEnregFloat,
3903 VarSetOps::Intersection(this, block->bbLiveOut, optAllFloatVars));
3907 // long conditional jumps can generate transition bloks
3908 compMayHaveTransitionBlocks = true;
3916 if (compHndBBtabCount > 0)
3918 // If we have any handlers we won't enregister whatever is live out of this block
3920 "PERF Warning: Taking out FP candidates due to transition blocks + exception handlers.\n");
3921 VarSetOps::UnionD(this, raMaskDontEnregFloat,
3922 VarSetOps::Intersection(this, block->bbLiveOut, optAllFloatVars));
3926 // fp vars are live out of the switch, so we may have transition blocks
3927 compMayHaveTransitionBlocks = true;
3936 VARSET_TP liveSet(VarSetOps::MakeCopy(this, block->bbLiveIn));
3937 for (GenTree* stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
3939 assert(stmt->gtOper == GT_STMT);
3941 unsigned prevHeight = stmt->gtStmt.gtStmtList->gtFPlvl;
3942 for (GenTree* tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
3944 VarSetOps::AssignNoCopy(this, liveSet, fgUpdateLiveSet(liveSet, tree));
3945 switch (tree->gtOper)
3948 raAddPayloadStackFP(liveSet, block->getBBWeight(this) * 2);
3951 // For cast from long local var to double, decrement the ref count of the long
3952 // to avoid store forwarding stall
3953 if (tree->gtType == TYP_DOUBLE)
3955 GenTree* op1 = tree->gtOp.gtOp1;
3956 if (op1->gtOper == GT_LCL_VAR && op1->gtType == TYP_LONG)
3958 unsigned int lclNum = op1->gtLclVarCommon.gtLclNum;
3959 assert(lclNum < lvaCount);
3960 LclVarDsc* varDsc = lvaTable + lclNum;
3961 unsigned int weightedRefCnt = varDsc->lvRefCntWtd;
3962 unsigned int refCntDecrement = 2 * block->getBBWeight(this);
3963 if (refCntDecrement > weightedRefCnt)
3965 varDsc->lvRefCntWtd = 0;
3969 varDsc->lvRefCntWtd = weightedRefCnt - refCntDecrement;
3979 unsigned height = tree->gtFPlvl;
3981 if (height != prevHeight)
3983 if (height > prevHeight && height < FP_VIRTUALREGISTERS)
3985 for (unsigned i = 0; i < numFPVars; i++)
3987 if (VarSetOps::IsMember(this, liveSet, FPVars[i]))
3989 // The -1 are because we don't care about stack height 0
3990 // and we will use offset FP_VIRTUALREGISTERS to know what's
3991 // the count when we overflow. we multiply by 2, because that
3992 // is the number of memory accesses we will do for each spill
3993 // (even if we op directly with the spill)
3994 if (compCodeOpt() == SMALL_CODE)
3996 raHeightsStackFP[FPVars[i]][height - 1] += 2;
4000 raHeightsStackFP[FPVars[i]][height - 1] += 2 * block->getBBWeight(this);
4004 raHeightsNonWeightedStackFP[FPVars[i]][height - 1]++;
4010 prevHeight = height;
4019 // Disable enregistering of FP vars for methods with jmp op. We have really no
4021 // The problem with FP enreg vars is that the returning block is marked with having
4022 // all variables live on exit. This works for integer vars, but for FP vars we must
4023 // do the work to unload them. This is fairly straightforward to do, but I'm worried
4024 // by the coverage, so I'll take the conservative aproach of disabling FP enregistering
4025 // and we will fix it if there is demand
4026 JITDUMP("PERF Warning: Disabling FP enregistering due to JMP op!!!!!!!.\n");
4027 VarSetOps::UnionD(this, raMaskDontEnregFloat, optAllFloatVars);
4031 raDumpHeightsStackFP();
4035 void Compiler::raSetRegLclBirthDeath(GenTree* tree, VARSET_VALARG_TP lastlife, bool fromLDOBJ)
4037 assert(tree->gtOper == GT_LCL_VAR);
4039 unsigned lclnum = tree->gtLclVarCommon.gtLclNum;
4040 assert(lclnum < lvaCount);
4042 LclVarDsc* varDsc = lvaTable + lclnum;
4044 if (!varDsc->lvTracked)
4046 // Not tracked, can't be one of the enreg fp vars
4050 unsigned varIndex = varDsc->lvVarIndex;
4052 if (!VarSetOps::IsMember(this, optAllFPregVars, varIndex))
4054 // Not one of the enreg fp vars
4058 assert(varDsc->lvRegNum != REG_FPNONE);
4059 assert(!VarSetOps::IsMember(this, raMaskDontEnregFloat, varIndex));
4061 unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
4062 tree->ChangeOper(GT_REG_VAR);
4063 tree->gtFlags |= livenessFlags;
4064 tree->gtRegNum = varDsc->lvRegNum;
4065 tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
4066 tree->gtRegVar.SetLclNum(lclnum);
4068 // A liveset can change in a lclvar even if the lclvar itself is not
4069 // changing its life. This can happen for lclvars inside qmarks,
4070 // where lclvars die across the colon edge.
4072 // it is marked GTF_VAR_DEATH (already set by fgComputeLife)
4073 // OR it is already live
4074 // OR it is becoming live
4076 if ((tree->gtFlags & GTF_VAR_DEATH) == 0)
4078 if ((tree->gtFlags & GTF_VAR_DEF) != 0)
4081 tree->gtFlags |= GTF_REG_BIRTH;
4091 // In this pass we set the regvars and set the birth and death flags. we do it
4092 // for all enregistered variables at once.
4093 void Compiler::raEnregisterVarsPostPassStackFP()
4095 if (VarSetOps::IsEmpty(this, optAllFPregVars))
4097 // Nothing to fix up.
4102 JITDUMP("raEnregisterVarsPostPassStackFP:\n");
4104 for (block = fgFirstBB; block; block = block->bbNext)
4109 This opt fails in the case of a variable that has it's entire lifetime contained in the 'then' of
4110 a qmark. The use mask for the whole qmark won't contain that variable as it variable's value comes
4111 from a def in the else, and the def can't be set for the qmark if the else side of
4112 the qmark doesn't do a def.
4114 See VSW# 354454 for more info. Leaving the comment and code here just in case we try to be
4115 'smart' again in the future
4119 if (((block->bbVarUse |
4121 block->bbLiveIn ) & optAllFPregVars) == 0)
4128 VARSET_TP lastlife(VarSetOps::MakeCopy(this, block->bbLiveIn));
4129 for (GenTree* stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
4131 assert(stmt->gtOper == GT_STMT);
4133 for (GenTree *tree = stmt->gtStmt.gtStmtList; tree;
4134 VarSetOps::AssignNoCopy(this, lastlife, fgUpdateLiveSet(lastlife, tree)), tree = tree->gtNext)
4136 if (tree->gtOper == GT_LCL_VAR)
4138 raSetRegLclBirthDeath(tree, lastlife, false);
4141 // Model implicit use (& hence last use) of frame list root at pinvokes.
4142 if (tree->gtOper == GT_CALL)
4144 GenTreeCall* call = tree->AsCall();
4145 if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
4147 LclVarDsc* frameVarDsc = &lvaTable[info.compLvFrameListRoot];
4149 if (frameVarDsc->lvTracked && ((call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH) != 0))
4151 // Frame var dies here
4152 unsigned varIndex = frameVarDsc->lvVarIndex;
4153 VarSetOps::RemoveElemD(this, lastlife, varIndex);
4160 assert(VarSetOps::Equal(this, lastlife, block->bbLiveOut));
4165 void Compiler::raGenerateFPRefCounts()
4167 // Update ref counts to stack
4168 assert(raCntWtdStkDblStackFP == 0);
4169 assert(raCntStkParamDblStackFP == 0);
4170 assert(raCntStkStackFP == 0);
4174 for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
4176 if (varDsc->lvType == TYP_DOUBLE ||
4177 varDsc->lvStructDoubleAlign) // Account for structs (A bit over aggressive here, we should
4178 // account for field accesses, but should be a reasonable
4181 if (varDsc->lvRegister)
4183 assert(varDsc->lvTracked);
4187 // Increment tmp access
4188 raCntStkStackFP += varDsc->lvRefCnt;
4190 if (varDsc->lvIsParam)
4192 // Why is this not weighted?
4193 raCntStkParamDblStackFP += varDsc->lvRefCnt;
4197 raCntWtdStkDblStackFP += varDsc->lvRefCntWtd;
4204 if ((raCntWtdStkDblStackFP > 0) || (raCntStkParamDblStackFP > 0))
4206 JITDUMP("StackFP double stack weighted ref count: %u ; param ref count: %u\n", raCntWtdStkDblStackFP,
4207 raCntStkParamDblStackFP);
4212 void Compiler::raEnregisterVarsStackFP()
4214 const int FPENREGTHRESHOLD = 1;
4215 const unsigned int FPENREGTHRESHOLD_WEIGHTED = FPENREGTHRESHOLD;
4220 if (opts.compDbgCode || opts.MinOpts())
4222 // no enregistering for these options.
4226 if (VarSetOps::IsEmpty(this, optAllFloatVars))
4228 // No floating point vars. bail out
4232 // Do additional pass updating weights and generating height table
4233 raEnregisterVarsPrePassStackFP();
4235 // Vars are ordered by weight
4238 // Set an interference with V0 and V1, which we reserve as a temp registers.
4239 // We need only one temp. but we will take the easy way, as by using
4240 // two, we will need to teach codegen how to operate with spilled variables
4241 VarSetOps::Assign(this, raLclRegIntfFloat[REG_FPV0], optAllFloatVars);
4242 VarSetOps::Assign(this, raLclRegIntfFloat[REG_FPV1], optAllFloatVars);
4245 if (codeGen->genStressFloat())
4247 // Lock out registers for stress.
4248 regMaskTP locked = codeGen->genStressLockedMaskFloat();
4249 for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++)
4251 if (locked & genRegMaskFloat((regNumber)i))
4253 VarSetOps::Assign(this, raLclRegIntfFloat[i], optAllFloatVars);
4259 // Build the interesting FP var table
4260 LclVarDsc* fpLclFPVars[lclMAX_TRACKED];
4261 unsigned numFPVars = 0;
4262 for (unsigned i = 0; i < lvaTrackedCount; i++)
4264 if (VarSetOps::IsMember(this, raMaskDontEnregFloat, i))
4266 JITDUMP("Won't enregister V%02i (T%02i) because it's marked as dont enregister\n", lvaTrackedToVarNum[i],
4271 if (VarSetOps::IsMember(this, optAllFloatVars, i))
4273 varDsc = lvaTable + lvaTrackedToVarNum[i];
4275 assert(varDsc->lvTracked);
4277 if (varDsc->lvDoNotEnregister)
4279 JITDUMP("Won't enregister V%02i (T%02i) because it's marked as DoNotEnregister\n",
4280 lvaTrackedToVarNum[i], i);
4283 #if !FEATURE_X87_DOUBLES
4284 if (varDsc->TypeGet() == TYP_FLOAT)
4286 JITDUMP("Won't enregister V%02i (T%02i) because it's a TYP_FLOAT and we have disabled "
4287 "FEATURE_X87_DOUBLES\n",
4288 lvaTrackedToVarNum[i], i);
4293 fpLclFPVars[numFPVars++] = lvaTable + lvaTrackedToVarNum[i];
4297 unsigned maxRegVars = 0; // Max num of regvars at one time
4299 for (unsigned sortNum = 0; sortNum < numFPVars; sortNum++)
4304 JITDUMP("FP regvar candidates:\n");
4306 for (unsigned i = sortNum; i < numFPVars; i++)
4308 varDsc = fpLclFPVars[i];
4309 unsigned lclNum = varDsc - lvaTable;
4311 varIndex = varDsc->lvVarIndex;
4313 JITDUMP("V%02u/T%02u RefCount: %u Weight: %u ; Payload: %u ; Overflow: %u\n", lclNum, varIndex,
4314 varDsc->lvRefCnt, varDsc->lvRefCntWtd, raPayloadStackFP[varIndex],
4315 raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]);
4321 unsigned min = sortNum;
4323 // Find the one that will save us most
4324 for (unsigned i = sortNum + 1; i < numFPVars; i++)
4326 if (raVarIsGreaterValueStackFP(fpLclFPVars[i], fpLclFPVars[sortNum]))
4332 // Put it at the top of the array
4334 temp = fpLclFPVars[min];
4335 fpLclFPVars[min] = fpLclFPVars[sortNum];
4336 fpLclFPVars[sortNum] = temp;
4338 varDsc = fpLclFPVars[sortNum];
4341 unsigned lclNum = varDsc - lvaTable;
4343 unsigned varIndex = varDsc->lvVarIndex;
4345 assert(VarSetOps::IsMember(this, optAllFloatVars, varIndex));
4347 JITDUMP("Candidate for enregistering: V%02u/T%02u RefCount: %u Weight: %u ; Payload: %u ; Overflow: %u\n",
4348 lclNum, varIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd, raPayloadStackFP[varIndex],
4349 raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]);
4351 bool bMeetsThreshold = true;
4353 if (varDsc->lvRefCnt < FPENREGTHRESHOLD || varDsc->lvRefCntWtd < FPENREGTHRESHOLD_WEIGHTED)
4355 bMeetsThreshold = false;
4358 // We don't want to enregister arguments with only one use, as they will be
4359 // loaded in the prolog. Just don't enregister them and load them lazily(
4360 if (varDsc->lvIsParam &&
4361 (varDsc->lvRefCnt <= FPENREGTHRESHOLD || varDsc->lvRefCntWtd <= FPENREGTHRESHOLD_WEIGHTED))
4363 bMeetsThreshold = false;
4366 if (!bMeetsThreshold
4368 && codeGen->genStressFloat() != 1
4372 // Doesn't meet bar, do next
4373 JITDUMP("V%02u/T%02u doesnt meet threshold. Won't enregister\n", lclNum, varIndex);
4377 // We don't want to have problems with overflow (we now have 2 unsigned counters
4378 // that can possibly go to their limits), so we just promote to double here.
4381 double(varDsc->lvRefCntWtd) -
4382 double(raPayloadStackFP[varIndex]) - // Additional costs of enregistering variable
4383 double(raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]) - // Spilling costs of enregistering variable
4384 double(FPENREGTHRESHOLD_WEIGHTED);
4386 JITDUMP("balance = %d - %d - %d - %d\n", varDsc->lvRefCntWtd, raPayloadStackFP[varIndex],
4387 raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS], FPENREGTHRESHOLD_WEIGHTED);
4391 && codeGen->genStressFloat() != 1
4395 // Doesn't meet bar, do next
4396 JITDUMP("V%02u/T%02u doesnt meet threshold. Won't enregister\n", lclNum, varIndex);
4400 regNumber reg = raRegForVarStackFP(varDsc->lvVarIndex);
4401 if (reg == REG_FPNONE)
4403 // Didn't make if (interferes with other regvars), do next
4404 JITDUMP("V%02u/T%02u interferes with other enreg vars. Won't enregister\n", lclNum, varIndex);
4409 if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
4411 // Do not enregister if this is a floating field in a struct local of
4412 // promotion type PROMOTION_TYPE_DEPENDENT.
4416 // Yipee, we will enregister var.
4417 varDsc->lvRegister = true;
4418 varDsc->lvRegNum = reg;
4419 VarSetOps::AddElemD(this, optAllFPregVars, varIndex);
4422 raDumpVariableRegIntfFloat();
4427 gtDispLclVar(lclNum);
4428 printf("V%02u/T%02u (refcnt=%2u,refwtd=%4u%s) enregistered in %s\n", varIndex, varDsc->lvVarIndex,
4429 varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2, (varDsc->lvRefCntWtd & 1) ? ".5" : "",
4430 CodeGen::regVarNameStackFP(varDsc->lvRegNum));
4436 // Create interferences with other variables.
4437 assert(VarSetOps::IsEmpty(this, VarSetOps::Diff(this, raLclRegIntfFloat[(int)reg], optAllFloatVars)));
4438 VARSET_TP intfFloats(VarSetOps::Intersection(this, lvaVarIntf[varIndex], optAllFloatVars));
4440 VarSetOps::UnionD(this, raLclRegIntfFloat[reg], intfFloats);
4442 // Update height tables for variables that interfere with this one.
4443 raUpdateHeightsForVarsStackFP(intfFloats);
4445 // Update max number of reg vars at once.
4446 maxRegVars = min(REG_FPCOUNT, max(maxRegVars, VarSetOps::Count(this, intfFloats)));
4449 assert(VarSetOps::IsSubset(this, optAllFPregVars, optAllFloatVars));
4450 assert(VarSetOps::IsEmpty(this, VarSetOps::Intersection(this, optAllFPregVars, raMaskDontEnregFloat)));
4452 // This is a bit conservative, as they may not all go through a call.
4453 // If we have to, we can fix this.
4454 tmpDoubleSpillMax += maxRegVars;
4456 // Do pass marking trees as egvars
4457 raEnregisterVarsPostPassStackFP();
4461 JITDUMP("FP enregistration summary\n");
4464 for (i = 0; i < numFPVars; i++)
4466 varDsc = fpLclFPVars[i];
4468 if (varDsc->lvRegister)
4470 unsigned lclNum = varDsc - lvaTable;
4472 varIndex = varDsc->lvVarIndex;
4474 JITDUMP("Enregistered V%02u/T%02u in FPV%i RefCount: %u Weight: %u \n", lclNum, varIndex,
4475 varDsc->lvRegNum, varDsc->lvRefCnt, varDsc->lvRefCntWtd);
4478 JITDUMP("End of FP enregistration summary\n\n");
4485 regMaskTP CodeGenInterface::genStressLockedMaskFloat()
4487 assert(genStressFloat());
4489 // Don't use REG_FPV0 or REG_FPV1, they're reserved
4490 if (genStressFloat() == 1)
4492 return genRegMaskFloat(REG_FPV4) | genRegMaskFloat(REG_FPV5) | genRegMaskFloat(REG_FPV6) |
4493 genRegMaskFloat(REG_FPV7);
4497 return genRegMaskFloat(REG_FPV2) | genRegMaskFloat(REG_FPV3) | genRegMaskFloat(REG_FPV4) |
4498 genRegMaskFloat(REG_FPV5) | genRegMaskFloat(REG_FPV6) | genRegMaskFloat(REG_FPV7);
4504 #endif // FEATURE_STACK_FP_X87
4506 #endif // LEGACY_BACKEND