Fix reading Time zone rules using Julian days (#17672)
[platform/upstream/coreclr.git] / src / jit / stackfp.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 #include "jitpch.h"
6 #ifdef _MSC_VER
7 #pragma hdrstop
8 #endif
9
10 #ifdef LEGACY_BACKEND // This file is NOT used for the RyuJIT backend that uses the linear scan register allocator.
11
12 #ifdef _TARGET_AMD64_
13 #error AMD64 must be !LEGACY_BACKEND
14 #endif
15
16 #include "compiler.h"
17 #include "emit.h"
18 #include "codegen.h"
19
20 // Instruction list
21 // N=normal, R=reverse, P=pop
22 #if FEATURE_STACK_FP_X87
23 const static instruction FPmathNN[] = {INS_fadd, INS_fsub, INS_fmul, INS_fdiv};
24 const static instruction FPmathNP[] = {INS_faddp, INS_fsubp, INS_fmulp, INS_fdivp};
25 const static instruction FPmathRN[] = {INS_fadd, INS_fsubr, INS_fmul, INS_fdivr};
26 const static instruction FPmathRP[] = {INS_faddp, INS_fsubrp, INS_fmulp, INS_fdivrp};
27
28 FlatFPStateX87* CodeGenInterface::FlatFPAllocFPState(FlatFPStateX87* pInitFrom)
29 {
30     FlatFPStateX87* pNewState;
31
32     pNewState = new (compiler, CMK_FlatFPStateX87) FlatFPStateX87;
33     pNewState->Init(pInitFrom);
34
35     return pNewState;
36 }
37
38 bool CodeGen::FlatFPSameRegisters(FlatFPStateX87* pState, regMaskTP mask)
39 {
40     int i;
41     for (i = REG_FPV0; i < REG_FPCOUNT; i++)
42     {
43         if (pState->Mapped(i))
44         {
45             regMaskTP regmask = genRegMaskFloat((regNumber)i);
46             if ((mask & regmask) == 0)
47             {
48                 return false;
49             }
50
51             mask &= ~regmask;
52         }
53     }
54
55     return mask ? false : true;
56 }
57
58 bool FlatFPStateX87::Mapped(unsigned uEntry)
59 {
60     return m_uVirtualMap[uEntry] != (unsigned)FP_VRNOTMAPPED;
61 }
62
63 void FlatFPStateX87::Unmap(unsigned uEntry)
64 {
65     assert(Mapped(uEntry));
66     m_uVirtualMap[uEntry] = (unsigned)FP_VRNOTMAPPED;
67 }
68
69 bool FlatFPStateX87::AreEqual(FlatFPStateX87* pA, FlatFPStateX87* pB)
70 {
71     unsigned i;
72
73     assert(pA->IsConsistent());
74     assert(pB->IsConsistent());
75
76     if (pA->m_uStackSize != pB->m_uStackSize)
77     {
78         return false;
79     }
80
81     for (i = 0; i < pA->m_uStackSize; i++)
82     {
83         if (pA->m_uStack[i] != pB->m_uStack[i])
84         {
85             return false;
86         }
87     }
88
89     return true;
90 }
91
92 #ifdef DEBUG
93 bool FlatFPStateX87::IsValidEntry(unsigned uEntry)
94 {
95     return (Mapped(uEntry) && (m_uVirtualMap[uEntry] >= 0 && m_uVirtualMap[uEntry] < m_uStackSize)) || !Mapped(uEntry);
96 }
97
98 bool FlatFPStateX87::IsConsistent()
99 {
100     unsigned i;
101
102     for (i = 0; i < FP_VIRTUALREGISTERS; i++)
103     {
104         if (!IsValidEntry(i))
105         {
106             if (m_bIgnoreConsistencyChecks)
107             {
108                 return true;
109             }
110             else
111             {
112                 assert(!"Virtual register is marked as mapped but out of the stack range");
113                 return false;
114             }
115         }
116     }
117
118     for (i = 0; i < m_uStackSize; i++)
119     {
120         if (m_uVirtualMap[m_uStack[i]] != i)
121         {
122             if (m_bIgnoreConsistencyChecks)
123             {
124                 return true;
125             }
126             else
127             {
128                 assert(!"Register File and stack layout don't match!");
129                 return false;
130             }
131         }
132     }
133
134     return true;
135 }
136
137 void FlatFPStateX87::Dump()
138 {
139     unsigned i;
140
141     assert(IsConsistent());
142
143     if (m_uStackSize > 0)
144     {
145         printf("Virtual stack state: ");
146         for (i = 0; i < m_uStackSize; i++)
147         {
148             printf("ST(%i): FPV%i | ", StackToST(i), m_uStack[i]);
149         }
150         printf("\n");
151     }
152 }
153
154 void FlatFPStateX87::UpdateMappingFromStack()
155 {
156     memset(m_uVirtualMap, -1, sizeof(m_uVirtualMap));
157
158     unsigned i;
159
160     for (i = 0; i < m_uStackSize; i++)
161     {
162         m_uVirtualMap[m_uStack[i]] = i;
163     }
164 }
165
166 #endif
167
168 unsigned FlatFPStateX87::StackToST(unsigned uEntry)
169 {
170     assert(IsValidEntry(uEntry));
171     return m_uStackSize - 1 - uEntry;
172 }
173
174 unsigned FlatFPStateX87::VirtualToST(unsigned uEntry)
175 {
176     assert(Mapped(uEntry));
177
178     return StackToST(m_uVirtualMap[uEntry]);
179 }
180
181 unsigned FlatFPStateX87::STToVirtual(unsigned uST)
182 {
183     assert(uST < m_uStackSize);
184
185     return m_uStack[m_uStackSize - 1 - uST];
186 }
187
188 void FlatFPStateX87::Init(FlatFPStateX87* pFrom)
189 {
190     if (pFrom)
191     {
192         memcpy(this, pFrom, sizeof(*this));
193     }
194     else
195     {
196         memset(m_uVirtualMap, -1, sizeof(m_uVirtualMap));
197
198 #ifdef DEBUG
199         memset(m_uStack, -1, sizeof(m_uStack));
200 #endif
201         m_uStackSize = 0;
202     }
203
204 #ifdef DEBUG
205     m_bIgnoreConsistencyChecks = false;
206 #endif
207 }
208
209 void FlatFPStateX87::Associate(unsigned uEntry, unsigned uStack)
210 {
211     assert(uStack < m_uStackSize);
212
213     m_uStack[uStack]      = uEntry;
214     m_uVirtualMap[uEntry] = uStack;
215 }
216
217 unsigned FlatFPStateX87::TopIndex()
218 {
219     return m_uStackSize - 1;
220 }
221
222 unsigned FlatFPStateX87::TopVirtual()
223 {
224     assert(m_uStackSize > 0);
225     return m_uStack[m_uStackSize - 1];
226 }
227
228 void FlatFPStateX87::Rename(unsigned uVirtualTo, unsigned uVirtualFrom)
229 {
230     assert(!Mapped(uVirtualTo));
231
232     unsigned uSlot = m_uVirtualMap[uVirtualFrom];
233
234     Unmap(uVirtualFrom);
235     Associate(uVirtualTo, uSlot);
236 }
237
238 void FlatFPStateX87::Push(unsigned uEntry)
239 {
240     assert(m_uStackSize <= FP_PHYSICREGISTERS);
241     assert(!Mapped(uEntry));
242
243     m_uStackSize++;
244     Associate(uEntry, TopIndex());
245
246     assert(IsConsistent());
247 }
248
249 unsigned FlatFPStateX87::Pop()
250 {
251     assert(m_uStackSize != 0);
252
253     unsigned uVirtual = m_uStack[--m_uStackSize];
254
255 #ifdef DEBUG
256     m_uStack[m_uStackSize] = (unsigned)-1;
257 #endif
258
259     Unmap(uVirtual);
260
261     return uVirtual;
262 }
263
264 bool FlatFPStateX87::IsEmpty()
265 {
266     return m_uStackSize == 0;
267 }
268
269 void CodeGen::genCodeForTransitionStackFP(FlatFPStateX87* pSrc, FlatFPStateX87* pDst)
270 {
271     FlatFPStateX87  fpState;
272     FlatFPStateX87* pTmp;
273     int             i;
274
275     // Make a temp copy
276     memcpy(&fpState, pSrc, sizeof(FlatFPStateX87));
277     pTmp = &fpState;
278
279     // Make sure everything seems consistent.
280     assert(pSrc->m_uStackSize >= pDst->m_uStackSize);
281 #ifdef DEBUG
282     for (i = 0; i < FP_VIRTUALREGISTERS; i++)
283     {
284         if (!pTmp->Mapped(i) && pDst->Mapped(i))
285         {
286             assert(!"Dst stack state can't have a virtual register live if Src target has it dead");
287         }
288     }
289 #endif
290
291     // First we need to get rid of the stuff that's dead in pDst
292     for (i = 0; i < FP_VIRTUALREGISTERS; i++)
293     {
294         if (pTmp->Mapped(i) && !pDst->Mapped(i))
295         {
296             // We have to get rid of this one
297             JITDUMP("Removing virtual register V%i from stack\n", i);
298
299             // Don't need this virtual register any more
300             FlatFPX87_Unload(pTmp, i);
301         }
302     }
303
304     assert(pTmp->m_uStackSize == pDst->m_uStackSize);
305
306     // Extract cycles
307     int iProcessed = 0;
308
309     // We start with the top of the stack so that we can
310     // easily recognize the cycle that contains it
311     for (i = pTmp->m_uStackSize - 1; i >= 0; i--)
312     {
313         // Have we processed this stack element yet?
314         if (((1 << i) & iProcessed) == 0)
315         {
316             // Extract cycle
317             int iCycle[FP_VIRTUALREGISTERS];
318             int iCycleLength = 0;
319             int iCurrent     = i;
320             int iTOS         = pTmp->m_uStackSize - 1;
321
322             do
323             {
324                 // Mark current stack element as processed
325                 iProcessed |= (1 << iCurrent);
326
327                 // Update cycle
328                 iCycle[iCycleLength++] = iCurrent;
329
330                 // Next element in cycle
331                 iCurrent = pDst->m_uVirtualMap[pTmp->m_uStack[iCurrent]];
332
333             } while ((iProcessed & (1 << iCurrent)) == 0);
334
335 #ifdef DEBUG
336             if (verbose)
337             {
338                 printf("Cycle: (");
339                 for (int l = 0; l < iCycleLength; l++)
340                 {
341                     printf("%i", pTmp->StackToST(iCycle[l]));
342                     if (l + 1 < iCycleLength)
343                         printf(", ");
344                 }
345                 printf(")\n");
346             }
347 #endif
348
349             // Extract cycle
350             if (iCycleLength == 1)
351             {
352                 // Stack element in the same place. Nothing to do
353             }
354             else
355             {
356                 if (iCycle[0] == iTOS)
357                 {
358                     // Cycle includes stack element 0
359                     int j;
360
361                     for (j = 1; j < iCycleLength; j++)
362                     {
363                         FlatFPX87_SwapStack(pTmp, iCycle[j], iTOS);
364                     }
365                 }
366                 else
367                 {
368                     // Cycle doesn't include stack element 0
369                     int j;
370
371                     for (j = 0; j < iCycleLength; j++)
372                     {
373                         FlatFPX87_SwapStack(pTmp, iCycle[j], iTOS);
374                     }
375
376                     FlatFPX87_SwapStack(pTmp, iCycle[0], iTOS);
377                 }
378             }
379         }
380     }
381
382     assert(FlatFPStateX87::AreEqual(pTmp, pDst));
383 }
384
385 void CodeGen::genCodeForTransitionFromMask(FlatFPStateX87* pSrc, regMaskTP mask, bool bEmitCode)
386 {
387     unsigned i;
388     for (i = REG_FPV0; i < REG_FPCOUNT; i++)
389     {
390         if (pSrc->Mapped(i))
391         {
392             if ((mask & genRegMaskFloat((regNumber)i)) == 0)
393             {
394                 FlatFPX87_Unload(pSrc, i, bEmitCode);
395             }
396         }
397         else
398         {
399             assert((mask & genRegMaskFloat((regNumber)i)) == 0 &&
400                    "A register marked as incoming live in the target block isnt live in the current block");
401         }
402     }
403 }
404
405 void CodeGen::genCodeForPrologStackFP()
406 {
407     assert(compiler->compGeneratingProlog);
408     assert(compiler->fgFirstBB);
409
410     FlatFPStateX87* pState = compiler->fgFirstBB->bbFPStateX87;
411
412     if (pState && pState->m_uStackSize)
413     {
414         VARSET_TP liveEnregIn(
415             VarSetOps::Intersection(compiler, compiler->fgFirstBB->bbLiveIn, compiler->optAllFPregVars));
416         unsigned i;
417
418 #ifdef DEBUG
419         unsigned uLoads = 0;
420 #endif
421
422         assert(pState->m_uStackSize <= FP_VIRTUALREGISTERS);
423         for (i = 0; i < pState->m_uStackSize; i++)
424         {
425             // Get the virtual register that matches
426             unsigned iVirtual = pState->STToVirtual(pState->m_uStackSize - i - 1);
427
428             unsigned   varNum;
429             LclVarDsc* varDsc;
430
431             for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
432             {
433                 if (varDsc->IsFloatRegType() && varDsc->lvRegister && varDsc->lvRegNum == iVirtual)
434                 {
435                     unsigned varIndex = varDsc->lvVarIndex;
436
437                     // Is this variable live on entry?
438                     if (VarSetOps::IsMember(compiler, liveEnregIn, varIndex))
439                     {
440                         if (varDsc->lvIsParam)
441                         {
442                             getEmitter()->emitIns_S(INS_fld, EmitSize(varDsc->TypeGet()), varNum, 0);
443                         }
444                         else
445                         {
446                             // unitialized regvar
447                             getEmitter()->emitIns(INS_fldz);
448                         }
449
450 #ifdef DEBUG
451                         uLoads++;
452 #endif
453                         break;
454                     }
455                 }
456             }
457
458             assert(varNum != compiler->lvaCount); // We have to find the matching var!!!!
459         }
460
461         assert(uLoads == VarSetOps::Count(compiler, liveEnregIn));
462     }
463 }
464
465 void CodeGen::genCodeForEndBlockTransitionStackFP(BasicBlock* block)
466 {
467     switch (block->bbJumpKind)
468     {
469         case BBJ_EHFINALLYRET:
470         case BBJ_EHFILTERRET:
471         case BBJ_EHCATCHRET:
472             // Nothing to do
473             assert(compCurFPState.m_uStackSize == 0);
474             break;
475         case BBJ_THROW:
476             break;
477         case BBJ_RETURN:
478             // Nothing to do
479             assert((varTypeIsFloating(compiler->info.compRetType) && compCurFPState.m_uStackSize == 1) ||
480                    compCurFPState.m_uStackSize == 0);
481             break;
482         case BBJ_COND:
483         case BBJ_NONE:
484             genCodeForBBTransitionStackFP(block->bbNext);
485             break;
486         case BBJ_ALWAYS:
487             genCodeForBBTransitionStackFP(block->bbJumpDest);
488             break;
489         case BBJ_LEAVE:
490             assert(!"BBJ_LEAVE blocks shouldn't get here");
491             break;
492         case BBJ_CALLFINALLY:
493             assert(compCurFPState.IsEmpty() && "we don't enregister variables live on entry to finallys");
494             genCodeForBBTransitionStackFP(block->bbJumpDest);
495             break;
496         case BBJ_SWITCH:
497             // Nothing to do here
498             break;
499         default:
500             noway_assert(!"Unexpected bbJumpKind");
501             break;
502     }
503 }
504
505 regMaskTP CodeGen::genRegMaskFromLivenessStackFP(VARSET_VALARG_TP varset)
506 {
507     unsigned   varNum;
508     LclVarDsc* varDsc;
509     regMaskTP  result = 0;
510
511     for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
512     {
513         if (varDsc->IsFloatRegType() && varDsc->lvRegister)
514         {
515
516             unsigned varIndex = varDsc->lvVarIndex;
517
518             /* Is this variable live on entry? */
519
520             if (VarSetOps::IsMember(compiler, varset, varIndex))
521             {
522                 // We should only call this function doing a transition
523                 // To a block which hasn't state yet. All incoming live enregistered variables
524                 // should have been already initialized.
525                 assert(varDsc->lvRegNum != REG_FPNONE);
526
527                 result |= genRegMaskFloat(varDsc->lvRegNum);
528             }
529         }
530     }
531
532     return result;
533 }
534
535 void CodeGen::genCodeForBBTransitionStackFP(BasicBlock* pDst)
536 {
537     assert(compCurFPState.IsConsistent());
538     if (pDst->bbFPStateX87)
539     {
540         // Target block has an associated state. generate transition
541         genCodeForTransitionStackFP(&compCurFPState, pDst->bbFPStateX87);
542     }
543     else
544     {
545         // Target block hasn't got an associated state. As it can only possibly
546         // have a subset of the current state, we'll take advantage of this and
547         // generate the optimal transition
548
549         // Copy current state
550         pDst->bbFPStateX87 = FlatFPAllocFPState(&compCurFPState);
551
552         regMaskTP liveRegIn =
553             genRegMaskFromLivenessStackFP(VarSetOps::Intersection(compiler, pDst->bbLiveIn, compiler->optAllFPregVars));
554
555         // Match to live vars
556         genCodeForTransitionFromMask(pDst->bbFPStateX87, liveRegIn);
557     }
558 }
559
560 void CodeGen::SpillTempsStackFP(regMaskTP canSpillMask)
561 {
562
563     unsigned  i;
564     regMaskTP spillMask = 0;
565     regNumber reg;
566
567     // First pass we determine which registers we spill
568     for (i = 0; i < compCurFPState.m_uStackSize; i++)
569     {
570         reg               = (regNumber)compCurFPState.m_uStack[i];
571         regMaskTP regMask = genRegMaskFloat(reg);
572         if ((regMask & canSpillMask) && (regMask & regSet.rsMaskRegVarFloat) == 0)
573         {
574             spillMask |= regMask;
575         }
576     }
577
578     // Second pass we do the actual spills
579     for (i = REG_FPV0; i < REG_FPCOUNT; i++)
580     {
581         if ((genRegMaskFloat((regNumber)i) & spillMask))
582         {
583             JITDUMP("spilling temp in register %s\n", regVarNameStackFP((regNumber)i));
584             SpillFloat((regNumber)i, true);
585         }
586     }
587 }
588
589 // Spills all the fp stack. We need this to spill
590 // across calls
591 void CodeGen::SpillForCallStackFP()
592 {
593     unsigned i;
594     unsigned uSize = compCurFPState.m_uStackSize;
595
596     for (i = 0; i < uSize; i++)
597     {
598         SpillFloat((regNumber)compCurFPState.m_uStack[compCurFPState.TopIndex()], true);
599     }
600 }
601
602 void CodeGenInterface::SpillFloat(regNumber reg, bool bIsCall)
603 {
604 #ifdef DEBUG
605     regMaskTP mask = genRegMaskFloat(reg);
606
607     // We can allow spilling regvars, but we don't need it at the moment, and we're
608     // missing code in setupopforflatfp, so assert.
609     assert(bIsCall || (mask & (regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat)) == 0);
610 #endif
611
612     JITDUMP("SpillFloat spilling register %s\n", regVarNameStackFP(reg));
613
614     // We take the virtual register to the top of the stack
615     FlatFPX87_MoveToTOS(&compCurFPState, reg);
616
617     // Allocate spill structure
618     RegSet::SpillDsc* spill = RegSet::SpillDsc::alloc(compiler, &regSet, TYP_FLOAT);
619
620     // Fill out spill structure
621     var_types type;
622     if (regSet.genUsedRegsFloat[reg])
623     {
624         JITDUMP("will spill tree [%08p]\n", dspPtr(regSet.genUsedRegsFloat[reg]));
625         // register used for temp stack
626         spill->spillTree             = regSet.genUsedRegsFloat[reg];
627         spill->bEnregisteredVariable = false;
628
629         regSet.genUsedRegsFloat[reg]->gtFlags |= GTF_SPILLED;
630
631         type = genActualType(regSet.genUsedRegsFloat[reg]->TypeGet());
632
633         // Clear used flag
634         regSet.SetUsedRegFloat(regSet.genUsedRegsFloat[reg], false);
635     }
636     else
637     {
638         JITDUMP("will spill varDsc [%08p]\n", dspPtr(regSet.genRegVarsFloat[reg]));
639
640         // enregistered variable
641         spill->spillVarDsc = regSet.genRegVarsFloat[reg];
642         assert(spill->spillVarDsc);
643
644         spill->bEnregisteredVariable = true;
645
646         // Mark as spilled
647         spill->spillVarDsc->lvSpilled = true;
648         type                          = genActualType(regSet.genRegVarsFloat[reg]->TypeGet());
649
650         // Clear register flag
651         SetRegVarFloat(reg, type, 0);
652     }
653
654     // Add to spill list
655     spill->spillNext    = regSet.rsSpillFloat;
656     regSet.rsSpillFloat = spill;
657
658     // Obtain space
659     TempDsc* temp = spill->spillTemp = compiler->tmpGetTemp(type);
660     emitAttr size                    = EmitSize(type);
661
662     getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
663     compCurFPState.Pop();
664 }
665
666 void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc, bool useSameReg)
667 {
668     NYI(!"Need not be implemented for x86.");
669 }
670
671 void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc)
672 {
673     // Do actual unspill
674     if (spillDsc->bEnregisteredVariable)
675     {
676         assert(spillDsc->spillVarDsc->lvSpilled);
677
678         // Do the logic as it was a regvar birth
679         genRegVarBirthStackFP(spillDsc->spillVarDsc);
680
681         // Mark as not spilled any more
682         spillDsc->spillVarDsc->lvSpilled = false;
683
684         // Update stack layout.
685         compCurFPState.Push(spillDsc->spillVarDsc->lvRegNum);
686     }
687     else
688     {
689         assert(spillDsc->spillTree->gtFlags & GTF_SPILLED);
690
691         spillDsc->spillTree->gtFlags &= ~GTF_SPILLED;
692
693         regNumber reg = regSet.PickRegFloat();
694         genMarkTreeInReg(spillDsc->spillTree, reg);
695         regSet.SetUsedRegFloat(spillDsc->spillTree, true);
696
697         compCurFPState.Push(reg);
698     }
699
700     // load from spilled spot
701     emitAttr size = EmitSize(spillDsc->spillTemp->tdTempType());
702     getEmitter()->emitIns_S(INS_fld, size, spillDsc->spillTemp->tdTempNum(), 0);
703 }
704
705 // unspills any reg var that we have in the spill list. We need this
706 // because we can't have any spilled vars across basic blocks
707 void CodeGen::UnspillRegVarsStackFp()
708 {
709     RegSet::SpillDsc* cur;
710     RegSet::SpillDsc* next;
711
712     for (cur = regSet.rsSpillFloat; cur; cur = next)
713     {
714         next = cur->spillNext;
715
716         if (cur->bEnregisteredVariable)
717         {
718             UnspillFloat(cur);
719         }
720     }
721 }
722
723 #ifdef DEBUG
724 const char* regNamesFP[] = {
725 #define REGDEF(name, rnum, mask, sname) sname,
726 #include "registerfp.h"
727 };
728
729 // static
730 const char* CodeGenInterface::regVarNameStackFP(regNumber reg)
731 {
732     return regNamesFP[reg];
733 }
734
735 bool CodeGen::ConsistentAfterStatementStackFP()
736 {
737     if (!compCurFPState.IsConsistent())
738     {
739         return false;
740     }
741
742     if (regSet.rsMaskUsedFloat != 0)
743     {
744         assert(!"FP register marked as used after statement");
745         return false;
746     }
747     if (regSet.rsMaskLockedFloat != 0)
748     {
749         assert(!"FP register marked as locked after statement");
750         return false;
751     }
752     if (genCountBits(regSet.rsMaskRegVarFloat) > compCurFPState.m_uStackSize)
753     {
754         assert(!"number of FP regvars in regSet.rsMaskRegVarFloat doesnt match current FP state");
755         return false;
756     }
757
758     return true;
759 }
760
761 #endif
762
763 int CodeGen::genNumberTemps()
764 {
765     return compCurFPState.m_uStackSize - genCountBits(regSet.rsMaskRegVarFloat);
766 }
767
768 void CodeGen::genDiscardStackFP(GenTree* tree)
769 {
770     assert(tree->InReg());
771     assert(varTypeIsFloating(tree));
772
773     FlatFPX87_Unload(&compCurFPState, tree->gtRegNum, true);
774 }
775
776 void CodeGen::genRegRenameWithMasks(regNumber dstReg, regNumber srcReg)
777 {
778     regMaskTP dstregmask = genRegMaskFloat(dstReg);
779     regMaskTP srcregmask = genRegMaskFloat(srcReg);
780
781     // rename use register
782     compCurFPState.Rename(dstReg, srcReg);
783
784     regSet.rsMaskUsedFloat &= ~srcregmask;
785     regSet.rsMaskUsedFloat |= dstregmask;
786
787     if (srcregmask & regSet.rsMaskLockedFloat)
788     {
789         assert((dstregmask & regSet.rsMaskLockedFloat) == 0);
790         // We will set the new one as locked
791         regSet.rsMaskLockedFloat &= ~srcregmask;
792         regSet.rsMaskLockedFloat |= dstregmask;
793     }
794
795     // Updated used tree
796     assert(!regSet.genUsedRegsFloat[dstReg]);
797     regSet.genUsedRegsFloat[dstReg]           = regSet.genUsedRegsFloat[srcReg];
798     regSet.genUsedRegsFloat[dstReg]->gtRegNum = dstReg;
799     regSet.genUsedRegsFloat[srcReg]           = NULL;
800 }
801
802 void CodeGen::genRegVarBirthStackFP(LclVarDsc* varDsc)
803 {
804     // Mark the virtual register we're assigning to this local;
805     regNumber reg = varDsc->lvRegNum;
806
807 #ifdef DEBUG
808     regMaskTP regmask = genRegMaskFloat(reg);
809 #endif
810
811     assert(varDsc->lvTracked && varDsc->lvRegister && reg != REG_FPNONE);
812     if (regSet.genUsedRegsFloat[reg])
813     {
814
815         // Register was marked as used... will have to rename it so we can put the
816         // regvar where it belongs.
817         JITDUMP("Renaming used register %s\n", regVarNameStackFP(reg));
818
819         regNumber newreg;
820
821         newreg = regSet.PickRegFloat();
822
823 #ifdef DEBUG
824         regMaskTP newregmask = genRegMaskFloat(newreg);
825 #endif
826
827         // Update used mask
828         assert((regSet.rsMaskUsedFloat & regmask) && (regSet.rsMaskUsedFloat & newregmask) == 0);
829
830         genRegRenameWithMasks(newreg, reg);
831     }
832
833     // Mark the reg as holding a regvar
834     varDsc->lvSpilled = false;
835     SetRegVarFloat(reg, varDsc->TypeGet(), varDsc);
836 }
837
838 void CodeGen::genRegVarBirthStackFP(GenTree* tree)
839 {
840 #ifdef DEBUG
841     if (compiler->verbose)
842     {
843         printf("variable V%i is going live in ", tree->gtLclVarCommon.gtLclNum);
844         Compiler::printTreeID(tree);
845         printf("\n");
846     }
847 #endif // DEBUG
848
849     // Update register in local var
850     LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
851
852     genRegVarBirthStackFP(varDsc);
853     assert(tree->gtRegNum == tree->gtRegVar.gtRegNum && tree->gtRegNum == varDsc->lvRegNum);
854 }
855
856 void CodeGen::genRegVarDeathStackFP(LclVarDsc* varDsc)
857 {
858     regNumber reg = varDsc->lvRegNum;
859
860     assert(varDsc->lvTracked && varDsc->lvRegister && reg != REG_FPNONE);
861     SetRegVarFloat(reg, varDsc->TypeGet(), 0);
862 }
863
864 void CodeGen::genRegVarDeathStackFP(GenTree* tree)
865 {
866 #ifdef DEBUG
867     if (compiler->verbose)
868     {
869         printf("register %s is going dead in ", regVarNameStackFP(tree->gtRegVar.gtRegNum));
870         Compiler::printTreeID(tree);
871         printf("\n");
872     }
873 #endif // DEBUG
874
875     LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
876     genRegVarDeathStackFP(varDsc);
877 }
878
879 void CodeGen::genLoadStackFP(GenTree* tree, regNumber reg)
880 {
881 #ifdef DEBUG
882     if (compiler->verbose)
883     {
884         printf("genLoadStackFP");
885         Compiler::printTreeID(tree);
886         printf(" %s\n", regVarNameStackFP(reg));
887     }
888 #endif // DEBUG
889
890     if (tree->IsRegVar())
891     {
892         // if it has been spilled, unspill it.%
893         LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
894         if (varDsc->lvSpilled)
895         {
896             UnspillFloat(varDsc);
897         }
898
899         // if it's dying, just rename the register, else load it normally
900         if (tree->IsRegVarDeath())
901         {
902             genRegVarDeathStackFP(tree);
903             compCurFPState.Rename(reg, tree->gtRegVar.gtRegNum);
904         }
905         else
906         {
907             assert(tree->gtRegNum == tree->gtRegVar.gtRegNum);
908             inst_FN(INS_fld, compCurFPState.VirtualToST(tree->gtRegVar.gtRegNum));
909             FlatFPX87_PushVirtual(&compCurFPState, reg);
910         }
911     }
912     else
913     {
914         FlatFPX87_PushVirtual(&compCurFPState, reg);
915         inst_FS_TT(INS_fld, tree);
916     }
917 }
918
919 void CodeGen::genMovStackFP(GenTree* dst, regNumber dstreg, GenTree* src, regNumber srcreg)
920 {
921     if (dstreg == REG_FPNONE && !dst->IsRegVar())
922     {
923         regNumber reg;
924
925         // reg to mem path
926         if (srcreg == REG_FPNONE)
927         {
928             assert(src->IsRegVar());
929             reg = src->gtRegNum;
930         }
931         else
932         {
933             reg = srcreg;
934         }
935
936         // Mov src to top of the stack
937         FlatFPX87_MoveToTOS(&compCurFPState, reg);
938
939         if (srcreg != REG_FPNONE || (src->IsRegVar() && src->IsRegVarDeath()))
940         {
941             // Emit instruction
942             inst_FS_TT(INS_fstp, dst);
943
944             // Update stack
945             compCurFPState.Pop();
946         }
947         else
948         {
949             inst_FS_TT(INS_fst, dst);
950         }
951     }
952     else
953     {
954         if (dstreg == REG_FPNONE)
955         {
956             assert(dst->IsRegVar());
957             dstreg = dst->gtRegNum;
958         }
959
960         if (srcreg == REG_FPNONE && !src->IsRegVar())
961         {
962             // mem to reg
963             assert(dst->IsRegVar() && dst->IsRegVarBirth());
964
965             FlatFPX87_PushVirtual(&compCurFPState, dstreg);
966             FlatFPX87_MoveToTOS(&compCurFPState, dstreg);
967
968             if (src->gtOper == GT_CNS_DBL)
969             {
970                 genConstantLoadStackFP(src);
971             }
972             else
973             {
974                 inst_FS_TT(INS_fld, src);
975             }
976         }
977         else
978         {
979             // disposable reg to reg, use renaming
980             assert(dst->IsRegVar() && dst->IsRegVarBirth());
981             assert(src->IsRegVar() || (src->InReg()));
982             assert(src->gtRegNum != REG_FPNONE);
983
984             if ((src->InReg()) || (src->IsRegVar() && src->IsRegVarDeath()))
985             {
986                 // src is disposable and dst is a regvar, so we'll rename src to dst
987
988                 // SetupOp should have masked out the regvar
989                 assert(!src->IsRegVar() || !src->IsRegVarDeath() ||
990                        !(genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat));
991
992                 // get slot that holds the value
993                 unsigned uStack = compCurFPState.m_uVirtualMap[src->gtRegNum];
994
995                 // unlink the slot that holds the value
996                 compCurFPState.Unmap(src->gtRegNum);
997
998                 regNumber tgtreg = dst->gtRegVar.gtRegNum;
999
1000                 compCurFPState.IgnoreConsistencyChecks(true);
1001
1002                 if (regSet.genUsedRegsFloat[tgtreg])
1003                 {
1004                     // tgtreg is used, we move it to src reg. We do this here as src reg won't be
1005                     // marked as used, if tgtreg is used it srcreg will be a candidate for moving
1006                     // which is something we don't want, so we do the renaming here.
1007                     genRegRenameWithMasks(src->gtRegNum, tgtreg);
1008                 }
1009
1010                 compCurFPState.IgnoreConsistencyChecks(false);
1011
1012                 // Birth of FP var
1013                 genRegVarBirthStackFP(dst);
1014
1015                 // Associate target reg with source physical register
1016                 compCurFPState.Associate(tgtreg, uStack);
1017             }
1018             else
1019             {
1020                 if (src->IsRegVar())
1021                 {
1022                     // regvar that isnt dying to regvar
1023                     assert(!src->IsRegVarDeath());
1024
1025                     // Birth of FP var
1026                     genRegVarBirthStackFP(dst);
1027
1028                     // Load register
1029                     inst_FN(INS_fld, compCurFPState.VirtualToST(src->gtRegVar.gtRegNum));
1030
1031                     // update our logic stack
1032                     FlatFPX87_PushVirtual(&compCurFPState, dst->gtRegVar.gtRegNum);
1033                 }
1034                 else
1035                 {
1036                     // memory to regvar
1037
1038                     // Birth of FP var
1039                     genRegVarBirthStackFP(dst);
1040
1041                     // load into stack
1042                     inst_FS_TT(INS_fld, src);
1043
1044                     // update our logic stack
1045                     FlatFPX87_PushVirtual(&compCurFPState, dst->gtRegVar.gtRegNum);
1046                 }
1047             }
1048         }
1049     }
1050 }
1051
1052 void CodeGen::genCodeForTreeStackFP_DONE(GenTree* tree, regNumber reg)
1053 {
1054     return genCodeForTree_DONE(tree, reg);
1055 }
1056
1057 // Does the setup of the FP stack on entry to block
1058 void CodeGen::genSetupStateStackFP(BasicBlock* block)
1059 {
1060     bool bGenerate = !block->bbFPStateX87;
1061     if (bGenerate)
1062     {
1063         // Allocate FP state
1064         block->bbFPStateX87 = FlatFPAllocFPState();
1065         block->bbFPStateX87->Init();
1066     }
1067
1068     // Update liveset and lock enregistered live vars on entry
1069     VARSET_TP liveSet(VarSetOps::Intersection(compiler, block->bbLiveIn, compiler->optAllFPregVars));
1070
1071     if (!VarSetOps::IsEmpty(compiler, liveSet))
1072     {
1073         unsigned   varNum;
1074         LclVarDsc* varDsc;
1075
1076         for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
1077         {
1078             if (varDsc->IsFloatRegType() && varDsc->lvRegister)
1079             {
1080
1081                 unsigned varIndex = varDsc->lvVarIndex;
1082
1083                 // Is this variable live on entry?
1084                 if (VarSetOps::IsMember(compiler, liveSet, varIndex))
1085                 {
1086                     JITDUMP("genSetupStateStackFP(): enregistered variable V%i is live on entry to block\n", varNum);
1087
1088                     assert(varDsc->lvTracked);
1089                     assert(varDsc->lvRegNum != REG_FPNONE);
1090
1091                     genRegVarBirthStackFP(varDsc);
1092
1093                     if (bGenerate)
1094                     {
1095                         // If we're generating layout, update it.
1096                         block->bbFPStateX87->Push(varDsc->lvRegNum);
1097                     }
1098                 }
1099             }
1100         }
1101     }
1102
1103     compCurFPState.Init(block->bbFPStateX87);
1104
1105     assert(block->bbFPStateX87->IsConsistent());
1106 }
1107
1108 regMaskTP CodeGen::genPushArgumentStackFP(GenTree* args)
1109 {
1110     regMaskTP addrReg = 0;
1111     unsigned  opsz    = genTypeSize(genActualType(args->TypeGet()));
1112
1113     switch (args->gtOper)
1114     {
1115         GenTree* temp;
1116         GenTree* fval;
1117         size_t   flopsz;
1118
1119         case GT_CNS_DBL:
1120         {
1121             float f    = 0.0;
1122             int*  addr = NULL;
1123             if (args->TypeGet() == TYP_FLOAT)
1124             {
1125                 f = (float)args->gtDblCon.gtDconVal;
1126                 // *(long*) (&f) used instead of *addr because of of strict
1127                 // pointer aliasing optimization. According to the ISO C/C++
1128                 // standard, an optimizer can assume two pointers of
1129                 // non-compatible types do not point to the same memory.
1130                 inst_IV(INS_push, *((int*)(&f)));
1131                 genSinglePush();
1132                 addrReg = 0;
1133             }
1134             else
1135             {
1136                 addr = (int*)&args->gtDblCon.gtDconVal;
1137
1138                 // store forwarding fix for pentium 4 and Centrino
1139                 // (even for down level CPUs as we don't care about their perf any more)
1140                 fval = genMakeConst(&args->gtDblCon.gtDconVal, args->gtType, args, true);
1141                 inst_FS_TT(INS_fld, fval);
1142                 flopsz = (size_t)8;
1143                 inst_RV_IV(INS_sub, REG_ESP, flopsz, EA_PTRSIZE);
1144                 getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(flopsz), REG_NA, REG_ESP, 0);
1145                 genSinglePush();
1146                 genSinglePush();
1147
1148                 addrReg = 0;
1149             }
1150
1151             break;
1152         }
1153
1154         case GT_CAST:
1155         {
1156             // Is the value a cast from double ?
1157             if ((args->gtOper == GT_CAST) && (args->CastFromType() == TYP_DOUBLE))
1158             {
1159                 /* Load the value onto the FP stack */
1160
1161                 genCodeForTreeFlt(args->gtCast.CastOp(), false);
1162
1163                 /* Go push the value as a float/double */
1164                 args = args->gtCast.CastOp();
1165
1166                 addrReg = 0;
1167                 goto PUSH_FLT;
1168             }
1169             // Fall through to default case....
1170         }
1171         default:
1172         {
1173             temp = genMakeAddrOrFPstk(args, &addrReg, false);
1174             if (temp)
1175             {
1176                 unsigned offs;
1177
1178                 // We have the address of the float operand, push its bytes
1179                 offs = opsz;
1180                 assert(offs % sizeof(int) == 0);
1181
1182                 if (offs == 4)
1183                 {
1184                     assert(args->gtType == temp->gtType);
1185                     do
1186                     {
1187                         offs -= sizeof(int);
1188                         inst_TT(INS_push, temp, offs);
1189                         genSinglePush();
1190                     } while (offs);
1191                 }
1192                 else
1193                 {
1194                     // store forwarding fix for pentium 4 and Centrino
1195                     inst_FS_TT(INS_fld, temp);
1196                     flopsz = (size_t)offs;
1197                     inst_RV_IV(INS_sub, REG_ESP, (size_t)flopsz, EA_PTRSIZE);
1198                     getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(flopsz), REG_NA, REG_ESP, 0);
1199                     genSinglePush();
1200                     genSinglePush();
1201                 }
1202             }
1203             else
1204             {
1205             // The argument is on the FP stack -- pop it into [ESP-4/8]
1206
1207             PUSH_FLT:
1208
1209                 inst_RV_IV(INS_sub, REG_ESP, opsz, EA_PTRSIZE);
1210
1211                 genSinglePush();
1212                 if (opsz == 2 * sizeof(unsigned))
1213                     genSinglePush();
1214
1215                 // Take reg to top of stack
1216                 FlatFPX87_MoveToTOS(&compCurFPState, args->gtRegNum);
1217
1218                 // Pop it off to stack
1219                 compCurFPState.Pop();
1220                 getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(opsz), REG_NA, REG_ESP, 0);
1221             }
1222
1223             gcInfo.gcMarkRegSetNpt(addrReg);
1224             break;
1225         }
1226     }
1227
1228     return addrReg;
1229 }
1230
1231 void CodeGen::genRoundFpExpressionStackFP(GenTree* op, var_types type)
1232 {
1233     // Do nothing with memory resident opcodes - these are the right precision
1234     // (even if genMakeAddrOrFPstk loads them to the FP stack)
1235     if (type == TYP_UNDEF)
1236         type = op->TypeGet();
1237
1238     switch (op->gtOper)
1239     {
1240         case GT_LCL_VAR:
1241         case GT_LCL_FLD:
1242         case GT_CLS_VAR:
1243         case GT_CNS_DBL:
1244         case GT_IND:
1245         case GT_LEA:
1246             if (type == op->TypeGet())
1247                 return;
1248         default:
1249             break;
1250     }
1251
1252     assert(op->gtRegNum != REG_FPNONE);
1253
1254     // Take register to top of stack
1255     FlatFPX87_MoveToTOS(&compCurFPState, op->gtRegNum);
1256
1257     // Allocate a temp for the expression
1258     TempDsc* temp = compiler->tmpGetTemp(type);
1259
1260     // Store the FP value into the temp
1261     inst_FS_ST(INS_fstp, EmitSize(type), temp, 0);
1262
1263     // Load the value back onto the FP stack
1264     inst_FS_ST(INS_fld, EmitSize(type), temp, 0);
1265
1266     // We no longer need the temp
1267     compiler->tmpRlsTemp(temp);
1268 }
1269
1270 void CodeGen::genCodeForTreeStackFP_Const(GenTree* tree)
1271 {
1272 #ifdef DEBUG
1273     if (compiler->verbose)
1274     {
1275         printf("genCodeForTreeStackFP_Const() ");
1276         Compiler::printTreeID(tree);
1277         printf("\n");
1278     }
1279 #endif // DEBUG
1280
1281 #ifdef DEBUG
1282     if (tree->OperGet() != GT_CNS_DBL)
1283     {
1284         compiler->gtDispTree(tree);
1285         assert(!"bogus float const");
1286     }
1287 #endif
1288     // Pick register
1289     regNumber reg = regSet.PickRegFloat();
1290
1291     // Load constant
1292     genConstantLoadStackFP(tree);
1293
1294     // Push register to virtual stack
1295     FlatFPX87_PushVirtual(&compCurFPState, reg);
1296
1297     // Update tree
1298     genCodeForTreeStackFP_DONE(tree, reg);
1299 }
1300
1301 void CodeGen::genCodeForTreeStackFP_Leaf(GenTree* tree)
1302 {
1303 #ifdef DEBUG
1304     if (compiler->verbose)
1305     {
1306         printf("genCodeForTreeStackFP_Leaf() ");
1307         Compiler::printTreeID(tree);
1308         printf("\n");
1309     }
1310 #endif // DEBUG
1311
1312     switch (tree->OperGet())
1313     {
1314         case GT_LCL_VAR:
1315         case GT_LCL_FLD:
1316         {
1317             assert(!compiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvRegister);
1318
1319             // Pick register
1320             regNumber reg = regSet.PickRegFloat();
1321
1322             // Load it
1323             genLoadStackFP(tree, reg);
1324
1325             genCodeForTreeStackFP_DONE(tree, reg);
1326
1327             break;
1328         }
1329
1330         case GT_REG_VAR:
1331         {
1332             regNumber reg = regSet.PickRegFloat();
1333
1334             genLoadStackFP(tree, reg);
1335
1336             genCodeForTreeStackFP_DONE(tree, reg);
1337
1338             break;
1339         }
1340
1341         case GT_CLS_VAR:
1342         {
1343             // Pick register
1344             regNumber reg = regSet.PickRegFloat();
1345
1346             // Load it
1347             genLoadStackFP(tree, reg);
1348
1349             genCodeForTreeStackFP_DONE(tree, reg);
1350
1351             break;
1352         }
1353
1354         default:
1355 #ifdef DEBUG
1356             compiler->gtDispTree(tree);
1357 #endif
1358             assert(!"unexpected leaf");
1359     }
1360
1361     genUpdateLife(tree);
1362 }
1363
1364 void CodeGen::genCodeForTreeStackFP_Asg(GenTree* tree)
1365 {
1366 #ifdef DEBUG
1367     if (compiler->verbose)
1368     {
1369         printf("genCodeForTreeStackFP_Asg() ");
1370         Compiler::printTreeID(tree);
1371         printf("\n");
1372     }
1373 #endif // DEBUG
1374
1375     emitAttr size;
1376     unsigned offs;
1377     GenTree* op1 = tree->gtOp.gtOp1;
1378     GenTree* op2 = tree->gtGetOp2IfPresent();
1379
1380     assert(tree->OperGet() == GT_ASG);
1381
1382     if (!op1->IsRegVar() && (op2->gtOper == GT_CAST) && (op1->gtType == op2->gtType) &&
1383         varTypeIsFloating(op2->gtCast.CastOp()))
1384     {
1385         /* We can discard the cast */
1386         op2 = op2->gtCast.CastOp();
1387     }
1388
1389     size = EmitSize(op1);
1390     offs = 0;
1391
1392     // If lhs is a comma expression, evaluate the non-last parts, make op1 be the remainder.
1393     // (But can't do this if the assignment is reversed...)
1394     if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
1395     {
1396         op1 = genCodeForCommaTree(op1);
1397     }
1398
1399     GenTree* op1NonCom = op1->gtEffectiveVal();
1400     if (op1NonCom->gtOper == GT_LCL_VAR)
1401     {
1402 #ifdef DEBUG
1403         LclVarDsc* varDsc = &compiler->lvaTable[op1NonCom->gtLclVarCommon.gtLclNum];
1404         // No dead stores
1405         assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1NonCom->gtFlags & GTF_VAR_DEATH));
1406 #endif
1407
1408         /* For non-debuggable code, every definition of a lcl-var has
1409          * to be checked to see if we need to open a new scope for it.
1410          */
1411
1412         if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
1413         {
1414             siCheckVarScope(op1NonCom->gtLclVarCommon.gtLclNum, op1NonCom->gtLclVar.gtLclILoffs);
1415         }
1416     }
1417
1418     assert(op2);
1419     switch (op2->gtOper)
1420     {
1421         case GT_CNS_DBL:
1422
1423             assert(compCurFPState.m_uStackSize <= FP_PHYSICREGISTERS);
1424
1425             regMaskTP addrRegInt;
1426             addrRegInt = 0;
1427             regMaskTP addrRegFlt;
1428             addrRegFlt = 0;
1429
1430             // op2 is already "evaluated," so doesn't matter if they're reversed or not...
1431             op1 = genCodeForCommaTree(op1);
1432             op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
1433
1434             // We want to 'cast' the constant to the op1'a type
1435             double constantValue;
1436             constantValue = op2->gtDblCon.gtDconVal;
1437             if (op1->gtType == TYP_FLOAT)
1438             {
1439                 float temp    = forceCastToFloat(constantValue);
1440                 constantValue = (double)temp;
1441             }
1442
1443             GenTree* constantTree;
1444             constantTree = compiler->gtNewDconNode(constantValue);
1445             if (genConstantLoadStackFP(constantTree, true))
1446             {
1447                 if (op1->IsRegVar())
1448                 {
1449                     // regvar birth
1450                     genRegVarBirthStackFP(op1);
1451
1452                     // Update
1453                     compCurFPState.Push(op1->gtRegNum);
1454                 }
1455                 else
1456                 {
1457                     // store in target
1458                     inst_FS_TT(INS_fstp, op1);
1459                 }
1460             }
1461             else
1462             {
1463                 // Standard constant
1464                 if (op1->IsRegVar())
1465                 {
1466                     // Load constant to fp stack.
1467
1468                     GenTree* cnsaddr;
1469
1470                     // Create slot for constant
1471                     if (op1->gtType == TYP_FLOAT || StackFPIsSameAsFloat(op2->gtDblCon.gtDconVal))
1472                     {
1473                         // We're going to use that double as a float, so recompute addr
1474                         float f = forceCastToFloat(op2->gtDblCon.gtDconVal);
1475                         cnsaddr = genMakeConst(&f, TYP_FLOAT, tree, true);
1476                     }
1477                     else
1478                     {
1479                         cnsaddr = genMakeConst(&op2->gtDblCon.gtDconVal, TYP_DOUBLE, tree, true);
1480                     }
1481
1482                     // Load into stack
1483                     inst_FS_TT(INS_fld, cnsaddr);
1484
1485                     // regvar birth
1486                     genRegVarBirthStackFP(op1);
1487
1488                     // Update
1489                     compCurFPState.Push(op1->gtRegNum);
1490                 }
1491                 else
1492                 {
1493                     if (size == 4)
1494                     {
1495
1496                         float f    = forceCastToFloat(op2->gtDblCon.gtDconVal);
1497                         int*  addr = (int*)&f;
1498
1499                         do
1500                         {
1501                             inst_TT_IV(INS_mov, op1, *addr++, offs);
1502                             offs += sizeof(int);
1503                         } while (offs < size);
1504                     }
1505                     else
1506                     {
1507                         // store forwarding fix for pentium 4 and centrino and also
1508                         // fld for doubles that can be represented as floats, saving
1509                         // 4 bytes of load
1510                         GenTree* cnsaddr;
1511
1512                         // Create slot for constant
1513                         if (op1->gtType == TYP_FLOAT || StackFPIsSameAsFloat(op2->gtDblCon.gtDconVal))
1514                         {
1515                             // We're going to use that double as a float, so recompute addr
1516                             float f = forceCastToFloat(op2->gtDblCon.gtDconVal);
1517                             cnsaddr = genMakeConst(&f, TYP_FLOAT, tree, true);
1518                         }
1519                         else
1520                         {
1521                             assert(tree->gtType == TYP_DOUBLE);
1522                             cnsaddr = genMakeConst(&op2->gtDblCon.gtDconVal, TYP_DOUBLE, tree, true);
1523                         }
1524
1525                         inst_FS_TT(INS_fld, cnsaddr);
1526                         inst_FS_TT(INS_fstp, op1);
1527                     }
1528                 }
1529             }
1530
1531             genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
1532             genUpdateLife(op1);
1533             return;
1534
1535         default:
1536             break;
1537     }
1538
1539     // Not one of the easy optimizations. Proceed normally
1540     if (tree->gtFlags & GTF_REVERSE_OPS)
1541     {
1542         /* Evaluate the RHS onto the FP stack.
1543            We don't need to round it as we will be doing a spill for
1544            the assignment anyway (unless op1 is a GT_REG_VAR). */
1545
1546         genSetupForOpStackFP(op1, op2, true, true, false, true);
1547
1548         // Do the move
1549         genMovStackFP(op1, REG_FPNONE, op2, (op2->InReg()) ? op2->gtRegNum : REG_FPNONE);
1550     }
1551     else
1552     {
1553         // Have to evaluate left side before
1554
1555         // This should never happen
1556         assert(!op1->IsRegVar());
1557
1558         genSetupForOpStackFP(op1, op2, false, true, false, true);
1559
1560         // Do the actual move
1561         genMovStackFP(op1, REG_FPNONE, op2, (op2->InReg()) ? op2->gtRegNum : REG_FPNONE);
1562     }
1563 }
1564
1565 void CodeGen::genSetupForOpStackFP(
1566     GenTree*& op1, GenTree*& op2, bool bReverse, bool bMakeOp1Addressable, bool bOp1ReadOnly, bool bOp2ReadOnly)
1567 {
1568     if (bMakeOp1Addressable)
1569     {
1570         if (bReverse)
1571         {
1572             genSetupForOpStackFP(op2, op1, false, false, bOp2ReadOnly, bOp1ReadOnly);
1573         }
1574         else
1575         {
1576             regMaskTP addrRegInt = 0;
1577             regMaskTP addrRegFlt = 0;
1578
1579             op1 = genCodeForCommaTree(op1);
1580
1581             // Evaluate RHS on FP stack
1582             if (bOp2ReadOnly && op2->IsRegVar() && !op2->IsRegVarDeath())
1583             {
1584                 // read only and not dying, so just make addressable
1585                 op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
1586                 genKeepAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
1587                 genUpdateLife(op2);
1588             }
1589             else
1590             {
1591                 // Make target addressable
1592                 op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
1593
1594                 op2 = genCodeForCommaTree(op2);
1595
1596                 genCodeForTreeFloat(op2);
1597
1598                 regSet.SetUsedRegFloat(op2, true);
1599                 regSet.SetLockedRegFloat(op2, true);
1600
1601                 // Make sure target is still adressable
1602                 genKeepAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
1603
1604                 regSet.SetLockedRegFloat(op2, false);
1605                 regSet.SetUsedRegFloat(op2, false);
1606             }
1607
1608             /* Free up anything that was tied up by the target address */
1609             genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
1610         }
1611     }
1612     else
1613     {
1614         assert(!bReverse ||
1615                !"Can't do this. if op2 is a reg var and dies in op1, we have a serious problem. For the "
1616                 "moment, handle this in the caller");
1617
1618         regMaskTP addrRegInt = 0;
1619         regMaskTP addrRegFlt = 0;
1620
1621         op1 = genCodeForCommaTree(op1);
1622
1623         if (bOp1ReadOnly && op1->IsRegVar() && !op1->IsRegVarDeath() &&
1624             !genRegVarDiesInSubTree(op2, op1->gtRegVar.gtRegNum)) // regvar can't die in op2 either
1625         {
1626             // First update liveness for op1, since we're "evaluating" it here
1627             genUpdateLife(op1);
1628
1629             op2 = genCodeForCommaTree(op2);
1630
1631             // read only and not dying, we dont have to do anything.
1632             op2 = genMakeAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
1633             genKeepAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
1634         }
1635         else
1636         {
1637             genCodeForTreeFloat(op1);
1638
1639             regSet.SetUsedRegFloat(op1, true);
1640
1641             op2 = genCodeForCommaTree(op2);
1642
1643             op2 = genMakeAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
1644
1645             // Restore op1 if necessary
1646             if (op1->gtFlags & GTF_SPILLED)
1647             {
1648                 UnspillFloat(op1);
1649             }
1650
1651             // Lock op1
1652             regSet.SetLockedRegFloat(op1, true);
1653
1654             genKeepAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
1655
1656             // unlock op1
1657             regSet.SetLockedRegFloat(op1, false);
1658
1659             // mark as free
1660             regSet.SetUsedRegFloat(op1, false);
1661         }
1662
1663         genDoneAddressableStackFP(op2, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
1664     }
1665 }
1666
1667 void CodeGen::genCodeForTreeStackFP_Arithm(GenTree* tree)
1668 {
1669 #ifdef DEBUG
1670     if (compiler->verbose)
1671     {
1672         printf("genCodeForTreeStackFP_Arithm() ");
1673         Compiler::printTreeID(tree);
1674         printf("\n");
1675     }
1676 #endif // DEBUG
1677
1678     assert(tree->OperGet() == GT_ADD || tree->OperGet() == GT_SUB || tree->OperGet() == GT_MUL ||
1679            tree->OperGet() == GT_DIV);
1680
1681     // We handle the reverse here instead of leaving setupop to do it. As for this case
1682     //
1683     //              + with reverse
1684     //          op1    regvar
1685     //
1686     // and in regvar dies in op1, we would need a load of regvar, instead of a noop. So we handle this
1687     // here and tell genArithmStackFP to do the reverse operation
1688     bool bReverse;
1689
1690     GenTree* op1;
1691     GenTree* op2;
1692
1693     if (tree->gtFlags & GTF_REVERSE_OPS)
1694     {
1695         bReverse = true;
1696         op1      = tree->gtGetOp2IfPresent();
1697         op2      = tree->gtOp.gtOp1;
1698     }
1699     else
1700     {
1701         bReverse = false;
1702         op1      = tree->gtOp.gtOp1;
1703         op2      = tree->gtGetOp2IfPresent();
1704     }
1705
1706     regNumber result;
1707
1708     // Fast paths
1709     genTreeOps oper = tree->OperGet();
1710     if (op1->IsRegVar() && op2->IsRegVar() && !op1->IsRegVarDeath() && op2->IsRegVarDeath())
1711     {
1712         // In this fastpath, we will save a load by doing the operation directly on the op2
1713         // register, as it's dying.
1714
1715         // Mark op2 as dead
1716         genRegVarDeathStackFP(op2);
1717
1718         // Do operation
1719         result = genArithmStackFP(oper, op2, op2->gtRegVar.gtRegNum, op1, REG_FPNONE, !bReverse);
1720
1721         genUpdateLife(op1);
1722         genUpdateLife(op2);
1723     }
1724     else if (!op1->IsRegVar() &&                         // We don't do this for regvars, as we'll need a scratch reg
1725              ((tree->gtFlags & GTF_SIDE_EFFECT) == 0) && // No side effects
1726              GenTree::Compare(op1, op2))                 // op1 and op2 are the same
1727     {
1728         // op1 is same thing as op2. Ideal for CSEs that werent optimized
1729         // due to their low cost.
1730
1731         // First we need to update lifetimes from op1
1732         VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, genUpdateLiveSetForward(op1));
1733         compiler->compCurLifeTree = op1;
1734
1735         genCodeForTreeFloat(op2);
1736
1737         result = genArithmStackFP(oper, op2, op2->gtRegNum, op2, op2->gtRegNum, bReverse);
1738     }
1739     else
1740     {
1741         genSetupForOpStackFP(op1, op2, false, false, false, true);
1742
1743         result = genArithmStackFP(oper, op1, (op1->InReg()) ? op1->gtRegNum : REG_FPNONE, op2,
1744                                   (op2->InReg()) ? op2->gtRegNum : REG_FPNONE, bReverse);
1745     }
1746
1747     genCodeForTreeStackFP_DONE(tree, result);
1748 }
1749
1750 regNumber CodeGen::genArithmStackFP(
1751     genTreeOps oper, GenTree* dst, regNumber dstreg, GenTree* src, regNumber srcreg, bool bReverse)
1752 {
1753 #ifdef DEBUG
1754     if (compiler->verbose)
1755     {
1756         printf("genArithmStackFP() dst: ");
1757         Compiler::printTreeID(dst);
1758         printf(" src: ");
1759         Compiler::printTreeID(src);
1760         printf(" dstreg: %s srcreg: %s\n", dstreg == REG_FPNONE ? "NONE" : regVarNameStackFP(dstreg),
1761                srcreg == REG_FPNONE ? "NONE" : regVarNameStackFP(srcreg));
1762     }
1763 #endif // DEBUG
1764
1765     // Select instruction depending on oper and bReverseOp
1766
1767     instruction ins_NN;
1768     instruction ins_RN;
1769     instruction ins_RP;
1770     instruction ins_NP;
1771
1772     switch (oper)
1773     {
1774         default:
1775             assert(!"Unexpected oper");
1776         case GT_ADD:
1777         case GT_SUB:
1778         case GT_MUL:
1779         case GT_DIV:
1780
1781             /* Make sure the instruction tables look correctly ordered */
1782             assert(FPmathNN[GT_ADD - GT_ADD] == INS_fadd);
1783             assert(FPmathNN[GT_SUB - GT_ADD] == INS_fsub);
1784             assert(FPmathNN[GT_MUL - GT_ADD] == INS_fmul);
1785             assert(FPmathNN[GT_DIV - GT_ADD] == INS_fdiv);
1786
1787             assert(FPmathNP[GT_ADD - GT_ADD] == INS_faddp);
1788             assert(FPmathNP[GT_SUB - GT_ADD] == INS_fsubp);
1789             assert(FPmathNP[GT_MUL - GT_ADD] == INS_fmulp);
1790             assert(FPmathNP[GT_DIV - GT_ADD] == INS_fdivp);
1791
1792             assert(FPmathRN[GT_ADD - GT_ADD] == INS_fadd);
1793             assert(FPmathRN[GT_SUB - GT_ADD] == INS_fsubr);
1794             assert(FPmathRN[GT_MUL - GT_ADD] == INS_fmul);
1795             assert(FPmathRN[GT_DIV - GT_ADD] == INS_fdivr);
1796
1797             assert(FPmathRP[GT_ADD - GT_ADD] == INS_faddp);
1798             assert(FPmathRP[GT_SUB - GT_ADD] == INS_fsubrp);
1799             assert(FPmathRP[GT_MUL - GT_ADD] == INS_fmulp);
1800             assert(FPmathRP[GT_DIV - GT_ADD] == INS_fdivrp);
1801
1802             if (bReverse)
1803             {
1804                 ins_NN = FPmathRN[oper - GT_ADD];
1805                 ins_NP = FPmathRP[oper - GT_ADD];
1806                 ins_RN = FPmathNN[oper - GT_ADD];
1807                 ins_RP = FPmathNP[oper - GT_ADD];
1808             }
1809             else
1810             {
1811                 ins_NN = FPmathNN[oper - GT_ADD];
1812                 ins_NP = FPmathNP[oper - GT_ADD];
1813                 ins_RN = FPmathRN[oper - GT_ADD];
1814                 ins_RP = FPmathRP[oper - GT_ADD];
1815             }
1816     }
1817
1818     regNumber result = REG_FPNONE;
1819
1820     if (dstreg != REG_FPNONE)
1821     {
1822         if (srcreg == REG_FPNONE)
1823         {
1824             if (src->IsRegVar())
1825             {
1826                 if (src->IsRegVarDeath())
1827                 {
1828                     if (compCurFPState.TopVirtual() == (unsigned)dst->gtRegNum)
1829                     {
1830                         // Do operation and store in srcreg
1831                         inst_FS(ins_RP, compCurFPState.VirtualToST(src->gtRegNum));
1832
1833                         // kill current dst and rename src as dst.
1834                         FlatFPX87_Kill(&compCurFPState, dstreg);
1835                         compCurFPState.Rename(dstreg, src->gtRegNum);
1836                     }
1837                     else
1838                     {
1839                         // Take src to top of stack
1840                         FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
1841
1842                         // do reverse and pop operation
1843                         inst_FS(ins_NP, compCurFPState.VirtualToST(dstreg));
1844
1845                         // Kill the register
1846                         FlatFPX87_Kill(&compCurFPState, src->gtRegNum);
1847                     }
1848
1849                     assert(!src->IsRegVar() || !src->IsRegVarDeath() ||
1850                            !(genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat));
1851                 }
1852                 else
1853                 {
1854                     if (compCurFPState.TopVirtual() == (unsigned)src->gtRegNum)
1855                     {
1856                         inst_FS(ins_RN, compCurFPState.VirtualToST(dst->gtRegNum));
1857                     }
1858                     else
1859                     {
1860                         FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum);
1861                         inst_FN(ins_NN, compCurFPState.VirtualToST(src->gtRegNum));
1862                     }
1863                 }
1864             }
1865             else
1866             {
1867                 // do operation with memory and store in dest
1868                 FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum);
1869                 inst_FS_TT(ins_NN, src);
1870             }
1871         }
1872         else
1873         {
1874             if (dstreg == srcreg)
1875             {
1876                 FlatFPX87_MoveToTOS(&compCurFPState, dstreg);
1877                 inst_FN(ins_NN, compCurFPState.VirtualToST(dstreg));
1878             }
1879             else
1880             {
1881                 if (compCurFPState.TopVirtual() == (unsigned)dst->gtRegNum)
1882                 {
1883                     // Do operation and store in srcreg
1884                     inst_FS(ins_RP, compCurFPState.VirtualToST(srcreg));
1885
1886                     // kill current dst and rename src as dst.
1887                     FlatFPX87_Kill(&compCurFPState, dstreg);
1888                     compCurFPState.Rename(dstreg, srcreg);
1889                 }
1890                 else
1891                 {
1892                     FlatFPX87_MoveToTOS(&compCurFPState, srcreg);
1893
1894                     // do reverse and pop operation
1895                     inst_FS(ins_NP, compCurFPState.VirtualToST(dstreg));
1896
1897                     // Kill the register
1898                     FlatFPX87_Kill(&compCurFPState, srcreg);
1899                 }
1900             }
1901         }
1902
1903         result = dstreg;
1904     }
1905     else
1906     {
1907         assert(!"if we get here it means we didnt load op1 into a temp. Investigate why");
1908     }
1909
1910     assert(result != REG_FPNONE);
1911     return result;
1912 }
1913
1914 void CodeGen::genCodeForTreeStackFP_AsgArithm(GenTree* tree)
1915 {
1916 #ifdef DEBUG
1917     if (compiler->verbose)
1918     {
1919         printf("genCodeForTreeStackFP_AsgArithm() ");
1920         Compiler::printTreeID(tree);
1921         printf("\n");
1922     }
1923 #endif // DEBUG
1924
1925     assert(tree->OperGet() == GT_ASG_ADD || tree->OperGet() == GT_ASG_SUB || tree->OperGet() == GT_ASG_MUL ||
1926            tree->OperGet() == GT_ASG_DIV);
1927
1928     GenTree* op1 = tree->gtOp.gtOp1;
1929     GenTree* op2 = tree->gtGetOp2IfPresent();
1930
1931     genSetupForOpStackFP(op1, op2, (tree->gtFlags & GTF_REVERSE_OPS) ? true : false, true, false, true);
1932
1933     regNumber result = genAsgArithmStackFP(tree->OperGet(), op1, (op1->InReg()) ? op1->gtRegNum : REG_FPNONE, op2,
1934                                            (op2->InReg()) ? op2->gtRegNum : REG_FPNONE);
1935
1936     genCodeForTreeStackFP_DONE(tree, result);
1937 }
1938
1939 regNumber CodeGen::genAsgArithmStackFP(genTreeOps oper, GenTree* dst, regNumber dstreg, GenTree* src, regNumber srcreg)
1940 {
1941     regNumber result = REG_FPNONE;
1942
1943 #ifdef DEBUG
1944     if (compiler->verbose)
1945     {
1946         printf("genAsgArithmStackFP() dst: ");
1947         Compiler::printTreeID(dst);
1948         printf(" src: ");
1949         Compiler::printTreeID(src);
1950         printf(" dstreg: %s srcreg: %s\n", dstreg == REG_FPNONE ? "NONE" : regVarNameStackFP(dstreg),
1951                srcreg == REG_FPNONE ? "NONE" : regVarNameStackFP(srcreg));
1952     }
1953 #endif // DEBUG
1954
1955     instruction ins_NN;
1956     instruction ins_RN;
1957     instruction ins_RP;
1958     instruction ins_NP;
1959
1960     switch (oper)
1961     {
1962         default:
1963             assert(!"Unexpected oper");
1964             break;
1965         case GT_ASG_ADD:
1966         case GT_ASG_SUB:
1967         case GT_ASG_MUL:
1968         case GT_ASG_DIV:
1969
1970             assert(FPmathRN[GT_ASG_ADD - GT_ASG_ADD] == INS_fadd);
1971             assert(FPmathRN[GT_ASG_SUB - GT_ASG_ADD] == INS_fsubr);
1972             assert(FPmathRN[GT_ASG_MUL - GT_ASG_ADD] == INS_fmul);
1973             assert(FPmathRN[GT_ASG_DIV - GT_ASG_ADD] == INS_fdivr);
1974
1975             assert(FPmathRP[GT_ASG_ADD - GT_ASG_ADD] == INS_faddp);
1976             assert(FPmathRP[GT_ASG_SUB - GT_ASG_ADD] == INS_fsubrp);
1977             assert(FPmathRP[GT_ASG_MUL - GT_ASG_ADD] == INS_fmulp);
1978             assert(FPmathRP[GT_ASG_DIV - GT_ASG_ADD] == INS_fdivrp);
1979
1980             ins_NN = FPmathNN[oper - GT_ASG_ADD];
1981             ins_NP = FPmathNP[oper - GT_ASG_ADD];
1982
1983             ins_RN = FPmathRN[oper - GT_ASG_ADD];
1984             ins_RP = FPmathRP[oper - GT_ASG_ADD];
1985
1986             if (dstreg != REG_FPNONE)
1987             {
1988                 assert(!"dst should be a regvar or memory");
1989             }
1990             else
1991             {
1992                 if (dst->IsRegVar())
1993                 {
1994                     if (src->IsRegVar())
1995                     {
1996                         if (src->IsRegVarDeath())
1997                         {
1998                             // Take src to top of stack
1999                             FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
2000
2001                             // Do op
2002                             inst_FS(ins_NP, compCurFPState.VirtualToST(dst->gtRegNum));
2003
2004                             // Kill the register
2005                             FlatFPX87_Kill(&compCurFPState, src->gtRegNum);
2006
2007                             // SetupOp should mark the regvar as dead
2008                             assert((genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat) == 0);
2009                         }
2010                         else
2011                         {
2012                             assert(src->gtRegNum == src->gtRegVar.gtRegNum &&
2013                                    "We shoudnt be loading regvar src on the stack as src is readonly");
2014
2015                             // Take src to top of stack
2016                             FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
2017
2018                             // Do op
2019                             inst_FS(ins_RN, compCurFPState.VirtualToST(dst->gtRegNum));
2020                         }
2021                     }
2022                     else
2023                     {
2024                         if (srcreg == REG_FPNONE)
2025                         {
2026                             // take enregistered variable to top of stack
2027                             FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum);
2028
2029                             // Do operation with mem
2030                             inst_FS_TT(ins_NN, src);
2031                         }
2032                         else
2033                         {
2034                             // take enregistered variable to top of stack
2035                             FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
2036
2037                             // do op
2038                             inst_FS(ins_NP, compCurFPState.VirtualToST(dst->gtRegNum));
2039
2040                             // Kill the register
2041                             FlatFPX87_Kill(&compCurFPState, src->gtRegNum);
2042                         }
2043                     }
2044                 }
2045                 else
2046                 {
2047                     // To memory
2048                     if ((src->IsRegVar()) && !src->IsRegVarDeath())
2049                     {
2050                         // We set src as read only, but as dst is in memory, we will need
2051                         // an extra physical register (which we should have, as we have a
2052                         // spare one for transitions).
2053                         //
2054                         // There used to be an assertion: assert(src->gtRegNum == src->gtRegVar.gtRegNum, ...)
2055                         // here, but there's actually no reason to assume that.  AFAICT, for FP vars under stack FP,
2056                         // src->gtRegVar.gtRegNum is the allocated stack pseudo-register, but src->gtRegNum is the
2057                         // FP stack position into which that is loaded to represent a particular use of the variable.
2058                         inst_FN(INS_fld, compCurFPState.VirtualToST(src->gtRegNum));
2059
2060                         // Do operation with mem
2061                         inst_FS_TT(ins_RN, dst);
2062
2063                         // store back
2064                         inst_FS_TT(INS_fstp, dst);
2065                     }
2066                     else
2067                     {
2068                         // put src in top of stack
2069                         FlatFPX87_MoveToTOS(&compCurFPState, srcreg);
2070
2071                         // Do operation with mem
2072                         inst_FS_TT(ins_RN, dst);
2073
2074                         // store back
2075                         inst_FS_TT(INS_fstp, dst);
2076
2077                         // SetupOp should have marked the regvar as dead in tat case
2078                         assert(!src->IsRegVar() || !src->IsRegVarDeath() ||
2079                                (genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat) == 0);
2080
2081                         FlatFPX87_Kill(&compCurFPState, srcreg);
2082                     }
2083                 }
2084             }
2085     }
2086
2087     return result;
2088 }
2089
2090 void CodeGen::genCodeForTreeStackFP_SmpOp(GenTree* tree)
2091 {
2092 #ifdef DEBUG
2093     if (compiler->verbose)
2094     {
2095         printf("genCodeForTreeStackFP_SmpOp() ");
2096         Compiler::printTreeID(tree);
2097         printf("\n");
2098     }
2099 #endif // DEBUG
2100
2101     assert(tree->OperKind() & GTK_SMPOP);
2102
2103     switch (tree->OperGet())
2104     {
2105         // Assignment
2106         case GT_ASG:
2107         {
2108             genCodeForTreeStackFP_Asg(tree);
2109             break;
2110         }
2111
2112         // Arithmetic binops
2113         case GT_ADD:
2114         case GT_SUB:
2115         case GT_MUL:
2116         case GT_DIV:
2117         {
2118             genCodeForTreeStackFP_Arithm(tree);
2119             break;
2120         }
2121
2122         // Asg-Arithmetic ops
2123         case GT_ASG_ADD:
2124         case GT_ASG_SUB:
2125         case GT_ASG_MUL:
2126         case GT_ASG_DIV:
2127         {
2128             genCodeForTreeStackFP_AsgArithm(tree);
2129             break;
2130         }
2131
2132         case GT_IND:
2133         case GT_LEA:
2134         {
2135             regMaskTP addrReg;
2136
2137             // Make sure the address value is 'addressable' */
2138             addrReg = genMakeAddressable(tree, 0, RegSet::FREE_REG);
2139
2140             // Load the value onto the FP stack
2141             regNumber reg = regSet.PickRegFloat();
2142             genLoadStackFP(tree, reg);
2143
2144             genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
2145
2146             genCodeForTreeStackFP_DONE(tree, reg);
2147
2148             break;
2149         }
2150
2151         case GT_RETURN:
2152         {
2153             GenTree* op1 = tree->gtOp.gtOp1;
2154             assert(op1);
2155
2156             // Compute the result onto the FP stack
2157             if (op1->gtType == TYP_FLOAT)
2158             {
2159 #if ROUND_FLOAT
2160                 bool roundOp1 = false;
2161
2162                 switch (getRoundFloatLevel())
2163                 {
2164                     case ROUND_NEVER:
2165                         /* No rounding at all */
2166                         break;
2167
2168                     case ROUND_CMP_CONST:
2169                         break;
2170
2171                     case ROUND_CMP:
2172                         /* Round all comparands and return values*/
2173                         roundOp1 = true;
2174                         break;
2175
2176                     case ROUND_ALWAYS:
2177                         /* Round everything */
2178                         roundOp1 = true;
2179                         break;
2180
2181                     default:
2182                         assert(!"Unsupported Round Level");
2183                         break;
2184                 }
2185 #endif
2186                 genCodeForTreeFlt(op1);
2187             }
2188             else
2189             {
2190                 assert(op1->gtType == TYP_DOUBLE);
2191                 genCodeForTreeFloat(op1);
2192
2193 #if ROUND_FLOAT
2194                 if ((op1->gtOper == GT_CAST) && (op1->CastFromType() == TYP_LONG))
2195                     genRoundFpExpressionStackFP(op1);
2196 #endif
2197             }
2198
2199             // kill enregistered variables
2200             compCurFPState.Pop();
2201             assert(compCurFPState.m_uStackSize == 0);
2202             break;
2203         }
2204
2205         case GT_COMMA:
2206         {
2207             GenTree* op1 = tree->gtOp.gtOp1;
2208             GenTree* op2 = tree->gtGetOp2IfPresent();
2209
2210             if (tree->gtFlags & GTF_REVERSE_OPS)
2211             {
2212                 genCodeForTreeFloat(op2);
2213
2214                 regSet.SetUsedRegFloat(op2, true);
2215
2216                 genEvalSideEffects(op1);
2217
2218                 if (op2->gtFlags & GTF_SPILLED)
2219                 {
2220                     UnspillFloat(op2);
2221                 }
2222
2223                 regSet.SetUsedRegFloat(op2, false);
2224             }
2225             else
2226             {
2227                 genEvalSideEffects(op1);
2228                 genCodeForTreeFloat(op2);
2229             }
2230
2231             genCodeForTreeStackFP_DONE(tree, op2->gtRegNum);
2232             break;
2233         }
2234         case GT_CAST:
2235         {
2236             genCodeForTreeStackFP_Cast(tree);
2237             break;
2238         }
2239
2240         case GT_NEG:
2241         {
2242             GenTree* op1 = tree->gtOp.gtOp1;
2243
2244             // get the tree into a register
2245             genCodeForTreeFloat(op1);
2246
2247             // Take reg to top of stack
2248             FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
2249
2250             // change the sign
2251             instGen(INS_fchs);
2252
2253             // mark register that holds tree
2254             genCodeForTreeStackFP_DONE(tree, op1->gtRegNum);
2255             return;
2256         }
2257         case GT_INTRINSIC:
2258         {
2259             assert(compiler->IsMathIntrinsic(tree));
2260
2261             GenTree* op1 = tree->gtOp.gtOp1;
2262
2263             // get tree into a register
2264             genCodeForTreeFloat(op1);
2265
2266             // Take reg to top of stack
2267             FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
2268
2269             static const instruction mathIns[] = {
2270                 INS_fsin, INS_fcos, INS_invalid, INS_fsqrt, INS_fabs, INS_frndint,
2271             };
2272
2273             assert(mathIns[CORINFO_INTRINSIC_Sin] == INS_fsin);
2274             assert(mathIns[CORINFO_INTRINSIC_Cos] == INS_fcos);
2275             assert(mathIns[CORINFO_INTRINSIC_Sqrt] == INS_fsqrt);
2276             assert(mathIns[CORINFO_INTRINSIC_Abs] == INS_fabs);
2277             assert(mathIns[CORINFO_INTRINSIC_Round] == INS_frndint);
2278             assert((unsigned)(tree->gtIntrinsic.gtIntrinsicId) < _countof(mathIns));
2279             instGen(mathIns[tree->gtIntrinsic.gtIntrinsicId]);
2280
2281             // mark register that holds tree
2282             genCodeForTreeStackFP_DONE(tree, op1->gtRegNum);
2283
2284             return;
2285         }
2286         case GT_CKFINITE:
2287         {
2288             TempDsc* temp;
2289             int      offs;
2290
2291             GenTree* op1 = tree->gtOp.gtOp1;
2292
2293             // Offset of the DWord containing the exponent
2294             offs = (op1->gtType == TYP_FLOAT) ? 0 : sizeof(int);
2295
2296             // get tree into a register
2297             genCodeForTreeFloat(op1);
2298
2299             // Take reg to top of stack
2300             FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
2301
2302             temp          = compiler->tmpGetTemp(op1->TypeGet());
2303             emitAttr size = EmitSize(op1);
2304
2305             // Store the value from the FP stack into the temp
2306             getEmitter()->emitIns_S(INS_fst, size, temp->tdTempNum(), 0);
2307
2308             regNumber reg = regSet.rsPickReg();
2309
2310             // Load the DWord containing the exponent into a general reg.
2311             inst_RV_ST(INS_mov, reg, temp, offs, op1->TypeGet(), EA_4BYTE);
2312             compiler->tmpRlsTemp(temp);
2313
2314             // 'reg' now contains the DWord containing the exponent
2315             regTracker.rsTrackRegTrash(reg);
2316
2317             // Mask of exponent with all 1's - appropriate for given type
2318
2319             int expMask;
2320             expMask = (op1->gtType == TYP_FLOAT) ? 0x7F800000  // TYP_FLOAT
2321                                                  : 0x7FF00000; // TYP_DOUBLE
2322
2323             // Check if the exponent is all 1's
2324
2325             inst_RV_IV(INS_and, reg, expMask, EA_4BYTE);
2326             inst_RV_IV(INS_cmp, reg, expMask, EA_4BYTE);
2327
2328             // If exponent was all 1's, we need to throw ArithExcep
2329             genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
2330
2331             genUpdateLife(tree);
2332
2333             genCodeForTreeStackFP_DONE(tree, op1->gtRegNum);
2334             break;
2335         }
2336         default:
2337             NYI("opertype");
2338     }
2339 }
2340
2341 void CodeGen::genCodeForTreeStackFP_Cast(GenTree* tree)
2342 {
2343 #ifdef DEBUG
2344     if (compiler->verbose)
2345     {
2346         printf("genCodeForTreeStackFP_Cast() ");
2347         Compiler::printTreeID(tree);
2348         printf("\n");
2349     }
2350 #endif // DEBUG
2351
2352 #if ROUND_FLOAT
2353     bool roundResult = true;
2354 #endif
2355
2356     regMaskTP addrReg;
2357     TempDsc*  temp;
2358     emitAttr  size;
2359
2360     GenTree* op1 = tree->gtOp.gtOp1;
2361
2362     // If op1 is a comma expression, evaluate the non-last parts, make op1 be the rest.
2363     op1 = genCodeForCommaTree(op1);
2364
2365     switch (op1->gtType)
2366     {
2367         case TYP_BOOL:
2368         case TYP_BYTE:
2369         case TYP_UBYTE:
2370         case TYP_USHORT:
2371         case TYP_SHORT:
2372         {
2373
2374             // Operand too small for 'fild', load it into a register
2375             genCodeForTree(op1, 0);
2376
2377 #if ROUND_FLOAT
2378             // no need to round, can't overflow float or dbl
2379             roundResult = false;
2380 #endif
2381
2382             // fall through
2383         }
2384         case TYP_INT:
2385         case TYP_BYREF:
2386         case TYP_LONG:
2387         {
2388             // Can't 'fild' a constant, it has to be loaded from memory
2389             switch (op1->gtOper)
2390             {
2391                 case GT_CNS_INT:
2392                     op1 = genMakeConst(&op1->gtIntCon.gtIconVal, TYP_INT, tree, false);
2393                     break;
2394
2395                 case GT_CNS_LNG:
2396                     // Our encoder requires fild on m64int to be 64-bit aligned.
2397                     op1 = genMakeConst(&op1->gtLngCon.gtLconVal, TYP_LONG, tree, true);
2398                     break;
2399                 default:
2400                     break;
2401             }
2402
2403             addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG);
2404
2405             // Grab register for the cast
2406             regNumber reg = regSet.PickRegFloat();
2407             genMarkTreeInReg(tree, reg);
2408             compCurFPState.Push(reg);
2409
2410             // Is the value now sitting in a register?
2411             if (op1->InReg())
2412             {
2413                 // We'll have to store the value into the stack */
2414                 size = EA_ATTR(roundUp(genTypeSize(op1->gtType)));
2415                 temp = compiler->tmpGetTemp(op1->TypeGet());
2416
2417                 // Move the value into the temp
2418                 if (op1->gtType == TYP_LONG)
2419                 {
2420                     regPairNo regPair = op1->gtRegPair;
2421
2422                     // This code is pretty ugly, but straightforward
2423
2424                     if (genRegPairLo(regPair) == REG_STK)
2425                     {
2426                         regNumber rg1 = genRegPairHi(regPair);
2427
2428                         assert(rg1 != REG_STK);
2429
2430                         /* Move enregistered half to temp */
2431
2432                         inst_ST_RV(INS_mov, temp, 4, rg1, TYP_LONG);
2433
2434                         /* Move lower half to temp via "high register" */
2435
2436                         inst_RV_TT(INS_mov, rg1, op1, 0);
2437                         inst_ST_RV(INS_mov, temp, 0, rg1, TYP_LONG);
2438
2439                         /* Reload transfer register */
2440
2441                         inst_RV_ST(INS_mov, rg1, temp, 4, TYP_LONG);
2442                     }
2443                     else if (genRegPairHi(regPair) == REG_STK)
2444                     {
2445                         regNumber rg1 = genRegPairLo(regPair);
2446
2447                         assert(rg1 != REG_STK);
2448
2449                         /* Move enregistered half to temp */
2450
2451                         inst_ST_RV(INS_mov, temp, 0, rg1, TYP_LONG);
2452
2453                         /* Move high half to temp via "low register" */
2454
2455                         inst_RV_TT(INS_mov, rg1, op1, 4);
2456                         inst_ST_RV(INS_mov, temp, 4, rg1, TYP_LONG);
2457
2458                         /* Reload transfer register */
2459
2460                         inst_RV_ST(INS_mov, rg1, temp, 0, TYP_LONG);
2461                     }
2462                     else
2463                     {
2464                         /* Move the value into the temp */
2465
2466                         inst_ST_RV(INS_mov, temp, 0, genRegPairLo(regPair), TYP_LONG);
2467                         inst_ST_RV(INS_mov, temp, 4, genRegPairHi(regPair), TYP_LONG);
2468                     }
2469                     genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
2470
2471                     /* Load the long from the temp */
2472
2473                     inst_FS_ST(INS_fildl, size, temp, 0);
2474                 }
2475                 else
2476                 {
2477                     /* Move the value into the temp */
2478
2479                     inst_ST_RV(INS_mov, temp, 0, op1->gtRegNum, TYP_INT);
2480
2481                     genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
2482
2483                     /* Load the integer from the temp */
2484
2485                     inst_FS_ST(INS_fild, size, temp, 0);
2486                 }
2487
2488                 // We no longer need the temp
2489                 compiler->tmpRlsTemp(temp);
2490             }
2491             else
2492             {
2493                 // Load the value from its address
2494                 if (op1->gtType == TYP_LONG)
2495                     inst_TT(INS_fildl, op1);
2496                 else
2497                     inst_TT(INS_fild, op1);
2498
2499                 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
2500             }
2501
2502 #if ROUND_FLOAT
2503             /* integer to fp conversions can overflow. roundResult
2504             * is cleared above in cases where it can't
2505             */
2506             if (roundResult &&
2507                 ((tree->gtType == TYP_FLOAT) || ((tree->gtType == TYP_DOUBLE) && (op1->gtType == TYP_LONG))))
2508                 genRoundFpExpression(tree);
2509 #endif
2510
2511             break;
2512         }
2513         case TYP_FLOAT:
2514         {
2515             //  This is a cast from float to double.
2516             //  Note that conv.r(r4/r8) and conv.r8(r4/r9) are indistinguishable
2517             //  as we will generate GT_CAST-TYP_DOUBLE for both. This would
2518             //  cause us to truncate precision in either case. However,
2519             //  conv.r was needless in the first place, and should have
2520             //  been removed */
2521             genCodeForTreeFloat(op1); // Trucate its precision
2522
2523             if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD || op1->gtOper == GT_CLS_VAR ||
2524                 op1->gtOper == GT_IND || op1->gtOper == GT_LEA)
2525             {
2526                 // We take advantage here of the fact that we know that our
2527                 // codegen will have just loaded this from memory, and that
2528                 // therefore, no cast is really needed.
2529                 // Ideally we wouldn't do this optimization here, but in
2530                 // morphing, however, we need to do this after regalloc, as
2531                 // this optimization doesnt apply if what we're loading is a
2532                 // regvar
2533             }
2534             else
2535             {
2536                 genRoundFpExpressionStackFP(op1, tree->TypeGet());
2537             }
2538
2539             // Assign reg to tree
2540             genMarkTreeInReg(tree, op1->gtRegNum);
2541
2542             break;
2543         }
2544         case TYP_DOUBLE:
2545         {
2546             // This is a cast from double to float or double
2547             // Load the value, store as destType, load back
2548             genCodeForTreeFlt(op1);
2549
2550             if ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD || op1->gtOper == GT_CLS_VAR ||
2551                  op1->gtOper == GT_IND || op1->gtOper == GT_LEA) &&
2552                 tree->TypeGet() == TYP_DOUBLE)
2553             {
2554                 // We take advantage here of the fact that we know that our
2555                 // codegen will have just loaded this from memory, and that
2556                 // therefore, no cast is really needed.
2557                 // Ideally we wouldn't do this optimization here, but in
2558                 // morphing. However, we need to do this after regalloc, as
2559                 // this optimization doesnt apply if what we're loading is a
2560                 // regvar
2561             }
2562             else
2563             {
2564                 genRoundFpExpressionStackFP(op1, tree->TypeGet());
2565             }
2566
2567             // Assign reg to tree
2568             genMarkTreeInReg(tree, op1->gtRegNum);
2569
2570             break;
2571         }
2572         default:
2573         {
2574             assert(!"unsupported cast");
2575             break;
2576         }
2577     }
2578 }
2579
2580 void CodeGen::genCodeForTreeStackFP_Special(GenTree* tree)
2581 {
2582 #ifdef DEBUG
2583     if (compiler->verbose)
2584     {
2585         printf("genCodeForTreeStackFP_Special() ");
2586         Compiler::printTreeID(tree);
2587         printf("\n");
2588     }
2589 #endif // DEBUG
2590
2591     switch (tree->OperGet())
2592     {
2593         case GT_CALL:
2594         {
2595             genCodeForCall(tree->AsCall(), true);
2596             break;
2597         }
2598         default:
2599             NYI("genCodeForTreeStackFP_Special");
2600             break;
2601     }
2602 }
2603
2604 void CodeGen::genCodeForTreeFloat(GenTree* tree, RegSet::RegisterPreference* pref)
2605 {
2606     // TestTransitions();
2607     genTreeOps oper;
2608     unsigned   kind;
2609
2610     assert(tree);
2611     assert(tree->gtOper != GT_STMT);
2612     assert(varTypeIsFloating(tree));
2613
2614     // What kind of node do we have?
2615     oper = tree->OperGet();
2616     kind = tree->OperKind();
2617
2618     if (kind & GTK_CONST)
2619     {
2620         genCodeForTreeStackFP_Const(tree);
2621     }
2622     else if (kind & GTK_LEAF)
2623     {
2624         genCodeForTreeStackFP_Leaf(tree);
2625     }
2626     else if (kind & GTK_SMPOP)
2627     {
2628         genCodeForTreeStackFP_SmpOp(tree);
2629     }
2630     else
2631     {
2632         genCodeForTreeStackFP_Special(tree);
2633     }
2634
2635 #ifdef DEBUG
2636     if (verbose)
2637     {
2638         JitDumpFPState();
2639     }
2640     assert(compCurFPState.IsConsistent());
2641 #endif
2642 }
2643
2644 bool CodeGen::genCompInsStackFP(GenTree* tos, GenTree* other)
2645 {
2646     // assume gensetupop done
2647
2648     bool bUseFcomip = genUse_fcomip();
2649     bool bReverse   = false;
2650
2651     // Take op1 to top of the stack
2652     FlatFPX87_MoveToTOS(&compCurFPState, tos->gtRegNum);
2653
2654     // We pop top of stack if it's not a live regvar
2655     bool bPopTos   = !(tos->IsRegVar() && !tos->IsRegVarDeath()) || (tos->InReg());
2656     bool bPopOther = !(other->IsRegVar() && !other->IsRegVarDeath()) || (other->InReg());
2657
2658     assert(tos->IsRegVar() || (tos->InReg()));
2659
2660     if (!(other->IsRegVar() || (other->InReg())))
2661     {
2662         // op2 in memory
2663         assert(bPopOther);
2664
2665         if (bUseFcomip)
2666         {
2667             // We should have space for a load
2668             assert(compCurFPState.m_uStackSize < FP_PHYSICREGISTERS);
2669
2670             // load from mem, now the comparison will be the other way around
2671             inst_FS_TT(INS_fld, other);
2672             inst_FN(INS_fcomip, 1);
2673
2674             // pop if we've been asked to do so
2675             if (bPopTos)
2676             {
2677                 inst_FS(INS_fstp, 0);
2678                 FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
2679             }
2680
2681             bReverse = true;
2682         }
2683         else
2684         {
2685             // compare directly with memory
2686             if (bPopTos)
2687             {
2688                 inst_FS_TT(INS_fcomp, other);
2689                 FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
2690             }
2691             else
2692             {
2693                 inst_FS_TT(INS_fcom, other);
2694             }
2695         }
2696     }
2697     else
2698     {
2699         if (bUseFcomip)
2700         {
2701             if (bPopTos)
2702             {
2703                 inst_FN(INS_fcomip, compCurFPState.VirtualToST(other->gtRegNum));
2704                 FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
2705             }
2706             else
2707             {
2708                 inst_FN(INS_fcomi, compCurFPState.VirtualToST(other->gtRegNum));
2709             }
2710
2711             if (bPopOther)
2712             {
2713                 FlatFPX87_Unload(&compCurFPState, other->gtRegNum);
2714             }
2715         }
2716         else
2717         {
2718             if (bPopTos)
2719             {
2720                 inst_FN(INS_fcomp, compCurFPState.VirtualToST(other->gtRegNum));
2721                 FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
2722             }
2723             else
2724             {
2725                 inst_FN(INS_fcom, compCurFPState.VirtualToST(other->gtRegNum));
2726             }
2727
2728             if (bPopOther)
2729             {
2730                 FlatFPX87_Unload(&compCurFPState, other->gtRegNum);
2731             }
2732         }
2733     }
2734
2735     if (!bUseFcomip)
2736     {
2737         // oops, we have to put result of compare in eflags
2738
2739         // Grab EAX for the result of the fnstsw
2740         regSet.rsGrabReg(RBM_EAX);
2741
2742         // Generate the 'fnstsw' and test its result
2743         inst_RV(INS_fnstsw, REG_EAX, TYP_INT);
2744         regTracker.rsTrackRegTrash(REG_EAX);
2745         instGen(INS_sahf);
2746     }
2747
2748     return bReverse;
2749 }
2750
2751 void CodeGen::genCondJumpFltStackFP(GenTree* cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bDoTransition)
2752 {
2753     assert(jumpTrue && jumpFalse);
2754     assert(!(cond->gtFlags & GTF_REVERSE_OPS)); // Done in genCondJump()
2755     assert(varTypeIsFloating(cond->gtOp.gtOp1));
2756
2757     GenTree*   op1 = cond->gtOp.gtOp1;
2758     GenTree*   op2 = cond->gtOp.gtOp2;
2759     genTreeOps cmp = cond->OperGet();
2760
2761     // Prepare operands.
2762     genSetupForOpStackFP(op1, op2, false, false, true, false);
2763
2764     GenTree* tos;
2765     GenTree* other;
2766     bool     bReverseCmp = false;
2767
2768     if ((op2->IsRegVar() || (op2->InReg())) &&                     // op2 is in a reg
2769         (compCurFPState.TopVirtual() == (unsigned)op2->gtRegNum && // Is it already at the top of the stack?
2770          (!op2->IsRegVar() || op2->IsRegVarDeath())))              // are we going to pop it off?
2771     {
2772         tos         = op2;
2773         other       = op1;
2774         bReverseCmp = true;
2775     }
2776     else
2777     {
2778         tos         = op1;
2779         other       = op2;
2780         bReverseCmp = false;
2781     }
2782
2783     if (genCompInsStackFP(tos, other))
2784     {
2785         bReverseCmp = !bReverseCmp;
2786     }
2787
2788     // do .un comparison
2789     if (cond->gtFlags & GTF_RELOP_NAN_UN)
2790     {
2791         // Generate the first jump (NaN check)
2792         genCondJmpInsStackFP(EJ_jpe, jumpTrue, NULL, bDoTransition);
2793     }
2794     else
2795     {
2796         jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
2797
2798         // Generate the first jump (NaN check)
2799         genCondJmpInsStackFP(EJ_jpe, jumpFalse, NULL, bDoTransition);
2800     }
2801
2802     /* Generate the second jump (comparison) */
2803     const static BYTE dblCmpTstJmp2[] = {
2804         EJ_je,  // GT_EQ
2805         EJ_jne, // GT_NE
2806         EJ_jb,  // GT_LT
2807         EJ_jbe, // GT_LE
2808         EJ_jae, // GT_GE
2809         EJ_ja,  // GT_GT
2810     };
2811
2812     // Swap comp order if necessary
2813     if (bReverseCmp)
2814     {
2815         cmp = GenTree::SwapRelop(cmp);
2816     }
2817
2818     genCondJmpInsStackFP((emitJumpKind)dblCmpTstJmp2[cmp - GT_EQ], jumpTrue, jumpFalse, bDoTransition);
2819 }
2820
2821 BasicBlock* CodeGen::genTransitionBlockStackFP(FlatFPStateX87* pState, BasicBlock* pFrom, BasicBlock* pTarget)
2822 {
2823     // Fast paths where a transition block is not necessary
2824     if ((pTarget->bbFPStateX87 && FlatFPStateX87::AreEqual(pState, pTarget->bbFPStateX87)) || pState->IsEmpty())
2825     {
2826         return pTarget;
2827     }
2828
2829     // We shouldn't have any handlers if we're generating transition blocks, as we don't know
2830     // how to recover them
2831     assert(compiler->compMayHaveTransitionBlocks);
2832     assert(compiler->compHndBBtabCount == 0);
2833
2834 #ifdef DEBUG
2835     compiler->fgSafeBasicBlockCreation = true;
2836 #endif
2837
2838     // Create a temp block
2839     BasicBlock* pBlock = compiler->bbNewBasicBlock(BBJ_ALWAYS);
2840
2841 #ifdef DEBUG
2842     compiler->fgSafeBasicBlockCreation = false;
2843 #endif
2844
2845     VarSetOps::Assign(compiler, pBlock->bbLiveIn, pFrom->bbLiveOut);
2846     VarSetOps::Assign(compiler, pBlock->bbLiveOut, pFrom->bbLiveOut);
2847
2848     pBlock->bbJumpDest = pTarget;
2849     pBlock->bbFlags |= BBF_JMP_TARGET;
2850     //
2851     // If either pFrom or pTarget are cold blocks then
2852     // the transition block also must be cold
2853     //
2854     pBlock->bbFlags |= (pFrom->bbFlags & BBF_COLD);
2855     pBlock->bbFlags |= (pTarget->bbFlags & BBF_COLD);
2856
2857     // The FP state for the block is the same as the current one
2858     pBlock->bbFPStateX87 = FlatFPAllocFPState(pState);
2859
2860     if ((pBlock->bbFlags & BBF_COLD) || (compiler->fgFirstColdBlock == NULL))
2861     {
2862         //
2863         // If this block is cold or if all blocks are hot
2864         // then we just insert it at the end of the method.
2865         //
2866         compiler->fgMoveBlocksAfter(pBlock, pBlock, compiler->fgLastBBInMainFunction());
2867     }
2868     else
2869     {
2870         //
2871         // This block is hot so we need to insert it in the hot region
2872         // of the method.
2873         //
2874         BasicBlock* lastHotBlock = compiler->fgFirstColdBlock->bbPrev;
2875         noway_assert(lastHotBlock != nullptr);
2876
2877         if (lastHotBlock->bbFallsThrough())
2878             NO_WAY("Bad fgFirstColdBlock in genTransitionBlockStackFP()");
2879
2880         //
2881         // Insert pBlock between lastHotBlock and fgFirstColdBlock
2882         //
2883         compiler->fgInsertBBafter(lastHotBlock, pBlock);
2884     }
2885
2886     return pBlock;
2887 }
2888
2889 void CodeGen::genCondJumpLngStackFP(GenTree* cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
2890 {
2891     // For the moment, and so we don't have to deal with the amount of special cases
2892     // we have, will insert a dummy block for jumpTrue (if necessary) that will do the
2893     // transition for us. For the jumpFalse case, we play a trick. For the false case ,
2894     // a Long conditional has a fallthrough (least significant DWORD check is false) and
2895     // also has a jump to the fallthrough (bbNext) if the most significant DWORD check
2896     // fails. However, we do want to make an FP transition if we're in the later case,
2897     // So what we do is create a label and make jumpFalse go there. This label is defined
2898     // before doing the FP transition logic at the end of the block, so now both exit paths
2899     // for false condition will go through the transition and then fall through to bbnext.
2900     assert(jumpFalse == compiler->compCurBB->bbNext);
2901
2902     BasicBlock* pTransition = genCreateTempLabel();
2903
2904     genCondJumpLng(cond, jumpTrue, pTransition, true);
2905
2906     genDefineTempLabel(pTransition);
2907 }
2908
2909 void CodeGen::genQMarkRegVarTransition(GenTree* nextNode, VARSET_VALARG_TP liveset)
2910 {
2911     // Kill any vars that may die in the transition
2912     VARSET_TP newLiveSet(VarSetOps::Intersection(compiler, liveset, compiler->optAllFPregVars));
2913
2914     regMaskTP liveRegIn = genRegMaskFromLivenessStackFP(newLiveSet);
2915     genCodeForTransitionFromMask(&compCurFPState, liveRegIn);
2916
2917     unsigned i;
2918
2919     // Kill all regvars
2920     for (i = REG_FPV0; i < REG_FPCOUNT; i++)
2921     {
2922         if ((genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat))
2923         {
2924
2925             genRegVarDeathStackFP(regSet.genRegVarsFloat[i]);
2926         }
2927     }
2928
2929     // Born necessary regvars
2930     for (i = 0; i < compiler->lvaTrackedCount; i++)
2931     {
2932         unsigned   lclVar = compiler->lvaTrackedToVarNum[i];
2933         LclVarDsc* varDsc = compiler->lvaTable + lclVar;
2934
2935         assert(varDsc->lvTracked);
2936
2937         if (varDsc->lvRegister && VarSetOps::IsMember(compiler, newLiveSet, i))
2938         {
2939             genRegVarBirthStackFP(varDsc);
2940         }
2941     }
2942 }
2943
2944 void CodeGen::genQMarkBeforeElseStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTree* nextNode)
2945 {
2946     assert(regSet.rsMaskLockedFloat == 0);
2947
2948     // Save current state at colon
2949     pState->stackState.Init(&compCurFPState);
2950
2951     // Kill any vars that may die in the transition to then
2952     genQMarkRegVarTransition(nextNode, varsetCond);
2953 }
2954
2955 void CodeGen::genQMarkAfterElseBlockStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTree* nextNode)
2956 {
2957     assert(regSet.rsMaskLockedFloat == 0);
2958
2959     FlatFPStateX87 tempSwap;
2960
2961     // Save current state. Now tempFPState will store the target state for the else block
2962     tempSwap.Init(&compCurFPState);
2963
2964     compCurFPState.Init(&pState->stackState);
2965
2966     pState->stackState.Init(&tempSwap);
2967
2968     // Did any regvars die in the then block that are live on entry to the else block?
2969     unsigned i;
2970     for (i = 0; i < compiler->lvaTrackedCount; i++)
2971     {
2972         if (VarSetOps::IsMember(compiler, varsetCond, i) && VarSetOps::IsMember(compiler, compiler->optAllFPregVars, i))
2973         {
2974             // This variable should be live
2975             unsigned   lclnum = compiler->lvaTrackedToVarNum[i];
2976             LclVarDsc* varDsc = compiler->lvaTable + lclnum;
2977
2978             if (regSet.genRegVarsFloat[varDsc->lvRegNum] != varDsc)
2979             {
2980                 JITDUMP("genQMarkAfterThenBlockStackFP(): Fixing up regvar that was modified in then\n");
2981                 if (regSet.genRegVarsFloat[varDsc->lvRegNum])
2982                 {
2983                     genRegVarDeathStackFP(regSet.genRegVarsFloat[varDsc->lvRegNum]);
2984                 }
2985
2986                 genRegVarBirthStackFP(varDsc);
2987             }
2988         }
2989     }
2990
2991     // Kill any vars that may die in the transition
2992     genQMarkRegVarTransition(nextNode, varsetCond);
2993 }
2994
2995 void CodeGen::genQMarkAfterThenBlockStackFP(QmarkStateStackFP* pState)
2996 {
2997     JITDUMP("genQMarkAfterThenBlockStackFP()\n");
2998     assert(regSet.rsMaskLockedFloat == 0);
2999
3000     // Generate transition to the previous one set by the then block
3001     genCodeForTransitionStackFP(&compCurFPState, &pState->stackState);
3002
3003     // Update state
3004     compCurFPState.Init(&pState->stackState);
3005 }
3006
3007 void CodeGenInterface::SetRegVarFloat(regNumber reg, var_types type, LclVarDsc* varDsc)
3008 {
3009     regMaskTP mask = genRegMaskFloat(reg, type);
3010
3011     if (varDsc)
3012     {
3013         JITDUMP("marking register %s as a regvar\n", getRegNameFloat(reg, type));
3014
3015         assert(mask && ((regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat | regSet.rsMaskUsedFloat) & mask) == 0);
3016
3017         regSet.rsMaskRegVarFloat |= mask;
3018     }
3019     else
3020     {
3021         JITDUMP("unmarking register %s as a regvar\n", getRegNameFloat(reg, type));
3022
3023         assert(mask && (regSet.rsMaskRegVarFloat & mask));
3024
3025         regSet.rsMaskRegVarFloat &= ~mask;
3026     }
3027
3028     // Update lookup table
3029     regSet.genRegVarsFloat[reg] = varDsc;
3030 }
3031
3032 // Generates a conditional jump. It will do the appropiate stack matching for the jmpTrue.
3033 // We don't use jumpFalse anywhere and the integer codebase assumes that it will be bbnext, and that is
3034 // taken care of at the end of the bb code generation.
3035 void CodeGen::genCondJmpInsStackFP(emitJumpKind jumpKind,
3036                                    BasicBlock*  jumpTrue,
3037                                    BasicBlock*  jumpFalse,
3038                                    bool         bDoTransition)
3039 {
3040     // Assert the condition above.
3041     assert(!jumpFalse || jumpFalse == compiler->compCurBB->bbNext || !bDoTransition);
3042
3043     // Do the fp stack matching.
3044     if (bDoTransition && !jumpTrue->bbFPStateX87 &&
3045         FlatFPSameRegisters(&compCurFPState, genRegMaskFromLivenessStackFP(jumpTrue->bbLiveIn)))
3046     {
3047         // Target block doesn't have state yet, but has the same registers, so
3048         // we allocate the block and generate the normal jump
3049         genCodeForBBTransitionStackFP(jumpTrue);
3050         inst_JMP(jumpKind, jumpTrue);
3051     }
3052     else if (!bDoTransition || compCurFPState.IsEmpty() || // If it's empty, target has to be empty too.
3053              (jumpTrue->bbFPStateX87 && FlatFPStateX87::AreEqual(&compCurFPState, jumpTrue->bbFPStateX87)))
3054     {
3055         // Nothing to do here. Proceed normally and generate the jump
3056         inst_JMP(jumpKind, jumpTrue);
3057
3058         if (jumpFalse && jumpFalse != compiler->compCurBB->bbNext)
3059         {
3060             inst_JMP(EJ_jmp, jumpFalse);
3061         }
3062     }
3063     else
3064     {
3065         // temporal workaround for stack matching
3066         // do a forward conditional jump, generate the transition and jump to the target
3067         // The payload is an aditional jump instruction, but both jumps will be correctly
3068         // predicted by the processor in the loop case.
3069         BasicBlock* endLabel = NULL;
3070
3071         endLabel = genCreateTempLabel();
3072
3073         inst_JMP(emitter::emitReverseJumpKind(jumpKind), endLabel);
3074
3075         genCodeForBBTransitionStackFP(jumpTrue);
3076
3077         inst_JMP(EJ_jmp, jumpTrue);
3078
3079         genDefineTempLabel(endLabel);
3080     }
3081 }
3082
3083 void CodeGen::genTableSwitchStackFP(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab)
3084 {
3085     // Only come here when we have to do something special for the FPU stack!
3086     //
3087     assert(!compCurFPState.IsEmpty());
3088     VARSET_TP liveInFP(VarSetOps::MakeEmpty(compiler));
3089     VARSET_TP liveOutFP(VarSetOps::MakeEmpty(compiler));
3090     for (unsigned i = 0; i < jumpCnt; i++)
3091     {
3092         VarSetOps::Assign(compiler, liveInFP, jumpTab[i]->bbLiveIn);
3093         VarSetOps::IntersectionD(compiler, liveInFP, compiler->optAllFPregVars);
3094         VarSetOps::Assign(compiler, liveOutFP, compiler->compCurBB->bbLiveOut);
3095         VarSetOps::IntersectionD(compiler, liveOutFP, compiler->optAllFPregVars);
3096
3097         if (!jumpTab[i]->bbFPStateX87 && VarSetOps::Equal(compiler, liveInFP, liveOutFP))
3098         {
3099             // Hasn't state yet and regvar set is the same, so just copy state and don't change the jump
3100             jumpTab[i]->bbFPStateX87 = FlatFPAllocFPState(&compCurFPState);
3101         }
3102         else if (jumpTab[i]->bbFPStateX87 && FlatFPStateX87::AreEqual(&compCurFPState, jumpTab[i]->bbFPStateX87))
3103         {
3104             // Same state, don't change the jump
3105         }
3106         else
3107         {
3108             // We have to do a transition. First check if we can reuse another one
3109             unsigned j;
3110             for (j = 0; j < i; j++)
3111             {
3112                 // Has to be already forwarded. If not it can't be targetting the same block
3113                 if (jumpTab[j]->bbFlags & BBF_FORWARD_SWITCH)
3114                 {
3115                     if (jumpTab[i] == jumpTab[j]->bbJumpDest)
3116                     {
3117                         // yipee, we can reuse this transition block
3118                         jumpTab[i] = jumpTab[j];
3119                         break;
3120                     }
3121                 }
3122             }
3123
3124             if (j == i)
3125             {
3126                 // We will have to create a new transition block
3127                 jumpTab[i] = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTab[i]);
3128
3129                 jumpTab[i]->bbFlags |= BBF_FORWARD_SWITCH;
3130             }
3131         }
3132     }
3133
3134     // Clear flag
3135     for (unsigned i = 0; i < jumpCnt; i++)
3136     {
3137         jumpTab[i]->bbFlags &= ~BBF_FORWARD_SWITCH;
3138     }
3139
3140     // everything's fixed now, so go down the normal path
3141     return genTableSwitch(reg, jumpCnt, jumpTab);
3142 }
3143
3144 bool CodeGen::genConstantLoadStackFP(GenTree* tree, bool bOnlyNoMemAccess)
3145 {
3146     assert(tree->gtOper == GT_CNS_DBL);
3147
3148     bool        bFastConstant  = false;
3149     instruction ins_ConstantNN = INS_fldz; // keep compiler happy
3150
3151     // Both positive 0 and 1 are represnetable in float and double, beware if we add other constants
3152     switch (*((__int64*)&(tree->gtDblCon.gtDconVal)))
3153     {
3154         case 0:
3155             // CAREFUL here!, -0 is different than +0, a -0 shouldn't issue a fldz.
3156             ins_ConstantNN = INS_fldz;
3157             bFastConstant  = true;
3158             break;
3159         case I64(0x3ff0000000000000):
3160             ins_ConstantNN = INS_fld1;
3161             bFastConstant  = true;
3162     }
3163
3164     if (bFastConstant == false && bOnlyNoMemAccess)
3165     {
3166         // Caller asked only to generate instructions if it didn't involve memory accesses
3167         return false;
3168     }
3169
3170     if (bFastConstant)
3171     {
3172         assert(compCurFPState.m_uStackSize <= FP_PHYSICREGISTERS);
3173         instGen(ins_ConstantNN);
3174     }
3175     else
3176     {
3177         GenTree* addr;
3178         if (tree->gtType == TYP_FLOAT || StackFPIsSameAsFloat(tree->gtDblCon.gtDconVal))
3179         {
3180             float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
3181             addr    = genMakeConst(&f, TYP_FLOAT, tree, false);
3182         }
3183         else
3184         {
3185             addr = genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
3186         }
3187
3188         inst_FS_TT(INS_fld, addr);
3189     }
3190
3191     return true;
3192 }
3193
3194 // Function called at the end of every statement. For stack based x87 its mission is to
3195 // remove any remaining temps on the stack.
3196 void CodeGen::genEndOfStatement()
3197 {
3198     unsigned i;
3199
3200 #ifdef DEBUG
3201     // Sanity check
3202     unsigned uTemps = 0;
3203     for (i = REG_FPV0; i < REG_FPCOUNT; i++)
3204     {
3205         if (compCurFPState.Mapped(i) &&                                      // register is mapped
3206             (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat) == 0) // but not enregistered
3207         {
3208             uTemps++;
3209         }
3210     }
3211     assert(uTemps <= 1);
3212 #endif
3213
3214     for (i = REG_FPV0; i < REG_FPCOUNT; i++)
3215     {
3216         if (compCurFPState.Mapped(i) &&                                      // register is mapped
3217             (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat) == 0) // but not enregistered
3218         {
3219             // remove register from stacks
3220             FlatFPX87_Unload(&compCurFPState, i);
3221         }
3222     }
3223
3224     assert(ConsistentAfterStatementStackFP());
3225 }
3226
3227 bool CodeGen::StackFPIsSameAsFloat(double d)
3228 {
3229     if (forceCastToFloat(d) == d)
3230     {
3231         JITDUMP("StackFPIsSameAsFloat is true for value %lf\n", d);
3232         return true;
3233     }
3234     else
3235     {
3236         JITDUMP("StackFPIsSameAsFloat is false for value %lf\n", d);
3237     }
3238
3239     return false;
3240 }
3241
3242 GenTree* CodeGen::genMakeAddressableStackFP(GenTree*   tree,
3243                                             regMaskTP* regMaskIntPtr,
3244                                             regMaskTP* regMaskFltPtr,
3245                                             bool       bCollapseConstantDoubles)
3246 {
3247     *regMaskIntPtr = *regMaskFltPtr = 0;
3248
3249     switch (tree->OperGet())
3250     {
3251         case GT_CNS_DBL:
3252             if (tree->gtDblCon.gtDconVal == 0.0 || tree->gtDblCon.gtDconVal == 1.0)
3253             {
3254                 // For constants like 0 or 1 don't waste memory
3255                 genCodeForTree(tree, 0);
3256                 regSet.SetUsedRegFloat(tree, true);
3257
3258                 *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
3259                 return tree;
3260             }
3261             else
3262             {
3263                 GenTree* addr;
3264                 if (tree->gtType == TYP_FLOAT ||
3265                     (bCollapseConstantDoubles && StackFPIsSameAsFloat(tree->gtDblCon.gtDconVal)))
3266                 {
3267                     float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
3268                     addr    = genMakeConst(&f, TYP_FLOAT, tree, true);
3269                 }
3270                 else
3271                 {
3272                     addr = genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
3273                 }
3274 #ifdef DEBUG
3275                 if (compiler->verbose)
3276                 {
3277                     printf("Generated new constant in tree ");
3278                     Compiler::printTreeID(addr);
3279                     printf(" with value %lf\n", tree->gtDblCon.gtDconVal);
3280                 }
3281 #endif // DEBUG
3282                 tree->ReplaceWith(addr, compiler);
3283                 return tree;
3284             }
3285             break;
3286         case GT_REG_VAR:
3287             // We take care about this in genKeepAddressableStackFP
3288             return tree;
3289         case GT_LCL_VAR:
3290         case GT_LCL_FLD:
3291         case GT_CLS_VAR:
3292             return tree;
3293
3294         case GT_LEA:
3295             if (!genMakeIndAddrMode(tree, tree, false, 0, RegSet::KEEP_REG, regMaskIntPtr, false))
3296             {
3297                 assert(false);
3298             }
3299             genUpdateLife(tree);
3300             return tree;
3301
3302         case GT_IND:
3303             // Try to make the address directly addressable
3304
3305             if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, 0, RegSet::KEEP_REG, regMaskIntPtr, false))
3306             {
3307                 genUpdateLife(tree);
3308                 return tree;
3309             }
3310             else
3311             {
3312                 GenTree* addr = tree;
3313                 tree          = tree->gtOp.gtOp1;
3314
3315                 genCodeForTree(tree, 0);
3316                 regSet.rsMarkRegUsed(tree, addr);
3317
3318                 *regMaskIntPtr = genRegMask(tree->gtRegNum);
3319                 return addr;
3320             }
3321
3322         // fall through
3323
3324         default:
3325             genCodeForTreeFloat(tree);
3326             regSet.SetUsedRegFloat(tree, true);
3327
3328             // update mask
3329             *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
3330
3331             return tree;
3332             break;
3333     }
3334 }
3335
3336 void CodeGen::genKeepAddressableStackFP(GenTree* tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr)
3337 {
3338     regMaskTP regMaskInt, regMaskFlt;
3339
3340     regMaskInt = *regMaskIntPtr;
3341     regMaskFlt = *regMaskFltPtr;
3342
3343     *regMaskIntPtr = *regMaskFltPtr = 0;
3344
3345     switch (tree->OperGet())
3346     {
3347         case GT_REG_VAR:
3348             // If register has been spilled, unspill it
3349             if (tree->gtFlags & GTF_SPILLED)
3350             {
3351                 UnspillFloat(&compiler->lvaTable[tree->gtLclVarCommon.gtLclNum]);
3352             }
3353
3354             // If regvar is dying, take it out of the regvar mask
3355             if (tree->IsRegVarDeath())
3356             {
3357                 genRegVarDeathStackFP(tree);
3358             }
3359             genUpdateLife(tree);
3360
3361             return;
3362         case GT_CNS_DBL:
3363         {
3364             if (tree->gtFlags & GTF_SPILLED)
3365             {
3366                 UnspillFloat(tree);
3367             }
3368
3369             *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
3370
3371             return;
3372         }
3373         case GT_LCL_FLD:
3374         case GT_LCL_VAR:
3375         case GT_CLS_VAR:
3376             genUpdateLife(tree);
3377             return;
3378         case GT_IND:
3379         case GT_LEA:
3380             if (regMaskFlt)
3381             {
3382                 // fall through
3383             }
3384             else
3385             {
3386                 *regMaskIntPtr = genKeepAddressable(tree, regMaskInt, 0);
3387                 *regMaskFltPtr = 0;
3388                 return;
3389             }
3390         default:
3391
3392             *regMaskIntPtr = 0;
3393             if (tree->gtFlags & GTF_SPILLED)
3394             {
3395                 UnspillFloat(tree);
3396             }
3397             *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
3398             return;
3399     }
3400 }
3401
3402 void CodeGen::genDoneAddressableStackFP(GenTree*        tree,
3403                                         regMaskTP       addrRegInt,
3404                                         regMaskTP       addrRegFlt,
3405                                         RegSet::KeepReg keptReg)
3406 {
3407     assert(!(addrRegInt && addrRegFlt));
3408
3409     if (addrRegInt)
3410     {
3411         return genDoneAddressable(tree, addrRegInt, keptReg);
3412     }
3413     else if (addrRegFlt)
3414     {
3415         if (keptReg == RegSet::KEEP_REG)
3416         {
3417             for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++)
3418             {
3419                 if (genRegMaskFloat((regNumber)i) & addrRegFlt)
3420                 {
3421                     regSet.SetUsedRegFloat(tree, false);
3422                 }
3423             }
3424         }
3425     }
3426 }
3427
3428 void CodeGen::FlatFPX87_Kill(FlatFPStateX87* pState, unsigned uVirtual)
3429 {
3430     JITDUMP("Killing %s\n", regVarNameStackFP((regNumber)uVirtual));
3431
3432     assert(pState->TopVirtual() == uVirtual);
3433     pState->Pop();
3434 }
3435
3436 void CodeGen::FlatFPX87_PushVirtual(FlatFPStateX87* pState, unsigned uRegister, bool bEmitCode)
3437 {
3438     JITDUMP("Pushing %s to stack\n", regVarNameStackFP((regNumber)uRegister));
3439
3440     pState->Push(uRegister);
3441 }
3442
3443 unsigned CodeGen::FlatFPX87_Pop(FlatFPStateX87* pState, bool bEmitCode)
3444 {
3445     assert(pState->m_uStackSize > 0);
3446
3447     // Update state
3448     unsigned uVirtual = pState->Pop();
3449
3450     // Emit instruction
3451     if (bEmitCode)
3452     {
3453         inst_FS(INS_fstp, 0);
3454     }
3455
3456     return (uVirtual);
3457 }
3458
3459 unsigned CodeGen::FlatFPX87_Top(FlatFPStateX87* pState, bool bEmitCode)
3460 {
3461     return pState->TopVirtual();
3462 }
3463
3464 void CodeGen::FlatFPX87_Unload(FlatFPStateX87* pState, unsigned uVirtual, bool bEmitCode)
3465 {
3466     if (uVirtual != pState->TopVirtual())
3467     {
3468         // We will do an fstp to the right place
3469
3470         // Update state
3471         unsigned uStack  = pState->m_uVirtualMap[uVirtual];
3472         unsigned uPhysic = pState->StackToST(uStack);
3473
3474         pState->Unmap(uVirtual);
3475         pState->Associate(pState->TopVirtual(), uStack);
3476         pState->m_uStackSize--;
3477
3478 #ifdef DEBUG
3479
3480         pState->m_uStack[pState->m_uStackSize] = (unsigned)-1;
3481 #endif
3482
3483         // Emit instruction
3484         if (bEmitCode)
3485         {
3486             inst_FS(INS_fstp, uPhysic);
3487         }
3488     }
3489     else
3490     {
3491         // Emit fstp
3492         FlatFPX87_Pop(pState, bEmitCode);
3493     }
3494
3495     assert(pState->IsConsistent());
3496 }
3497
3498 void CodeGenInterface::FlatFPX87_MoveToTOS(FlatFPStateX87* pState, unsigned uVirtual, bool bEmitCode)
3499 {
3500     assert(!IsUninitialized(uVirtual));
3501
3502     JITDUMP("Moving %s to top of stack\n", regVarNameStackFP((regNumber)uVirtual));
3503
3504     if (uVirtual != pState->TopVirtual())
3505     {
3506         FlatFPX87_SwapStack(pState, pState->m_uVirtualMap[uVirtual], pState->TopIndex(), bEmitCode);
3507     }
3508     else
3509     {
3510         JITDUMP("%s already on the top of stack\n", regVarNameStackFP((regNumber)uVirtual));
3511     }
3512
3513     assert(pState->IsConsistent());
3514 }
3515
3516 void CodeGenInterface::FlatFPX87_SwapStack(FlatFPStateX87* pState, unsigned i, unsigned j, bool bEmitCode)
3517 {
3518     assert(i != j);
3519     assert(i < pState->m_uStackSize);
3520     assert(j < pState->m_uStackSize);
3521
3522     JITDUMP("Exchanging ST(%i) and ST(%i)\n", pState->StackToST(i), pState->StackToST(j));
3523
3524     // issue actual swaps
3525     int iPhysic = pState->StackToST(i);
3526     int jPhysic = pState->StackToST(j);
3527
3528     if (bEmitCode)
3529     {
3530         if (iPhysic == 0 || jPhysic == 0)
3531         {
3532             inst_FN(INS_fxch, iPhysic ? iPhysic : jPhysic);
3533         }
3534         else
3535         {
3536             inst_FN(INS_fxch, iPhysic);
3537             inst_FN(INS_fxch, jPhysic);
3538             inst_FN(INS_fxch, iPhysic);
3539         }
3540     }
3541
3542     // Update State
3543
3544     // Swap Register file
3545     pState->m_uVirtualMap[pState->m_uStack[i]] = j;
3546     pState->m_uVirtualMap[pState->m_uStack[j]] = i;
3547
3548     // Swap stack
3549     int temp;
3550     temp                = pState->m_uStack[i];
3551     pState->m_uStack[i] = pState->m_uStack[j];
3552     pState->m_uStack[j] = temp;
3553
3554     assert(pState->IsConsistent());
3555 }
3556
3557 #ifdef DEBUG
3558
3559 void CodeGen::JitDumpFPState()
3560 {
3561     int i;
3562
3563     if ((regSet.rsMaskUsedFloat != 0) || (regSet.rsMaskRegVarFloat != 0))
3564     {
3565         printf("FPSTATE\n");
3566         printf("Used virtual registers: ");
3567         for (i = REG_FPV0; i < REG_FPCOUNT; i++)
3568         {
3569             if (genRegMaskFloat((regNumber)i) & regSet.rsMaskUsedFloat)
3570             {
3571                 printf("FPV%i ", i);
3572             }
3573         }
3574         printf("\n");
3575
3576         printf("virtual registers holding reg vars: ");
3577         for (i = REG_FPV0; i < REG_FPCOUNT; i++)
3578         {
3579             if (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat)
3580             {
3581                 printf("FPV%i ", i);
3582             }
3583         }
3584         printf("\n");
3585     }
3586     compCurFPState.Dump();
3587 }
3588 #endif
3589
3590 //
3591 //
3592 //  Register allocation
3593 //
3594 struct ChangeToRegVarCallback
3595 {
3596     unsigned  lclnum;
3597     regNumber reg;
3598 };
3599
3600 void Compiler::raInitStackFP()
3601 {
3602     // Reset local/reg interference
3603     for (int i = 0; i < REG_FPCOUNT; i++)
3604     {
3605         VarSetOps::AssignNoCopy(this, raLclRegIntfFloat[i], VarSetOps::MakeEmpty(this));
3606     }
3607
3608     VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::MakeEmpty(this));
3609     VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
3610     VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
3611
3612     raCntStkStackFP         = 0;
3613     raCntWtdStkDblStackFP   = 0;
3614     raCntStkParamDblStackFP = 0;
3615
3616     VarSetOps::AssignNoCopy(this, raMaskDontEnregFloat, VarSetOps::MakeEmpty(this));
3617
3618     // Calculate the set of all tracked FP/non-FP variables
3619     //  into compiler->optAllFloatVars and compiler->optAllNonFPvars
3620     unsigned   lclNum;
3621     LclVarDsc* varDsc;
3622
3623     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
3624     {
3625         /* Ignore the variable if it's not tracked */
3626
3627         if (!varDsc->lvTracked)
3628             continue;
3629
3630         /* Get hold of the index and the interference mask for the variable */
3631
3632         unsigned varNum = varDsc->lvVarIndex;
3633
3634         /* add to the set of all tracked FP/non-FP variables */
3635
3636         if (varDsc->IsFloatRegType())
3637             VarSetOps::AddElemD(this, optAllFloatVars, varNum);
3638         else
3639             VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
3640     }
3641 }
3642
3643 #ifdef DEBUG
3644 void Compiler::raDumpVariableRegIntfFloat()
3645 {
3646     unsigned i;
3647     unsigned j;
3648
3649     for (i = REG_FPV0; i < REG_FPCOUNT; i++)
3650     {
3651         if (!VarSetOps::IsEmpty(this, raLclRegIntfFloat[i]))
3652         {
3653             JITDUMP("FPV%u interferes with ", i);
3654             for (j = 0; j < lvaTrackedCount; j++)
3655             {
3656                 assert(VarSetOps::IsEmpty(this, VarSetOps::Diff(this, raLclRegIntfFloat[i], optAllFloatVars)));
3657
3658                 if (VarSetOps::IsMember(this, raLclRegIntfFloat[i], j))
3659                 {
3660                     JITDUMP("T%02u/V%02u, ", j, lvaTrackedToVarNum[j]);
3661                 }
3662             }
3663             JITDUMP("\n");
3664         }
3665     }
3666 }
3667 #endif
3668
3669 // Returns the regnum for the variable passed as param takin in account
3670 // the fpvar to register interference mask. If we can't find anything, we
3671 // will return REG_FPNONE
3672 regNumber Compiler::raRegForVarStackFP(unsigned varTrackedIndex)
3673 {
3674     for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++)
3675     {
3676         if (!VarSetOps::IsMember(this, raLclRegIntfFloat[i], varTrackedIndex))
3677         {
3678             return (regNumber)i;
3679         }
3680     }
3681
3682     return REG_FPNONE;
3683 }
3684
3685 void Compiler::raAddPayloadStackFP(VARSET_VALARG_TP maskArg, unsigned weight)
3686 {
3687     VARSET_TP mask(VarSetOps::Intersection(this, maskArg, optAllFloatVars));
3688     if (VarSetOps::IsEmpty(this, mask))
3689     {
3690         return;
3691     }
3692
3693     for (unsigned i = 0; i < lvaTrackedCount; i++)
3694     {
3695         if (VarSetOps::IsMember(this, mask, i))
3696         {
3697             raPayloadStackFP[i] += weight;
3698         }
3699     }
3700 }
3701
3702 bool Compiler::raVarIsGreaterValueStackFP(LclVarDsc* lv1, LclVarDsc* lv2)
3703 {
3704     assert(lv1->lvTracked);
3705     assert(lv2->lvTracked);
3706
3707     bool bSmall = (compCodeOpt() == SMALL_CODE);
3708
3709     double weight1 = double(bSmall ? lv1->lvRefCnt : lv1->lvRefCntWtd) - double(raPayloadStackFP[lv1->lvVarIndex]) -
3710                      double(raHeightsStackFP[lv1->lvVarIndex][FP_VIRTUALREGISTERS]);
3711
3712     double weight2 = double(bSmall ? lv2->lvRefCnt : lv2->lvRefCntWtd) - double(raPayloadStackFP[lv2->lvVarIndex]) -
3713                      double(raHeightsStackFP[lv2->lvVarIndex][FP_VIRTUALREGISTERS]);
3714
3715     double diff = weight1 - weight2;
3716
3717     if (diff)
3718     {
3719         return diff > 0 ? true : false;
3720     }
3721     else
3722     {
3723         return int(lv1->lvRefCnt - lv2->lvRefCnt) ? true : false;
3724     }
3725 }
3726
3727 #ifdef DEBUG
3728 // Dumps only interesting vars (the ones that are not enregistered yet
3729 void Compiler::raDumpHeightsStackFP()
3730 {
3731     unsigned i;
3732     unsigned j;
3733
3734     JITDUMP("raDumpHeightsStackFP():\n");
3735     JITDUMP("--------------------------------------------------------\n");
3736     JITDUMP("Weighted Height Table Dump\n            ");
3737     for (i = 0; i < FP_VIRTUALREGISTERS; i++)
3738     {
3739         JITDUMP(" %i    ", i + 1);
3740     }
3741
3742     JITDUMP("OVF\n");
3743
3744     for (i = 0; i < lvaTrackedCount; i++)
3745     {
3746         if (VarSetOps::IsMember(this, optAllFloatVars, i) && !VarSetOps::IsMember(this, optAllFPregVars, i))
3747         {
3748             JITDUMP("V%02u/T%02u: ", lvaTrackedToVarNum[i], i);
3749
3750             for (j = 0; j <= FP_VIRTUALREGISTERS; j++)
3751             {
3752                 JITDUMP("%5u ", raHeightsStackFP[i][j]);
3753             }
3754             JITDUMP("\n");
3755         }
3756     }
3757
3758     JITDUMP("\nNonweighted Height Table Dump\n            ");
3759     for (i = 0; i < FP_VIRTUALREGISTERS; i++)
3760     {
3761         JITDUMP(" %i    ", i + 1);
3762     }
3763
3764     JITDUMP("OVF\n");
3765
3766     for (i = 0; i < lvaTrackedCount; i++)
3767     {
3768         if (VarSetOps::IsMember(this, optAllFloatVars, i) && !VarSetOps::IsMember(this, optAllFPregVars, i))
3769         {
3770             JITDUMP("V%02u/T%02u: ", lvaTrackedToVarNum[i], i);
3771
3772             for (j = 0; j <= FP_VIRTUALREGISTERS; j++)
3773             {
3774                 JITDUMP("%5u ", raHeightsNonWeightedStackFP[i][j]);
3775             }
3776             JITDUMP("\n");
3777         }
3778     }
3779     JITDUMP("--------------------------------------------------------\n");
3780 }
3781 #endif
3782
3783 // Increases heights for tracked variables given in mask. We call this
3784 // function when we enregister a variable and will cause the heights to
3785 // shift one place to the right.
3786 void Compiler::raUpdateHeightsForVarsStackFP(VARSET_VALARG_TP mask)
3787 {
3788     assert(VarSetOps::IsSubset(this, mask, optAllFloatVars));
3789
3790     for (unsigned i = 0; i < lvaTrackedCount; i++)
3791     {
3792         if (VarSetOps::IsMember(this, mask, i))
3793         {
3794             for (unsigned j = FP_VIRTUALREGISTERS; j > 0; j--)
3795             {
3796                 raHeightsStackFP[i][j] = raHeightsStackFP[i][j - 1];
3797
3798 #ifdef DEBUG
3799                 raHeightsNonWeightedStackFP[i][j] = raHeightsNonWeightedStackFP[i][j - 1];
3800 #endif
3801             }
3802
3803             raHeightsStackFP[i][0] = 0;
3804 #ifdef DEBUG
3805             raHeightsNonWeightedStackFP[i][0] = 0;
3806 #endif
3807         }
3808     }
3809
3810 #ifdef DEBUG
3811     raDumpHeightsStackFP();
3812 #endif
3813 }
3814
3815 // This is the prepass we do to adjust refcounts across calls and
3816 // create the height structure.
3817 void Compiler::raEnregisterVarsPrePassStackFP()
3818 {
3819     BasicBlock* block;
3820
3821     assert(!VarSetOps::IsEmpty(this, optAllFloatVars));
3822
3823     // Initialization of the height table
3824     memset(raHeightsStackFP, 0, sizeof(raHeightsStackFP));
3825
3826     // Initialization of the payload table
3827     memset(raPayloadStackFP, 0, sizeof(raPayloadStackFP));
3828
3829 #ifdef DEBUG
3830     memset(raHeightsNonWeightedStackFP, 0, sizeof(raHeightsStackFP));
3831 #endif
3832
3833     // We will have a quick table with the pointers to the interesting varDscs
3834     // so that we don't have to scan for them for each tree.
3835     unsigned FPVars[lclMAX_TRACKED];
3836     unsigned numFPVars = 0;
3837     for (unsigned i = 0; i < lvaTrackedCount; i++)
3838     {
3839         if (VarSetOps::IsMember(this, optAllFloatVars, i))
3840         {
3841             FPVars[numFPVars++] = i;
3842         }
3843     }
3844
3845     assert(numFPVars == VarSetOps::Count(this, optAllFloatVars));
3846
3847     // Things we check here:
3848     //
3849     // We substract 2 for each FP variable that's live across a call, as we will
3850     // have 2 memory accesses to spill and unpsill around it.
3851     //
3852     //
3853     //
3854     VARSET_TP blockLiveOutFloats(VarSetOps::MakeEmpty(this));
3855     for (block = fgFirstBB; block; block = block->bbNext)
3856     {
3857         compCurBB = block;
3858         /*
3859         This opt fails in the case of a variable that has it's entire lifetime contained in the 'then' of
3860         a qmark. The use mask for the whole qmark won't contain that variable as it variable's value comes
3861         from  a def in the else, and the def can't be set for the qmark if the else side of
3862         the qmark doesn't do a def.
3863
3864         See VSW# 354454 for more info. Leaving the comment and code here just in case we try to be
3865         'smart' again in the future
3866
3867
3868         if (((block->bbVarUse |
3869               block->bbVarDef |
3870               block->bbLiveIn   ) & optAllFloatVars) == 0)
3871         {
3872             // Fast way out
3873             continue;
3874         }
3875         */
3876         VarSetOps::Assign(this, blockLiveOutFloats, block->bbLiveOut);
3877         VarSetOps::IntersectionD(this, blockLiveOutFloats, optAllFloatVars);
3878         if (!VarSetOps::IsEmpty(this, blockLiveOutFloats))
3879         {
3880             // See comment in compiler.h above declaration of compMayHaveTransitionBlocks
3881             // to understand the reason for this limitation of FP optimizer.
3882             switch (block->bbJumpKind)
3883             {
3884                 case BBJ_COND:
3885                 {
3886                     GenTree* stmt;
3887                     stmt = block->bbTreeList->gtPrev;
3888                     assert(stmt->gtNext == NULL && stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3889
3890                     assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
3891                     GenTree* cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
3892
3893                     assert(cond->OperIsCompare());
3894
3895                     if (cond->gtOp.gtOp1->TypeGet() == TYP_LONG)
3896                     {
3897                         if (compHndBBtabCount > 0)
3898                         {
3899                             // If we have any handlers we won't enregister whatever is live out of this block
3900                             JITDUMP("PERF Warning: Taking out FP candidates due to transition blocks + exception "
3901                                     "handlers.\n");
3902                             VarSetOps::UnionD(this, raMaskDontEnregFloat,
3903                                               VarSetOps::Intersection(this, block->bbLiveOut, optAllFloatVars));
3904                         }
3905                         else
3906                         {
3907                             // long conditional jumps can generate transition bloks
3908                             compMayHaveTransitionBlocks = true;
3909                         }
3910                     }
3911
3912                     break;
3913                 }
3914                 case BBJ_SWITCH:
3915                 {
3916                     if (compHndBBtabCount > 0)
3917                     {
3918                         // If we have any handlers we won't enregister whatever is live out of this block
3919                         JITDUMP(
3920                             "PERF Warning: Taking out FP candidates due to transition blocks + exception handlers.\n");
3921                         VarSetOps::UnionD(this, raMaskDontEnregFloat,
3922                                           VarSetOps::Intersection(this, block->bbLiveOut, optAllFloatVars));
3923                     }
3924                     else
3925                     {
3926                         // fp vars are live out of the switch, so we may have transition blocks
3927                         compMayHaveTransitionBlocks = true;
3928                     }
3929                     break;
3930                     default:
3931                         break;
3932                 }
3933             }
3934         }
3935
3936         VARSET_TP liveSet(VarSetOps::MakeCopy(this, block->bbLiveIn));
3937         for (GenTree* stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
3938         {
3939             assert(stmt->gtOper == GT_STMT);
3940
3941             unsigned prevHeight = stmt->gtStmt.gtStmtList->gtFPlvl;
3942             for (GenTree* tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
3943             {
3944                 VarSetOps::AssignNoCopy(this, liveSet, fgUpdateLiveSet(liveSet, tree));
3945                 switch (tree->gtOper)
3946                 {
3947                     case GT_CALL:
3948                         raAddPayloadStackFP(liveSet, block->getBBWeight(this) * 2);
3949                         break;
3950                     case GT_CAST:
3951                         // For cast from long local var to double, decrement the ref count of the long
3952                         // to avoid store forwarding stall
3953                         if (tree->gtType == TYP_DOUBLE)
3954                         {
3955                             GenTree* op1 = tree->gtOp.gtOp1;
3956                             if (op1->gtOper == GT_LCL_VAR && op1->gtType == TYP_LONG)
3957                             {
3958                                 unsigned int lclNum = op1->gtLclVarCommon.gtLclNum;
3959                                 assert(lclNum < lvaCount);
3960                                 LclVarDsc*   varDsc          = lvaTable + lclNum;
3961                                 unsigned int weightedRefCnt  = varDsc->lvRefCntWtd;
3962                                 unsigned int refCntDecrement = 2 * block->getBBWeight(this);
3963                                 if (refCntDecrement > weightedRefCnt)
3964                                 {
3965                                     varDsc->lvRefCntWtd = 0;
3966                                 }
3967                                 else
3968                                 {
3969                                     varDsc->lvRefCntWtd = weightedRefCnt - refCntDecrement;
3970                                 }
3971                             }
3972                         }
3973                         break;
3974                     default:
3975                         break;
3976                 }
3977
3978                 // Update heights
3979                 unsigned height = tree->gtFPlvl;
3980
3981                 if (height != prevHeight)
3982                 {
3983                     if (height > prevHeight && height < FP_VIRTUALREGISTERS)
3984                     {
3985                         for (unsigned i = 0; i < numFPVars; i++)
3986                         {
3987                             if (VarSetOps::IsMember(this, liveSet, FPVars[i]))
3988                             {
3989                                 // The -1 are because we don't care about stack height 0
3990                                 // and we will use offset FP_VIRTUALREGISTERS to know what's
3991                                 // the count when we overflow. we multiply by 2, because that
3992                                 // is the number of memory accesses we will do for each spill
3993                                 // (even if we op directly with the spill)
3994                                 if (compCodeOpt() == SMALL_CODE)
3995                                 {
3996                                     raHeightsStackFP[FPVars[i]][height - 1] += 2;
3997                                 }
3998                                 else
3999                                 {
4000                                     raHeightsStackFP[FPVars[i]][height - 1] += 2 * block->getBBWeight(this);
4001                                 }
4002
4003 #ifdef DEBUG
4004                                 raHeightsNonWeightedStackFP[FPVars[i]][height - 1]++;
4005 #endif
4006                             }
4007                         }
4008                     }
4009
4010                     prevHeight = height;
4011                 }
4012             }
4013         }
4014     }
4015     compCurBB = NULL;
4016
4017     if (compJmpOpUsed)
4018     {
4019         // Disable enregistering of FP vars for methods with jmp op. We have really no
4020         // coverage here.
4021         // The problem with FP enreg vars is that the returning block is marked with having
4022         // all variables live on exit. This works for integer vars, but for FP vars we must
4023         // do the work to unload them. This is fairly straightforward to do, but I'm worried
4024         // by the coverage, so I'll take the conservative aproach of disabling FP enregistering
4025         // and we will fix it if there is demand
4026         JITDUMP("PERF Warning: Disabling FP enregistering due to JMP op!!!!!!!.\n");
4027         VarSetOps::UnionD(this, raMaskDontEnregFloat, optAllFloatVars);
4028     }
4029
4030 #ifdef DEBUG
4031     raDumpHeightsStackFP();
4032 #endif
4033 }
4034
4035 void Compiler::raSetRegLclBirthDeath(GenTree* tree, VARSET_VALARG_TP lastlife, bool fromLDOBJ)
4036 {
4037     assert(tree->gtOper == GT_LCL_VAR);
4038
4039     unsigned lclnum = tree->gtLclVarCommon.gtLclNum;
4040     assert(lclnum < lvaCount);
4041
4042     LclVarDsc* varDsc = lvaTable + lclnum;
4043
4044     if (!varDsc->lvTracked)
4045     {
4046         // Not tracked, can't be one of the enreg fp vars
4047         return;
4048     }
4049
4050     unsigned varIndex = varDsc->lvVarIndex;
4051
4052     if (!VarSetOps::IsMember(this, optAllFPregVars, varIndex))
4053     {
4054         // Not one of the enreg fp vars
4055         return;
4056     }
4057
4058     assert(varDsc->lvRegNum != REG_FPNONE);
4059     assert(!VarSetOps::IsMember(this, raMaskDontEnregFloat, varIndex));
4060
4061     unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
4062     tree->ChangeOper(GT_REG_VAR);
4063     tree->gtFlags |= livenessFlags;
4064     tree->gtRegNum          = varDsc->lvRegNum;
4065     tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
4066     tree->gtRegVar.SetLclNum(lclnum);
4067
4068     // A liveset can change in a lclvar even if the lclvar itself is not
4069     // changing its life. This can happen for lclvars inside qmarks,
4070     // where lclvars die across the colon edge.
4071     // SO, either
4072     //     it is marked GTF_VAR_DEATH (already set by fgComputeLife)
4073     //     OR it is already live
4074     //     OR it is becoming live
4075     //
4076     if ((tree->gtFlags & GTF_VAR_DEATH) == 0)
4077     {
4078         if ((tree->gtFlags & GTF_VAR_DEF) != 0)
4079
4080         {
4081             tree->gtFlags |= GTF_REG_BIRTH;
4082         }
4083     }
4084
4085 #ifdef DEBUG
4086     if (verbose)
4087         gtDispTree(tree);
4088 #endif
4089 }
4090
4091 // In this pass we set the regvars and set the birth and death flags. we do it
4092 // for all enregistered variables at once.
4093 void Compiler::raEnregisterVarsPostPassStackFP()
4094 {
4095     if (VarSetOps::IsEmpty(this, optAllFPregVars))
4096     {
4097         // Nothing to fix up.
4098     }
4099
4100     BasicBlock* block;
4101
4102     JITDUMP("raEnregisterVarsPostPassStackFP:\n");
4103
4104     for (block = fgFirstBB; block; block = block->bbNext)
4105     {
4106         compCurBB = block;
4107
4108         /*
4109         This opt fails in the case of a variable that has it's entire lifetime contained in the 'then' of
4110         a qmark. The use mask for the whole qmark won't contain that variable as it variable's value comes
4111         from  a def in the else, and the def can't be set for the qmark if the else side of
4112         the qmark doesn't do a def.
4113
4114         See VSW# 354454 for more info. Leaving the comment and code here just in case we try to be
4115         'smart' again in the future
4116
4117
4118
4119         if (((block->bbVarUse |
4120               block->bbVarDef |
4121               block->bbLiveIn   ) & optAllFPregVars) == 0)
4122         {
4123             // Fast way out
4124             continue;
4125         }
4126         */
4127
4128         VARSET_TP lastlife(VarSetOps::MakeCopy(this, block->bbLiveIn));
4129         for (GenTree* stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
4130         {
4131             assert(stmt->gtOper == GT_STMT);
4132
4133             for (GenTree *tree = stmt->gtStmt.gtStmtList; tree;
4134                  VarSetOps::AssignNoCopy(this, lastlife, fgUpdateLiveSet(lastlife, tree)), tree = tree->gtNext)
4135             {
4136                 if (tree->gtOper == GT_LCL_VAR)
4137                 {
4138                     raSetRegLclBirthDeath(tree, lastlife, false);
4139                 }
4140
4141                 // Model implicit use (& hence last use) of frame list root at pinvokes.
4142                 if (tree->gtOper == GT_CALL)
4143                 {
4144                     GenTreeCall* call = tree->AsCall();
4145                     if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
4146                     {
4147                         LclVarDsc* frameVarDsc = &lvaTable[info.compLvFrameListRoot];
4148
4149                         if (frameVarDsc->lvTracked && ((call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH) != 0))
4150                         {
4151                             // Frame var dies here
4152                             unsigned varIndex = frameVarDsc->lvVarIndex;
4153                             VarSetOps::RemoveElemD(this, lastlife, varIndex);
4154                         }
4155                     }
4156                 }
4157             }
4158         }
4159
4160         assert(VarSetOps::Equal(this, lastlife, block->bbLiveOut));
4161     }
4162     compCurBB = NULL;
4163 }
4164
4165 void Compiler::raGenerateFPRefCounts()
4166 {
4167     // Update ref counts to stack
4168     assert(raCntWtdStkDblStackFP == 0);
4169     assert(raCntStkParamDblStackFP == 0);
4170     assert(raCntStkStackFP == 0);
4171
4172     LclVarDsc* varDsc;
4173     unsigned   lclNum;
4174     for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
4175     {
4176         if (varDsc->lvType == TYP_DOUBLE ||
4177             varDsc->lvStructDoubleAlign) // Account for structs (A bit over aggressive here, we should
4178                                          // account for field accesses, but should be a reasonable
4179                                          // heuristic).
4180         {
4181             if (varDsc->lvRegister)
4182             {
4183                 assert(varDsc->lvTracked);
4184             }
4185             else
4186             {
4187                 // Increment tmp access
4188                 raCntStkStackFP += varDsc->lvRefCnt;
4189
4190                 if (varDsc->lvIsParam)
4191                 {
4192                     // Why is this not weighted?
4193                     raCntStkParamDblStackFP += varDsc->lvRefCnt;
4194                 }
4195                 else
4196                 {
4197                     raCntWtdStkDblStackFP += varDsc->lvRefCntWtd;
4198                 }
4199             }
4200         }
4201     }
4202
4203 #ifdef DEBUG
4204     if ((raCntWtdStkDblStackFP > 0) || (raCntStkParamDblStackFP > 0))
4205     {
4206         JITDUMP("StackFP double stack weighted ref count: %u ; param ref count: %u\n", raCntWtdStkDblStackFP,
4207                 raCntStkParamDblStackFP);
4208     }
4209 #endif
4210 }
4211
4212 void Compiler::raEnregisterVarsStackFP()
4213 {
4214     const int          FPENREGTHRESHOLD          = 1;
4215     const unsigned int FPENREGTHRESHOLD_WEIGHTED = FPENREGTHRESHOLD;
4216
4217     // Do init
4218     raInitStackFP();
4219
4220     if (opts.compDbgCode || opts.MinOpts())
4221     {
4222         // no enregistering for these options.
4223         return;
4224     }
4225
4226     if (VarSetOps::IsEmpty(this, optAllFloatVars))
4227     {
4228         // No floating point vars. bail out
4229         return;
4230     }
4231
4232     // Do additional pass updating weights and generating height table
4233     raEnregisterVarsPrePassStackFP();
4234
4235     // Vars are ordered by weight
4236     LclVarDsc* varDsc;
4237
4238     // Set an interference with V0 and V1, which we reserve as a temp registers.
4239     // We need only one temp. but we will take the easy way, as by using
4240     // two, we will need to teach codegen how to operate with spilled variables
4241     VarSetOps::Assign(this, raLclRegIntfFloat[REG_FPV0], optAllFloatVars);
4242     VarSetOps::Assign(this, raLclRegIntfFloat[REG_FPV1], optAllFloatVars);
4243
4244 #ifdef DEBUG
4245     if (codeGen->genStressFloat())
4246     {
4247         // Lock out registers for stress.
4248         regMaskTP locked = codeGen->genStressLockedMaskFloat();
4249         for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++)
4250         {
4251             if (locked & genRegMaskFloat((regNumber)i))
4252             {
4253                 VarSetOps::Assign(this, raLclRegIntfFloat[i], optAllFloatVars);
4254             }
4255         }
4256     }
4257 #endif
4258
4259     // Build the interesting FP var table
4260     LclVarDsc* fpLclFPVars[lclMAX_TRACKED];
4261     unsigned   numFPVars = 0;
4262     for (unsigned i = 0; i < lvaTrackedCount; i++)
4263     {
4264         if (VarSetOps::IsMember(this, raMaskDontEnregFloat, i))
4265         {
4266             JITDUMP("Won't enregister V%02i (T%02i) because it's marked as dont enregister\n", lvaTrackedToVarNum[i],
4267                     i);
4268             continue;
4269         }
4270
4271         if (VarSetOps::IsMember(this, optAllFloatVars, i))
4272         {
4273             varDsc = lvaTable + lvaTrackedToVarNum[i];
4274
4275             assert(varDsc->lvTracked);
4276
4277             if (varDsc->lvDoNotEnregister)
4278             {
4279                 JITDUMP("Won't enregister V%02i (T%02i) because it's marked as DoNotEnregister\n",
4280                         lvaTrackedToVarNum[i], i);
4281                 continue;
4282             }
4283 #if !FEATURE_X87_DOUBLES
4284             if (varDsc->TypeGet() == TYP_FLOAT)
4285             {
4286                 JITDUMP("Won't enregister V%02i (T%02i) because it's a TYP_FLOAT and we have disabled "
4287                         "FEATURE_X87_DOUBLES\n",
4288                         lvaTrackedToVarNum[i], i);
4289                 continue;
4290             }
4291 #endif
4292
4293             fpLclFPVars[numFPVars++] = lvaTable + lvaTrackedToVarNum[i];
4294         }
4295     }
4296
4297     unsigned maxRegVars = 0; // Max num of regvars at one time
4298
4299     for (unsigned sortNum = 0; sortNum < numFPVars; sortNum++)
4300     {
4301 #ifdef DEBUG
4302         {
4303             JITDUMP("\n");
4304             JITDUMP("FP regvar candidates:\n");
4305
4306             for (unsigned i = sortNum; i < numFPVars; i++)
4307             {
4308                 varDsc          = fpLclFPVars[i];
4309                 unsigned lclNum = varDsc - lvaTable;
4310                 unsigned varIndex;
4311                 varIndex = varDsc->lvVarIndex;
4312
4313                 JITDUMP("V%02u/T%02u RefCount: %u Weight: %u ; Payload: %u ; Overflow: %u\n", lclNum, varIndex,
4314                         varDsc->lvRefCnt, varDsc->lvRefCntWtd, raPayloadStackFP[varIndex],
4315                         raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]);
4316             }
4317             JITDUMP("\n");
4318         }
4319 #endif
4320
4321         unsigned min = sortNum;
4322
4323         // Find the one that will save us most
4324         for (unsigned i = sortNum + 1; i < numFPVars; i++)
4325         {
4326             if (raVarIsGreaterValueStackFP(fpLclFPVars[i], fpLclFPVars[sortNum]))
4327             {
4328                 min = i;
4329             }
4330         }
4331
4332         // Put it at the top of the array
4333         LclVarDsc* temp;
4334         temp                 = fpLclFPVars[min];
4335         fpLclFPVars[min]     = fpLclFPVars[sortNum];
4336         fpLclFPVars[sortNum] = temp;
4337
4338         varDsc = fpLclFPVars[sortNum];
4339
4340 #ifdef DEBUG
4341         unsigned lclNum = varDsc - lvaTable;
4342 #endif
4343         unsigned varIndex = varDsc->lvVarIndex;
4344
4345         assert(VarSetOps::IsMember(this, optAllFloatVars, varIndex));
4346
4347         JITDUMP("Candidate for enregistering: V%02u/T%02u RefCount: %u Weight: %u ; Payload: %u ; Overflow: %u\n",
4348                 lclNum, varIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd, raPayloadStackFP[varIndex],
4349                 raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]);
4350
4351         bool bMeetsThreshold = true;
4352
4353         if (varDsc->lvRefCnt < FPENREGTHRESHOLD || varDsc->lvRefCntWtd < FPENREGTHRESHOLD_WEIGHTED)
4354         {
4355             bMeetsThreshold = false;
4356         }
4357
4358         // We don't want to enregister arguments with only one use, as they will be
4359         // loaded in the prolog. Just don't enregister them and load them lazily(
4360         if (varDsc->lvIsParam &&
4361             (varDsc->lvRefCnt <= FPENREGTHRESHOLD || varDsc->lvRefCntWtd <= FPENREGTHRESHOLD_WEIGHTED))
4362         {
4363             bMeetsThreshold = false;
4364         }
4365
4366         if (!bMeetsThreshold
4367 #ifdef DEBUG
4368             && codeGen->genStressFloat() != 1
4369 #endif
4370             )
4371         {
4372             // Doesn't meet bar, do next
4373             JITDUMP("V%02u/T%02u doesnt meet threshold. Won't enregister\n", lclNum, varIndex);
4374             continue;
4375         }
4376
4377         // We don't want to have problems with overflow (we now have 2 unsigned counters
4378         // that can possibly go to their limits), so we just promote to double here.
4379         // diff
4380         double balance =
4381             double(varDsc->lvRefCntWtd) -
4382             double(raPayloadStackFP[varIndex]) -                      // Additional costs of enregistering variable
4383             double(raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]) - // Spilling costs of enregistering variable
4384             double(FPENREGTHRESHOLD_WEIGHTED);
4385
4386         JITDUMP("balance = %d - %d - %d - %d\n", varDsc->lvRefCntWtd, raPayloadStackFP[varIndex],
4387                 raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS], FPENREGTHRESHOLD_WEIGHTED);
4388
4389         if (balance < 0.0
4390 #ifdef DEBUG
4391             && codeGen->genStressFloat() != 1
4392 #endif
4393             )
4394         {
4395             // Doesn't meet bar, do next
4396             JITDUMP("V%02u/T%02u doesnt meet threshold. Won't enregister\n", lclNum, varIndex);
4397             continue;
4398         }
4399
4400         regNumber reg = raRegForVarStackFP(varDsc->lvVarIndex);
4401         if (reg == REG_FPNONE)
4402         {
4403             // Didn't make if (interferes with other regvars), do next
4404             JITDUMP("V%02u/T%02u interferes with other enreg vars. Won't enregister\n", lclNum, varIndex);
4405
4406             continue;
4407         }
4408
4409         if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
4410         {
4411             // Do not enregister if this is a floating field in a struct local of
4412             // promotion type PROMOTION_TYPE_DEPENDENT.
4413             continue;
4414         }
4415
4416         // Yipee, we will enregister var.
4417         varDsc->lvRegister = true;
4418         varDsc->lvRegNum   = reg;
4419         VarSetOps::AddElemD(this, optAllFPregVars, varIndex);
4420
4421 #ifdef DEBUG
4422         raDumpVariableRegIntfFloat();
4423
4424         if (verbose)
4425         {
4426             printf("; ");
4427             gtDispLclVar(lclNum);
4428             printf("V%02u/T%02u (refcnt=%2u,refwtd=%4u%s) enregistered in %s\n", varIndex, varDsc->lvVarIndex,
4429                    varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2, (varDsc->lvRefCntWtd & 1) ? ".5" : "",
4430                    CodeGen::regVarNameStackFP(varDsc->lvRegNum));
4431         }
4432
4433         JITDUMP("\n");
4434 #endif
4435
4436         // Create interferences with other variables.
4437         assert(VarSetOps::IsEmpty(this, VarSetOps::Diff(this, raLclRegIntfFloat[(int)reg], optAllFloatVars)));
4438         VARSET_TP intfFloats(VarSetOps::Intersection(this, lvaVarIntf[varIndex], optAllFloatVars));
4439
4440         VarSetOps::UnionD(this, raLclRegIntfFloat[reg], intfFloats);
4441
4442         // Update height tables for variables that interfere with this one.
4443         raUpdateHeightsForVarsStackFP(intfFloats);
4444
4445         // Update max number of reg vars at once.
4446         maxRegVars = min(REG_FPCOUNT, max(maxRegVars, VarSetOps::Count(this, intfFloats)));
4447     }
4448
4449     assert(VarSetOps::IsSubset(this, optAllFPregVars, optAllFloatVars));
4450     assert(VarSetOps::IsEmpty(this, VarSetOps::Intersection(this, optAllFPregVars, raMaskDontEnregFloat)));
4451
4452     // This is a bit conservative, as they may not all go through a call.
4453     // If we have to, we can fix this.
4454     tmpDoubleSpillMax += maxRegVars;
4455
4456     // Do pass marking trees as egvars
4457     raEnregisterVarsPostPassStackFP();
4458
4459 #ifdef DEBUG
4460     {
4461         JITDUMP("FP enregistration summary\n");
4462
4463         unsigned i;
4464         for (i = 0; i < numFPVars; i++)
4465         {
4466             varDsc = fpLclFPVars[i];
4467
4468             if (varDsc->lvRegister)
4469             {
4470                 unsigned lclNum = varDsc - lvaTable;
4471                 unsigned varIndex;
4472                 varIndex = varDsc->lvVarIndex;
4473
4474                 JITDUMP("Enregistered V%02u/T%02u in FPV%i RefCount: %u Weight: %u \n", lclNum, varIndex,
4475                         varDsc->lvRegNum, varDsc->lvRefCnt, varDsc->lvRefCntWtd);
4476             }
4477         }
4478         JITDUMP("End of FP enregistration summary\n\n");
4479     }
4480 #endif
4481 }
4482
4483 #ifdef DEBUG
4484
4485 regMaskTP CodeGenInterface::genStressLockedMaskFloat()
4486 {
4487     assert(genStressFloat());
4488
4489     // Don't use REG_FPV0 or REG_FPV1, they're reserved
4490     if (genStressFloat() == 1)
4491     {
4492         return genRegMaskFloat(REG_FPV4) | genRegMaskFloat(REG_FPV5) | genRegMaskFloat(REG_FPV6) |
4493                genRegMaskFloat(REG_FPV7);
4494     }
4495     else
4496     {
4497         return genRegMaskFloat(REG_FPV2) | genRegMaskFloat(REG_FPV3) | genRegMaskFloat(REG_FPV4) |
4498                genRegMaskFloat(REG_FPV5) | genRegMaskFloat(REG_FPV6) | genRegMaskFloat(REG_FPV7);
4499     }
4500 }
4501
4502 #endif
4503
4504 #endif // FEATURE_STACK_FP_X87
4505
4506 #endif // LEGACY_BACKEND