0608dcd85ee0d706719d8e019793f84d1c372dde
[platform/upstream/coreclr.git] / src / jit / morph.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX                                                                           XX
8 XX                          Morph                                            XX
9 XX                                                                           XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 */
13
14 #include "jitpch.h"
15 #ifdef _MSC_VER
16 #pragma hdrstop
17 #endif
18
19 #include "allocacheck.h" // for alloca
20
21 // Convert the given node into a call to the specified helper passing
22 // the given argument list.
23 //
24 // Tries to fold constants and also adds an edge for overflow exception
25 // returns the morphed tree
26 GenTreePtr Compiler::fgMorphCastIntoHelper(GenTreePtr tree, int helper, GenTreePtr oper)
27 {
28     GenTree* result;
29
30     /* If the operand is a constant, we'll try to fold it */
31     if (oper->OperIsConst())
32     {
33         GenTreePtr oldTree = tree;
34
35         tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
36
37         if (tree != oldTree)
38         {
39             return fgMorphTree(tree);
40         }
41         else if (tree->OperKind() & GTK_CONST)
42         {
43             return fgMorphConst(tree);
44         }
45
46         // assert that oper is unchanged and that it is still a GT_CAST node
47         noway_assert(tree->gtCast.CastOp() == oper);
48         noway_assert(tree->gtOper == GT_CAST);
49     }
50     result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
51     assert(result == tree);
52     return result;
53 }
54
55 /*****************************************************************************
56  *
57  *  Convert the given node into a call to the specified helper passing
58  *  the given argument list.
59  */
60
61 GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeArgList* args)
62 {
63     // The helper call ought to be semantically equivalent to the original node, so preserve its VN.
64     tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN);
65
66     tree->gtFlags |= GTF_CALL;
67     if (args)
68     {
69         tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
70     }
71     tree->gtCall.gtCallType            = CT_HELPER;
72     tree->gtCall.gtCallMethHnd         = eeFindHelper(helper);
73     tree->gtCall.gtCallArgs            = args;
74     tree->gtCall.gtCallObjp            = nullptr;
75     tree->gtCall.gtCallLateArgs        = nullptr;
76     tree->gtCall.fgArgInfo             = nullptr;
77     tree->gtCall.gtRetClsHnd           = nullptr;
78     tree->gtCall.gtCallMoreFlags       = 0;
79     tree->gtCall.gtInlineCandidateInfo = nullptr;
80     tree->gtCall.gtControlExpr         = nullptr;
81
82 #ifdef LEGACY_BACKEND
83     tree->gtCall.gtCallRegUsedMask = RBM_NONE;
84 #endif // LEGACY_BACKEND
85
86 #if DEBUG
87     // Helper calls are never candidates.
88
89     tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
90 #endif // DEBUG
91
92 #ifdef FEATURE_READYTORUN_COMPILER
93     tree->gtCall.gtEntryPoint.addr = nullptr;
94 #endif
95
96 #if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND)
97     if (varTypeIsLong(tree))
98     {
99         GenTreeCall*    callNode    = tree->AsCall();
100         ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
101         retTypeDesc->Reset();
102         retTypeDesc->InitializeLongReturnType(this);
103         callNode->ClearOtherRegs();
104     }
105 #endif // _TARGET_XXX_
106
107     /* Perform the morphing */
108
109     tree = fgMorphArgs(tree->AsCall());
110
111     return tree;
112 }
113
114 /*****************************************************************************
115  *
116  *  Determine if a relop must be morphed to a qmark to manifest a boolean value.
117  *  This is done when code generation can't create straight-line code to do it.
118  */
119 bool Compiler::fgMorphRelopToQmark(GenTreePtr tree)
120 {
121 #ifndef LEGACY_BACKEND
122     return false;
123 #else  // LEGACY_BACKEND
124     return (genActualType(tree->TypeGet()) == TYP_LONG) || varTypeIsFloating(tree->TypeGet());
125 #endif // LEGACY_BACKEND
126 }
127
128 /*****************************************************************************
129  *
130  *  Morph a cast node (we perform some very simple transformations here).
131  */
132
133 #ifdef _PREFAST_
134 #pragma warning(push)
135 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
136 #endif
137 GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
138 {
139     noway_assert(tree->gtOper == GT_CAST);
140     noway_assert(genTypeSize(TYP_I_IMPL) == sizeof(void*));
141
142     /* The first sub-operand is the thing being cast */
143
144     GenTreePtr oper = tree->gtCast.CastOp();
145
146     if (fgGlobalMorph && (oper->gtOper == GT_ADDR))
147     {
148         // Make sure we've checked if 'oper' is an address of an implicit-byref parameter.
149         // If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast
150         // morphing code to see that type.
151         fgMorphImplicitByRefArgs(oper);
152     }
153
154     var_types srcType = genActualType(oper->TypeGet());
155     unsigned  srcSize;
156
157     var_types dstType = tree->CastToType();
158     unsigned  dstSize = genTypeSize(dstType);
159
160     // See if the cast has to be done in two steps.  R -> I
161     if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
162     {
163         // Only x86 must go through TYP_DOUBLE to get to all
164         // integral types everybody else can get straight there
165         // except for when using helpers
166         if (srcType == TYP_FLOAT
167 #if !FEATURE_STACK_FP_X87
168
169 #if defined(_TARGET_ARM64_)
170             // Amd64: src = float, dst is overflow conversion.
171             // This goes through helper and hence src needs to be converted to double.
172             && tree->gtOverflow()
173 #elif defined(_TARGET_AMD64_)
174             // Amd64: src = float, dst = uint64 or overflow conversion.
175             // This goes through helper and hence src needs to be converted to double.
176             && (tree->gtOverflow() || (dstType == TYP_ULONG))
177 #elif defined(_TARGET_ARM_)
178             // Arm: src = float, dst = int64/uint64 or overflow conversion.
179             && (tree->gtOverflow() || varTypeIsLong(dstType))
180 #endif
181
182 #endif // FEATURE_STACK_FP_X87
183                 )
184         {
185             oper = gtNewCastNode(TYP_DOUBLE, oper, TYP_DOUBLE);
186         }
187
188         // do we need to do it in two steps R -> I, '-> smallType
189         CLANG_FORMAT_COMMENT_ANCHOR;
190
191 #if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
192         if (dstSize < genTypeSize(TYP_INT))
193         {
194             oper = gtNewCastNodeL(TYP_INT, oper, TYP_INT);
195             oper->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
196             tree->gtFlags &= ~GTF_UNSIGNED;
197         }
198 #else
199         if (dstSize < sizeof(void*))
200         {
201             oper = gtNewCastNodeL(TYP_I_IMPL, oper, TYP_I_IMPL);
202             oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
203         }
204 #endif
205         else
206         {
207             /* Note that if we need to use a helper call then we can not morph oper */
208             if (!tree->gtOverflow())
209             {
210 #ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
211                 goto OPTIMIZECAST;
212 #else
213                 switch (dstType)
214                 {
215                     case TYP_INT:
216 #ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
217 #ifdef LEGACY_BACKEND
218                         // the RyuJIT backend does not use the x87 FPU and therefore
219                         // does not support folding the cast conv.i4(round.d(d))
220                         if ((oper->gtOper == GT_INTRINSIC) &&
221                             (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
222                         {
223                             /* optimization: conv.i4(round.d(d)) -> round.i(d) */
224                             oper->gtType = dstType;
225                             return fgMorphTree(oper);
226                         }
227                         // if SSE2 is not enabled, we need the helper
228                         else
229 #endif // LEGACY_BACKEND
230                             if (!opts.compCanUseSSE2)
231                         {
232                             return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
233                         }
234                         else
235 #endif // _TARGET_X86_
236                         {
237                             goto OPTIMIZECAST;
238                         }
239 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
240                     case TYP_UINT:
241                         goto OPTIMIZECAST;
242 #else  // _TARGET_ARM_
243                     case TYP_UINT:
244                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
245 #endif // _TARGET_ARM_
246
247 #ifdef _TARGET_AMD64_
248                     // SSE2 has instructions to convert a float/double directly to a long
249                     case TYP_LONG:
250                         goto OPTIMIZECAST;
251 #else
252                     case TYP_LONG:
253                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
254 #endif //_TARGET_AMD64_
255                     case TYP_ULONG:
256                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
257                     default:
258                         break;
259                 }
260 #endif // _TARGET_ARM64_
261             }
262             else
263             {
264                 switch (dstType)
265                 {
266                     case TYP_INT:
267                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
268                     case TYP_UINT:
269                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
270                     case TYP_LONG:
271                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
272                     case TYP_ULONG:
273                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
274                     default:
275                         break;
276                 }
277             }
278             noway_assert(!"Unexpected dstType");
279         }
280     }
281 #ifndef _TARGET_64BIT_
282     // The code generation phase (for x86 & ARM32) does not handle casts
283     // directly from [u]long to anything other than [u]int. Insert an
284     // intermediate cast to native int.
285     else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
286     {
287         oper = gtNewCastNode(TYP_I_IMPL, oper, TYP_I_IMPL);
288         oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
289         tree->gtFlags &= ~GTF_UNSIGNED;
290     }
291 #endif //!_TARGET_64BIT_
292
293 #ifdef _TARGET_ARM_
294     else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
295              !varTypeIsLong(oper->gtCast.CastOp()))
296     {
297         // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
298         // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
299         // This happens semi-frequently because there is no IL 'conv.r4.un'
300         oper->gtType       = TYP_FLOAT;
301         oper->CastToType() = TYP_FLOAT;
302         return fgMorphTree(oper);
303     }
304     // converts long/ulong --> float/double casts into helper calls.
305     else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
306     {
307         if (dstType == TYP_FLOAT)
308         {
309             // there is only a double helper, so we
310             // - change the dsttype to double
311             // - insert a cast from double to float
312             // - recurse into the resulting tree
313             tree->CastToType() = TYP_DOUBLE;
314             tree->gtType       = TYP_DOUBLE;
315
316             tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
317
318             return fgMorphTree(tree);
319         }
320         if (tree->gtFlags & GTF_UNSIGNED)
321             return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
322         return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
323     }
324 #endif //_TARGET_ARM_
325
326 #ifdef _TARGET_AMD64_
327     // Do we have to do two step U4/8 -> R4/8 ?
328     // Codegen supports the following conversion as one-step operation
329     // a) Long -> R4/R8
330     // b) U8 -> R8
331     //
332     // The following conversions are performed as two-step operations using above.
333     // U4 -> R4/8 = U4-> Long -> R4/8
334     // U8 -> R4   = U8 -> R8 -> R4
335     else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
336     {
337         srcType = genUnsignedType(srcType);
338
339         if (srcType == TYP_ULONG)
340         {
341             if (dstType == TYP_FLOAT)
342             {
343                 // Codegen can handle U8 -> R8 conversion.
344                 // U8 -> R4 =  U8 -> R8 -> R4
345                 // - change the dsttype to double
346                 // - insert a cast from double to float
347                 // - recurse into the resulting tree
348                 tree->CastToType() = TYP_DOUBLE;
349                 tree->gtType       = TYP_DOUBLE;
350                 tree               = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
351                 return fgMorphTree(tree);
352             }
353         }
354         else if (srcType == TYP_UINT)
355         {
356             oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
357             oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
358             tree->gtFlags &= ~GTF_UNSIGNED;
359         }
360     }
361 #endif // _TARGET_AMD64_
362
363 #ifdef _TARGET_X86_
364     // Do we have to do two step U4/8 -> R4/8 ?
365     else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
366     {
367         srcType = genUnsignedType(srcType);
368
369         if (srcType == TYP_ULONG)
370         {
371             return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
372         }
373         else if (srcType == TYP_UINT)
374         {
375             oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
376             oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
377             tree->gtFlags &= ~GTF_UNSIGNED;
378 #ifndef LEGACY_BACKEND
379             return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
380 #endif
381         }
382     }
383 #ifndef LEGACY_BACKEND
384     else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
385     {
386         return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
387     }
388 #endif
389 #endif //_TARGET_XARCH_
390     else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
391     {
392         // We are casting away GC information.  we would like to just
393         // change the type to int, however this gives the emitter fits because
394         // it believes the variable is a GC variable at the begining of the
395         // instruction group, but is not turned non-gc by the code generator
396         // we fix this by copying the GC pointer to a non-gc pointer temp.
397         noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
398
399         // We generate an assignment to an int and then do the cast from an int. With this we avoid
400         // the gc problem and we allow casts to bytes, longs,  etc...
401         unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
402         oper->gtType    = TYP_I_IMPL;
403         GenTreePtr asg  = gtNewTempAssign(lclNum, oper);
404         oper->gtType    = srcType;
405
406         // do the real cast
407         GenTreePtr cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), dstType);
408
409         // Generate the comma tree
410         oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
411
412         return fgMorphTree(oper);
413     }
414
415     // Look for narrowing casts ([u]long -> [u]int) and try to push them
416     // down into the operand before morphing it.
417     //
418     // It doesn't matter if this is cast is from ulong or long (i.e. if
419     // GTF_UNSIGNED is set) because the transformation is only applied to
420     // overflow-insensitive narrowing casts, which always silently truncate.
421     //
422     // Note that casts from [u]long to small integer types are handled above.
423     if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
424     {
425         // As a special case, look for overflow-sensitive casts of an AND
426         // expression, and see if the second operand is a small constant. Since
427         // the result of an AND is bound by its smaller operand, it may be
428         // possible to prove that the cast won't overflow, which will in turn
429         // allow the cast's operand to be transformed.
430         if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
431         {
432             GenTreePtr andOp2 = oper->gtOp.gtOp2;
433
434             // Special case to the special case: AND with a casted int.
435             if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
436             {
437                 // gtFoldExprConst will deal with whether the cast is signed or
438                 // unsigned, or overflow-sensitive.
439                 andOp2 = oper->gtOp.gtOp2 = gtFoldExprConst(andOp2);
440             }
441
442             // Look for a constant less than 2^{32} for a cast to uint, or less
443             // than 2^{31} for a cast to int.
444             int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
445
446             if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
447             {
448                 // This cast can't overflow.
449                 tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
450             }
451         }
452
453         // Only apply this transformation during global morph,
454         // when neither the cast node nor the oper node may throw an exception
455         // based on the upper 32 bits.
456         //
457         if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
458         {
459             // For these operations the lower 32 bits of the result only depends
460             // upon the lower 32 bits of the operands
461             //
462             if (oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG, GT_LSH))
463             {
464                 DEBUG_DESTROY_NODE(tree);
465
466                 // Insert narrowing casts for op1 and op2
467                 oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, dstType);
468                 if (oper->gtOp.gtOp2 != nullptr)
469                 {
470                     oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, dstType);
471                 }
472
473                 // Clear the GT_MUL_64RSLT if it is set
474                 if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
475                 {
476                     oper->gtFlags &= ~GTF_MUL_64RSLT;
477                 }
478
479                 // The operation now produces a 32-bit result.
480                 oper->gtType = TYP_INT;
481
482                 // Remorph the new tree as the casts that we added may be folded away.
483                 return fgMorphTree(oper);
484             }
485         }
486     }
487
488 OPTIMIZECAST:
489     noway_assert(tree->gtOper == GT_CAST);
490
491     /* Morph the operand */
492     tree->gtCast.CastOp() = oper = fgMorphTree(oper);
493
494     /* Reset the call flag */
495     tree->gtFlags &= ~GTF_CALL;
496
497     /* unless we have an overflow cast, reset the except flag */
498     if (!tree->gtOverflow())
499     {
500         tree->gtFlags &= ~GTF_EXCEPT;
501     }
502
503     /* Just in case new side effects were introduced */
504     tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
505
506     srcType = oper->TypeGet();
507
508     /* if GTF_UNSIGNED is set then force srcType to an unsigned type */
509     if (tree->gtFlags & GTF_UNSIGNED)
510     {
511         srcType = genUnsignedType(srcType);
512     }
513
514     srcSize = genTypeSize(srcType);
515
516     if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate
517     {
518         /* See if we can discard the cast */
519         if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
520         {
521             if (srcType == dstType)
522             { // Certainly if they are identical it is pointless
523                 goto REMOVE_CAST;
524             }
525
526             if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
527             {
528                 unsigned   varNum = oper->gtLclVarCommon.gtLclNum;
529                 LclVarDsc* varDsc = &lvaTable[varNum];
530                 if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
531                 {
532                     goto REMOVE_CAST;
533                 }
534             }
535
536             bool unsignedSrc = varTypeIsUnsigned(srcType);
537             bool unsignedDst = varTypeIsUnsigned(dstType);
538             bool signsDiffer = (unsignedSrc != unsignedDst);
539
540             // For same sized casts with
541             //    the same signs or non-overflow cast we discard them as well
542             if (srcSize == dstSize)
543             {
544                 /* This should have been handled above */
545                 noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
546
547                 if (!signsDiffer)
548                 {
549                     goto REMOVE_CAST;
550                 }
551
552                 if (!tree->gtOverflow())
553                 {
554                     /* For small type casts, when necessary we force
555                        the src operand to the dstType and allow the
556                        implied load from memory to perform the casting */
557                     if (varTypeIsSmall(srcType))
558                     {
559                         switch (oper->gtOper)
560                         {
561                             case GT_IND:
562                             case GT_CLS_VAR:
563                             case GT_LCL_FLD:
564                             case GT_ARR_ELEM:
565                                 oper->gtType = dstType;
566                                 goto REMOVE_CAST;
567                             default:
568                                 break;
569                         }
570                     }
571                     else
572                     {
573                         goto REMOVE_CAST;
574                     }
575                 }
576             }
577
578             if (srcSize < dstSize) // widening cast
579             {
580                 // Keep any long casts
581                 if (dstSize == sizeof(int))
582                 {
583                     // Only keep signed to unsigned widening cast with overflow check
584                     if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
585                     {
586                         goto REMOVE_CAST;
587                     }
588                 }
589
590                 // Casts from signed->unsigned can never overflow while widening
591
592                 if (unsignedSrc || !unsignedDst)
593                 {
594                     tree->gtFlags &= ~GTF_OVERFLOW;
595                 }
596             }
597             else
598             {
599                 // Try to narrow the operand of the cast and discard the cast
600                 // Note: Do not narrow a cast that is marked as a CSE
601                 // And do not narrow if the oper is marked as a CSE either
602                 //
603                 if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
604                     optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
605                 {
606                     optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
607
608                     /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
609                     if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
610                     {
611                         oper = oper->gtCast.CastOp();
612                     }
613                     goto REMOVE_CAST;
614                 }
615             }
616         }
617
618         switch (oper->gtOper)
619         {
620             /* If the operand is a constant, we'll fold it */
621             case GT_CNS_INT:
622             case GT_CNS_LNG:
623             case GT_CNS_DBL:
624             case GT_CNS_STR:
625             {
626                 GenTreePtr oldTree = tree;
627
628                 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
629
630                 // Did we get a comma throw as a result of gtFoldExprConst?
631                 if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
632                 {
633                     noway_assert(fgIsCommaThrow(tree));
634                     tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
635                     fgMorphTreeDone(tree);
636                     return tree;
637                 }
638                 else if (tree->gtOper != GT_CAST)
639                 {
640                     return tree;
641                 }
642
643                 noway_assert(tree->gtCast.CastOp() == oper); // unchanged
644             }
645             break;
646
647             case GT_CAST:
648                 /* Check for two consecutive casts into the same dstType */
649                 if (!tree->gtOverflow())
650                 {
651                     var_types dstType2 = oper->CastToType();
652                     if (dstType == dstType2)
653                     {
654                         goto REMOVE_CAST;
655                     }
656                 }
657                 break;
658
659 #ifdef LEGACY_BACKEND
660
661             /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag
662                so that the code generator will know not to convert the result
663                of the idiv to a regpair */
664             case GT_MOD:
665                 if (dstType == TYP_INT)
666                 {
667                     tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
668                 }
669
670                 break;
671             case GT_UMOD:
672                 if (dstType == TYP_UINT)
673                 {
674                     tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
675                 }
676                 break;
677
678 #endif // LEGACY_BACKEND
679
680             case GT_COMMA:
681                 // Check for cast of a GT_COMMA with a throw overflow
682                 // Bug 110829: Since this optimization will bash the types
683                 // neither oper or commaOp2 can be CSE candidates
684                 if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
685                 {
686                     GenTreePtr commaOp2 = oper->gtOp.gtOp2;
687
688                     if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
689                     {
690                         // need type of oper to be same as tree
691                         if (tree->gtType == TYP_LONG)
692                         {
693                             commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
694                             commaOp2->gtIntConCommon.SetLngValue(0);
695                             /* Change the types of oper and commaOp2 to TYP_LONG */
696                             oper->gtType = commaOp2->gtType = TYP_LONG;
697                         }
698                         else if (varTypeIsFloating(tree->gtType))
699                         {
700                             commaOp2->ChangeOperConst(GT_CNS_DBL);
701                             commaOp2->gtDblCon.gtDconVal = 0.0;
702                             // Change the types of oper and commaOp2
703                             // X87 promotes everything to TYP_DOUBLE
704                             // But other's are a little more precise
705                             const var_types newTyp
706 #if FEATURE_X87_DOUBLES
707                                 = TYP_DOUBLE;
708 #else  // FEATURE_X87_DOUBLES
709                                 = tree->gtType;
710 #endif // FEATURE_X87_DOUBLES
711                             oper->gtType = commaOp2->gtType = newTyp;
712                         }
713                         else
714                         {
715                             commaOp2->ChangeOperConst(GT_CNS_INT);
716                             commaOp2->gtIntCon.gtIconVal = 0;
717                             /* Change the types of oper and commaOp2 to TYP_INT */
718                             oper->gtType = commaOp2->gtType = TYP_INT;
719                         }
720                     }
721
722                     if (vnStore != nullptr)
723                     {
724                         fgValueNumberTreeConst(commaOp2);
725                     }
726
727                     /* Return the GT_COMMA node as the new tree */
728                     return oper;
729                 }
730                 break;
731
732             default:
733                 break;
734         } /* end switch (oper->gtOper) */
735     }
736
737     if (tree->gtOverflow())
738     {
739         fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
740     }
741
742     return tree;
743
744 REMOVE_CAST:
745
746     /* Here we've eliminated the cast, so just return it's operand */
747     assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
748
749     DEBUG_DESTROY_NODE(tree);
750     return oper;
751 }
752 #ifdef _PREFAST_
753 #pragma warning(pop)
754 #endif
755
756 /*****************************************************************************
757  *
758  *  Perform an unwrap operation on a Proxy object
759  */
760
761 GenTreePtr Compiler::fgUnwrapProxy(GenTreePtr objRef)
762 {
763     assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
764
765     CORINFO_EE_INFO* pInfo = eeGetEEInfo();
766     GenTreePtr       addTree;
767
768     // Perform the unwrap:
769     //
770     //   This requires two extra indirections.
771     //   We mark these indirections as 'invariant' and
772     //   the CSE logic will hoist them when appropriate.
773     //
774     //  Note that each dereference is a GC pointer
775
776     addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
777
778     objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
779     objRef->gtFlags |= GTF_IND_INVARIANT;
780
781     addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
782
783     objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
784     objRef->gtFlags |= GTF_IND_INVARIANT;
785
786     // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
787     return objRef;
788 }
789
790 /*****************************************************************************
791  *
792  *  Morph an argument list; compute the pointer argument count in the process.
793  *
794  *  NOTE: This function can be called from any place in the JIT to perform re-morphing
795  *  due to graph altering modifications such as copy / constant propagation
796  */
797
798 unsigned UpdateGT_LISTFlags(GenTreePtr tree)
799 {
800     assert(tree->gtOper == GT_LIST);
801
802     unsigned flags = 0;
803     if (tree->gtOp.gtOp2)
804     {
805         flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
806     }
807
808     flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
809
810     tree->gtFlags &= ~GTF_ALL_EFFECT;
811     tree->gtFlags |= flags;
812
813     return tree->gtFlags;
814 }
815
816 #ifdef DEBUG
817 void fgArgTabEntry::Dump()
818 {
819     printf("fgArgTabEntry[arg %u", argNum);
820     if (regNum != REG_STK)
821     {
822         printf(", %s, regs=%u", getRegName(regNum), numRegs);
823     }
824     if (numSlots > 0)
825     {
826         printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
827     }
828     printf(", align=%u", alignment);
829     if (lateArgInx != (unsigned)-1)
830     {
831         printf(", lateArgInx=%u", lateArgInx);
832     }
833     if (isSplit)
834     {
835         printf(", isSplit");
836     }
837     if (needTmp)
838     {
839         printf(", tmpNum=V%02u", tmpNum);
840     }
841     if (needPlace)
842     {
843         printf(", needPlace");
844     }
845     if (isTmp)
846     {
847         printf(", isTmp");
848     }
849     if (processed)
850     {
851         printf(", processed");
852     }
853     if (isHfaRegArg)
854     {
855         printf(", isHfa");
856     }
857     if (isBackFilled)
858     {
859         printf(", isBackFilled");
860     }
861     if (isNonStandard)
862     {
863         printf(", isNonStandard");
864     }
865     printf("]\n");
866 }
867 #endif
868
869 fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
870 {
871     compiler    = comp;
872     callTree    = call;
873     argCount    = 0; // filled in arg count, starts at zero
874     nextSlotNum = INIT_ARG_STACK_SLOT;
875     stkLevel    = 0;
876 #if defined(UNIX_X86_ABI)
877     alignmentDone = false;
878     stkSizeBytes  = 0;
879     padStkAlign   = 0;
880 #endif
881 #if FEATURE_FIXED_OUT_ARGS
882     outArgSize = 0;
883 #endif
884
885     argTableSize = numArgs; // the allocated table size
886
887     hasRegArgs   = false;
888     hasStackArgs = false;
889     argsComplete = false;
890     argsSorted   = false;
891
892     if (argTableSize == 0)
893     {
894         argTable = nullptr;
895     }
896     else
897     {
898         argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
899     }
900 }
901
902 /*****************************************************************************
903  *
904  *  fgArgInfo Copy Constructor
905  *
906  *  This method needs to act like a copy constructor for fgArgInfo.
907  *  The newCall needs to have its fgArgInfo initialized such that
908  *  we have newCall that is an exact copy of the oldCall.
909  *  We have to take care since the argument information
910  *  in the argTable contains pointers that must point to the
911  *  new arguments and not the old arguments.
912  */
913 fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
914 {
915     fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo;
916
917     compiler    = oldArgInfo->compiler;
918     callTree    = newCall;
919     argCount    = 0; // filled in arg count, starts at zero
920     nextSlotNum = INIT_ARG_STACK_SLOT;
921     stkLevel    = oldArgInfo->stkLevel;
922 #if defined(UNIX_X86_ABI)
923     alignmentDone = oldArgInfo->alignmentDone;
924     stkSizeBytes  = oldArgInfo->stkSizeBytes;
925     padStkAlign   = oldArgInfo->padStkAlign;
926 #endif
927 #if FEATURE_FIXED_OUT_ARGS
928     outArgSize = oldArgInfo->outArgSize;
929 #endif
930     argTableSize = oldArgInfo->argTableSize;
931     argsComplete = false;
932     argTable     = nullptr;
933     if (argTableSize > 0)
934     {
935         argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
936         for (unsigned inx = 0; inx < argTableSize; inx++)
937         {
938             argTable[inx] = nullptr;
939         }
940     }
941
942     assert(oldArgInfo->argsComplete);
943
944     // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
945     // so we can iterate over these argument lists more uniformly.
946     // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
947     GenTreeArgList* newArgs;
948     GenTreeArgList  newArgObjp(newCall, newCall->gtCallArgs);
949     GenTreeArgList* oldArgs;
950     GenTreeArgList  oldArgObjp(oldCall, oldCall->gtCallArgs);
951
952     if (newCall->gtCallObjp == nullptr)
953     {
954         assert(oldCall->gtCallObjp == nullptr);
955         newArgs = newCall->gtCallArgs;
956         oldArgs = oldCall->gtCallArgs;
957     }
958     else
959     {
960         assert(oldCall->gtCallObjp != nullptr);
961         newArgObjp.Current() = newCall->gtCallArgs;
962         newArgs              = &newArgObjp;
963         oldArgObjp.Current() = oldCall->gtCallObjp;
964         oldArgs              = &oldArgObjp;
965     }
966
967     GenTreePtr        newCurr;
968     GenTreePtr        oldCurr;
969     GenTreeArgList*   newParent   = nullptr;
970     GenTreeArgList*   oldParent   = nullptr;
971     fgArgTabEntryPtr* oldArgTable = oldArgInfo->argTable;
972     bool              scanRegArgs = false;
973
974     while (newArgs)
975     {
976         /* Get hold of the next argument values for the oldCall and newCall */
977
978         newCurr = newArgs->Current();
979         oldCurr = oldArgs->Current();
980         if (newArgs != &newArgObjp)
981         {
982             newParent = newArgs;
983             oldParent = oldArgs;
984         }
985         else
986         {
987             assert(newParent == nullptr && oldParent == nullptr);
988         }
989         newArgs = newArgs->Rest();
990         oldArgs = oldArgs->Rest();
991
992         fgArgTabEntryPtr oldArgTabEntry = nullptr;
993         fgArgTabEntryPtr newArgTabEntry = nullptr;
994
995         for (unsigned inx = 0; inx < argTableSize; inx++)
996         {
997             oldArgTabEntry = oldArgTable[inx];
998
999             if (oldArgTabEntry->parent == oldParent)
1000             {
1001                 assert((oldParent == nullptr) == (newParent == nullptr));
1002
1003                 // We have found the matching "parent" field in oldArgTabEntry
1004
1005                 newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1006
1007                 // First block copy all fields
1008                 //
1009                 *newArgTabEntry = *oldArgTabEntry;
1010
1011                 // Then update all GenTreePtr fields in the newArgTabEntry
1012                 //
1013                 newArgTabEntry->parent = newParent;
1014
1015                 // The node field is likely to have been updated
1016                 //  to point at a node in the gtCallLateArgs list
1017                 //
1018                 if (oldArgTabEntry->node == oldCurr)
1019                 {
1020                     // node is not pointing into the gtCallLateArgs list
1021                     newArgTabEntry->node = newCurr;
1022                 }
1023                 else
1024                 {
1025                     // node must be pointing into the gtCallLateArgs list
1026                     //
1027                     // We will fix this pointer up in the next loop
1028                     //
1029                     newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
1030
1031                     scanRegArgs = true;
1032                 }
1033
1034                 // Now initialize the proper element in the argTable array
1035                 //
1036                 argTable[inx] = newArgTabEntry;
1037                 break;
1038             }
1039         }
1040         // We should have found the matching oldArgTabEntry and created the newArgTabEntry
1041         //
1042         assert(newArgTabEntry != nullptr);
1043     }
1044
1045     if (scanRegArgs)
1046     {
1047         newArgs = newCall->gtCallLateArgs;
1048         oldArgs = oldCall->gtCallLateArgs;
1049
1050         while (newArgs)
1051         {
1052             /* Get hold of the next argument values for the oldCall and newCall */
1053
1054             assert(newArgs->OperIsList());
1055
1056             newCurr = newArgs->Current();
1057             newArgs = newArgs->Rest();
1058
1059             assert(oldArgs->OperIsList());
1060
1061             oldCurr = oldArgs->Current();
1062             oldArgs = oldArgs->Rest();
1063
1064             fgArgTabEntryPtr oldArgTabEntry = nullptr;
1065             fgArgTabEntryPtr newArgTabEntry = nullptr;
1066
1067             for (unsigned inx = 0; inx < argTableSize; inx++)
1068             {
1069                 oldArgTabEntry = oldArgTable[inx];
1070
1071                 if (oldArgTabEntry->node == oldCurr)
1072                 {
1073                     // We have found the matching "node" field in oldArgTabEntry
1074
1075                     newArgTabEntry = argTable[inx];
1076                     assert(newArgTabEntry != nullptr);
1077
1078                     // update the "node" GenTreePtr fields in the newArgTabEntry
1079                     //
1080                     assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
1081
1082                     newArgTabEntry->node = newCurr;
1083                     break;
1084                 }
1085             }
1086         }
1087     }
1088
1089     argCount     = oldArgInfo->argCount;
1090     nextSlotNum  = oldArgInfo->nextSlotNum;
1091     hasRegArgs   = oldArgInfo->hasRegArgs;
1092     hasStackArgs = oldArgInfo->hasStackArgs;
1093     argsComplete = true;
1094     argsSorted   = true;
1095 }
1096
1097 void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry)
1098 {
1099     assert(argCount < argTableSize);
1100     argTable[argCount] = curArgTabEntry;
1101     argCount++;
1102 }
1103
1104 fgArgTabEntryPtr fgArgInfo::AddRegArg(
1105     unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1106 {
1107     fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1108
1109     curArgTabEntry->argNum        = argNum;
1110     curArgTabEntry->node          = node;
1111     curArgTabEntry->parent        = parent;
1112     curArgTabEntry->regNum        = regNum;
1113     curArgTabEntry->slotNum       = 0;
1114     curArgTabEntry->numRegs       = numRegs;
1115     curArgTabEntry->numSlots      = 0;
1116     curArgTabEntry->alignment     = alignment;
1117     curArgTabEntry->lateArgInx    = (unsigned)-1;
1118     curArgTabEntry->tmpNum        = (unsigned)-1;
1119     curArgTabEntry->isSplit       = false;
1120     curArgTabEntry->isTmp         = false;
1121     curArgTabEntry->needTmp       = false;
1122     curArgTabEntry->needPlace     = false;
1123     curArgTabEntry->processed     = false;
1124     curArgTabEntry->isHfaRegArg   = false;
1125     curArgTabEntry->isBackFilled  = false;
1126     curArgTabEntry->isNonStandard = false;
1127
1128     hasRegArgs = true;
1129     AddArg(curArgTabEntry);
1130     return curArgTabEntry;
1131 }
1132
1133 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1134 fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned                                                         argNum,
1135                                       GenTreePtr                                                       node,
1136                                       GenTreePtr                                                       parent,
1137                                       regNumber                                                        regNum,
1138                                       unsigned                                                         numRegs,
1139                                       unsigned                                                         alignment,
1140                                       const bool                                                       isStruct,
1141                                       const regNumber                                                  otherRegNum,
1142                                       const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
1143 {
1144     fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
1145     assert(curArgTabEntry != nullptr);
1146
1147     // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1148     // PlaceHolder node (in case of needed late argument, for example.)
1149     // This requires using of an extra flag. At creation time the state is right, so
1150     // and this assert enforces that.
1151     assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1152     curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
1153     curArgTabEntry->isStruct    = isStruct;    // is this a struct arg
1154
1155     if (isStruct && structDescPtr != nullptr)
1156     {
1157         curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
1158     }
1159
1160     return curArgTabEntry;
1161 }
1162 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1163
1164 fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned   argNum,
1165                                       GenTreePtr node,
1166                                       GenTreePtr parent,
1167                                       unsigned   numSlots,
1168                                       unsigned   alignment
1169                                           FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
1170 {
1171     fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1172
1173     nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1174
1175 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1176     // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1177     // PlaceHolder node (in case of needed late argument, for example.)
1178     // This reqires using of an extra flag. At creation time the state is right, so
1179     // and this assert enforces that.
1180     assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1181     curArgTabEntry->isStruct = isStruct; // is this a struct arg
1182 #endif                                   // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1183
1184     curArgTabEntry->argNum        = argNum;
1185     curArgTabEntry->node          = node;
1186     curArgTabEntry->parent        = parent;
1187     curArgTabEntry->regNum        = REG_STK;
1188     curArgTabEntry->slotNum       = nextSlotNum;
1189     curArgTabEntry->numRegs       = 0;
1190     curArgTabEntry->numSlots      = numSlots;
1191     curArgTabEntry->alignment     = alignment;
1192     curArgTabEntry->lateArgInx    = (unsigned)-1;
1193     curArgTabEntry->tmpNum        = (unsigned)-1;
1194     curArgTabEntry->isSplit       = false;
1195     curArgTabEntry->isTmp         = false;
1196     curArgTabEntry->needTmp       = false;
1197     curArgTabEntry->needPlace     = false;
1198     curArgTabEntry->processed     = false;
1199     curArgTabEntry->isHfaRegArg   = false;
1200     curArgTabEntry->isBackFilled  = false;
1201     curArgTabEntry->isNonStandard = false;
1202
1203     hasStackArgs = true;
1204     AddArg(curArgTabEntry);
1205
1206     nextSlotNum += numSlots;
1207     return curArgTabEntry;
1208 }
1209
1210 void fgArgInfo::RemorphReset()
1211 {
1212     nextSlotNum = INIT_ARG_STACK_SLOT;
1213 }
1214
1215 fgArgTabEntry* fgArgInfo::RemorphRegArg(
1216     unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1217 {
1218     fgArgTabEntryPtr curArgTabEntry = nullptr;
1219     unsigned         regArgInx      = 0;
1220     unsigned         inx;
1221
1222     for (inx = 0; inx < argCount; inx++)
1223     {
1224         curArgTabEntry = argTable[inx];
1225         if (curArgTabEntry->argNum == argNum)
1226         {
1227             break;
1228         }
1229
1230         bool       isRegArg;
1231         GenTreePtr argx;
1232         if (curArgTabEntry->parent != nullptr)
1233         {
1234             assert(curArgTabEntry->parent->OperIsList());
1235             argx     = curArgTabEntry->parent->Current();
1236             isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1237         }
1238         else
1239         {
1240             argx     = curArgTabEntry->node;
1241             isRegArg = true;
1242         }
1243
1244         if (isRegArg)
1245         {
1246             regArgInx++;
1247         }
1248     }
1249     // if this was a nonstandard arg the table is definitive
1250     if (curArgTabEntry->isNonStandard)
1251     {
1252         regNum = curArgTabEntry->regNum;
1253     }
1254
1255     assert(curArgTabEntry->argNum == argNum);
1256     assert(curArgTabEntry->regNum == regNum);
1257     assert(curArgTabEntry->alignment == alignment);
1258     assert(curArgTabEntry->parent == parent);
1259
1260     if (curArgTabEntry->node != node)
1261     {
1262         GenTreePtr argx     = nullptr;
1263         unsigned   regIndex = 0;
1264
1265         /* process the register argument list */
1266         for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
1267         {
1268             argx = list->Current();
1269             assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1270             if (regIndex == regArgInx)
1271             {
1272                 break;
1273             }
1274         }
1275         assert(regIndex == regArgInx);
1276         assert(regArgInx == curArgTabEntry->lateArgInx);
1277
1278         if (curArgTabEntry->node != argx)
1279         {
1280             curArgTabEntry->node = argx;
1281         }
1282     }
1283     return curArgTabEntry;
1284 }
1285
1286 void fgArgInfo::RemorphStkArg(
1287     unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, unsigned alignment)
1288 {
1289     fgArgTabEntryPtr curArgTabEntry = nullptr;
1290     bool             isRegArg       = false;
1291     unsigned         regArgInx      = 0;
1292     GenTreePtr       argx;
1293     unsigned         inx;
1294
1295     for (inx = 0; inx < argCount; inx++)
1296     {
1297         curArgTabEntry = argTable[inx];
1298
1299         if (curArgTabEntry->parent != nullptr)
1300         {
1301             assert(curArgTabEntry->parent->OperIsList());
1302             argx     = curArgTabEntry->parent->Current();
1303             isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1304         }
1305         else
1306         {
1307             argx     = curArgTabEntry->node;
1308             isRegArg = true;
1309         }
1310
1311         if (curArgTabEntry->argNum == argNum)
1312         {
1313             break;
1314         }
1315
1316         if (isRegArg)
1317         {
1318             regArgInx++;
1319         }
1320     }
1321
1322     nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1323
1324     assert(curArgTabEntry->argNum == argNum);
1325     assert(curArgTabEntry->slotNum == nextSlotNum);
1326     assert(curArgTabEntry->numSlots == numSlots);
1327     assert(curArgTabEntry->alignment == alignment);
1328     assert(curArgTabEntry->parent == parent);
1329     assert(parent->OperIsList());
1330
1331 #if FEATURE_FIXED_OUT_ARGS
1332     if (curArgTabEntry->node != node)
1333     {
1334         if (isRegArg)
1335         {
1336             GenTreePtr argx     = nullptr;
1337             unsigned   regIndex = 0;
1338
1339             /* process the register argument list */
1340             for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
1341             {
1342                 argx = list->Current();
1343                 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1344                 if (regIndex == regArgInx)
1345                 {
1346                     break;
1347                 }
1348             }
1349             assert(regIndex == regArgInx);
1350             assert(regArgInx == curArgTabEntry->lateArgInx);
1351
1352             if (curArgTabEntry->node != argx)
1353             {
1354                 curArgTabEntry->node = argx;
1355             }
1356         }
1357         else
1358         {
1359             assert(parent->Current() == node);
1360             curArgTabEntry->node = node;
1361         }
1362     }
1363 #else
1364     curArgTabEntry->node = node;
1365 #endif
1366
1367     nextSlotNum += numSlots;
1368 }
1369
1370 void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
1371 {
1372     fgArgTabEntryPtr curArgTabEntry = nullptr;
1373     assert(argNum < argCount);
1374     for (unsigned inx = 0; inx < argCount; inx++)
1375     {
1376         curArgTabEntry = argTable[inx];
1377         if (curArgTabEntry->argNum == argNum)
1378         {
1379             break;
1380         }
1381     }
1382
1383     assert(numRegs > 0);
1384     assert(numSlots > 0);
1385
1386     curArgTabEntry->isSplit  = true;
1387     curArgTabEntry->numRegs  = numRegs;
1388     curArgTabEntry->numSlots = numSlots;
1389
1390     nextSlotNum += numSlots;
1391 }
1392
1393 void fgArgInfo::EvalToTmp(unsigned argNum, unsigned tmpNum, GenTreePtr newNode)
1394 {
1395     fgArgTabEntryPtr curArgTabEntry = nullptr;
1396     assert(argNum < argCount);
1397     for (unsigned inx = 0; inx < argCount; inx++)
1398     {
1399         curArgTabEntry = argTable[inx];
1400         if (curArgTabEntry->argNum == argNum)
1401         {
1402             break;
1403         }
1404     }
1405     assert(curArgTabEntry->parent->Current() == newNode);
1406
1407     curArgTabEntry->node   = newNode;
1408     curArgTabEntry->tmpNum = tmpNum;
1409     curArgTabEntry->isTmp  = true;
1410 }
1411
1412 void fgArgInfo::ArgsComplete()
1413 {
1414     bool hasStackArgs    = false;
1415     bool hasStructRegArg = false;
1416
1417     for (unsigned curInx = 0; curInx < argCount; curInx++)
1418     {
1419         fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1420         assert(curArgTabEntry != nullptr);
1421         GenTreePtr argx = curArgTabEntry->node;
1422
1423         if (curArgTabEntry->regNum == REG_STK)
1424         {
1425             hasStackArgs = true;
1426 #if !FEATURE_FIXED_OUT_ARGS
1427             // On x86 we use push instructions to pass arguments:
1428             //   The non-register arguments are evaluated and pushed in order
1429             //   and they are never evaluated into temps
1430             //
1431             continue;
1432 #endif
1433         }
1434         else // we have a register argument, next we look for a struct type.
1435         {
1436             if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
1437             {
1438                 hasStructRegArg = true;
1439             }
1440         }
1441
1442         /* If the argument tree contains an assignment (GTF_ASG) then the argument and
1443            and every earlier argument (except constants) must be evaluated into temps
1444            since there may be other arguments that follow and they may use the value being assigned.
1445
1446            EXAMPLE: ArgTab is "a, a=5, a"
1447                     -> when we see the second arg "a=5"
1448                        we know the first two arguments "a, a=5" have to be evaluated into temps
1449
1450            For the case of an assignment, we only know that there exist some assignment someplace
1451            in the tree.  We don't know what is being assigned so we are very conservative here
1452            and assume that any local variable could have been assigned.
1453          */
1454
1455         if (argx->gtFlags & GTF_ASG)
1456         {
1457             // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
1458             // a tmp, then we need a temp in the late arg list.
1459             if ((argCount > 1) || argx->OperIsCopyBlkOp()
1460 #ifdef FEATURE_FIXED_OUT_ARGS
1461                 || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
1462                                          // that we only have late non-register args when that feature is on.
1463 #endif                                   // FEATURE_FIXED_OUT_ARGS
1464                 )
1465             {
1466                 curArgTabEntry->needTmp = true;
1467             }
1468
1469             // For all previous arguments, unless they are a simple constant
1470             //  we require that they be evaluated into temps
1471             for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1472             {
1473                 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1474                 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1475
1476                 assert(prevArgTabEntry->node);
1477                 if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
1478                 {
1479                     prevArgTabEntry->needTmp = true;
1480                 }
1481             }
1482         }
1483
1484 #if FEATURE_FIXED_OUT_ARGS
1485         // Like calls, if this argument has a tree that will do an inline throw,
1486         // a call to a jit helper, then we need to treat it like a call (but only
1487         // if there are/were any stack args).
1488         // This means unnesting, sorting, etc.  Technically this is overly
1489         // conservative, but I want to avoid as much special-case debug-only code
1490         // as possible, so leveraging the GTF_CALL flag is the easiest.
1491         if (!(argx->gtFlags & GTF_CALL) && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) &&
1492             compiler->opts.compDbgCode &&
1493             (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
1494         {
1495             for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
1496             {
1497                 if (otherInx == curInx)
1498                 {
1499                     continue;
1500                 }
1501
1502                 if (argTable[otherInx]->regNum == REG_STK)
1503                 {
1504                     argx->gtFlags |= GTF_CALL;
1505                     break;
1506                 }
1507             }
1508         }
1509 #endif // FEATURE_FIXED_OUT_ARGS
1510
1511         /* If it contains a call (GTF_CALL) then itself and everything before the call
1512            with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
1513            has to be kept in the right order since we will move the call to the first position)
1514
1515            For calls we don't have to be quite as conservative as we are with an assignment
1516            since the call won't be modifying any non-address taken LclVars.
1517          */
1518
1519         if (argx->gtFlags & GTF_CALL)
1520         {
1521             if (argCount > 1) // If this is not the only argument
1522             {
1523                 curArgTabEntry->needTmp = true;
1524             }
1525             else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
1526             {
1527                 // Spill all arguments that are floating point calls
1528                 curArgTabEntry->needTmp = true;
1529             }
1530
1531             // All previous arguments may need to be evaluated into temps
1532             for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1533             {
1534                 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1535                 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1536                 assert(prevArgTabEntry->node);
1537
1538                 // For all previous arguments, if they have any GTF_ALL_EFFECT
1539                 //  we require that they be evaluated into a temp
1540                 if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
1541                 {
1542                     prevArgTabEntry->needTmp = true;
1543                 }
1544 #if FEATURE_FIXED_OUT_ARGS
1545                 // Or, if they are stored into the FIXED_OUT_ARG area
1546                 // we require that they be moved to the gtCallLateArgs
1547                 // and replaced with a placeholder node
1548                 else if (prevArgTabEntry->regNum == REG_STK)
1549                 {
1550                     prevArgTabEntry->needPlace = true;
1551                 }
1552 #endif
1553             }
1554         }
1555
1556 #ifndef LEGACY_BACKEND
1557 #if FEATURE_MULTIREG_ARGS
1558         // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
1559         // with multiple indirections, so here we consider spilling it into a tmp LclVar.
1560         //
1561         // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
1562         // so we skip this for ARM32 until it is ported to use RyuJIT backend
1563         //
1564
1565         bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
1566
1567         if ((argx->TypeGet() == TYP_STRUCT) && (curArgTabEntry->needTmp == false))
1568         {
1569             if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
1570             {
1571                 // Spill multireg struct arguments that have Assignments or Calls embedded in them
1572                 curArgTabEntry->needTmp = true;
1573             }
1574             else
1575             {
1576                 // We call gtPrepareCost to measure the cost of evaluating this tree
1577                 compiler->gtPrepareCost(argx);
1578
1579                 if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
1580                 {
1581                     // Spill multireg struct arguments that are expensive to evaluate twice
1582                     curArgTabEntry->needTmp = true;
1583                 }
1584                 else if (argx->OperGet() == GT_OBJ)
1585                 {
1586                     GenTreeObj*          argObj     = argx->AsObj();
1587                     CORINFO_CLASS_HANDLE objClass   = argObj->gtClass;
1588                     unsigned             structSize = compiler->info.compCompHnd->getClassSize(objClass);
1589                     switch (structSize)
1590                     {
1591                         case 3:
1592                         case 5:
1593                         case 6:
1594                         case 7:
1595                             // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
1596                             //
1597                             if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
1598                             {
1599                                 // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
1600                                 // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
1601                                 //
1602                                 curArgTabEntry->needTmp = true;
1603                             }
1604                             break;
1605
1606                         case 11:
1607                         case 13:
1608                         case 14:
1609                         case 15:
1610                             // Spill any GT_OBJ multireg structs that are difficult to extract
1611                             //
1612                             // When we have a GT_OBJ of a struct with the above sizes we would need
1613                             // to use 3 or 4 load instructions to load the exact size of this struct.
1614                             // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
1615                             // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
1616                             // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
1617                             // the argument.
1618                             //
1619                             curArgTabEntry->needTmp = true;
1620                             break;
1621
1622                         default:
1623                             break;
1624                     }
1625                 }
1626             }
1627         }
1628 #endif // FEATURE_MULTIREG_ARGS
1629 #endif // LEGACY_BACKEND
1630     }
1631
1632     // We only care because we can't spill structs and qmarks involve a lot of spilling, but
1633     // if we don't have qmarks, then it doesn't matter.
1634     // So check for Qmark's globally once here, instead of inside the loop.
1635     //
1636     const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
1637
1638 #if FEATURE_FIXED_OUT_ARGS
1639
1640     // For Arm/x64 we only care because we can't reorder a register
1641     // argument that uses GT_LCLHEAP.  This is an optimization to
1642     // save a check inside the below loop.
1643     //
1644     const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
1645
1646 #else
1647
1648     const bool hasStackArgsWeCareAbout = hasStackArgs;
1649
1650 #endif // FEATURE_FIXED_OUT_ARGS
1651
1652     // If we have any stack args we have to force the evaluation
1653     // of any arguments passed in registers that might throw an exception
1654     //
1655     // Technically we only a required to handle the following two cases:
1656     //     a GT_IND with GTF_IND_RNGCHK (only on x86) or
1657     //     a GT_LCLHEAP node that allocates stuff on the stack
1658     //
1659     if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
1660     {
1661         for (unsigned curInx = 0; curInx < argCount; curInx++)
1662         {
1663             fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1664             assert(curArgTabEntry != nullptr);
1665             GenTreePtr argx = curArgTabEntry->node;
1666
1667             // Examine the register args that are currently not marked needTmp
1668             //
1669             if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
1670             {
1671                 if (hasStackArgsWeCareAbout)
1672                 {
1673 #if !FEATURE_FIXED_OUT_ARGS
1674                     // On x86 we previously recorded a stack depth of zero when
1675                     // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
1676                     // Thus we can not reorder the argument after any stack based argument
1677                     // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
1678                     // check for it explicitly
1679                     //
1680                     if (argx->gtFlags & GTF_EXCEPT)
1681                     {
1682                         curArgTabEntry->needTmp = true;
1683                         continue;
1684                     }
1685 #else
1686                     // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
1687                     //
1688                     if (argx->gtFlags & GTF_EXCEPT)
1689                     {
1690                         assert(compiler->compLocallocUsed);
1691
1692                         // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
1693                         //
1694                         if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
1695                         {
1696                             curArgTabEntry->needTmp = true;
1697                             continue;
1698                         }
1699                     }
1700 #endif
1701                 }
1702                 if (hasStructRegArgWeCareAbout)
1703                 {
1704                     // Returns true if a GT_QMARK node is encountered in the argx tree
1705                     //
1706                     if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
1707                     {
1708                         curArgTabEntry->needTmp = true;
1709                         continue;
1710                     }
1711                 }
1712             }
1713         }
1714     }
1715
1716     argsComplete = true;
1717 }
1718
1719 void fgArgInfo::SortArgs()
1720 {
1721     assert(argsComplete == true);
1722
1723 #ifdef DEBUG
1724     if (compiler->verbose)
1725     {
1726         printf("\nSorting the arguments:\n");
1727     }
1728 #endif
1729
1730     /* Shuffle the arguments around before we build the gtCallLateArgs list.
1731        The idea is to move all "simple" arguments like constants and local vars
1732        to the end of the table, and move the complex arguments towards the beginning
1733        of the table. This will help prevent registers from being spilled by
1734        allowing us to evaluate the more complex arguments before the simpler arguments.
1735        The argTable ends up looking like:
1736            +------------------------------------+  <--- argTable[argCount - 1]
1737            |          constants                 |
1738            +------------------------------------+
1739            |    local var / local field         |
1740            +------------------------------------+
1741            | remaining arguments sorted by cost |
1742            +------------------------------------+
1743            | temps (argTable[].needTmp = true)  |
1744            +------------------------------------+
1745            |  args with calls (GTF_CALL)        |
1746            +------------------------------------+  <--- argTable[0]
1747      */
1748
1749     /* Set the beginning and end for the new argument table */
1750     unsigned curInx;
1751     int      regCount      = 0;
1752     unsigned begTab        = 0;
1753     unsigned endTab        = argCount - 1;
1754     unsigned argsRemaining = argCount;
1755
1756     // First take care of arguments that are constants.
1757     // [We use a backward iterator pattern]
1758     //
1759     curInx = argCount;
1760     do
1761     {
1762         curInx--;
1763
1764         fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1765
1766         if (curArgTabEntry->regNum != REG_STK)
1767         {
1768             regCount++;
1769         }
1770
1771         // Skip any already processed args
1772         //
1773         if (!curArgTabEntry->processed)
1774         {
1775             GenTreePtr argx = curArgTabEntry->node;
1776
1777             // put constants at the end of the table
1778             //
1779             if (argx->gtOper == GT_CNS_INT)
1780             {
1781                 noway_assert(curInx <= endTab);
1782
1783                 curArgTabEntry->processed = true;
1784
1785                 // place curArgTabEntry at the endTab position by performing a swap
1786                 //
1787                 if (curInx != endTab)
1788                 {
1789                     argTable[curInx] = argTable[endTab];
1790                     argTable[endTab] = curArgTabEntry;
1791                 }
1792
1793                 endTab--;
1794                 argsRemaining--;
1795             }
1796         }
1797     } while (curInx > 0);
1798
1799     if (argsRemaining > 0)
1800     {
1801         // Next take care of arguments that are calls.
1802         // [We use a forward iterator pattern]
1803         //
1804         for (curInx = begTab; curInx <= endTab; curInx++)
1805         {
1806             fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1807
1808             // Skip any already processed args
1809             //
1810             if (!curArgTabEntry->processed)
1811             {
1812                 GenTreePtr argx = curArgTabEntry->node;
1813
1814                 // put calls at the beginning of the table
1815                 //
1816                 if (argx->gtFlags & GTF_CALL)
1817                 {
1818                     curArgTabEntry->processed = true;
1819
1820                     // place curArgTabEntry at the begTab position by performing a swap
1821                     //
1822                     if (curInx != begTab)
1823                     {
1824                         argTable[curInx] = argTable[begTab];
1825                         argTable[begTab] = curArgTabEntry;
1826                     }
1827
1828                     begTab++;
1829                     argsRemaining--;
1830                 }
1831             }
1832         }
1833     }
1834
1835     if (argsRemaining > 0)
1836     {
1837         // Next take care arguments that are temps.
1838         // These temps come before the arguments that are
1839         // ordinary local vars or local fields
1840         // since this will give them a better chance to become
1841         // enregistered into their actual argument register.
1842         // [We use a forward iterator pattern]
1843         //
1844         for (curInx = begTab; curInx <= endTab; curInx++)
1845         {
1846             fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1847
1848             // Skip any already processed args
1849             //
1850             if (!curArgTabEntry->processed)
1851             {
1852                 if (curArgTabEntry->needTmp)
1853                 {
1854                     curArgTabEntry->processed = true;
1855
1856                     // place curArgTabEntry at the begTab position by performing a swap
1857                     //
1858                     if (curInx != begTab)
1859                     {
1860                         argTable[curInx] = argTable[begTab];
1861                         argTable[begTab] = curArgTabEntry;
1862                     }
1863
1864                     begTab++;
1865                     argsRemaining--;
1866                 }
1867             }
1868         }
1869     }
1870
1871     if (argsRemaining > 0)
1872     {
1873         // Next take care of local var and local field arguments.
1874         // These are moved towards the end of the argument evaluation.
1875         // [We use a backward iterator pattern]
1876         //
1877         curInx = endTab + 1;
1878         do
1879         {
1880             curInx--;
1881
1882             fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1883
1884             // Skip any already processed args
1885             //
1886             if (!curArgTabEntry->processed)
1887             {
1888                 GenTreePtr argx = curArgTabEntry->node;
1889
1890                 if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
1891                 {
1892                     noway_assert(curInx <= endTab);
1893
1894                     curArgTabEntry->processed = true;
1895
1896                     // place curArgTabEntry at the endTab position by performing a swap
1897                     //
1898                     if (curInx != endTab)
1899                     {
1900                         argTable[curInx] = argTable[endTab];
1901                         argTable[endTab] = curArgTabEntry;
1902                     }
1903
1904                     endTab--;
1905                     argsRemaining--;
1906                 }
1907             }
1908         } while (curInx > begTab);
1909     }
1910
1911     // Finally, take care of all the remaining arguments.
1912     // Note that we fill in one arg at a time using a while loop.
1913     bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
1914     while (argsRemaining > 0)
1915     {
1916         /* Find the most expensive arg remaining and evaluate it next */
1917
1918         fgArgTabEntryPtr expensiveArgTabEntry = nullptr;
1919         unsigned         expensiveArg         = UINT_MAX;
1920         unsigned         expensiveArgCost     = 0;
1921
1922         // [We use a forward iterator pattern]
1923         //
1924         for (curInx = begTab; curInx <= endTab; curInx++)
1925         {
1926             fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1927
1928             // Skip any already processed args
1929             //
1930             if (!curArgTabEntry->processed)
1931             {
1932                 GenTreePtr argx = curArgTabEntry->node;
1933
1934                 // We should have already handled these kinds of args
1935                 assert(argx->gtOper != GT_LCL_VAR);
1936                 assert(argx->gtOper != GT_LCL_FLD);
1937                 assert(argx->gtOper != GT_CNS_INT);
1938
1939                 // This arg should either have no persistent side effects or be the last one in our table
1940                 // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
1941
1942                 if (argsRemaining == 1)
1943                 {
1944                     // This is the last arg to place
1945                     expensiveArg         = curInx;
1946                     expensiveArgTabEntry = curArgTabEntry;
1947                     assert(begTab == endTab);
1948                     break;
1949                 }
1950                 else
1951                 {
1952                     if (!costsPrepared)
1953                     {
1954                         /* We call gtPrepareCost to measure the cost of evaluating this tree */
1955                         compiler->gtPrepareCost(argx);
1956                     }
1957
1958                     if (argx->gtCostEx > expensiveArgCost)
1959                     {
1960                         // Remember this arg as the most expensive one that we have yet seen
1961                         expensiveArgCost     = argx->gtCostEx;
1962                         expensiveArg         = curInx;
1963                         expensiveArgTabEntry = curArgTabEntry;
1964                     }
1965                 }
1966             }
1967         }
1968
1969         noway_assert(expensiveArg != UINT_MAX);
1970
1971         // put the most expensive arg towards the beginning of the table
1972
1973         expensiveArgTabEntry->processed = true;
1974
1975         // place expensiveArgTabEntry at the begTab position by performing a swap
1976         //
1977         if (expensiveArg != begTab)
1978         {
1979             argTable[expensiveArg] = argTable[begTab];
1980             argTable[begTab]       = expensiveArgTabEntry;
1981         }
1982
1983         begTab++;
1984         argsRemaining--;
1985
1986         costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
1987     }
1988
1989     // The table should now be completely filled and thus begTab should now be adjacent to endTab
1990     // and regArgsRemaining should be zero
1991     assert(begTab == (endTab + 1));
1992     assert(argsRemaining == 0);
1993
1994 #if !FEATURE_FIXED_OUT_ARGS
1995     // Finally build the regArgList
1996     //
1997     callTree->gtCall.regArgList      = NULL;
1998     callTree->gtCall.regArgListCount = regCount;
1999
2000     unsigned regInx = 0;
2001     for (curInx = 0; curInx < argCount; curInx++)
2002     {
2003         fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2004
2005         if (curArgTabEntry->regNum != REG_STK)
2006         {
2007             // Encode the argument register in the register mask
2008             //
2009             callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
2010             regInx++;
2011         }
2012     }
2013 #endif // !FEATURE_FIXED_OUT_ARGS
2014
2015     argsSorted = true;
2016 }
2017
2018 //------------------------------------------------------------------------------
2019 // fgMakeTmpArgNode : This function creates a tmp var only if needed.
2020 //                    We need this to be done in order to enforce ordering
2021 //                    of the evaluation of arguments.
2022 //
2023 // Arguments:
2024 //    tmpVarNum  - the var num which we clone into the newly created temp var.
2025 //
2026 // Return Value:
2027 //    the newly created temp var tree.
2028
2029 GenTreePtr Compiler::fgMakeTmpArgNode(
2030     unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
2031 {
2032     LclVarDsc* varDsc = &lvaTable[tmpVarNum];
2033     assert(varDsc->lvIsTemp);
2034     var_types type = varDsc->TypeGet();
2035
2036     // Create a copy of the temp to go into the late argument list
2037     GenTreePtr arg      = gtNewLclvNode(tmpVarNum, type);
2038     GenTreePtr addrNode = nullptr;
2039
2040     if (varTypeIsStruct(type))
2041     {
2042
2043 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
2044
2045 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2046
2047         arg->gtFlags |= GTF_DONT_CSE;
2048
2049 #else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2050         // Can this type be passed in a single register?
2051         // If so, the following call will return the corresponding primitive type.
2052         // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.
2053
2054         bool                 passedInRegisters = false;
2055         structPassingKind    kind;
2056         CORINFO_CLASS_HANDLE clsHnd         = varDsc->lvVerTypeInfo.GetClassHandle();
2057         var_types            structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
2058
2059         if (structBaseType != TYP_UNKNOWN)
2060         {
2061             passedInRegisters = true;
2062             type              = structBaseType;
2063         }
2064 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2065
2066         // If it is passed in registers, don't get the address of the var. Make it a
2067         // field instead. It will be loaded in registers with putarg_reg tree in lower.
2068         if (passedInRegisters)
2069         {
2070             arg->ChangeOper(GT_LCL_FLD);
2071             arg->gtType = type;
2072         }
2073         else
2074         {
2075 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2076             // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
2077             var_types addrType = type;
2078 #else
2079             var_types addrType = TYP_BYREF;
2080 #endif
2081             arg      = gtNewOperNode(GT_ADDR, addrType, arg);
2082             addrNode = arg;
2083
2084 #if FEATURE_MULTIREG_ARGS
2085 #ifdef _TARGET_ARM64_
2086             assert(varTypeIsStruct(type));
2087             if (lvaIsMultiregStruct(varDsc))
2088             {
2089                 // ToDo-ARM64: Consider using:  arg->ChangeOper(GT_LCL_FLD);
2090                 // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
2091                 // We will create a GT_OBJ for the argument below.
2092                 // This will be passed by value in two registers.
2093                 assert(addrNode != nullptr);
2094
2095                 // Create an Obj of the temp to use it as a call argument.
2096                 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2097
2098                 // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
2099                 // this is only to preserve former behavior (though some CSE'ing of struct
2100                 // values can be pessimizing, so enabling this may require some additional tuning).
2101                 arg->gtFlags |= GTF_DONT_CSE;
2102             }
2103 #endif // _TARGET_ARM64_
2104 #endif // FEATURE_MULTIREG_ARGS
2105         }
2106
2107 #else // not (_TARGET_AMD64_ or _TARGET_ARM64_)
2108
2109         // other targets, we pass the struct by value
2110         assert(varTypeIsStruct(type));
2111
2112         addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
2113
2114         // Get a new Obj node temp to use it as a call argument.
2115         // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
2116         arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
2117
2118 #endif // not (_TARGET_AMD64_ or _TARGET_ARM64_)
2119
2120     } // (varTypeIsStruct(type))
2121
2122     if (addrNode != nullptr)
2123     {
2124         assert(addrNode->gtOper == GT_ADDR);
2125
2126         // This will prevent this LclVar from being optimized away
2127         lvaSetVarAddrExposed(tmpVarNum);
2128
2129         // the child of a GT_ADDR is required to have this flag set
2130         addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
2131     }
2132
2133     return arg;
2134 }
2135
2136 void fgArgInfo::EvalArgsToTemps()
2137 {
2138     assert(argsSorted == true);
2139
2140     unsigned regArgInx = 0;
2141     // Now go through the argument table and perform the necessary evaluation into temps
2142     GenTreeArgList* tmpRegArgNext = nullptr;
2143     for (unsigned curInx = 0; curInx < argCount; curInx++)
2144     {
2145         fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2146
2147         GenTreePtr argx     = curArgTabEntry->node;
2148         GenTreePtr setupArg = nullptr;
2149         GenTreePtr defArg;
2150
2151 #if !FEATURE_FIXED_OUT_ARGS
2152         // Only ever set for FEATURE_FIXED_OUT_ARGS
2153         assert(curArgTabEntry->needPlace == false);
2154
2155         // On x86 and other archs that use push instructions to pass arguments:
2156         //   Only the register arguments need to be replaced with placeholder nodes.
2157         //   Stacked arguments are evaluated and pushed (or stored into the stack) in order.
2158         //
2159         if (curArgTabEntry->regNum == REG_STK)
2160             continue;
2161 #endif
2162
2163         if (curArgTabEntry->needTmp)
2164         {
2165             unsigned tmpVarNum;
2166
2167             if (curArgTabEntry->isTmp == true)
2168             {
2169                 // Create a copy of the temp to go into the late argument list
2170                 tmpVarNum = curArgTabEntry->tmpNum;
2171                 defArg    = compiler->fgMakeTmpArgNode(tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
2172                     argTable[curInx]->structDesc.passedInRegisters));
2173
2174                 // mark the original node as a late argument
2175                 argx->gtFlags |= GTF_LATE_ARG;
2176             }
2177             else
2178             {
2179                 // Create a temp assignment for the argument
2180                 // Put the temp in the gtCallLateArgs list
2181                 CLANG_FORMAT_COMMENT_ANCHOR;
2182
2183 #ifdef DEBUG
2184                 if (compiler->verbose)
2185                 {
2186                     printf("Argument with 'side effect'...\n");
2187                     compiler->gtDispTree(argx);
2188                 }
2189 #endif
2190
2191 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2192                 noway_assert(argx->gtType != TYP_STRUCT);
2193 #endif
2194
2195                 tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
2196                 if (argx->gtOper == GT_MKREFANY)
2197                 {
2198                     // For GT_MKREFANY, typically the actual struct copying does
2199                     // not have any side-effects and can be delayed. So instead
2200                     // of using a temp for the whole struct, we can just use a temp
2201                     // for operand that that has a side-effect
2202                     GenTreePtr operand;
2203                     if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
2204                     {
2205                         operand = argx->gtOp.gtOp1;
2206
2207                         // In the early argument evaluation, place an assignment to the temp
2208                         // from the source operand of the mkrefany
2209                         setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2210
2211                         // Replace the operand for the mkrefany with the new temp.
2212                         argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2213                     }
2214                     else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
2215                     {
2216                         operand = argx->gtOp.gtOp2;
2217
2218                         // In the early argument evaluation, place an assignment to the temp
2219                         // from the source operand of the mkrefany
2220                         setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2221
2222                         // Replace the operand for the mkrefany with the new temp.
2223                         argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2224                     }
2225                 }
2226
2227                 if (setupArg != nullptr)
2228                 {
2229                     // Now keep the mkrefany for the late argument list
2230                     defArg = argx;
2231
2232                     // Clear the side-effect flags because now both op1 and op2 have no side-effects
2233                     defArg->gtFlags &= ~GTF_ALL_EFFECT;
2234                 }
2235                 else
2236                 {
2237                     setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
2238
2239                     LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
2240
2241 #ifndef LEGACY_BACKEND
2242                     if (compiler->fgOrder == Compiler::FGOrderLinear)
2243                     {
2244                         // We'll reference this temporary variable just once
2245                         // when we perform the function call after
2246                         // setting up this argument.
2247                         varDsc->lvRefCnt = 1;
2248                     }
2249 #endif // !LEGACY_BACKEND
2250
2251                     var_types lclVarType = genActualType(argx->gtType);
2252                     var_types scalarType = TYP_UNKNOWN;
2253
2254                     if (setupArg->OperIsCopyBlkOp())
2255                     {
2256                         setupArg = compiler->fgMorphCopyBlock(setupArg);
2257 #ifdef _TARGET_ARM64_
2258                         // This scalar LclVar widening step is only performed for ARM64
2259                         //
2260                         CORINFO_CLASS_HANDLE clsHnd     = compiler->lvaGetStruct(tmpVarNum);
2261                         unsigned             structSize = varDsc->lvExactSize;
2262
2263                         scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd);
2264 #endif // _TARGET_ARM64_
2265                     }
2266
2267                     // scalarType can be set to a wider type for ARM64: (3 => 4)  or (5,6,7 => 8)
2268                     if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
2269                     {
2270                         // Create a GT_LCL_FLD using the wider type to go to the late argument list
2271                         defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
2272                     }
2273                     else
2274                     {
2275                         // Create a copy of the temp to go to the late argument list
2276                         defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
2277                     }
2278
2279                     curArgTabEntry->isTmp  = true;
2280                     curArgTabEntry->tmpNum = tmpVarNum;
2281
2282 #ifdef _TARGET_ARM_
2283                     // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
2284                     // might have left holes in the used registers (see
2285                     // fgAddSkippedRegsInPromotedStructArg).
2286                     // Too bad we're not that smart for these intermediate temps...
2287                     if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
2288                     {
2289                         regNumber argReg      = curArgTabEntry->regNum;
2290                         regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
2291                         for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
2292                         {
2293                             argReg = genRegArgNext(argReg);
2294                             allUsedRegs |= genRegMask(argReg);
2295                         }
2296 #ifdef LEGACY_BACKEND
2297                         callTree->gtCall.gtCallRegUsedMask |= allUsedRegs;
2298 #endif // LEGACY_BACKEND
2299                     }
2300 #endif // _TARGET_ARM_
2301                 }
2302
2303                 /* mark the assignment as a late argument */
2304                 setupArg->gtFlags |= GTF_LATE_ARG;
2305
2306 #ifdef DEBUG
2307                 if (compiler->verbose)
2308                 {
2309                     printf("\n  Evaluate to a temp:\n");
2310                     compiler->gtDispTree(setupArg);
2311                 }
2312 #endif
2313             }
2314         }
2315         else // curArgTabEntry->needTmp == false
2316         {
2317             //   On x86 -
2318             //      Only register args are replaced with placeholder nodes
2319             //      and the stack based arguments are evaluated and pushed in order.
2320             //
2321             //   On Arm/x64 - When needTmp is false and needPlace is false,
2322             //      the non-register arguments are evaluated and stored in order.
2323             //      When needPlace is true we have a nested call that comes after
2324             //      this argument so we have to replace it in the gtCallArgs list
2325             //      (the initial argument evaluation list) with a placeholder.
2326             //
2327             if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
2328             {
2329                 continue;
2330             }
2331
2332             /* No temp needed - move the whole node to the gtCallLateArgs list */
2333
2334             /* The argument is deferred and put in the late argument list */
2335
2336             defArg = argx;
2337
2338             // Create a placeholder node to put in its place in gtCallLateArgs.
2339
2340             // For a struct type we also need to record the class handle of the arg.
2341             CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
2342
2343 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2344
2345             // All structs are either passed (and retyped) as integral types, OR they
2346             // are passed by reference.
2347             noway_assert(argx->gtType != TYP_STRUCT);
2348
2349 #else // !defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2350
2351             if (varTypeIsStruct(defArg))
2352             {
2353                 // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
2354                 GenTreePtr defArgTmp = defArg;
2355
2356                 // The GT_OBJ may be be a child of a GT_COMMA.
2357                 while (defArgTmp->gtOper == GT_COMMA)
2358                 {
2359                     defArgTmp = defArgTmp->gtOp.gtOp2;
2360                 }
2361                 assert(varTypeIsStruct(defArgTmp));
2362
2363                 // We handle two opcodes: GT_MKREFANY and GT_OBJ.
2364                 if (defArgTmp->gtOper == GT_MKREFANY)
2365                 {
2366                     clsHnd = compiler->impGetRefAnyClass();
2367                 }
2368                 else if (defArgTmp->gtOper == GT_OBJ)
2369                 {
2370                     clsHnd = defArgTmp->AsObj()->gtClass;
2371                 }
2372                 else
2373                 {
2374                     BADCODE("Unhandled struct argument tree in fgMorphArgs");
2375                 }
2376             }
2377
2378 #endif // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
2379
2380             setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
2381
2382             /* mark the placeholder node as a late argument */
2383             setupArg->gtFlags |= GTF_LATE_ARG;
2384
2385 #ifdef DEBUG
2386             if (compiler->verbose)
2387             {
2388                 if (curArgTabEntry->regNum == REG_STK)
2389                 {
2390                     printf("Deferred stack argument :\n");
2391                 }
2392                 else
2393                 {
2394                     printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
2395                 }
2396
2397                 compiler->gtDispTree(argx);
2398                 printf("Replaced with placeholder node:\n");
2399                 compiler->gtDispTree(setupArg);
2400             }
2401 #endif
2402         }
2403
2404         if (setupArg != nullptr)
2405         {
2406             if (curArgTabEntry->parent)
2407             {
2408                 GenTreePtr parent = curArgTabEntry->parent;
2409                 /* a normal argument from the list */
2410                 noway_assert(parent->OperIsList());
2411                 noway_assert(parent->gtOp.gtOp1 == argx);
2412
2413                 parent->gtOp.gtOp1 = setupArg;
2414             }
2415             else
2416             {
2417                 /* must be the gtCallObjp */
2418                 noway_assert(callTree->gtCall.gtCallObjp == argx);
2419
2420                 callTree->gtCall.gtCallObjp = setupArg;
2421             }
2422         }
2423
2424         /* deferred arg goes into the late argument list */
2425
2426         if (tmpRegArgNext == nullptr)
2427         {
2428             tmpRegArgNext                   = compiler->gtNewArgList(defArg);
2429             callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
2430         }
2431         else
2432         {
2433             noway_assert(tmpRegArgNext->OperIsList());
2434             noway_assert(tmpRegArgNext->Current());
2435             tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
2436             tmpRegArgNext             = tmpRegArgNext->Rest();
2437         }
2438
2439         curArgTabEntry->node       = defArg;
2440         curArgTabEntry->lateArgInx = regArgInx++;
2441     }
2442
2443 #ifdef DEBUG
2444     if (compiler->verbose)
2445     {
2446         printf("\nShuffled argument table:    ");
2447         for (unsigned curInx = 0; curInx < argCount; curInx++)
2448         {
2449             fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2450
2451             if (curArgTabEntry->regNum != REG_STK)
2452             {
2453                 printf("%s ", getRegName(curArgTabEntry->regNum));
2454             }
2455         }
2456         printf("\n");
2457     }
2458 #endif
2459 }
2460
2461 // Get the late arg for arg at position argIndex.
2462 // argIndex - 0-based position to get late arg for.
2463 //            Caller must ensure this position has a late arg.
2464 GenTreePtr fgArgInfo::GetLateArg(unsigned argIndex)
2465 {
2466     for (unsigned j = 0; j < this->ArgCount(); j++)
2467     {
2468         if (this->ArgTable()[j]->argNum == argIndex)
2469         {
2470             return this->ArgTable()[j]->node;
2471         }
2472     }
2473     // Caller must ensure late arg exists.
2474     unreached();
2475 }
2476
2477 void fgArgInfo::RecordStkLevel(unsigned stkLvl)
2478 {
2479     assert(!IsUninitialized(stkLvl));
2480     this->stkLevel = stkLvl;
2481 }
2482
2483 unsigned fgArgInfo::RetrieveStkLevel()
2484 {
2485     assert(!IsUninitialized(stkLevel));
2486     return stkLevel;
2487 }
2488
2489 // Return a conservative estimate of the stack size in bytes.
2490 // It will be used only on the intercepted-for-host code path to copy the arguments.
2491 int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
2492 {
2493
2494     int numArgs = 0;
2495     for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
2496     {
2497         numArgs++;
2498     }
2499
2500     int numStkArgs;
2501     if (numArgs > MAX_REG_ARG)
2502     {
2503         numStkArgs = numArgs - MAX_REG_ARG;
2504     }
2505     else
2506     {
2507         numStkArgs = 0;
2508     }
2509
2510     return numStkArgs * REGSIZE_BYTES;
2511 }
2512
2513 //------------------------------------------------------------------------------
2514 // fgMakeMultiUse : If the node is a local, clone it and increase the ref count
2515 //                  otherwise insert a comma form temp
2516 //
2517 // Arguments:
2518 //    ppTree  - a pointer to the child node we will be replacing with the comma expression that
2519 //              evaluates ppTree to a temp and returns the result
2520 //
2521 // Return Value:
2522 //    A fresh GT_LCL_VAR node referencing the temp which has not been used
2523 //
2524 // Assumption:
2525 //    The result tree MUST be added to the tree structure since the ref counts are
2526 //    already incremented.
2527
2528 GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
2529 {
2530     GenTree* tree = *pOp;
2531     if (tree->IsLocal())
2532     {
2533         auto result = gtClone(tree);
2534         if (lvaLocalVarRefCounted)
2535         {
2536             lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2537         }
2538         return result;
2539     }
2540     else
2541     {
2542         GenTree* result = fgInsertCommaFormTemp(pOp);
2543
2544         // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
2545         // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
2546         // be added by the caller.
2547         if (lvaLocalVarRefCounted)
2548         {
2549             lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2550             lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2551             lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2552         }
2553
2554         return result;
2555     }
2556 }
2557
2558 //------------------------------------------------------------------------------
2559 // fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
2560 //                        and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
2561 //
2562 // Arguments:
2563 //    ppTree     - a pointer to the child node we will be replacing with the comma expression that
2564 //                 evaluates ppTree to a temp and returns the result
2565 //
2566 //    structType - value type handle if the temp created is of TYP_STRUCT.
2567 //
2568 // Return Value:
2569 //    A fresh GT_LCL_VAR node referencing the temp which has not been used
2570 //
2571
2572 GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
2573 {
2574     GenTree* subTree = *ppTree;
2575
2576     unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
2577
2578     if (varTypeIsStruct(subTree))
2579     {
2580         assert(structType != nullptr);
2581         lvaSetStruct(lclNum, structType, false);
2582     }
2583
2584     // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
2585     // The type of GT_COPYBLK is TYP_VOID.  Therefore, we should use subTree->TypeGet() for
2586     // setting type of lcl vars created.
2587     GenTree* asg = gtNewTempAssign(lclNum, subTree);
2588
2589     GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2590
2591     GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
2592
2593     *ppTree = comma;
2594
2595     return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2596 }
2597
2598 //------------------------------------------------------------------------
2599 // fgMorphArgs: Walk and transform (morph) the arguments of a call
2600 //
2601 // Arguments:
2602 //    callNode - the call for which we are doing the argument morphing
2603 //
2604 // Return Value:
2605 //    Like most morph methods, this method returns the morphed node,
2606 //    though in this case there are currently no scenarios where the
2607 //    node itself is re-created.
2608 //
2609 // Notes:
2610 //    This method is even less idempotent than most morph methods.
2611 //    That is, it makes changes that should not be redone. It uses the existence
2612 //    of gtCallLateArgs (the late arguments list) to determine if it has
2613 //    already done that work.
2614 //
2615 //    The first time it is called (i.e. during global morphing), this method
2616 //    computes the "late arguments". This is when it determines which arguments
2617 //    need to be evaluated to temps prior to the main argument setup, and which
2618 //    can be directly evaluated into the argument location. It also creates a
2619 //    second argument list (gtCallLateArgs) that does the final placement of the
2620 //    arguments, e.g. into registers or onto the stack.
2621 //
2622 //    The "non-late arguments", aka the gtCallArgs, are doing the in-order
2623 //    evaluation of the arguments that might have side-effects, such as embedded
2624 //    assignments, calls or possible throws. In these cases, it and earlier
2625 //    arguments must be evaluated to temps.
2626 //
2627 //    On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
2628 //    if we have any nested calls, we need to defer the copying of the argument
2629 //    into the fixed argument area until after the call. If the argument did not
2630 //    otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
2631 //    replaced in the "early" arg list (gtCallArgs) with a placeholder node.
2632
2633 #ifdef _PREFAST_
2634 #pragma warning(push)
2635 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2636 #endif
2637 GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
2638 {
2639     GenTreePtr args;
2640     GenTreePtr argx;
2641
2642     unsigned flagsSummary    = 0;
2643     unsigned genPtrArgCntSav = fgPtrArgCntCur;
2644
2645     unsigned argIndex = 0;
2646
2647     unsigned intArgRegNum = 0;
2648     unsigned fltArgRegNum = 0;
2649
2650 #ifdef _TARGET_ARM_
2651     regMaskTP argSkippedRegMask    = RBM_NONE;
2652     regMaskTP fltArgSkippedRegMask = RBM_NONE;
2653 #endif //  _TARGET_ARM_
2654
2655 #if defined(_TARGET_X86_)
2656     unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
2657 #else
2658     const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
2659 #endif
2660
2661     unsigned argSlots                = 0;
2662     unsigned nonRegPassedStructSlots = 0;
2663     bool     reMorphing              = call->AreArgsComplete();
2664     bool     callHasRetBuffArg       = call->HasRetBufArg();
2665
2666 #ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
2667     bool callIsVararg = call->IsVarargs();
2668 #endif
2669
2670 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2671     // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
2672     // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
2673     // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
2674     bool hasStackArgCopy = false;
2675 #endif
2676
2677 #ifndef LEGACY_BACKEND
2678     // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
2679     // following the normal calling convention or in the normal argument registers. We either mark existing
2680     // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
2681     // non-standard arguments into the argument list, below.
2682     class NonStandardArgs
2683     {
2684         struct NonStandardArg
2685         {
2686             regNumber reg;  // The register to be assigned to this non-standard argument.
2687             GenTree*  node; // The tree node representing this non-standard argument.
2688                             //   Note that this must be updated if the tree node changes due to morphing!
2689         };
2690
2691         ArrayStack<NonStandardArg> args;
2692
2693     public:
2694         NonStandardArgs(Compiler* compiler) : args(compiler, 3) // We will have at most 3 non-standard arguments
2695         {
2696         }
2697
2698         //-----------------------------------------------------------------------------
2699         // Add: add a non-standard argument to the table of non-standard arguments
2700         //
2701         // Arguments:
2702         //    node - a GenTree node that has a non-standard argument.
2703         //    reg - the register to assign to this node.
2704         //
2705         // Return Value:
2706         //    None.
2707         //
2708         void Add(GenTree* node, regNumber reg)
2709         {
2710             NonStandardArg nsa = {reg, node};
2711             args.Push(nsa);
2712         }
2713
2714         //-----------------------------------------------------------------------------
2715         // Find: Look for a GenTree* in the set of non-standard args.
2716         //
2717         // Arguments:
2718         //    node - a GenTree node to look for
2719         //
2720         // Return Value:
2721         //    The index of the non-standard argument (a non-negative, unique, stable number).
2722         //    If the node is not a non-standard argument, return -1.
2723         //
2724         int Find(GenTree* node)
2725         {
2726             for (int i = 0; i < args.Height(); i++)
2727             {
2728                 if (node == args.Index(i).node)
2729                 {
2730                     return i;
2731                 }
2732             }
2733             return -1;
2734         }
2735
2736         //-----------------------------------------------------------------------------
2737         // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
2738         // set the register to use for the node.
2739         //
2740         // Arguments:
2741         //    node - a GenTree node to look for
2742         //    pReg - an OUT argument. *pReg is set to the non-standard register to use if
2743         //           'node' is found in the non-standard argument set.
2744         //
2745         // Return Value:
2746         //    'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
2747         //          register to use.
2748         //    'false' otherwise (in this case, *pReg is unmodified).
2749         //
2750         bool FindReg(GenTree* node, regNumber* pReg)
2751         {
2752             for (int i = 0; i < args.Height(); i++)
2753             {
2754                 NonStandardArg& nsa = args.IndexRef(i);
2755                 if (node == nsa.node)
2756                 {
2757                     *pReg = nsa.reg;
2758                     return true;
2759                 }
2760             }
2761             return false;
2762         }
2763
2764         //-----------------------------------------------------------------------------
2765         // Replace: Replace the non-standard argument node at a given index. This is done when
2766         // the original node was replaced via morphing, but we need to continue to assign a
2767         // particular non-standard arg to it.
2768         //
2769         // Arguments:
2770         //    index - the index of the non-standard arg. It must exist.
2771         //    node - the new GenTree node.
2772         //
2773         // Return Value:
2774         //    None.
2775         //
2776         void Replace(int index, GenTree* node)
2777         {
2778             args.IndexRef(index).node = node;
2779         }
2780
2781     } nonStandardArgs(this);
2782 #endif // !LEGACY_BACKEND
2783
2784     // Count of args. On first morph, this is counted before we've filled in the arg table.
2785     // On remorph, we grab it from the arg table.
2786     unsigned numArgs = 0;
2787
2788     // Process the late arguments (which were determined by a previous caller).
2789     // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
2790     // may need to refer to it.
2791     if (reMorphing)
2792     {
2793         // We need to reMorph the gtCallLateArgs early since that is what triggers
2794         // the expression folding and we need to have the final folded gtCallLateArgs
2795         // available when we call RemorphRegArg so that we correctly update the fgArgInfo
2796         // with the folded tree that represents the final optimized argument nodes.
2797         //
2798         // However if a range-check needs to be generated for any of these late
2799         // arguments we also need to "know" what the stack depth will be when we generate
2800         // code to branch to the throw range check failure block as that is part of the
2801         // GC information contract for that block.
2802         //
2803         // Since the late arguments are evaluated last we have pushed all of the
2804         // other arguments on the stack before we evaluate these late arguments,
2805         // so we record the stack depth on the first morph call when reMorphing
2806         // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
2807         //
2808         if (call->gtCallLateArgs != nullptr)
2809         {
2810             unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
2811             fgPtrArgCntCur += callStkLevel;
2812             call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
2813             flagsSummary |= call->gtCallLateArgs->gtFlags;
2814             fgPtrArgCntCur -= callStkLevel;
2815         }
2816         assert(call->fgArgInfo != nullptr);
2817         call->fgArgInfo->RemorphReset();
2818
2819         numArgs = call->fgArgInfo->ArgCount();
2820     }
2821     else
2822     {
2823         // First we need to count the args
2824         if (call->gtCallObjp)
2825         {
2826             numArgs++;
2827         }
2828         for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
2829         {
2830             numArgs++;
2831         }
2832
2833         // Insert or mark non-standard args. These are either outside the normal calling convention, or
2834         // arguments registers that don't follow the normal progression of argument registers in the calling
2835         // convention (such as for the ARM64 fixed return buffer argument x8).
2836         //
2837         // *********** NOTE *************
2838         // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
2839         // in the implementation of fast tail call.
2840         // *********** END NOTE *********
2841         CLANG_FORMAT_COMMENT_ANCHOR;
2842
2843 #if !defined(LEGACY_BACKEND)
2844 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2845         // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
2846         // Set the argument registers correctly here.
2847         if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
2848         {
2849             GenTreeArgList* args = call->gtCallArgs;
2850             GenTree*        arg1 = args->Current();
2851             assert(arg1 != nullptr);
2852             nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
2853         }
2854 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2855 #if defined(_TARGET_X86_)
2856         // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
2857         // hi part to be in EDX. This sets the argument registers up correctly.
2858         else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
2859                  call->IsHelperCall(this, CORINFO_HELP_LRSZ))
2860         {
2861             GenTreeArgList* args = call->gtCallArgs;
2862             GenTree*        arg1 = args->Current();
2863             assert(arg1 != nullptr);
2864             nonStandardArgs.Add(arg1, REG_LNGARG_LO);
2865
2866             args          = args->Rest();
2867             GenTree* arg2 = args->Current();
2868             assert(arg2 != nullptr);
2869             nonStandardArgs.Add(arg2, REG_LNGARG_HI);
2870         }
2871 #else  // !defined(_TARGET_X86_)
2872         // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
2873         // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
2874         // convention for x86/SSE.
2875
2876         // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
2877         //
2878         if (hasFixedRetBuffReg() && call->HasRetBufArg())
2879         {
2880             args = call->gtCallArgs;
2881             assert(args != nullptr);
2882             assert(args->OperIsList());
2883
2884             argx = call->gtCallArgs->Current();
2885
2886             // We don't increment numArgs here, since we already counted this argument above.
2887
2888             nonStandardArgs.Add(argx, theFixedRetBuffReg());
2889         }
2890
2891         // We are allowed to have a Fixed Return Buffer argument combined
2892         // with any of the remaining non-standard arguments
2893         //
2894         if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
2895         {
2896             assert(!call->gtCallCookie);
2897             // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
2898             // It will be used only on the intercepted-for-host code path to copy the arguments.
2899
2900             GenTree* cns     = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
2901             call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
2902             numArgs++;
2903
2904             nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
2905         }
2906         else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT) && !call->IsTailCallViaHelper())
2907         {
2908             // indirect VSD stubs need the base of the indirection cell to be
2909             // passed in addition.  At this point that is the value in gtCallAddr.
2910             // The actual call target will be derived from gtCallAddr in call
2911             // lowering.
2912
2913             // If it is a VSD call getting dispatched via tail call helper,
2914             // fgMorphTailCall() would materialize stub addr as an additional
2915             // parameter added to the original arg list and hence no need to
2916             // add as a non-standard arg.
2917
2918             GenTree* arg = call->gtCallAddr;
2919             if (arg->OperIsLocal())
2920             {
2921                 arg = gtClone(arg, true);
2922             }
2923             else
2924             {
2925                 call->gtCallAddr = fgInsertCommaFormTemp(&arg);
2926                 call->gtFlags |= GTF_ASG;
2927             }
2928             noway_assert(arg != nullptr);
2929
2930             // And push the stub address onto the list of arguments
2931             call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2932             numArgs++;
2933
2934             nonStandardArgs.Add(arg, REG_VIRTUAL_STUB_PARAM);
2935         }
2936         else
2937 #endif // defined(_TARGET_X86_)
2938         if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
2939         {
2940             assert(!call->IsUnmanaged());
2941
2942             GenTree* arg = call->gtCallCookie;
2943             noway_assert(arg != nullptr);
2944             call->gtCallCookie = nullptr;
2945
2946 #if defined(_TARGET_X86_)
2947             // x86 passes the cookie on the stack as the final argument to the call.
2948             GenTreeArgList** insertionPoint = &call->gtCallArgs;
2949             for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
2950             {
2951             }
2952             *insertionPoint = gtNewListNode(arg, nullptr);
2953 #else  // !defined(_TARGET_X86_)
2954             // All other architectures pass the cookie in a register.
2955             call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2956 #endif // defined(_TARGET_X86_)
2957
2958             nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
2959             numArgs++;
2960
2961             // put destination into R10/EAX
2962             arg              = gtClone(call->gtCallAddr, true);
2963             call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2964             numArgs++;
2965
2966             nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
2967
2968             // finally change this call to a helper call
2969             call->gtCallType    = CT_HELPER;
2970             call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
2971         }
2972 #endif // !defined(LEGACY_BACKEND)
2973
2974         // Allocate the fgArgInfo for the call node;
2975         //
2976         call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
2977     }
2978
2979     if (varTypeIsStruct(call))
2980     {
2981         fgFixupStructReturn(call);
2982     }
2983
2984     /* First we morph the argument subtrees ('this' pointer, arguments, etc.).
2985      * During the first call to fgMorphArgs we also record the
2986      * information about late arguments we have in 'fgArgInfo'.
2987      * This information is used later to contruct the gtCallLateArgs */
2988
2989     /* Process the 'this' argument value, if present */
2990
2991     argx = call->gtCallObjp;
2992
2993     if (argx)
2994     {
2995         argx             = fgMorphTree(argx);
2996         call->gtCallObjp = argx;
2997         flagsSummary |= argx->gtFlags;
2998
2999         assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
3000
3001         assert(argIndex == 0);
3002
3003         /* We must fill in or update the argInfo table */
3004
3005         if (reMorphing)
3006         {
3007             /* this is a register argument - possibly update it in the table */
3008             call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
3009         }
3010         else
3011         {
3012             assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
3013
3014             /* this is a register argument - put it in the table */
3015             call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
3016 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3017                                        ,
3018                                        false, REG_STK, nullptr
3019 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3020                                        );
3021         }
3022         // this can't be a struct.
3023         assert(argx->gtType != TYP_STRUCT);
3024
3025         /* Increment the argument register count and argument index */
3026         if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
3027         {
3028             intArgRegNum++;
3029 #ifdef WINDOWS_AMD64_ABI
3030             // Whenever we pass an integer register argument
3031             // we skip the corresponding floating point register argument
3032             fltArgRegNum++;
3033 #endif // WINDOWS_AMD64_ABI
3034         }
3035         else
3036         {
3037             noway_assert(!"the 'this' pointer can not be a floating point type");
3038         }
3039         argIndex++;
3040         argSlots++;
3041     }
3042
3043 #ifdef _TARGET_X86_
3044     // Compute the maximum number of arguments that can be passed in registers.
3045     // For X86 we handle the varargs and unmanaged calling conventions
3046
3047     if (call->gtFlags & GTF_CALL_POP_ARGS)
3048     {
3049         noway_assert(intArgRegNum < MAX_REG_ARG);
3050         // No more register arguments for varargs (CALL_POP_ARGS)
3051         maxRegArgs = intArgRegNum;
3052
3053         // Add in the ret buff arg
3054         if (callHasRetBuffArg)
3055             maxRegArgs++;
3056     }
3057
3058     if (call->IsUnmanaged())
3059     {
3060         noway_assert(intArgRegNum == 0);
3061
3062         if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
3063         {
3064             noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
3065                          call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
3066                          call->gtCallArgs->gtOp.gtOp1->gtOper ==
3067                              GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
3068             maxRegArgs = 1;
3069         }
3070         else
3071         {
3072             maxRegArgs = 0;
3073         }
3074
3075         // Add in the ret buff arg
3076         if (callHasRetBuffArg)
3077             maxRegArgs++;
3078     }
3079 #endif // _TARGET_X86_
3080
3081     /* Morph the user arguments */
3082     CLANG_FORMAT_COMMENT_ANCHOR;
3083
3084 #if defined(_TARGET_ARM_)
3085
3086     // The ARM ABI has a concept of back-filling of floating-point argument registers, according
3087     // to the "Procedure Call Standard for the ARM Architecture" document, especially
3088     // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
3089     // appear in a lower-numbered register than floating point argument N. That is, argument
3090     // register allocation is not strictly increasing. To support this, we need to keep track of unused
3091     // floating-point argument registers that we can back-fill. We only support 4-byte float and
3092     // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
3093     // only back-fill single registers, since there is no way with these types to create
3094     // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
3095     // available (with 16 FP argument registers). Consider this code:
3096     //
3097     // struct HFA { float x, y, z; }; // a three element HFA
3098     // void bar(float a1,   // passed in f0
3099     //          double a2,  // passed in f2/f3; skip f1 for alignment
3100     //          HFA a3,     // passed in f4/f5/f6
3101     //          double a4,  // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
3102     //          HFA a5,     // passed in f10/f11/f12
3103     //          double a6,  // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
3104     //                      // slots
3105     //          float a7,   // passed in f1 (back-filled)
3106     //          float a8,   // passed in f7 (back-filled)
3107     //          float a9,   // passed in f13 (back-filled)
3108     //          float a10)  // passed on the stack in [OutArg+0]
3109     //
3110     // Note that if we ever support FP types with larger alignment requirements, then there could
3111     // be more than single register back-fills.
3112     //
3113     // Once we assign a floating-pointer register to the stack, they all must be on the stack.
3114     // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
3115     // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
3116     // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
3117     // and prevent any additional floating-point arguments from going in registers.
3118
3119     bool anyFloatStackArgs = false;
3120
3121 #endif // _TARGET_ARM_
3122
3123 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3124     SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3125 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3126
3127     bool hasStructArgument     = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
3128     bool hasMultiregStructArgs = false;
3129     for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
3130     {
3131         GenTreePtr* parentArgx = &args->gtOp.gtOp1;
3132
3133 #if FEATURE_MULTIREG_ARGS
3134         if (!hasStructArgument)
3135         {
3136             hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
3137         }
3138 #endif // FEATURE_MULTIREG_ARGS
3139
3140 #ifndef LEGACY_BACKEND
3141         // Record the index of any nonStandard arg that we may be processing here, as we are
3142         // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
3143         GenTreePtr orig_argx         = *parentArgx;
3144         int        nonStandard_index = nonStandardArgs.Find(orig_argx);
3145 #endif // !LEGACY_BACKEND
3146
3147         argx        = fgMorphTree(*parentArgx);
3148         *parentArgx = argx;
3149         flagsSummary |= argx->gtFlags;
3150
3151         assert(args->OperIsList());
3152         assert(argx == args->Current());
3153
3154 #ifndef LEGACY_BACKEND
3155         if ((nonStandard_index != -1) && (argx != orig_argx))
3156         {
3157             // We need to update the node field for this nonStandard arg here
3158             // as it was changed by the call to fgMorphTree
3159             nonStandardArgs.Replace(nonStandard_index, argx);
3160         }
3161 #endif // !LEGACY_BACKEND
3162
3163         /* Change the node to TYP_I_IMPL so we don't report GC info
3164          * NOTE: We deferred this from the importer because of the inliner */
3165
3166         if (argx->IsVarAddr())
3167         {
3168             argx->gtType = TYP_I_IMPL;
3169         }
3170
3171         bool     passUsingFloatRegs;
3172         unsigned argAlign = 1;
3173         // Setup any HFA information about 'argx'
3174         var_types hfaType  = GetHfaType(argx);
3175         bool      isHfaArg = varTypeIsFloating(hfaType);
3176         unsigned  hfaSlots = 0;
3177
3178         if (isHfaArg)
3179         {
3180             hfaSlots = GetHfaCount(argx);
3181
3182             // If we have a HFA struct it's possible we transition from a method that originally
3183             // only had integer types to now start having FP types.  We have to communicate this
3184             // through this flag since LSRA later on will use this flag to determine whether
3185             // or not to track the FP register set.
3186             //
3187             compFloatingPointUsed = true;
3188         }
3189
3190         unsigned             size          = 0;
3191         CORINFO_CLASS_HANDLE copyBlkClass  = nullptr;
3192         bool                 isRegArg      = false;
3193         bool                 isNonStandard = false;
3194         regNumber            nonStdRegNum  = REG_NA;
3195
3196         fgArgTabEntryPtr argEntry = nullptr;
3197
3198         if (reMorphing)
3199         {
3200             argEntry = gtArgEntryByArgNum(call, argIndex);
3201         }
3202
3203 #ifdef _TARGET_ARM_
3204
3205         bool passUsingIntRegs;
3206         if (reMorphing)
3207         {
3208             passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3209             passUsingIntRegs   = isValidIntArgReg(argEntry->regNum);
3210         }
3211         else
3212         {
3213             passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
3214             passUsingIntRegs   = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
3215         }
3216
3217         GenTreePtr curArg = argx;
3218         // If late args have already been computed, use the node in the argument table.
3219         if (argEntry != NULL && argEntry->isTmp)
3220         {
3221             curArg = argEntry->node;
3222         }
3223
3224         if (reMorphing)
3225         {
3226             argAlign = argEntry->alignment;
3227         }
3228         else
3229         {
3230             // We don't use the "size" return value from InferOpSizeAlign().
3231             codeGen->InferOpSizeAlign(curArg, &argAlign);
3232
3233             argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
3234             argAlign /= TARGET_POINTER_SIZE;
3235         }
3236
3237         if (argAlign == 2)
3238         {
3239             if (passUsingFloatRegs)
3240             {
3241                 if (fltArgRegNum % 2 == 1)
3242                 {
3243                     fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3244                     fltArgRegNum++;
3245                 }
3246             }
3247             else if (passUsingIntRegs)
3248             {
3249                 if (intArgRegNum % 2 == 1)
3250                 {
3251                     argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3252                     intArgRegNum++;
3253                 }
3254             }
3255
3256             if (argSlots % 2 == 1)
3257             {
3258                 argSlots++;
3259             }
3260         }
3261
3262 #elif defined(_TARGET_ARM64_)
3263
3264         if (reMorphing)
3265         {
3266             passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3267         }
3268         else
3269         {
3270             passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
3271         }
3272
3273 #elif defined(_TARGET_AMD64_)
3274         if (reMorphing)
3275         {
3276             passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3277         }
3278         else
3279         {
3280             passUsingFloatRegs = varTypeIsFloating(argx);
3281         }
3282 #elif defined(_TARGET_X86_)
3283
3284         passUsingFloatRegs = false;
3285
3286 #else
3287 #error Unsupported or unset target architecture
3288 #endif // _TARGET_*
3289
3290         bool      isBackFilled     = false;
3291         unsigned  nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
3292         var_types structBaseType   = TYP_STRUCT;
3293         unsigned  structSize       = 0;
3294
3295         bool isStructArg = varTypeIsStruct(argx);
3296
3297         if (reMorphing)
3298         {
3299 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3300             // Get the struct description for the already completed struct argument.
3301             fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, argx);
3302             assert(fgEntryPtr != nullptr);
3303
3304             // As described in few other places, this can happen when the argx was morphed
3305             // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
3306             // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
3307             // was a struct and the struct classification.
3308             isStructArg = fgEntryPtr->isStruct;
3309
3310             if (isStructArg)
3311             {
3312                 structDesc.CopyFrom(fgEntryPtr->structDesc);
3313             }
3314 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3315
3316             assert(argEntry != nullptr);
3317             if (argEntry->IsBackFilled())
3318             {
3319                 isRegArg         = true;
3320                 size             = argEntry->numRegs;
3321                 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
3322                 assert(size == 1);
3323                 isBackFilled = true;
3324             }
3325             else if (argEntry->regNum == REG_STK)
3326             {
3327                 isRegArg = false;
3328                 assert(argEntry->numRegs == 0);
3329                 size = argEntry->numSlots;
3330             }
3331             else
3332             {
3333                 isRegArg = true;
3334                 assert(argEntry->numRegs > 0);
3335                 size = argEntry->numRegs + argEntry->numSlots;
3336             }
3337
3338             // This size has now been computed
3339             assert(size != 0);
3340         }
3341         else // !reMorphing
3342         {
3343             //
3344             // Figure out the size of the argument. This is either in number of registers, or number of
3345             // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
3346             // the stack.
3347             //
3348             if (argx->IsArgPlaceHolderNode() || (!isStructArg))
3349             {
3350 #if defined(_TARGET_AMD64_)
3351 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3352                 if (!isStructArg)
3353                 {
3354                     size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3355                 }
3356                 else
3357                 {
3358                     size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3359                                               TARGET_POINTER_SIZE)) /
3360                            TARGET_POINTER_SIZE;
3361                     eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
3362                     if (size > 1)
3363                     {
3364                         hasMultiregStructArgs = true;
3365                     }
3366                 }
3367 #else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3368                 size         = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3369 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3370 #elif defined(_TARGET_ARM64_)
3371                 if (isStructArg)
3372                 {
3373                     if (isHfaArg)
3374                     {
3375                         size = GetHfaCount(argx);
3376                         // HFA structs are passed by value in multiple registers
3377                         hasMultiregStructArgs = true;
3378                     }
3379                     else
3380                     {
3381                         // Structs are either passed in 1 or 2 (64-bit) slots
3382                         size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3383                                                   TARGET_POINTER_SIZE)) /
3384                                TARGET_POINTER_SIZE;
3385
3386                         if (size == 2)
3387                         {
3388                             // Structs that are the size of 2 pointers are passed by value in multiple registers
3389                             hasMultiregStructArgs = true;
3390                         }
3391                         else if (size > 2)
3392                         {
3393                             size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
3394                                       // reference (to a copy)
3395                         }
3396                     }
3397                     // Note that there are some additional rules for multireg structs.
3398                     // (i.e they cannot be split between registers and the stack)
3399                 }
3400                 else
3401                 {
3402                     size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
3403                 }
3404 #elif defined(_TARGET_ARM_)
3405                 if (isStructArg)
3406                 {
3407                     size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3408                                               TARGET_POINTER_SIZE)) /
3409                            TARGET_POINTER_SIZE;
3410                     if (isHfaArg)
3411                     {
3412                         hasMultiregStructArgs = true;
3413                     }
3414                     else if (size > 1 && size <= 4)
3415                     {
3416                         hasMultiregStructArgs = true;
3417                     }
3418                 }
3419                 else
3420                 {
3421                     // The typical case
3422                     // long/double type argument(s) will be changed to GT_FIELD_LIST in lower phase
3423                     size = genTypeStSz(argx->gtType);
3424                 }
3425 #elif defined(_TARGET_X86_)
3426                 size       = genTypeStSz(argx->gtType);
3427 #else
3428 #error Unsupported or unset target architecture
3429 #endif // _TARGET_XXX_
3430             }
3431 #ifdef _TARGET_ARM_
3432             else if (isHfaArg)
3433             {
3434                 size                  = GetHfaCount(argx);
3435                 hasMultiregStructArgs = true;
3436             }
3437 #endif           // _TARGET_ARM_
3438             else // struct type
3439             {
3440                 // We handle two opcodes: GT_MKREFANY and GT_OBJ
3441                 if (argx->gtOper == GT_MKREFANY)
3442                 {
3443                     if (varTypeIsStruct(argx))
3444                     {
3445                         isStructArg = true;
3446                     }
3447 #ifdef _TARGET_AMD64_
3448 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3449                     if (varTypeIsStruct(argx))
3450                     {
3451                         size                 = info.compCompHnd->getClassSize(impGetRefAnyClass());
3452                         unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
3453                         size                 = roundupSize / TARGET_POINTER_SIZE;
3454                         eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
3455                     }
3456                     else
3457 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3458                     {
3459                         size = 1;
3460                     }
3461 #else
3462                     size                 = 2;
3463 #endif
3464                 }
3465                 else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
3466                 {
3467                     GenTreePtr  argObj         = argx;
3468                     GenTreePtr* parentOfArgObj = parentArgx;
3469
3470                     assert(args->OperIsList());
3471                     assert(argx == args->Current());
3472
3473                     /* The GT_OBJ may be be a child of a GT_COMMA */
3474                     while (argObj->gtOper == GT_COMMA)
3475                     {
3476                         parentOfArgObj = &argObj->gtOp.gtOp2;
3477                         argObj         = argObj->gtOp.gtOp2;
3478                     }
3479
3480                     // TODO-1stClassStructs: An OBJ node should not be required for lclVars.
3481                     if (argObj->gtOper != GT_OBJ)
3482                     {
3483                         BADCODE("illegal argument tree in fgMorphArgs");
3484                     }
3485
3486                     CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
3487 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3488                     eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
3489 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3490
3491                     unsigned originalSize = info.compCompHnd->getClassSize(objClass);
3492                     originalSize          = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
3493                     unsigned roundupSize  = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
3494
3495                     structSize = originalSize;
3496
3497                     structPassingKind howToPassStruct;
3498                     structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);
3499
3500 #ifdef _TARGET_ARM64_
3501                     if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
3502                         !isPow2(originalSize))                    // size is 3,5,6 or 7 bytes
3503                     {
3504                         if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3505                         {
3506                             // For ARM64 we pass structs that are 3,5,6,7 bytes in size
3507                             // we can read 4 or 8 bytes from the LclVar to pass this arg
3508                             originalSize = genTypeSize(structBaseType);
3509                         }
3510                     }
3511 #endif //  _TARGET_ARM64_
3512
3513 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3514                     // On System V OS-es a struct is never passed by reference.
3515                     // It is either passed by value on the stack or in registers.
3516                     bool passStructInRegisters = false;
3517 #else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3518                     bool passStructByRef = false;
3519 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3520
3521                     // The following if-then-else needs to be carefully refactored.
3522                     // Basically the else portion wants to turn a struct load (a GT_OBJ)
3523                     // into a GT_IND of the appropriate size.
3524                     // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
3525                     // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined  (Why?)
3526                     // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
3527                     // It also can't do this if we have a HFA arg,
3528                     // unless we have a 1-elem HFA in which case we want to do the optimization.
3529                     CLANG_FORMAT_COMMENT_ANCHOR;
3530
3531 #ifndef _TARGET_X86_
3532 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3533                     // Check for struct argument with size 1, 2, 4 or 8 bytes
3534                     // As we can optimize these by turning them into a GT_IND of the correct type
3535                     //
3536                     // Check for cases that we cannot optimize:
3537                     //
3538                     if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
3539                         !isPow2(originalSize) ||                // it is not a power of two (1, 2, 4 or 8)
3540                         (isHfaArg && (hfaSlots != 1)))          // it is a one element HFA struct
3541 #endif                                                          // FEATURE_UNIX_AMD64_STRUCT_PASSING
3542                     {
3543                         // Normalize 'size' to the number of pointer sized items
3544                         // 'size' is the number of register slots that we will use to pass the argument
3545                         size = roundupSize / TARGET_POINTER_SIZE;
3546 #if defined(_TARGET_AMD64_)
3547 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3548                         size            = 1; // This must be copied to a temp and passed by address
3549                         passStructByRef = true;
3550                         copyBlkClass    = objClass;
3551 #else // FEATURE_UNIX_AMD64_STRUCT_PASSING
3552                         if (!structDesc.passedInRegisters)
3553                         {
3554                             GenTreePtr lclVar     = fgIsIndirOfAddrOfLocal(argObj);
3555                             bool       needCpyBlk = false;
3556                             if (lclVar != nullptr)
3557                             {
3558                                 // If the struct is promoted to registers, it has to be materialized
3559                                 // on stack. We may want to support promoted structures in
3560                                 // codegening pugarg_stk instead of creating a copy here.
3561                                 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3562                                 needCpyBlk        = varDsc->lvPromoted;
3563                             }
3564                             else
3565                             {
3566                                 // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
3567                                 // sets structDesc.passedInRegisters to be false.
3568                                 //
3569                                 // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
3570                                 // by rationalizer. For now we will let SIMD struct arg to be copied to
3571                                 // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
3572                                 //
3573                                 // +--*  obj       simd16
3574                                 // |  \--*  addr      byref
3575                                 // |     |  /--*  lclVar    simd16 V05 loc4
3576                                 // |     \--*  simd      simd16 int -
3577                                 // |        \--*  lclVar    simd16 V08 tmp1
3578                                 //
3579                                 // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
3580                                 // so that we don't need to generate a copy here.
3581                                 GenTree* addr = argObj->gtOp.gtOp1;
3582                                 if (addr->OperGet() == GT_ADDR)
3583                                 {
3584                                     GenTree* addrChild = addr->gtOp.gtOp1;
3585                                     if (addrChild->OperGet() == GT_SIMD)
3586                                     {
3587                                         needCpyBlk = true;
3588                                     }
3589                                 }
3590                             }
3591                             passStructInRegisters = false;
3592                             if (needCpyBlk)
3593                             {
3594                                 copyBlkClass = objClass;
3595                             }
3596                             else
3597                             {
3598                                 copyBlkClass = NO_CLASS_HANDLE;
3599                             }
3600                         }
3601                         else
3602                         {
3603                             // The objClass is used to materialize the struct on stack.
3604                             // For SystemV, the code below generates copies for struct arguments classified
3605                             // as register argument.
3606                             // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
3607                             // can be passed on registers or can be copied directly to outgoing area.
3608                             passStructInRegisters = true;
3609                             copyBlkClass          = objClass;
3610                         }
3611
3612 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3613 #elif defined(_TARGET_ARM64_)
3614                         if ((size > 2) && !isHfaArg)
3615                         {
3616                             size            = 1; // This must be copied to a temp and passed by address
3617                             passStructByRef = true;
3618                             copyBlkClass    = objClass;
3619                         }
3620 #endif
3621
3622 #ifdef _TARGET_ARM_
3623                         // If we're passing a promoted struct local var,
3624                         // we may need to skip some registers due to alignment; record those.
3625                         GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
3626                         if (lclVar != NULL)
3627                         {
3628                             LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3629                             if (varDsc->lvPromoted)
3630                             {
3631                                 assert(argObj->OperGet() == GT_OBJ);
3632                                 if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
3633                                 {
3634                                     fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3635                                 }
3636                             }
3637                         }
3638 #endif // _TARGET_ARM_
3639                     }
3640 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3641                     // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
3642                     // generated for struct 1, 2, 4, or 8.
3643                     else // We have a struct argument with size 1, 2, 4 or 8 bytes
3644                     {
3645                         // change our GT_OBJ into a GT_IND of the correct type.
3646                         // We've already ensured above that size is a power of 2, and less than or equal to pointer
3647                         // size.
3648
3649                         assert(howToPassStruct == SPK_PrimitiveType);
3650
3651                         // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
3652                         // primitives
3653                         if (isHfaArg)
3654                         {
3655                             // If we reach here with an HFA arg it has to be a one element HFA
3656                             assert(hfaSlots == 1);
3657                             structBaseType = hfaType; // change the indirection type to a floating point type
3658                         }
3659
3660                         noway_assert(structBaseType != TYP_UNKNOWN);
3661
3662                         argObj->ChangeOper(GT_IND);
3663
3664                         // Now see if we can fold *(&X) into X
3665                         if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
3666                         {
3667                             GenTreePtr temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
3668
3669                             // Keep the DONT_CSE flag in sync
3670                             // (as the addr always marks it for its op1)
3671                             temp->gtFlags &= ~GTF_DONT_CSE;
3672                             temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
3673                             DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
3674                             DEBUG_DESTROY_NODE(argObj);             // GT_IND
3675
3676                             argObj          = temp;
3677                             *parentOfArgObj = temp;
3678
3679                             // If the OBJ had been the top level node, we've now changed argx.
3680                             if (parentOfArgObj == parentArgx)
3681                             {
3682                                 argx = temp;
3683                             }
3684                         }
3685                         if (argObj->gtOper == GT_LCL_VAR)
3686                         {
3687                             unsigned   lclNum = argObj->gtLclVarCommon.gtLclNum;
3688                             LclVarDsc* varDsc = &lvaTable[lclNum];
3689
3690                             if (varDsc->lvPromoted)
3691                             {
3692                                 if (varDsc->lvFieldCnt == 1)
3693                                 {
3694                                     // get the first and only promoted field
3695                                     LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
3696                                     if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
3697                                     {
3698                                         // we will use the first and only promoted field
3699                                         argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
3700
3701                                         if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
3702                                             (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
3703                                         {
3704                                             // Just use the existing field's type
3705                                             argObj->gtType = fieldVarDsc->TypeGet();
3706                                         }
3707                                         else
3708                                         {
3709                                             // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
3710                                             // to a new type
3711                                             argObj->ChangeOper(GT_LCL_FLD);
3712                                             argObj->gtType = structBaseType;
3713                                         }
3714                                         assert(varTypeCanReg(argObj->TypeGet()));
3715                                         assert(copyBlkClass == NO_CLASS_HANDLE);
3716                                     }
3717                                     else
3718                                     {
3719                                         // use GT_LCL_FLD to swizzle the single field struct to a new type
3720                                         lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
3721                                         argObj->ChangeOper(GT_LCL_FLD);
3722                                         argObj->gtType = structBaseType;
3723                                     }
3724                                 }
3725                                 else
3726                                 {
3727                                     // The struct fits into a single register, but it has been promoted into its
3728                                     // constituent fields, and so we have to re-assemble it
3729                                     copyBlkClass = objClass;
3730 #ifdef _TARGET_ARM_
3731                                     // Alignment constraints may cause us not to use (to "skip") some argument
3732                                     // registers. Add those, if any, to the skipped (int) arg reg mask.
3733                                     fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3734 #endif // _TARGET_ARM_
3735                                 }
3736                             }
3737                             else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
3738                             {
3739                                 // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
3740                                 argObj->ChangeOper(GT_LCL_FLD);
3741                                 argObj->gtType = structBaseType;
3742                             }
3743                         }
3744                         else
3745                         {
3746                             // Not a GT_LCL_VAR, so we can just change the type on the node
3747                             argObj->gtType = structBaseType;
3748                         }
3749                         assert(varTypeCanReg(argObj->TypeGet()) ||
3750                                ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
3751
3752                         size = 1;
3753                     }
3754 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3755
3756 #endif // not _TARGET_X86_
3757                     // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
3758                     if ((structBaseType == TYP_STRUCT) &&
3759 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3760                         !passStructInRegisters
3761 #else  // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3762                         !passStructByRef
3763 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3764                         )
3765                     {
3766                         if (isHfaArg && passUsingFloatRegs)
3767                         {
3768                             size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA
3769                         }
3770                         else
3771                         {
3772                             // If the valuetype size is not a multiple of sizeof(void*),
3773                             // we must copyblk to a temp before doing the obj to avoid
3774                             // the obj reading memory past the end of the valuetype
3775                             CLANG_FORMAT_COMMENT_ANCHOR;
3776
3777                             if (roundupSize > originalSize)
3778                             {
3779                                 copyBlkClass = objClass;
3780
3781                                 // There are a few special cases where we can omit using a CopyBlk
3782                                 // where we normally would need to use one.
3783
3784                                 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3785                                 {
3786                                     copyBlkClass = NO_CLASS_HANDLE;
3787                                 }
3788                             }
3789
3790                             size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
3791                         }
3792                     }
3793                 }
3794
3795 #ifdef _TARGET_64BIT_
3796                 if (size > 1)
3797                 {
3798                     hasMultiregStructArgs = true;
3799                 }
3800 #elif defined(_TARGET_ARM_)
3801                 // TODO-Arm: Need to handle the case
3802                 // where structs passed by value can be split between registers and stack.
3803                 if (size > 1 && size <= 4)
3804                 {
3805                     hasMultiregStructArgs = true;
3806                 }
3807 #ifndef LEGACY_BACKEND
3808                 else if (size > 4 && passUsingIntRegs)
3809                 {
3810                     NYI_ARM("Struct can be split between registers and stack");
3811                 }
3812 #endif // !LEGACY_BACKEND
3813 #endif // _TARGET_ARM_
3814             }
3815
3816             // The 'size' value has now must have been set. (the original value of zero is an invalid value)
3817             assert(size != 0);
3818
3819             //
3820             // Figure out if the argument will be passed in a register.
3821             //
3822
3823             if (isRegParamType(genActualType(argx->TypeGet()))
3824 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3825                 && (!isStructArg || structDesc.passedInRegisters)
3826 #endif
3827                     )
3828             {
3829 #ifdef _TARGET_ARM_
3830                 if (passUsingFloatRegs)
3831                 {
3832                     // First, see if it can be back-filled
3833                     if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
3834                         (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
3835                         (size == 1))                          // The size to back-fill is one float register
3836                     {
3837                         // Back-fill the register.
3838                         isBackFilled              = true;
3839                         regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
3840                         fltArgSkippedRegMask &=
3841                             ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
3842                         nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
3843                         assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
3844                     }
3845
3846                     // Does the entire float, double, or HFA fit in the FP arg registers?
3847                     // Check if the last register needed is still in the argument register range.
3848                     isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
3849
3850                     if (!isRegArg)
3851                     {
3852                         anyFloatStackArgs = true;
3853                     }
3854                 }
3855                 else
3856                 {
3857                     isRegArg = intArgRegNum < MAX_REG_ARG;
3858                 }
3859 #elif defined(_TARGET_ARM64_)
3860                 if (passUsingFloatRegs)
3861                 {
3862                     // Check if the last register needed is still in the fp argument register range.
3863                     isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
3864
3865                     // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
3866                     if (isHfaArg && !isRegArg)
3867                     {
3868                         // recompute the 'size' so that it represent the number of stack slots rather than the number of
3869                         // registers
3870                         //
3871                         unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
3872                         size                 = roundupSize / TARGET_POINTER_SIZE;
3873
3874                         // We also must update fltArgRegNum so that we no longer try to
3875                         // allocate any new floating point registers for args
3876                         // This prevents us from backfilling a subsequent arg into d7
3877                         //
3878                         fltArgRegNum = MAX_FLOAT_REG_ARG;
3879                     }
3880                 }
3881                 else
3882                 {
3883                     // Check if the last register needed is still in the int argument register range.
3884                     isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3885
3886                     // Did we run out of registers when we had a 16-byte struct (size===2) ?
3887                     // (i.e we only have one register remaining but we needed two registers to pass this arg)
3888                     // This prevents us from backfilling a subsequent arg into x7
3889                     //
3890                     if (!isRegArg && (size > 1))
3891                     {
3892                         // We also must update intArgRegNum so that we no longer try to
3893                         // allocate any new general purpose registers for args
3894                         //
3895                         intArgRegNum = maxRegArgs;
3896                     }
3897                 }
3898 #else // not _TARGET_ARM_ or _TARGET_ARM64_
3899
3900 #if defined(UNIX_AMD64_ABI)
3901
3902 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3903                 // Here a struct can be passed in register following the classifications of its members and size.
3904                 // Now make sure there are actually enough registers to do so.
3905                 if (isStructArg)
3906                 {
3907                     unsigned int structFloatRegs = 0;
3908                     unsigned int structIntRegs   = 0;
3909                     for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3910                     {
3911                         if (structDesc.IsIntegralSlot(i))
3912                         {
3913                             structIntRegs++;
3914                         }
3915                         else if (structDesc.IsSseSlot(i))
3916                         {
3917                             structFloatRegs++;
3918                         }
3919                     }
3920
3921                     isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
3922                                ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
3923                 }
3924                 else
3925 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3926                 {
3927                     if (passUsingFloatRegs)
3928                     {
3929                         isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
3930                     }
3931                     else
3932                     {
3933                         isRegArg = intArgRegNum < MAX_REG_ARG;
3934                     }
3935                 }
3936 #else  // !defined(UNIX_AMD64_ABI)
3937                 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3938 #endif // !defined(UNIX_AMD64_ABI)
3939 #endif // _TARGET_ARM_
3940             }
3941             else
3942             {
3943                 isRegArg = false;
3944             }
3945
3946 #ifndef LEGACY_BACKEND
3947             // If there are nonstandard args (outside the calling convention) they were inserted above
3948             // and noted them in a table so we can recognize them here and build their argInfo.
3949             //
3950             // They should not affect the placement of any other args or stack space required.
3951             // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
3952             isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum);
3953             if (isNonStandard && (nonStdRegNum == REG_STK))
3954             {
3955                 isRegArg = false;
3956             }
3957 #if defined(_TARGET_X86_)
3958             else if (call->IsTailCallViaHelper())
3959             {
3960                 // We have already (before calling fgMorphArgs()) appended the 4 special args
3961                 // required by the x86 tailcall helper. These args are required to go on the
3962                 // stack. Force them to the stack here.
3963                 assert(numArgs >= 4);
3964                 if (argIndex >= numArgs - 4)
3965                 {
3966                     isRegArg = false;
3967                 }
3968             }
3969 #endif    // defined(_TARGET_X86_)
3970 #endif    // !LEGACY_BACKEND
3971         } // end !reMorphing
3972
3973         //
3974         // Now we know if the argument goes in registers or not and how big it is,
3975         // whether we had to just compute it or this is a re-morph call and we looked it up.
3976         //
3977         CLANG_FORMAT_COMMENT_ANCHOR;
3978
3979 #ifdef _TARGET_ARM_
3980         // If we ever allocate a floating point argument to the stack, then all
3981         // subsequent HFA/float/double arguments go on the stack.
3982         if (!isRegArg && passUsingFloatRegs)
3983         {
3984             for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
3985             {
3986                 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3987             }
3988         }
3989
3990         // If we think we're going to split a struct between integer registers and the stack, check to
3991         // see if we've already assigned a floating-point arg to the stack.
3992         if (isRegArg &&                            // We decided above to use a register for the argument
3993             !passUsingFloatRegs &&                 // We're using integer registers
3994             (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
3995             anyFloatStackArgs)                     // We've already used the stack for a floating-point argument
3996         {
3997             isRegArg = false; // Change our mind; don't pass this struct partially in registers
3998
3999             // Skip the rest of the integer argument registers
4000             for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
4001             {
4002                 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
4003             }
4004         }
4005
4006 #endif // _TARGET_ARM_
4007
4008         if (isRegArg)
4009         {
4010             regNumber nextRegNum = REG_STK;
4011 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4012             regNumber    nextOtherRegNum = REG_STK;
4013             unsigned int structFloatRegs = 0;
4014             unsigned int structIntRegs   = 0;
4015
4016             if (isStructArg && structDesc.passedInRegisters)
4017             {
4018                 // It is a struct passed in registers. Assign the next available register.
4019                 assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
4020                 regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
4021                 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
4022                 {
4023                     if (structDesc.IsIntegralSlot(i))
4024                     {
4025                         *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
4026                         structIntRegs++;
4027                     }
4028                     else if (structDesc.IsSseSlot(i))
4029                     {
4030                         *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
4031                         structFloatRegs++;
4032                     }
4033                 }
4034             }
4035             else
4036 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4037             {
4038                 // fill in or update the argInfo table
4039                 nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
4040                                                 : genMapIntRegArgNumToRegNum(intArgRegNum);
4041             }
4042
4043 #ifdef _TARGET_AMD64_
4044 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
4045             assert(size == 1);
4046 #endif
4047 #endif
4048
4049             fgArgTabEntryPtr newArgEntry;
4050             if (reMorphing)
4051             {
4052                 // This is a register argument - possibly update it in the table
4053                 newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
4054             }
4055             else
4056             {
4057                 if (isNonStandard)
4058                 {
4059                     nextRegNum = nonStdRegNum;
4060                 }
4061
4062                 // This is a register argument - put it in the table
4063                 newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
4064 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4065                                                          ,
4066                                                          isStructArg, nextOtherRegNum, &structDesc
4067 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4068                                                          );
4069
4070                 newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
4071                                             isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
4072                 newArgEntry->SetIsBackFilled(isBackFilled);
4073                 newArgEntry->isNonStandard = isNonStandard;
4074             }
4075
4076             if (newArgEntry->isNonStandard)
4077             {
4078                 continue;
4079             }
4080
4081             // Set up the next intArgRegNum and fltArgRegNum values.
4082             if (!isBackFilled)
4083             {
4084 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4085                 if (isStructArg)
4086                 {
4087                     intArgRegNum += structIntRegs;
4088                     fltArgRegNum += structFloatRegs;
4089                 }
4090                 else
4091 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4092                 {
4093                     if (passUsingFloatRegs)
4094                     {
4095                         fltArgRegNum += size;
4096
4097 #ifdef WINDOWS_AMD64_ABI
4098                         // Whenever we pass an integer register argument
4099                         // we skip the corresponding floating point register argument
4100                         intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
4101 #endif // WINDOWS_AMD64_ABI
4102 #ifdef _TARGET_ARM_
4103                         if (fltArgRegNum > MAX_FLOAT_REG_ARG)
4104                         {
4105 #ifndef LEGACY_BACKEND
4106                             NYI_ARM("Struct split between float registers and stack");
4107 #endif // !LEGACY_BACKEND
4108                             // This indicates a partial enregistration of a struct type
4109                             assert(varTypeIsStruct(argx));
4110                             unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
4111                             assert((unsigned char)numRegsPartial == numRegsPartial);
4112                             call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4113                             fltArgRegNum = MAX_FLOAT_REG_ARG;
4114                         }
4115 #endif // _TARGET_ARM_
4116                     }
4117                     else
4118                     {
4119                         if (hasFixedRetBuffReg() && (nextRegNum == theFixedRetBuffReg()))
4120                         {
4121                             // we are setting up the fixed return buffer register argument
4122                             // so don't increment intArgRegNum
4123                             assert(size == 1);
4124                         }
4125                         else
4126                         {
4127                             // Increment intArgRegNum by 'size' registers
4128                             intArgRegNum += size;
4129                         }
4130
4131 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
4132                         fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
4133 #endif // _TARGET_AMD64_
4134 #ifdef _TARGET_ARM_
4135                         if (intArgRegNum > MAX_REG_ARG)
4136                         {
4137 #ifndef LEGACY_BACKEND
4138                             NYI_ARM("Struct split between integer registers and stack");
4139 #endif // !LEGACY_BACKEND
4140                             // This indicates a partial enregistration of a struct type
4141                             assert((isStructArg) || argx->OperIsCopyBlkOp() ||
4142                                    (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
4143                             unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
4144                             assert((unsigned char)numRegsPartial == numRegsPartial);
4145                             call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4146                             intArgRegNum = MAX_REG_ARG;
4147                             fgPtrArgCntCur += size - numRegsPartial;
4148                         }
4149 #endif // _TARGET_ARM_
4150                     }
4151                 }
4152             }
4153         }
4154         else // We have an argument that is not passed in a register
4155         {
4156             fgPtrArgCntCur += size;
4157
4158             // If the register arguments have not been determined then we must fill in the argInfo
4159
4160             if (reMorphing)
4161             {
4162                 // This is a stack argument - possibly update it in the table
4163                 call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
4164             }
4165             else
4166             {
4167                 // This is a stack argument - put it in the table
4168                 call->fgArgInfo->AddStkArg(argIndex, argx, args, size,
4169                                            argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
4170             }
4171         }
4172
4173         if (copyBlkClass != NO_CLASS_HANDLE)
4174         {
4175             noway_assert(!reMorphing);
4176             fgMakeOutgoingStructArgCopy(call, args, argIndex,
4177                                         copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
4178
4179             // This can cause a GTF_EXCEPT flag to be set.
4180             // TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags.
4181             // This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and
4182             // there are no register arguments. Then reMorphing is never true, so we keep re-copying
4183             // any struct arguments.
4184             // i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0)
4185             flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT);
4186
4187 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4188             hasStackArgCopy = true;
4189 #endif
4190         }
4191
4192 #ifndef LEGACY_BACKEND
4193         if (argx->gtOper == GT_MKREFANY)
4194         {
4195             // 'Lower' the MKREFANY tree and insert it.
4196             noway_assert(!reMorphing);
4197
4198 #ifndef _TARGET_64BIT_
4199
4200             // Build the mkrefany as a GT_FIELD_LIST
4201             GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST)
4202                 GenTreeFieldList(argx->gtOp.gtOp1, offsetof(CORINFO_RefAny, dataPtr), TYP_BYREF, nullptr);
4203             (void)new (this, GT_FIELD_LIST)
4204                 GenTreeFieldList(argx->gtOp.gtOp2, offsetof(CORINFO_RefAny, type), TYP_I_IMPL, fieldList);
4205             fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
4206             fp->node            = fieldList;
4207             args->gtOp.gtOp1    = fieldList;
4208
4209 #else  // _TARGET_64BIT_
4210
4211             // Get a new temp
4212             // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
4213             unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
4214             lvaSetStruct(tmp, impGetRefAnyClass(), false);
4215
4216             // Build the mkrefany as a comma node:
4217             // (tmp.ptr=argx),(tmp.type=handle)
4218             GenTreeLclFld* destPtrSlot  = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr));
4219             GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type));
4220             destPtrSlot->gtFieldSeq     = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
4221             destPtrSlot->gtFlags |= GTF_VAR_DEF;
4222             destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
4223             destTypeSlot->gtFlags |= GTF_VAR_DEF;
4224
4225             GenTreePtr asgPtrSlot  = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
4226             GenTreePtr asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
4227             GenTreePtr asg         = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
4228
4229             // Change the expression to "(tmp=val)"
4230             args->gtOp.gtOp1 = asg;
4231
4232             // EvalArgsToTemps will cause tmp to actually get loaded as the argument
4233             call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
4234             lvaSetVarAddrExposed(tmp);
4235 #endif // _TARGET_64BIT_
4236         }
4237 #endif // !LEGACY_BACKEND
4238
4239 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
4240         if (isStructArg)
4241         {
4242             GenTree* lclNode = fgIsIndirOfAddrOfLocal(argx);
4243             if ((lclNode != nullptr) &&
4244                 (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT))
4245             {
4246                 // Make a GT_FIELD_LIST of the field lclVars.
4247                 GenTreeLclVarCommon* lcl       = lclNode->AsLclVarCommon();
4248                 LclVarDsc*           varDsc    = &(lvaTable[lcl->gtLclNum]);
4249                 GenTreeFieldList*    fieldList = nullptr;
4250                 for (unsigned fieldLclNum = varDsc->lvFieldLclStart;
4251                      fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum)
4252                 {
4253                     LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
4254                     if (fieldList == nullptr)
4255                     {
4256                         lcl->SetLclNum(fieldLclNum);
4257                         lcl->ChangeOper(GT_LCL_VAR);
4258                         lcl->gtType = fieldVarDsc->lvType;
4259                         fieldList   = new (this, GT_FIELD_LIST)
4260                             GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr);
4261                         fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
4262                         fp->node            = fieldList;
4263                         args->gtOp.gtOp1    = fieldList;
4264                     }
4265                     else
4266                     {
4267                         GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
4268                         fieldList         = new (this, GT_FIELD_LIST)
4269                             GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList);
4270                     }
4271                 }
4272             }
4273         }
4274 #endif // _TARGET_X86_ && !LEGACY_BACKEND
4275
4276 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4277         if (isStructArg && !isRegArg)
4278         {
4279             nonRegPassedStructSlots += size;
4280         }
4281         else
4282 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4283         {
4284             argSlots += size;
4285         }
4286     } // end foreach argument loop
4287
4288     if (!reMorphing)
4289     {
4290         call->fgArgInfo->ArgsComplete();
4291
4292 #ifdef LEGACY_BACKEND
4293         call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
4294 #if defined(_TARGET_ARM_)
4295         call->gtCallRegUsedMask &= ~argSkippedRegMask;
4296 #endif
4297         if (fltArgRegNum > 0)
4298         {
4299 #if defined(_TARGET_ARM_)
4300             call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask;
4301 #endif
4302         }
4303 #endif // LEGACY_BACKEND
4304     }
4305
4306     if (call->gtCallArgs)
4307     {
4308         UpdateGT_LISTFlags(call->gtCallArgs);
4309     }
4310
4311     /* Process the function address, if indirect call */
4312
4313     if (call->gtCallType == CT_INDIRECT)
4314     {
4315         call->gtCallAddr = fgMorphTree(call->gtCallAddr);
4316     }
4317
4318     call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur);
4319
4320     if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
4321     {
4322         fgPtrArgCntCur++;
4323     }
4324
4325     /* Remember the maximum value we ever see */
4326
4327     if (fgPtrArgCntMax < fgPtrArgCntCur)
4328     {
4329         JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
4330         fgPtrArgCntMax = fgPtrArgCntCur;
4331     }
4332
4333     assert(fgPtrArgCntCur >= genPtrArgCntSav);
4334     call->fgArgInfo->SetStkSizeBytes((fgPtrArgCntCur - genPtrArgCntSav) * TARGET_POINTER_SIZE);
4335
4336     /* The call will pop all the arguments we pushed */
4337
4338     fgPtrArgCntCur = genPtrArgCntSav;
4339
4340 #if FEATURE_FIXED_OUT_ARGS
4341
4342     // Record the outgoing argument size.  If the call is a fast tail
4343     // call, it will setup its arguments in incoming arg area instead
4344     // of the out-going arg area, so we don't need to track the
4345     // outgoing arg size.
4346     if (!call->IsFastTailCall())
4347     {
4348         unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
4349
4350 #if defined(UNIX_AMD64_ABI)
4351         opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly
4352
4353         // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above.
4354
4355         // First slots go in registers only, no stack needed.
4356         // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments,
4357         // and ignores floating point args (it is overly conservative in that case).
4358         preallocatedArgCount = nonRegPassedStructSlots;
4359         if (argSlots > MAX_REG_ARG)
4360         {
4361             preallocatedArgCount += argSlots - MAX_REG_ARG;
4362         }
4363 #endif // UNIX_AMD64_ABI
4364
4365         const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
4366         call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL));
4367
4368 #ifdef DEBUG
4369         if (verbose)
4370         {
4371             printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots,
4372                    preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize);
4373         }
4374 #endif
4375     }
4376 #endif // FEATURE_FIXED_OUT_ARGS
4377
4378     /* Update the 'side effect' flags value for the call */
4379
4380     call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4381
4382     // If the register arguments have already been determined
4383     // or we have no register arguments then we don't need to
4384     // call SortArgs() and EvalArgsToTemps()
4385     //
4386     // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
4387     // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
4388     // is added to make sure to call EvalArgsToTemp.
4389     if (!reMorphing && (call->fgArgInfo->HasRegArgs()
4390 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4391                         || hasStackArgCopy
4392 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4393                         ))
4394     {
4395         // This is the first time that we morph this call AND it has register arguments.
4396         // Follow into the code below and do the 'defer or eval to temp' analysis.
4397
4398         call->fgArgInfo->SortArgs();
4399
4400         call->fgArgInfo->EvalArgsToTemps();
4401
4402         // We may have updated the arguments
4403         if (call->gtCallArgs)
4404         {
4405             UpdateGT_LISTFlags(call->gtCallArgs);
4406         }
4407     }
4408
4409 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4410
4411     // Rewrite the struct args to be passed by value on stack or in registers.
4412     fgMorphSystemVStructArgs(call, hasStructArgument);
4413
4414 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4415
4416 #ifndef LEGACY_BACKEND
4417     // In the future we can migrate UNIX_AMD64 to use this
4418     // method instead of fgMorphSystemVStructArgs
4419
4420     // We only build GT_FIELD_LISTs for MultiReg structs for the RyuJIT backend
4421     if (hasMultiregStructArgs)
4422     {
4423         fgMorphMultiregStructArgs(call);
4424     }
4425 #endif // LEGACY_BACKEND
4426
4427 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4428
4429 #ifdef DEBUG
4430     if (verbose)
4431     {
4432         fgArgInfoPtr argInfo = call->fgArgInfo;
4433         for (unsigned curInx = 0; curInx < argInfo->ArgCount(); curInx++)
4434         {
4435             fgArgTabEntryPtr curArgEntry = argInfo->ArgTable()[curInx];
4436             curArgEntry->Dump();
4437         }
4438     }
4439 #endif
4440
4441     return call;
4442 }
4443 #ifdef _PREFAST_
4444 #pragma warning(pop)
4445 #endif
4446
4447 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4448 // fgMorphSystemVStructArgs:
4449 //   Rewrite the struct args to be passed by value on stack or in registers.
4450 //
4451 // args:
4452 //   call: The call whose arguments need to be morphed.
4453 //   hasStructArgument: Whether this call has struct arguments.
4454 //
4455 void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
4456 {
4457     unsigned   flagsSummary = 0;
4458     GenTreePtr args;
4459     GenTreePtr argx;
4460
4461     if (hasStructArgument)
4462     {
4463         fgArgInfoPtr allArgInfo = call->fgArgInfo;
4464
4465         for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4466         {
4467             // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4468             // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4469             // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4470             // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4471             // otherwise points to the list in the late args list.
4472             bool             isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4473             fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4474             assert(fgEntryPtr != nullptr);
4475             GenTreePtr argx     = fgEntryPtr->node;
4476             GenTreePtr lateList = nullptr;
4477             GenTreePtr lateNode = nullptr;
4478
4479             if (isLateArg)
4480             {
4481                 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4482                 {
4483                     assert(list->OperIsList());
4484
4485                     GenTreePtr argNode = list->Current();
4486                     if (argx == argNode)
4487                     {
4488                         lateList = list;
4489                         lateNode = argNode;
4490                         break;
4491                     }
4492                 }
4493                 assert(lateList != nullptr && lateNode != nullptr);
4494             }
4495             GenTreePtr arg            = argx;
4496             bool       argListCreated = false;
4497
4498             var_types type = arg->TypeGet();
4499
4500             if (varTypeIsStruct(type))
4501             {
4502                 var_types originalType = type;
4503                 // If we have already processed the arg...
4504                 if (arg->OperGet() == GT_FIELD_LIST && varTypeIsStruct(arg))
4505                 {
4506                     continue;
4507                 }
4508
4509                 // If already OBJ it is set properly already.
4510                 if (arg->OperGet() == GT_OBJ)
4511                 {
4512                     assert(!fgEntryPtr->structDesc.passedInRegisters);
4513                     continue;
4514                 }
4515
4516                 assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
4517                        (arg->OperGet() == GT_ADDR &&
4518                         (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
4519
4520                 GenTreeLclVarCommon* lclCommon =
4521                     arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
4522                 if (fgEntryPtr->structDesc.passedInRegisters)
4523                 {
4524                     if (fgEntryPtr->structDesc.eightByteCount == 1)
4525                     {
4526                         // Change the type and below the code will change the LclVar to a LCL_FLD
4527                         type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4528                                                                  fgEntryPtr->structDesc.eightByteSizes[0]);
4529                     }
4530                     else if (fgEntryPtr->structDesc.eightByteCount == 2)
4531                     {
4532                         // Create LCL_FLD for each eightbyte.
4533                         argListCreated = true;
4534
4535                         // First eightbyte.
4536                         arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
4537                         arg->gtType =
4538                             GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4539                                                               fgEntryPtr->structDesc.eightByteSizes[0]);
4540                         GenTreeFieldList* fieldList =
4541                             new (this, GT_FIELD_LIST) GenTreeFieldList(arg, 0, originalType, nullptr);
4542                         fieldList->gtType = originalType; // Preserve the type. It is a special case.
4543                         arg               = fieldList;
4544
4545                         // Second eightbyte.
4546                         GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
4547                             GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
4548                                                                                 .eightByteClassifications[1],
4549                                                                             fgEntryPtr->structDesc.eightByteSizes[1]),
4550                                           lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
4551
4552                         fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(newLclField, 0, originalType, fieldList);
4553                         fieldList->gtType       = originalType; // Preserve the type. It is a special case.
4554                         newLclField->gtFieldSeq = FieldSeqStore::NotAField();
4555                     }
4556                     else
4557                     {
4558                         assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
4559                                                                                        // for the CLR.
4560                     }
4561                 }
4562
4563                 // If we didn't change the type of the struct, it means
4564                 // its classification doesn't support to be passed directly through a
4565                 // register, so we need to pass a pointer to the destination where
4566                 // where we copied the struct to.
4567                 if (!argListCreated)
4568                 {
4569                     if (fgEntryPtr->structDesc.passedInRegisters)
4570                     {
4571                         arg->gtType = type;
4572                     }
4573                     else
4574                     {
4575                         // Make sure this is an addr node.
4576                         if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
4577                         {
4578                             arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
4579                         }
4580
4581                         assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
4582
4583                         // Create an Obj of the temp to use it as a call argument.
4584                         arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
4585                     }
4586                 }
4587             }
4588
4589             if (argx != arg)
4590             {
4591                 bool             isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4592                 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4593                 assert(fgEntryPtr != nullptr);
4594                 GenTreePtr argx     = fgEntryPtr->node;
4595                 GenTreePtr lateList = nullptr;
4596                 GenTreePtr lateNode = nullptr;
4597                 if (isLateArg)
4598                 {
4599                     for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4600                     {
4601                         assert(list->OperIsList());
4602
4603                         GenTreePtr argNode = list->Current();
4604                         if (argx == argNode)
4605                         {
4606                             lateList = list;
4607                             lateNode = argNode;
4608                             break;
4609                         }
4610                     }
4611                     assert(lateList != nullptr && lateNode != nullptr);
4612                 }
4613
4614                 fgEntryPtr->node = arg;
4615                 if (isLateArg)
4616                 {
4617                     lateList->gtOp.gtOp1 = arg;
4618                 }
4619                 else
4620                 {
4621                     args->gtOp.gtOp1 = arg;
4622                 }
4623             }
4624         }
4625     }
4626
4627     // Update the flags
4628     call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4629 }
4630 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4631
4632 //-----------------------------------------------------------------------------
4633 // fgMorphMultiregStructArgs:  Locate the TYP_STRUCT arguments and
4634 //                             call fgMorphMultiregStructArg on each of them.
4635 //
4636 // Arguments:
4637 //    call:    a GenTreeCall node that has one or more TYP_STRUCT arguments
4638 //
4639 // Notes:
4640 //    We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments.
4641 //    The call to fgMorphMultiregStructArg will mutate the argument into the GT_FIELD_LIST form
4642 //    which is only used for struct arguments.
4643 //    If this method fails to find any TYP_STRUCT arguments it will assert.
4644 //
4645 void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
4646 {
4647     GenTreePtr   args;
4648     GenTreePtr   argx;
4649     bool         foundStructArg = false;
4650     unsigned     initialFlags   = call->gtFlags;
4651     unsigned     flagsSummary   = 0;
4652     fgArgInfoPtr allArgInfo     = call->fgArgInfo;
4653
4654     // Currently ARM64/ARM is using this method to morph the MultiReg struct args
4655     //  in the future AMD64_UNIX will also use this method
4656     CLANG_FORMAT_COMMENT_ANCHOR;
4657
4658 #ifdef _TARGET_X86_
4659     assert(!"Logic error: no MultiregStructArgs for X86");
4660 #endif
4661 #ifdef _TARGET_AMD64_
4662 #if defined(UNIX_AMD64_ABI)
4663     NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
4664 #else  // WINDOWS_AMD64_ABI
4665     assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
4666 #endif // !UNIX_AMD64_ABI
4667 #endif
4668
4669     for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4670     {
4671         // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4672         // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4673         // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4674         // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4675         // otherwise points to the list in the late args list.
4676         bool             isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4677         fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4678         assert(fgEntryPtr != nullptr);
4679         GenTreePtr argx     = fgEntryPtr->node;
4680         GenTreePtr lateList = nullptr;
4681         GenTreePtr lateNode = nullptr;
4682
4683         if (isLateArg)
4684         {
4685             for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4686             {
4687                 assert(list->OperIsList());
4688
4689                 GenTreePtr argNode = list->Current();
4690                 if (argx == argNode)
4691                 {
4692                     lateList = list;
4693                     lateNode = argNode;
4694                     break;
4695                 }
4696             }
4697             assert(lateList != nullptr && lateNode != nullptr);
4698         }
4699
4700         GenTreePtr arg = argx;
4701
4702         if (arg->TypeGet() == TYP_STRUCT)
4703         {
4704             foundStructArg = true;
4705
4706             arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
4707
4708             // Did we replace 'argx' with a new tree?
4709             if (arg != argx)
4710             {
4711                 fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
4712
4713                 // link the new arg node into either the late arg list or the gtCallArgs list
4714                 if (isLateArg)
4715                 {
4716                     lateList->gtOp.gtOp1 = arg;
4717                 }
4718                 else
4719                 {
4720                     args->gtOp.gtOp1 = arg;
4721                 }
4722             }
4723         }
4724     }
4725
4726     // We should only call this method when we actually have one or more multireg struct args
4727     assert(foundStructArg);
4728
4729     // Update the flags
4730     call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4731 }
4732
4733 //-----------------------------------------------------------------------------
4734 // fgMorphMultiregStructArg:  Given a multireg TYP_STRUCT arg from a call argument list
4735 //   Morph the argument into a set of GT_FIELD_LIST nodes.
4736 //
4737 // Arguments:
4738 //     arg        - A GenTree node containing a TYP_STRUCT arg that
4739 //                  is to be passed in multiple registers
4740 //     fgEntryPtr - the fgArgTabEntry information for the current 'arg'
4741 //
4742 // Notes:
4743 //    arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable
4744 //    for passing in multiple registers.
4745 //    If arg is a LclVar we check if it is struct promoted and has the right number of fields
4746 //    and if they are at the appropriate offsets we will use the struct promted fields
4747 //    in the GT_FIELD_LIST nodes that we create.
4748 //    If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
4749 //    we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
4750 //    this also forces the struct to be stack allocated into the local frame.
4751 //    For the GT_OBJ case will clone the address expression and generate two (or more)
4752 //    indirections.
4753 //    Currently the implementation handles ARM64/ARM and will NYI for other architectures.
4754 //
4755 GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr)
4756 {
4757     assert(arg->TypeGet() == TYP_STRUCT);
4758
4759 #ifndef _TARGET_ARMARCH_
4760     NYI("fgMorphMultiregStructArg requires implementation for this target");
4761 #endif
4762
4763 #if FEATURE_MULTIREG_ARGS
4764     // Examine 'arg' and setup argValue objClass and structSize
4765     //
4766     CORINFO_CLASS_HANDLE objClass   = NO_CLASS_HANDLE;
4767     GenTreePtr           argValue   = arg; // normally argValue will be arg, but see right below
4768     unsigned             structSize = 0;
4769
4770     if (arg->OperGet() == GT_OBJ)
4771     {
4772         GenTreeObj* argObj = arg->AsObj();
4773         objClass           = argObj->gtClass;
4774         structSize         = info.compCompHnd->getClassSize(objClass);
4775
4776         // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR
4777         //
4778         if (argObj->gtOp1->OperGet() == GT_ADDR)
4779         {
4780             argValue = argObj->gtOp1->gtOp.gtOp1;
4781         }
4782     }
4783     else if (arg->OperGet() == GT_LCL_VAR)
4784     {
4785         GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
4786         unsigned             varNum  = varNode->gtLclNum;
4787         assert(varNum < lvaCount);
4788         LclVarDsc* varDsc = &lvaTable[varNum];
4789
4790         objClass   = lvaGetStruct(varNum);
4791         structSize = varDsc->lvExactSize;
4792     }
4793     noway_assert(objClass != nullptr);
4794
4795     var_types hfaType                 = TYP_UNDEF;
4796     var_types elemType                = TYP_UNDEF;
4797     unsigned  elemCount               = 0;
4798     unsigned  elemSize                = 0;
4799     var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
4800
4801     hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
4802     if (varTypeIsFloating(hfaType))
4803     {
4804         elemType  = hfaType;
4805         elemSize  = genTypeSize(elemType);
4806         elemCount = structSize / elemSize;
4807         assert(elemSize * elemCount == structSize);
4808         for (unsigned inx = 0; inx < elemCount; inx++)
4809         {
4810             type[inx] = elemType;
4811         }
4812     }
4813     else
4814     {
4815 #ifdef _TARGET_ARM64_
4816         assert(structSize <= 2 * TARGET_POINTER_SIZE);
4817 #elif defined(_TARGET_ARM_)
4818         assert(structSize <= 4 * TARGET_POINTER_SIZE);
4819 #endif
4820
4821 #ifdef _TARGET_ARM64_
4822         BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
4823         info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
4824         elemCount = 2;
4825         type[0]   = getJitGCType(gcPtrs[0]);
4826         type[1]   = getJitGCType(gcPtrs[1]);
4827 #elif defined(_TARGET_ARM_)
4828         BYTE gcPtrs[4] = {TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE};
4829         elemCount      = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
4830         info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
4831         for (unsigned inx = 0; inx < elemCount; inx++)
4832         {
4833             type[inx] = getJitGCType(gcPtrs[inx]);
4834         }
4835 #endif // _TARGET_ARM_
4836
4837         if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
4838         {
4839             elemSize = TARGET_POINTER_SIZE;
4840             // We can safely widen this to aligned bytes since we are loading from
4841             // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
4842             // lives in the stack frame or will be a promoted field.
4843             //
4844             structSize = elemCount * TARGET_POINTER_SIZE;
4845         }
4846         else // we must have a GT_OBJ
4847         {
4848             assert(argValue->OperGet() == GT_OBJ);
4849
4850             // We need to load the struct from an arbitrary address
4851             // and we can't read past the end of the structSize
4852             // We adjust the last load type here
4853             //
4854             unsigned remainingBytes = structSize % TARGET_POINTER_SIZE;
4855             unsigned lastElem       = elemCount - 1;
4856             if (remainingBytes != 0)
4857             {
4858                 switch (remainingBytes)
4859                 {
4860                     case 1:
4861                         type[lastElem] = TYP_BYTE;
4862                         break;
4863                     case 2:
4864                         type[lastElem] = TYP_SHORT;
4865                         break;
4866 #ifdef _TARGET_ARM64_
4867                     case 4:
4868                         type[lastElem] = TYP_INT;
4869                         break;
4870 #endif // _TARGET_ARM64_
4871                     default:
4872                         noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
4873                         break;
4874                 }
4875             }
4876         }
4877     }
4878     // We should still have a TYP_STRUCT
4879     assert(argValue->TypeGet() == TYP_STRUCT);
4880
4881     GenTreeFieldList* newArg = nullptr;
4882
4883     // Are we passing a struct LclVar?
4884     //
4885     if (argValue->OperGet() == GT_LCL_VAR)
4886     {
4887         GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4888         unsigned             varNum  = varNode->gtLclNum;
4889         assert(varNum < lvaCount);
4890         LclVarDsc* varDsc = &lvaTable[varNum];
4891
4892         // At this point any TYP_STRUCT LclVar must be an aligned struct
4893         // or an HFA struct, both which are passed by value.
4894         //
4895         assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
4896
4897         varDsc->lvIsMultiRegArg = true;
4898
4899 #ifdef DEBUG
4900         if (verbose)
4901         {
4902             JITDUMP("Multireg struct argument V%02u : ");
4903             fgEntryPtr->Dump();
4904         }
4905 #endif // DEBUG
4906
4907         // This local variable must match the layout of the 'objClass' type exactly
4908         if (varDsc->lvIsHfa())
4909         {
4910             // We have a HFA struct
4911             noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
4912             noway_assert(elemSize == genTypeSize(elemType));
4913             noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
4914             noway_assert(elemSize * elemCount == varDsc->lvExactSize);
4915
4916             for (unsigned inx = 0; (inx < elemCount); inx++)
4917             {
4918                 noway_assert(type[inx] == elemType);
4919             }
4920         }
4921         else
4922         {
4923 #ifdef _TARGET_ARM64_
4924             // We must have a 16-byte struct (non-HFA)
4925             noway_assert(elemCount == 2);
4926 #elif defined(_TARGET_ARM_)
4927             noway_assert(elemCount <= 4);
4928 #endif
4929
4930             for (unsigned inx = 0; inx < elemCount; inx++)
4931             {
4932                 CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
4933
4934                 // We setup the type[inx] value above using the GC info from 'objClass'
4935                 // This GT_LCL_VAR must have the same GC layout info
4936                 //
4937                 if (currentGcLayoutType != TYPE_GC_NONE)
4938                 {
4939                     noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
4940                 }
4941                 else
4942                 {
4943                     // We may have use a small type when we setup the type[inx] values above
4944                     // We can safely widen this to TYP_I_IMPL
4945                     type[inx] = TYP_I_IMPL;
4946                 }
4947             }
4948         }
4949
4950 #ifdef _TARGET_ARM64_
4951         // Is this LclVar a promoted struct with exactly 2 fields?
4952         // TODO-ARM64-CQ: Support struct promoted HFA types here
4953         if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
4954         {
4955             // See if we have two promoted fields that start at offset 0 and 8?
4956             unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
4957             unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
4958
4959             // Did we find the promoted fields at the necessary offsets?
4960             if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
4961             {
4962                 LclVarDsc* loVarDsc = &lvaTable[loVarNum];
4963                 LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
4964
4965                 var_types loType = loVarDsc->lvType;
4966                 var_types hiType = hiVarDsc->lvType;
4967
4968                 if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
4969                 {
4970                     // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
4971                     // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
4972                     //
4973                     JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
4974                             varNum);
4975                     //
4976                     // we call lvaSetVarDoNotEnregister and do the proper transformation below.
4977                     //
4978                 }
4979                 else
4980                 {
4981                     // We can use the struct promoted field as the two arguments
4982
4983                     GenTreePtr loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
4984                     GenTreePtr hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
4985
4986                     // Create a new tree for 'arg'
4987                     //    replace the existing LDOBJ(ADDR(LCLVAR))
4988                     //    with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr))
4989                     //
4990                     newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr);
4991                     (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg);
4992                 }
4993             }
4994         }
4995         else
4996         {
4997             //
4998             // We will create a list of GT_LCL_FLDs nodes to pass this struct
4999             //
5000             lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5001         }
5002 #elif defined(_TARGET_ARM_)
5003         // Is this LclVar a promoted struct with exactly same size?
5004         if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa())
5005         {
5006             // See if we have promoted fields?
5007             unsigned varNums[4];
5008             bool     hasBadVarNum = false;
5009             for (unsigned inx = 0; inx < elemCount; inx++)
5010             {
5011                 varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx);
5012                 if (varNums[inx] == BAD_VAR_NUM)
5013                 {
5014                     hasBadVarNum = true;
5015                     break;
5016                 }
5017             }
5018
5019             // Did we find the promoted fields at the necessary offsets?
5020             if (!hasBadVarNum)
5021             {
5022                 LclVarDsc* varDscs[4];
5023                 var_types  varType[4];
5024                 bool       varIsFloat = false;
5025
5026                 for (unsigned inx = 0; inx < elemCount; inx++)
5027                 {
5028                     varDscs[inx] = &lvaTable[varNums[inx]];
5029                     varType[inx] = varDscs[inx]->lvType;
5030                     if (varTypeIsFloating(varType[inx]))
5031                     {
5032                         // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the
5033                         // integer
5034                         // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
5035                         //
5036                         JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
5037                                 varNum);
5038                         //
5039                         // we call lvaSetVarDoNotEnregister and do the proper transformation below.
5040                         //
5041                         varIsFloat = true;
5042                         break;
5043                     }
5044                 }
5045
5046                 if (!varIsFloat)
5047                 {
5048                     unsigned          offset    = 0;
5049                     GenTreeFieldList* listEntry = nullptr;
5050                     // We can use the struct promoted field as arguments
5051                     for (unsigned inx = 0; inx < elemCount; inx++)
5052                     {
5053                         GenTreePtr lclVar = gtNewLclvNode(varNums[inx], varType[inx], varNums[inx]);
5054                         // Create a new tree for 'arg'
5055                         //    replace the existing LDOBJ(ADDR(LCLVAR))
5056                         listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(lclVar, offset, varType[inx], listEntry);
5057                         if (newArg == nullptr)
5058                         {
5059                             newArg = listEntry;
5060                         }
5061                         offset += TARGET_POINTER_SIZE;
5062                     }
5063                 }
5064             }
5065         }
5066         else
5067         {
5068             //
5069             // We will create a list of GT_LCL_FLDs nodes to pass this struct
5070             //
5071             lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5072         }
5073 #endif // _TARGET_ARM_
5074     }
5075
5076     // If we didn't set newarg to a new List Node tree
5077     //
5078     if (newArg == nullptr)
5079     {
5080         if (fgEntryPtr->regNum == REG_STK)
5081         {
5082             // We leave this stack passed argument alone
5083             return arg;
5084         }
5085
5086         // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
5087         // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
5088         //
5089         if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
5090         {
5091             GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
5092             unsigned             varNum  = varNode->gtLclNum;
5093             assert(varNum < lvaCount);
5094             LclVarDsc* varDsc = &lvaTable[varNum];
5095
5096             unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
5097             unsigned lastOffset = baseOffset + (elemCount * elemSize);
5098
5099             // The allocated size of our LocalVar must be at least as big as lastOffset
5100             assert(varDsc->lvSize() >= lastOffset);
5101
5102             if (varDsc->lvStructGcCount > 0)
5103             {
5104                 // alignment of the baseOffset is required
5105                 noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
5106                 noway_assert(elemSize == TARGET_POINTER_SIZE);
5107                 unsigned    baseIndex = baseOffset / TARGET_POINTER_SIZE;
5108                 const BYTE* gcPtrs    = varDsc->lvGcLayout; // Get the GC layout for the local variable
5109                 for (unsigned inx = 0; (inx < elemCount); inx++)
5110                 {
5111                     // The GC information must match what we setup using 'objClass'
5112                     noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
5113                 }
5114             }
5115             else //  this varDsc contains no GC pointers
5116             {
5117                 for (unsigned inx = 0; inx < elemCount; inx++)
5118                 {
5119                     // The GC information must match what we setup using 'objClass'
5120                     noway_assert(!varTypeIsGC(type[inx]));
5121                 }
5122             }
5123
5124             //
5125             // We create a list of GT_LCL_FLDs nodes to pass this struct
5126             //
5127             lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5128
5129             // Create a new tree for 'arg'
5130             //    replace the existing LDOBJ(ADDR(LCLVAR))
5131             //    with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
5132             //
5133             unsigned          offset    = baseOffset;
5134             GenTreeFieldList* listEntry = nullptr;
5135             for (unsigned inx = 0; inx < elemCount; inx++)
5136             {
5137                 elemSize              = genTypeSize(type[inx]);
5138                 GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
5139                 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry);
5140                 if (newArg == nullptr)
5141                 {
5142                     newArg = listEntry;
5143                 }
5144                 offset += elemSize;
5145             }
5146         }
5147         // Are we passing a GT_OBJ struct?
5148         //
5149         else if (argValue->OperGet() == GT_OBJ)
5150         {
5151             GenTreeObj* argObj   = argValue->AsObj();
5152             GenTreePtr  baseAddr = argObj->gtOp1;
5153             var_types   addrType = baseAddr->TypeGet();
5154
5155             // Create a new tree for 'arg'
5156             //    replace the existing LDOBJ(EXPR)
5157             //    with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...)
5158             //
5159
5160             unsigned          offset    = 0;
5161             GenTreeFieldList* listEntry = nullptr;
5162             for (unsigned inx = 0; inx < elemCount; inx++)
5163             {
5164                 elemSize           = genTypeSize(type[inx]);
5165                 GenTreePtr curAddr = baseAddr;
5166                 if (offset != 0)
5167                 {
5168                     GenTreePtr baseAddrDup = gtCloneExpr(baseAddr);
5169                     noway_assert(baseAddrDup != nullptr);
5170                     curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
5171                 }
5172                 else
5173                 {
5174                     curAddr = baseAddr;
5175                 }
5176                 GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr);
5177
5178                 // For safety all GT_IND should have at least GT_GLOB_REF set.
5179                 curItem->gtFlags |= GTF_GLOB_REF;
5180                 if (fgAddrCouldBeNull(curItem))
5181                 {
5182                     // This indirection can cause a GPF if the address could be null.
5183                     curItem->gtFlags |= GTF_EXCEPT;
5184                 }
5185
5186                 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry);
5187                 if (newArg == nullptr)
5188                 {
5189                     newArg = listEntry;
5190                 }
5191                 offset += elemSize;
5192             }
5193         }
5194     }
5195
5196 #ifdef DEBUG
5197     // If we reach here we should have set newArg to something
5198     if (newArg == nullptr)
5199     {
5200         gtDispTree(argValue);
5201         assert(!"Missing case in fgMorphMultiregStructArg");
5202     }
5203
5204     if (verbose)
5205     {
5206         printf("fgMorphMultiregStructArg created tree:\n");
5207         gtDispTree(newArg);
5208     }
5209 #endif
5210
5211     arg = newArg; // consider calling fgMorphTree(newArg);
5212
5213 #endif // FEATURE_MULTIREG_ARGS
5214
5215     return arg;
5216 }
5217
5218 // Make a copy of a struct variable if necessary, to pass to a callee.
5219 // returns: tree that computes address of the outgoing arg
5220 void Compiler::fgMakeOutgoingStructArgCopy(
5221     GenTreeCall*         call,
5222     GenTree*             args,
5223     unsigned             argIndex,
5224     CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
5225         const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
5226 {
5227     GenTree* argx = args->Current();
5228     noway_assert(argx->gtOper != GT_MKREFANY);
5229     // See if we need to insert a copy at all
5230     // Case 1: don't need a copy if it is the last use of a local.  We can't determine that all of the time
5231     // but if there is only one use and no loops, the use must be last.
5232     GenTreeLclVarCommon* lcl = nullptr;
5233     if (argx->OperIsLocal())
5234     {
5235         lcl = argx->AsLclVarCommon();
5236     }
5237     else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
5238     {
5239         lcl = argx->AsObj()->Addr()->AsLclVarCommon();
5240     }
5241     if (lcl != nullptr)
5242     {
5243         unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
5244         if (lvaIsImplicitByRefLocal(varNum))
5245         {
5246             LclVarDsc* varDsc = &lvaTable[varNum];
5247             // JIT_TailCall helper has an implicit assumption that all tail call arguments live
5248             // on the caller's frame. If an argument lives on the caller caller's frame, it may get
5249             // overwritten if that frame is reused for the tail call. Therefore, we should always copy
5250             // struct parameters if they are passed as arguments to a tail call.
5251             if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
5252             {
5253                 varDsc->lvRefCnt    = 0;
5254                 args->gtOp.gtOp1    = lcl;
5255                 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
5256                 fp->node            = lcl;
5257
5258                 JITDUMP("did not have to make outgoing copy for V%2d", varNum);
5259                 return;
5260             }
5261         }
5262     }
5263
5264     if (fgOutgoingArgTemps == nullptr)
5265     {
5266         fgOutgoingArgTemps = hashBv::Create(this);
5267     }
5268
5269     unsigned tmp   = 0;
5270     bool     found = false;
5271
5272     // Attempt to find a local we have already used for an outgoing struct and reuse it.
5273     // We do not reuse within a statement.
5274     if (!opts.MinOpts())
5275     {
5276         indexType lclNum;
5277         FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
5278         {
5279             LclVarDsc* varDsc = &lvaTable[lclNum];
5280             if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
5281                 !fgCurrentlyInUseArgTemps->testBit(lclNum))
5282             {
5283                 tmp   = (unsigned)lclNum;
5284                 found = true;
5285                 JITDUMP("reusing outgoing struct arg");
5286                 break;
5287             }
5288         }
5289         NEXT_HBV_BIT_SET;
5290     }
5291
5292     // Create the CopyBlk tree and insert it.
5293     if (!found)
5294     {
5295         // Get a new temp
5296         // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
5297         tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
5298         lvaSetStruct(tmp, copyBlkClass, false);
5299         fgOutgoingArgTemps->setBit(tmp);
5300     }
5301
5302     fgCurrentlyInUseArgTemps->setBit(tmp);
5303
5304     // TYP_SIMD structs should not be enregistered, since ABI requires it to be
5305     // allocated on stack and address of it needs to be passed.
5306     if (lclVarIsSIMDType(tmp))
5307     {
5308         lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
5309     }
5310
5311     // Create a reference to the temp
5312     GenTreePtr dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
5313     dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
5314
5315     // TODO-Cleanup: This probably shouldn't be done here because arg morphing is done prior
5316     // to ref counting of the lclVars.
5317     lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this);
5318
5319     GenTreePtr src;
5320     if (argx->gtOper == GT_OBJ)
5321     {
5322         argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
5323     }
5324     else
5325     {
5326         argx->gtFlags |= GTF_DONT_CSE;
5327     }
5328
5329     // Copy the valuetype to the temp
5330     unsigned   size    = info.compCompHnd->getClassSize(copyBlkClass);
5331     GenTreePtr copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
5332     copyBlk            = fgMorphCopyBlock(copyBlk);
5333
5334 #if FEATURE_FIXED_OUT_ARGS
5335
5336     // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
5337     // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
5338     GenTreePtr arg = copyBlk;
5339
5340 #else // FEATURE_FIXED_OUT_ARGS
5341
5342     // Structs are always on the stack, and thus never need temps
5343     // so we have to put the copy and temp all into one expression
5344     GenTreePtr arg = fgMakeTmpArgNode(tmp FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
5345
5346     // Change the expression to "(tmp=val),tmp"
5347     arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
5348
5349 #endif // FEATURE_FIXED_OUT_ARGS
5350
5351     args->gtOp.gtOp1 = arg;
5352     call->fgArgInfo->EvalToTmp(argIndex, tmp, arg);
5353
5354     return;
5355 }
5356
5357 #ifdef _TARGET_ARM_
5358 // See declaration for specification comment.
5359 void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
5360                                                    unsigned   firstArgRegNum,
5361                                                    regMaskTP* pArgSkippedRegMask)
5362 {
5363     assert(varDsc->lvPromoted);
5364     // There's no way to do these calculations without breaking abstraction and assuming that
5365     // integer register arguments are consecutive ints.  They are on ARM.
5366
5367     // To start, figure out what register contains the last byte of the first argument.
5368     LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
5369     unsigned   lastFldRegOfLastByte =
5370         (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5371     ;
5372
5373     // Now we're keeping track of the register that the last field ended in; see what registers
5374     // subsequent fields start in, and whether any are skipped.
5375     // (We assume here the invariant that the fields are sorted in offset order.)
5376     for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
5377     {
5378         unsigned   fldVarNum    = varDsc->lvFieldLclStart + fldVarOffset;
5379         LclVarDsc* fldVarDsc    = &lvaTable[fldVarNum];
5380         unsigned   fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
5381         assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
5382         // This loop should enumerate the offsets of any registers skipped.
5383         // Find what reg contains the last byte:
5384         // And start at the first register after that.  If that isn't the first reg of the current
5385         for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
5386              skippedRegOffsets++)
5387         {
5388             // If the register number would not be an arg reg, we're done.
5389             if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
5390                 return;
5391             *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
5392         }
5393         lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5394     }
5395 }
5396
5397 #endif // _TARGET_ARM_
5398
5399 //****************************************************************************
5400 //  fgFixupStructReturn:
5401 //    The companion to impFixupCallStructReturn.  Now that the importer is done
5402 //    change the gtType to the precomputed native return type
5403 //    requires that callNode currently has a struct type
5404 //
5405 void Compiler::fgFixupStructReturn(GenTreePtr callNode)
5406 {
5407     assert(varTypeIsStruct(callNode));
5408
5409     GenTreeCall* call              = callNode->AsCall();
5410     bool         callHasRetBuffArg = call->HasRetBufArg();
5411     bool         isHelperCall      = call->IsHelperCall();
5412
5413     // Decide on the proper return type for this call that currently returns a struct
5414     //
5415     CORINFO_CLASS_HANDLE        retClsHnd = call->gtRetClsHnd;
5416     Compiler::structPassingKind howToReturnStruct;
5417     var_types                   returnType;
5418
5419     // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
5420     // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
5421     //
5422     //    CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
5423     //    CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
5424     //    CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
5425     //
5426     if (isHelperCall)
5427     {
5428         assert(!callHasRetBuffArg);
5429         assert(retClsHnd == NO_CLASS_HANDLE);
5430
5431         // Now that we are past the importer, re-type this node
5432         howToReturnStruct = SPK_PrimitiveType;
5433         returnType        = (var_types)call->gtReturnType;
5434     }
5435     else
5436     {
5437         returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
5438     }
5439
5440     if (howToReturnStruct == SPK_ByReference)
5441     {
5442         assert(returnType == TYP_UNKNOWN);
5443         assert(callHasRetBuffArg);
5444     }
5445     else
5446     {
5447         assert(returnType != TYP_UNKNOWN);
5448
5449         if (returnType != TYP_STRUCT)
5450         {
5451             // Widen the primitive type if necessary
5452             returnType = genActualType(returnType);
5453         }
5454         call->gtType = returnType;
5455     }
5456
5457 #if FEATURE_MULTIREG_RET
5458     // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
5459     assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
5460 #else // !FEATURE_MULTIREG_RET
5461     // No more struct returns
5462     assert(call->TypeGet() != TYP_STRUCT);
5463 #endif
5464
5465 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5466     // If it was a struct return, it has been transformed into a call
5467     // with a return buffer (that returns TYP_VOID) or into a return
5468     // of a primitive/enregisterable type
5469     assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
5470 #endif
5471 }
5472
5473 /*****************************************************************************
5474  *
5475  *  A little helper used to rearrange nested commutative operations. The
5476  *  effect is that nested associative, commutative operations are transformed
5477  *  into a 'left-deep' tree, i.e. into something like this:
5478  *
5479  *      (((a op b) op c) op d) op...
5480  */
5481
5482 #if REARRANGE_ADDS
5483
5484 void Compiler::fgMoveOpsLeft(GenTreePtr tree)
5485 {
5486     GenTreePtr op1;
5487     GenTreePtr op2;
5488     genTreeOps oper;
5489
5490     do
5491     {
5492         op1  = tree->gtOp.gtOp1;
5493         op2  = tree->gtOp.gtOp2;
5494         oper = tree->OperGet();
5495
5496         noway_assert(GenTree::OperIsCommutative(oper));
5497         noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
5498         noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
5499         noway_assert(oper == op2->gtOper);
5500
5501         // Commutativity doesn't hold if overflow checks are needed
5502
5503         if (tree->gtOverflowEx() || op2->gtOverflowEx())
5504         {
5505             return;
5506         }
5507
5508         if (gtIsActiveCSE_Candidate(op2))
5509         {
5510             // If we have marked op2 as a CSE candidate,
5511             // we can't perform a commutative reordering
5512             // because any value numbers that we computed for op2
5513             // will be incorrect after performing a commutative reordering
5514             //
5515             return;
5516         }
5517
5518         if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
5519         {
5520             return;
5521         }
5522
5523         // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
5524         if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
5525         {
5526             return;
5527         }
5528
5529         if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
5530         {
5531             // We could deal with this, but we were always broken and just hit the assert
5532             // below regarding flags, which means it's not frequent, so will just bail out.
5533             // See #195514
5534             return;
5535         }
5536
5537         noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
5538
5539         GenTreePtr ad1 = op2->gtOp.gtOp1;
5540         GenTreePtr ad2 = op2->gtOp.gtOp2;
5541
5542         // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
5543         // We can not reorder such GT_OR trees
5544         //
5545         if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
5546         {
5547             break;
5548         }
5549
5550         /* Change "(x op (y op z))" to "(x op y) op z" */
5551         /* ie.    "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
5552
5553         GenTreePtr new_op1 = op2;
5554
5555         new_op1->gtOp.gtOp1 = op1;
5556         new_op1->gtOp.gtOp2 = ad1;
5557
5558         /* Change the flags. */
5559
5560         // Make sure we arent throwing away any flags
5561         noway_assert((new_op1->gtFlags &
5562                       ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
5563                         GTF_REVERSE_OPS |             // The reverse ops flag also can be set, it will be re-calculated
5564                         GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
5565
5566         new_op1->gtFlags =
5567             (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
5568             (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
5569
5570         /* Retype new_op1 if it has not/become a GC ptr. */
5571
5572         if (varTypeIsGC(op1->TypeGet()))
5573         {
5574             noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5575                           oper == GT_ADD) || // byref(ref + (int+int))
5576                          (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5577                           oper == GT_OR)); // int(gcref | int(gcref|intval))
5578
5579             new_op1->gtType = tree->gtType;
5580         }
5581         else if (varTypeIsGC(ad2->TypeGet()))
5582         {
5583             // Neither ad1 nor op1 are GC. So new_op1 isnt either
5584             noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
5585             new_op1->gtType = TYP_I_IMPL;
5586         }
5587
5588         // If new_op1 is a new expression. Assign it a new unique value number.
5589         // vnStore is null before the ValueNumber phase has run
5590         if (vnStore != nullptr)
5591         {
5592             // We can only keep the old value number on new_op1 if both op1 and ad2
5593             // have the same non-NoVN value numbers. Since op is commutative, comparing
5594             // only ad2 and op1 is enough.
5595             if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5596                 (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5597                 (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
5598             {
5599                 new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
5600             }
5601         }
5602
5603         tree->gtOp.gtOp1 = new_op1;
5604         tree->gtOp.gtOp2 = ad2;
5605
5606         /* If 'new_op1' is now the same nested op, process it recursively */
5607
5608         if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
5609         {
5610             fgMoveOpsLeft(new_op1);
5611         }
5612
5613         /* If   'ad2'   is now the same nested op, process it
5614          * Instead of recursion, we set up op1 and op2 for the next loop.
5615          */
5616
5617         op1 = new_op1;
5618         op2 = ad2;
5619     } while ((op2->gtOper == oper) && !op2->gtOverflowEx());
5620
5621     return;
5622 }
5623
5624 #endif
5625
5626 /*****************************************************************************/
5627
5628 void Compiler::fgSetRngChkTarget(GenTreePtr tree, bool delay)
5629 {
5630     GenTreeBoundsChk* bndsChk = nullptr;
5631     SpecialCodeKind   kind    = SCK_RNGCHK_FAIL;
5632
5633 #ifdef FEATURE_SIMD
5634     if ((tree->gtOper == GT_ARR_BOUNDS_CHECK) || (tree->gtOper == GT_SIMD_CHK))
5635 #else  // FEATURE_SIMD
5636     if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
5637 #endif // FEATURE_SIMD
5638     {
5639         bndsChk = tree->AsBoundsChk();
5640         kind    = tree->gtBoundsChk.gtThrowKind;
5641     }
5642     else
5643     {
5644         noway_assert((tree->gtOper == GT_ARR_ELEM) || (tree->gtOper == GT_ARR_INDEX));
5645     }
5646
5647 #ifdef _TARGET_X86_
5648     unsigned callStkDepth = fgPtrArgCntCur;
5649 #else
5650     // only x86 pushes args
5651     const unsigned callStkDepth = 0;
5652 #endif
5653
5654     if (opts.MinOpts())
5655     {
5656         delay = false;
5657
5658         // we need to initialize this field
5659         if (fgGlobalMorph && bndsChk != nullptr)
5660         {
5661             bndsChk->gtStkDepth = callStkDepth;
5662         }
5663     }
5664
5665     if (!opts.compDbgCode)
5666     {
5667         if (delay || compIsForInlining())
5668         {
5669             /*  We delay this until after loop-oriented range check
5670                 analysis. For now we merely store the current stack
5671                 level in the tree node.
5672              */
5673             if (bndsChk != nullptr)
5674             {
5675                 noway_assert(!bndsChk->gtIndRngFailBB || previousCompletedPhase >= PHASE_OPTIMIZE_LOOPS);
5676                 bndsChk->gtStkDepth = callStkDepth;
5677             }
5678         }
5679         else
5680         {
5681             /* Create/find the appropriate "range-fail" label */
5682
5683             // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
5684             noway_assert((bndsChk != nullptr) || fgGlobalMorph);
5685
5686             unsigned stkDepth = (bndsChk != nullptr) ? bndsChk->gtStkDepth : callStkDepth;
5687
5688             BasicBlock* rngErrBlk = fgRngChkTarget(compCurBB, stkDepth, kind);
5689
5690             /* Add the label to the indirection node */
5691
5692             if (bndsChk != nullptr)
5693             {
5694                 bndsChk->gtIndRngFailBB = gtNewCodeRef(rngErrBlk);
5695             }
5696         }
5697     }
5698 }
5699
5700 /*****************************************************************************
5701  *
5702  *  Expand a GT_INDEX node and fully morph the child operands
5703  *
5704  *  The orginal GT_INDEX node is bashed into the GT_IND node that accesses
5705  *  the array element.  We expand the GT_INDEX node into a larger tree that
5706  *  evaluates the array base and index.  The simplest expansion is a GT_COMMA
5707  *  with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
5708  *  For complex array or index expressions one or more GT_COMMA assignments
5709  *  are inserted so that we only evaluate the array or index expressions once.
5710  *
5711  *  The fully expanded tree is then morphed.  This causes gtFoldExpr to
5712  *  perform local constant prop and reorder the constants in the tree and
5713  *  fold them.
5714  *
5715  *  We then parse the resulting array element expression in order to locate
5716  *  and label the constants and variables that occur in the tree.
5717  */
5718
5719 const int MAX_ARR_COMPLEXITY   = 4;
5720 const int MAX_INDEX_COMPLEXITY = 4;
5721
5722 GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
5723 {
5724     noway_assert(tree->gtOper == GT_INDEX);
5725     GenTreeIndex* asIndex = tree->AsIndex();
5726
5727     var_types            elemTyp        = tree->TypeGet();
5728     unsigned             elemSize       = tree->gtIndex.gtIndElemSize;
5729     CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
5730
5731     noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
5732
5733 #ifdef FEATURE_SIMD
5734     if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= getSIMDVectorRegisterByteLength())
5735     {
5736         // If this is a SIMD type, this is the point at which we lose the type information,
5737         // so we need to set the correct type on the GT_IND.
5738         // (We don't care about the base type here, so we only check, but don't retain, the return value).
5739         unsigned simdElemSize = 0;
5740         if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
5741         {
5742             assert(simdElemSize == elemSize);
5743             elemTyp = getSIMDTypeForSize(elemSize);
5744             // This is the new type of the node.
5745             tree->gtType = elemTyp;
5746             // Now set elemStructType to null so that we don't confuse value numbering.
5747             elemStructType = nullptr;
5748         }
5749     }
5750 #endif // FEATURE_SIMD
5751
5752     GenTreePtr arrRef = asIndex->Arr();
5753     GenTreePtr index  = asIndex->Index();
5754
5755     // Set up the the array length's offset into lenOffs
5756     // And    the the first element's offset into elemOffs
5757     ssize_t lenOffs;
5758     ssize_t elemOffs;
5759     if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
5760     {
5761         lenOffs  = offsetof(CORINFO_String, stringLen);
5762         elemOffs = offsetof(CORINFO_String, chars);
5763         tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
5764     }
5765     else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
5766     {
5767         lenOffs  = offsetof(CORINFO_RefArray, length);
5768         elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
5769     }
5770     else // We have a standard array
5771     {
5772         lenOffs  = offsetof(CORINFO_Array, length);
5773         elemOffs = offsetof(CORINFO_Array, u1Elems);
5774     }
5775
5776     bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
5777     bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
5778
5779     GenTreePtr arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
5780     GenTreePtr indexDefn  = nullptr; // non-NULL if we need to allocate a temp for the index expression
5781     GenTreePtr bndsChk    = nullptr;
5782
5783     // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
5784     if (chkd)
5785     {
5786         GenTreePtr arrRef2 = nullptr; // The second copy will be used in array address expression
5787         GenTreePtr index2  = nullptr;
5788
5789         // If the arrRef expression involves an assignment, a call or reads from global memory,
5790         // then we *must* allocate a temporary in which to "localize" those values,
5791         // to ensure that the same values are used in the bounds check and the actual
5792         // dereference.
5793         // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
5794         // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true
5795         // complexity is not exposed. (Without that condition there are cases of local struct
5796         // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was
5797         // fixed, there were some regressions that were mostly ameliorated by adding this condition.)
5798         //
5799         if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
5800             gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
5801         {
5802             unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5803             arrRefDefn            = gtNewTempAssign(arrRefTmpNum, arrRef);
5804             arrRef                = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5805             arrRef2               = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5806         }
5807         else
5808         {
5809             arrRef2 = gtCloneExpr(arrRef);
5810             noway_assert(arrRef2 != nullptr);
5811         }
5812
5813         // If the index expression involves an assignment, a call or reads from global memory,
5814         // we *must* allocate a temporary in which to "localize" those values,
5815         // to ensure that the same values are used in the bounds check and the actual
5816         // dereference.
5817         // Also we allocate the temporary when the index is sufficiently complex/expensive.
5818         //
5819         if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
5820             (arrRef->OperGet() == GT_FIELD))
5821         {
5822             unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5823             indexDefn            = gtNewTempAssign(indexTmpNum, index);
5824             index                = gtNewLclvNode(indexTmpNum, index->TypeGet());
5825             index2               = gtNewLclvNode(indexTmpNum, index->TypeGet());
5826         }
5827         else
5828         {
5829             index2 = gtCloneExpr(index);
5830             noway_assert(index2 != nullptr);
5831         }
5832
5833         // Next introduce a GT_ARR_BOUNDS_CHECK node
5834         var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
5835
5836 #ifdef _TARGET_64BIT_
5837         // The CLI Spec allows an array to be indexed by either an int32 or a native int.  In the case
5838         // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
5839         // the comparison will have to be widen to 64 bits.
5840         if (index->TypeGet() == TYP_I_IMPL)
5841         {
5842             bndsChkType = TYP_I_IMPL;
5843         }
5844 #endif // _TARGET_64BIT_
5845
5846         GenTree* arrLen = new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrRef, (int)lenOffs);
5847
5848         if (bndsChkType != TYP_INT)
5849         {
5850             arrLen = gtNewCastNode(bndsChkType, arrLen, bndsChkType);
5851         }
5852
5853         GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
5854             GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);
5855
5856         bndsChk = arrBndsChk;
5857
5858         // Make sure to increment ref-counts if already ref-counted.
5859         if (lvaLocalVarRefCounted)
5860         {
5861             lvaRecursiveIncRefCounts(index);
5862             lvaRecursiveIncRefCounts(arrRef);
5863         }
5864
5865         // Now we'll switch to using the second copies for arrRef and index
5866         // to compute the address expression
5867
5868         arrRef = arrRef2;
5869         index  = index2;
5870     }
5871
5872     // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
5873
5874     GenTreePtr addr;
5875
5876 #ifdef _TARGET_64BIT_
5877     // Widen 'index' on 64-bit targets
5878     if (index->TypeGet() != TYP_I_IMPL)
5879     {
5880         if (index->OperGet() == GT_CNS_INT)
5881         {
5882             index->gtType = TYP_I_IMPL;
5883         }
5884         else
5885         {
5886             index = gtNewCastNode(TYP_I_IMPL, index, TYP_I_IMPL);
5887         }
5888     }
5889 #endif // _TARGET_64BIT_
5890
5891     /* Scale the index value if necessary */
5892     if (elemSize > 1)
5893     {
5894         GenTreePtr size = gtNewIconNode(elemSize, TYP_I_IMPL);
5895
5896         // Fix 392756 WP7 Crossgen
5897         //
5898         // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
5899         // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
5900         // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
5901         //
5902         size->gtFlags |= GTF_DONT_CSE;
5903
5904         /* Multiply by the array element size */
5905         addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
5906     }
5907     else
5908     {
5909         addr = index;
5910     }
5911
5912     /* Add the object ref to the element's offset */
5913
5914     addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
5915
5916     /* Add the first element's offset */
5917
5918     GenTreePtr cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
5919
5920     addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);
5921
5922 #if SMALL_TREE_NODES
5923     assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL);
5924 #endif
5925
5926     // Change the orginal GT_INDEX node into a GT_IND node
5927     tree->SetOper(GT_IND);
5928
5929     // If the index node is a floating-point type, notify the compiler
5930     // we'll potentially use floating point registers at the time of codegen.
5931     if (varTypeIsFloating(tree->gtType))
5932     {
5933         this->compFloatingPointUsed = true;
5934     }
5935
5936     // We've now consumed the GTF_INX_RNGCHK, and the node
5937     // is no longer a GT_INDEX node.
5938     tree->gtFlags &= ~GTF_INX_RNGCHK;
5939
5940     tree->gtOp.gtOp1 = addr;
5941
5942     // This is an array index expression.
5943     tree->gtFlags |= GTF_IND_ARR_INDEX;
5944
5945     /* An indirection will cause a GPF if the address is null */
5946     tree->gtFlags |= GTF_EXCEPT;
5947
5948     if (nCSE)
5949     {
5950         tree->gtFlags |= GTF_DONT_CSE;
5951     }
5952
5953     // Store information about it.
5954     GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));
5955
5956     // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
5957
5958     GenTreePtr indTree = tree;
5959
5960     // Did we create a bndsChk tree?
5961     if (bndsChk)
5962     {
5963         // Use a GT_COMMA node to prepend the array bound check
5964         //
5965         tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
5966
5967         /* Mark the indirection node as needing a range check */
5968         fgSetRngChkTarget(bndsChk);
5969     }
5970
5971     if (indexDefn != nullptr)
5972     {
5973         // Use a GT_COMMA node to prepend the index assignment
5974         //
5975         tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
5976     }
5977     if (arrRefDefn != nullptr)
5978     {
5979         // Use a GT_COMMA node to prepend the arRef assignment
5980         //
5981         tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
5982     }
5983
5984     // Currently we morph the tree to perform some folding operations prior
5985     // to attaching fieldSeq info and labeling constant array index contributions
5986     //
5987     fgMorphTree(tree);
5988
5989     // Ideally we just want to proceed to attaching fieldSeq info and labeling the
5990     // constant array index contributions, but the morphing operation may have changed
5991     // the 'tree' into something that now unconditionally throws an exception.
5992     //
5993     // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
5994     // or it could be left unchanged.  If it is unchanged then we should not return,
5995     // instead we should proceed to attaching fieldSeq info, etc...
5996     //
5997     GenTreePtr arrElem = tree->gtEffectiveVal();
5998
5999     if (fgIsCommaThrow(tree))
6000     {
6001         if ((arrElem != indTree) ||         // A new tree node may have been created
6002             (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
6003         {
6004             return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
6005         }
6006     }
6007
6008     assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
6009
6010     addr = arrElem->gtOp.gtOp1;
6011
6012     assert(addr->TypeGet() == TYP_BYREF);
6013
6014     GenTreePtr cnsOff = nullptr;
6015     if (addr->OperGet() == GT_ADD)
6016     {
6017         if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
6018         {
6019             cnsOff = addr->gtOp.gtOp2;
6020             addr   = addr->gtOp.gtOp1;
6021         }
6022
6023         while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB))
6024         {
6025             assert(addr->TypeGet() == TYP_BYREF);
6026             GenTreePtr index = addr->gtOp.gtOp2;
6027
6028             // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
6029             index->LabelIndex(this);
6030
6031             addr = addr->gtOp.gtOp1;
6032         }
6033         assert(addr->TypeGet() == TYP_REF);
6034     }
6035     else if (addr->OperGet() == GT_CNS_INT)
6036     {
6037         cnsOff = addr;
6038     }
6039
6040     FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
6041
6042     if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
6043     {
6044         // Assign it the [#FirstElem] field sequence
6045         //
6046         cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
6047     }
6048     else //  We have folded the first element's offset with the index expression
6049     {
6050         // Build the [#ConstantIndex, #FirstElem] field sequence
6051         //
6052         FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
6053         FieldSeqNode* fieldSeq          = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
6054
6055         if (cnsOff == nullptr) // It must have folded into a zero offset
6056         {
6057             // Record in the general zero-offset map.
6058             GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6059         }
6060         else
6061         {
6062             cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
6063         }
6064     }
6065
6066     return tree;
6067 }
6068
6069 #ifdef _TARGET_X86_
6070 /*****************************************************************************
6071  *
6072  *  Wrap fixed stack arguments for varargs functions to go through varargs
6073  *  cookie to access them, except for the cookie itself.
6074  *
6075  * Non-x86 platforms are allowed to access all arguments directly
6076  * so we don't need this code.
6077  *
6078  */
6079 GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
6080 {
6081     /* For the fixed stack arguments of a varargs function, we need to go
6082         through the varargs cookies to access them, except for the
6083         cookie itself */
6084
6085     LclVarDsc* varDsc = &lvaTable[lclNum];
6086
6087     if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
6088     {
6089         // Create a node representing the local pointing to the base of the args
6090         GenTreePtr ptrArg =
6091             gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
6092                           gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*) +
6093                                         lclOffs));
6094
6095         // Access the argument through the local
6096         GenTreePtr tree;
6097         if (varType == TYP_STRUCT)
6098         {
6099             tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize);
6100         }
6101         else
6102         {
6103             tree = gtNewOperNode(GT_IND, varType, ptrArg);
6104         }
6105         tree->gtFlags |= GTF_IND_TGTANYWHERE;
6106
6107         if (varDsc->lvAddrExposed)
6108         {
6109             tree->gtFlags |= GTF_GLOB_REF;
6110         }
6111
6112         return fgMorphTree(tree);
6113     }
6114
6115     return NULL;
6116 }
6117 #endif
6118
6119 /*****************************************************************************
6120  *
6121  *  Transform the given GT_LCL_VAR tree for code generation.
6122  */
6123
6124 GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree, bool forceRemorph)
6125 {
6126     noway_assert(tree->gtOper == GT_LCL_VAR);
6127
6128     unsigned   lclNum  = tree->gtLclVarCommon.gtLclNum;
6129     var_types  varType = lvaGetRealType(lclNum);
6130     LclVarDsc* varDsc  = &lvaTable[lclNum];
6131
6132     if (varDsc->lvAddrExposed)
6133     {
6134         tree->gtFlags |= GTF_GLOB_REF;
6135     }
6136
6137 #ifdef _TARGET_X86_
6138     if (info.compIsVarArgs)
6139     {
6140         GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
6141         if (newTree != nullptr)
6142         {
6143             if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
6144             {
6145                 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
6146             }
6147             return newTree;
6148         }
6149     }
6150 #endif // _TARGET_X86_
6151
6152     /* If not during the global morphing phase bail */
6153
6154     if (!fgGlobalMorph && !forceRemorph)
6155     {
6156         return tree;
6157     }
6158
6159     bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
6160
6161     noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
6162
6163     if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
6164     {
6165 #if LOCAL_ASSERTION_PROP
6166         /* Assertion prop can tell us to omit adding a cast here */
6167         if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
6168         {
6169             return tree;
6170         }
6171 #endif
6172         /* Small-typed arguments and aliased locals are normalized on load.
6173            Other small-typed locals are normalized on store.
6174            Also, under the debugger as the debugger could write to the variable.
6175            If this is one of the former, insert a narrowing cast on the load.
6176                    ie. Convert: var-short --> cast-short(var-int) */
6177
6178         tree->gtType = TYP_INT;
6179         fgMorphTreeDone(tree);
6180         tree = gtNewCastNode(TYP_INT, tree, varType);
6181         fgMorphTreeDone(tree);
6182         return tree;
6183     }
6184
6185     return tree;
6186 }
6187
6188 /*****************************************************************************
6189   Grab a temp for big offset morphing.
6190   This method will grab a new temp if no temp of this "type" has been created.
6191   Or it will return the same cached one if it has been created.
6192 */
6193 unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
6194 {
6195     unsigned lclNum = fgBigOffsetMorphingTemps[type];
6196
6197     if (lclNum == BAD_VAR_NUM)
6198     {
6199         // We haven't created a temp for this kind of type. Create one now.
6200         lclNum                         = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
6201         fgBigOffsetMorphingTemps[type] = lclNum;
6202     }
6203     else
6204     {
6205         // We better get the right type.
6206         noway_assert(lvaTable[lclNum].TypeGet() == type);
6207     }
6208
6209     noway_assert(lclNum != BAD_VAR_NUM);
6210     return lclNum;
6211 }
6212
6213 /*****************************************************************************
6214  *
6215  *  Transform the given GT_FIELD tree for code generation.
6216  */
6217
6218 GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
6219 {
6220     assert(tree->gtOper == GT_FIELD);
6221
6222     CORINFO_FIELD_HANDLE symHnd          = tree->gtField.gtFldHnd;
6223     unsigned             fldOffset       = tree->gtField.gtFldOffset;
6224     GenTreePtr           objRef          = tree->gtField.gtFldObj;
6225     bool                 fieldMayOverlap = false;
6226     bool                 objIsLocal      = false;
6227
6228     if (fgGlobalMorph && (objRef != nullptr) && (objRef->gtOper == GT_ADDR))
6229     {
6230         // Make sure we've checked if 'objRef' is an address of an implicit-byref parameter.
6231         // If it is, fgMorphImplicitByRefArgs may change it do a different opcode, which the
6232         // simd field rewrites are sensitive to.
6233         fgMorphImplicitByRefArgs(objRef);
6234     }
6235
6236     noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
6237                  ((tree->gtFlags & GTF_GLOB_REF) != 0));
6238
6239     if (tree->gtField.gtFldMayOverlap)
6240     {
6241         fieldMayOverlap = true;
6242         // Reset the flag because we may reuse the node.
6243         tree->gtField.gtFldMayOverlap = false;
6244     }
6245
6246 #ifdef FEATURE_SIMD
6247     // if this field belongs to simd struct, translate it to simd instrinsic.
6248     if (mac == nullptr)
6249     {
6250         GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
6251         if (newTree != tree)
6252         {
6253             newTree = fgMorphSmpOp(newTree);
6254             return newTree;
6255         }
6256     }
6257     else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
6258     {
6259         GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr();
6260         if (lcl != nullptr)
6261         {
6262             lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField));
6263         }
6264     }
6265 #endif
6266
6267     /* Is this an instance data member? */
6268
6269     if (objRef)
6270     {
6271         GenTreePtr addr;
6272         objIsLocal = objRef->IsLocal();
6273
6274         if (tree->gtFlags & GTF_IND_TLS_REF)
6275         {
6276             NO_WAY("instance field can not be a TLS ref.");
6277         }
6278
6279         /* We'll create the expression "*(objRef + mem_offs)" */
6280
6281         noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);
6282
6283         // An optimization for Contextful classes:
6284         // we unwrap the proxy when we have a 'this reference'
6285         if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
6286         {
6287             objRef = fgUnwrapProxy(objRef);
6288         }
6289
6290         /*
6291             Now we have a tree like this:
6292
6293                                   +--------------------+
6294                                   |      GT_FIELD      |   tree
6295                                   +----------+---------+
6296                                              |
6297                               +--------------+-------------+
6298                               |   tree->gtField.gtFldObj   |
6299                               +--------------+-------------+
6300
6301
6302             We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6303
6304                                   +--------------------+
6305                                   |   GT_IND/GT_OBJ    |   tree
6306                                   +---------+----------+
6307                                             |
6308                                             |
6309                                   +---------+----------+
6310                                   |       GT_ADD       |   addr
6311                                   +---------+----------+
6312                                             |
6313                                           /   \
6314                                         /       \
6315                                       /           \
6316                          +-------------------+  +----------------------+
6317                          |       objRef      |  |     fldOffset        |
6318                          |                   |  | (when fldOffset !=0) |
6319                          +-------------------+  +----------------------+
6320
6321
6322             or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6323
6324
6325                                   +--------------------+
6326                                   |   GT_IND/GT_OBJ    |   tree
6327                                   +----------+---------+
6328                                              |
6329                                   +----------+---------+
6330                                   |       GT_COMMA     |  comma2
6331                                   +----------+---------+
6332                                              |
6333                                             / \
6334                                           /     \
6335                                         /         \
6336                                       /             \
6337                  +---------+----------+               +---------+----------+
6338            comma |      GT_COMMA      |               |  "+" (i.e. GT_ADD) |   addr
6339                  +---------+----------+               +---------+----------+
6340                            |                                     |
6341                          /   \                                  /  \
6342                        /       \                              /      \
6343                      /           \                          /          \
6344          +-----+-----+             +-----+-----+      +---------+   +-----------+
6345      asg |  GT_ASG   |         ind |   GT_IND  |      |  tmpLcl |   | fldOffset |
6346          +-----+-----+             +-----+-----+      +---------+   +-----------+
6347                |                         |
6348               / \                        |
6349             /     \                      |
6350           /         \                    |
6351    +-----+-----+   +-----+-----+   +-----------+
6352    |   tmpLcl  |   |   objRef  |   |   tmpLcl  |
6353    +-----------+   +-----------+   +-----------+
6354
6355
6356         */
6357
6358         var_types objRefType = objRef->TypeGet();
6359
6360         GenTreePtr comma = nullptr;
6361
6362         bool addedExplicitNullCheck = false;
6363
6364         // NULL mac means we encounter the GT_FIELD first.  This denotes a dereference of the field,
6365         // and thus is equivalent to a MACK_Ind with zero offset.
6366         MorphAddrContext defMAC(MACK_Ind);
6367         if (mac == nullptr)
6368         {
6369             mac = &defMAC;
6370         }
6371
6372         // This flag is set to enable the "conservative" style of explicit null-check insertion.
6373         // This means that we insert an explicit null check whenever we create byref by adding a
6374         // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
6375         // dereferenced).  The alternative is "aggressive", which would not insert such checks (for
6376         // small offsets); in this plan, we would transfer some null-checking responsibility to
6377         // callee's of methods taking byref parameters.  They would have to add explicit null checks
6378         // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
6379         // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
6380         // large).  To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
6381         // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
6382         // This is left here to point out how to implement it.
6383         CLANG_FORMAT_COMMENT_ANCHOR;
6384
6385 #define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
6386
6387         // If the objRef is a GT_ADDR node, it, itself, never requires null checking.  The expression
6388         // whose address is being taken is either a local or static variable, whose address is necessarily
6389         // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
6390         if (objRef->gtOper != GT_ADDR && ((mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind) &&
6391                                           (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)
6392 #if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
6393                                            || (mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0))
6394 #else
6395                                            || (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
6396                                                (mac->m_totalOffset + fldOffset > 0))
6397 #endif
6398                                                )))
6399         {
6400 #ifdef DEBUG
6401             if (verbose)
6402             {
6403                 printf("Before explicit null check morphing:\n");
6404                 gtDispTree(tree);
6405             }
6406 #endif
6407
6408             //
6409             // Create the "comma" subtree
6410             //
6411             GenTreePtr asg = nullptr;
6412             GenTreePtr nullchk;
6413
6414             unsigned lclNum;
6415
6416             if (objRef->gtOper != GT_LCL_VAR)
6417             {
6418                 lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
6419
6420                 // Create the "asg" node
6421                 asg = gtNewTempAssign(lclNum, objRef);
6422             }
6423             else
6424             {
6425                 lclNum = objRef->gtLclVarCommon.gtLclNum;
6426             }
6427
6428             // Create the "nullchk" node.
6429             // Make it TYP_BYTE so we only deference it for 1 byte.
6430             GenTreePtr lclVar = gtNewLclvNode(lclNum, objRefType);
6431             nullchk           = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);
6432
6433             nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
6434
6435             // An indirection will cause a GPF if the address is null.
6436             nullchk->gtFlags |= GTF_EXCEPT;
6437
6438             compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
6439             optMethodFlags |= OMF_HAS_NULLCHECK;
6440
6441             if (asg)
6442             {
6443                 // Create the "comma" node.
6444                 comma = gtNewOperNode(GT_COMMA,
6445                                       TYP_VOID, // We don't want to return anything from this "comma" node.
6446                                                 // Set the type to TYP_VOID, so we can select "cmp" instruction
6447                                                 // instead of "mov" instruction later on.
6448                                       asg, nullchk);
6449             }
6450             else
6451             {
6452                 comma = nullchk;
6453             }
6454
6455             addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
6456
6457             addedExplicitNullCheck = true;
6458         }
6459         else if (fldOffset == 0)
6460         {
6461             // Generate the "addr" node.
6462             addr = objRef;
6463             FieldSeqNode* fieldSeq =
6464                 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6465             GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6466         }
6467         else
6468         {
6469             addr = objRef;
6470         }
6471
6472 #ifdef FEATURE_READYTORUN_COMPILER
6473         if (tree->gtField.gtFieldLookup.addr != nullptr)
6474         {
6475             GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL);
6476
6477             if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
6478             {
6479                 baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset);
6480             }
6481
6482             addr =
6483                 gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, baseOffset);
6484         }
6485 #endif
6486         if (fldOffset != 0)
6487         {
6488             // Generate the "addr" node.
6489             /* Add the member offset to the object's address */
6490             FieldSeqNode* fieldSeq =
6491                 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6492             addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
6493                                  gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
6494         }
6495
6496         // Now let's set the "tree" as a GT_IND tree.
6497
6498         tree->SetOper(GT_IND);
6499         tree->gtOp.gtOp1 = addr;
6500
6501         if (fgAddrCouldBeNull(addr))
6502         {
6503             // This indirection can cause a GPF if the address could be null.
6504             tree->gtFlags |= GTF_EXCEPT;
6505         }
6506
6507         if (addedExplicitNullCheck)
6508         {
6509             //
6510             // Create "comma2" node and link it to "tree".
6511             //
6512             GenTreePtr comma2;
6513             comma2 = gtNewOperNode(GT_COMMA,
6514                                    addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
6515                                    comma, addr);
6516             tree->gtOp.gtOp1 = comma2;
6517         }
6518
6519 #ifdef DEBUG
6520         if (verbose)
6521         {
6522             if (addedExplicitNullCheck)
6523             {
6524                 printf("After adding explicit null check:\n");
6525                 gtDispTree(tree);
6526             }
6527         }
6528 #endif
6529     }
6530     else /* This is a static data member */
6531     {
6532         if (tree->gtFlags & GTF_IND_TLS_REF)
6533         {
6534             // Thread Local Storage static field reference
6535             //
6536             // Field ref is a TLS 'Thread-Local-Storage' reference
6537             //
6538             // Build this tree:  IND(*) #
6539             //                    |
6540             //                   ADD(I_IMPL)
6541             //                   / \
6542             //                  /  CNS(fldOffset)
6543             //                 /
6544             //                /
6545             //               /
6546             //             IND(I_IMPL) == [Base of this DLL's TLS]
6547             //              |
6548             //             ADD(I_IMPL)
6549             //             / \
6550             //            /   CNS(IdValue*4) or MUL
6551             //           /                      / \
6552             //          IND(I_IMPL)            /  CNS(4)
6553             //           |                    /
6554             //          CNS(TLS_HDL,0x2C)    IND
6555             //                                |
6556             //                               CNS(pIdAddr)
6557             //
6558             // # Denotes the orginal node
6559             //
6560             void**   pIdAddr = nullptr;
6561             unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);
6562
6563             //
6564             // If we can we access the TLS DLL index ID value directly
6565             // then pIdAddr will be NULL and
6566             //      IdValue will be the actual TLS DLL index ID
6567             //
6568             GenTreePtr dllRef = nullptr;
6569             if (pIdAddr == nullptr)
6570             {
6571                 if (IdValue != 0)
6572                 {
6573                     dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
6574                 }
6575             }
6576             else
6577             {
6578                 dllRef = gtNewIconHandleNode((size_t)pIdAddr, GTF_ICON_STATIC_HDL);
6579                 dllRef = gtNewOperNode(GT_IND, TYP_I_IMPL, dllRef);
6580                 dllRef->gtFlags |= GTF_IND_INVARIANT;
6581
6582                 /* Multiply by 4 */
6583
6584                 dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
6585             }
6586
6587 #define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
6588
6589             // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
6590
6591             GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
6592
6593             // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6594             if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6595             {
6596                 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6597                 tlsRef->gtFlags |= GTF_ICON_INITCLASS;
6598             }
6599
6600             tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6601
6602             if (dllRef != nullptr)
6603             {
6604                 /* Add the dllRef */
6605                 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
6606             }
6607
6608             /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
6609             tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6610
6611             if (fldOffset != 0)
6612             {
6613                 FieldSeqNode* fieldSeq =
6614                     fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6615                 GenTreePtr fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
6616
6617                 /* Add the TLS static field offset to the address */
6618
6619                 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
6620             }
6621
6622             // Final indirect to get to actual value of TLS static field
6623
6624             tree->SetOper(GT_IND);
6625             tree->gtOp.gtOp1 = tlsRef;
6626
6627             noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
6628         }
6629         else
6630         {
6631             // Normal static field reference
6632
6633             //
6634             // If we can we access the static's address directly
6635             // then pFldAddr will be NULL and
6636             //      fldAddr will be the actual address of the static field
6637             //
6638             void** pFldAddr = nullptr;
6639             void*  fldAddr  = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);
6640
6641             if (pFldAddr == nullptr)
6642             {
6643 #ifdef _TARGET_64BIT_
6644                 if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
6645                 {
6646                     // The address is not directly addressible, so force it into a
6647                     // constant, so we handle it properly
6648
6649                     GenTreePtr addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
6650                     addr->gtType    = TYP_I_IMPL;
6651                     FieldSeqNode* fieldSeq =
6652                         fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6653                     addr->gtIntCon.gtFieldSeq = fieldSeq;
6654                     // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6655                     if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6656                     {
6657                         tree->gtFlags &= ~GTF_FLD_INITCLASS;
6658                         addr->gtFlags |= GTF_ICON_INITCLASS;
6659                     }
6660
6661                     tree->SetOper(GT_IND);
6662                     // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
6663                     // We must clear it when we transform the node.
6664                     // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
6665                     // that the logic above does its own checking to determine whether a nullcheck is needed.
6666                     tree->gtFlags &= ~GTF_IND_ARR_LEN;
6667                     tree->gtOp.gtOp1 = addr;
6668
6669                     return fgMorphSmpOp(tree);
6670                 }
6671                 else
6672 #endif // _TARGET_64BIT_
6673                 {
6674                     // Only volatile or classinit could be set, and they map over
6675                     noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0);
6676                     static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE);
6677                     static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS);
6678                     tree->SetOper(GT_CLS_VAR);
6679                     tree->gtClsVar.gtClsVarHnd = symHnd;
6680                     FieldSeqNode* fieldSeq =
6681                         fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6682                     tree->gtClsVar.gtFieldSeq = fieldSeq;
6683                 }
6684
6685                 return tree;
6686             }
6687             else
6688             {
6689                 GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
6690
6691                 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6692                 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6693                 {
6694                     tree->gtFlags &= ~GTF_FLD_INITCLASS;
6695                     addr->gtFlags |= GTF_ICON_INITCLASS;
6696                 }
6697
6698                 // There are two cases here, either the static is RVA based,
6699                 // in which case the type of the FIELD node is not a GC type
6700                 // and the handle to the RVA is a TYP_I_IMPL.  Or the FIELD node is
6701                 // a GC type and the handle to it is a TYP_BYREF in the GC heap
6702                 // because handles to statics now go into the large object heap
6703
6704                 var_types  handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
6705                 GenTreePtr op1       = gtNewOperNode(GT_IND, handleTyp, addr);
6706                 op1->gtFlags |= GTF_IND_INVARIANT;
6707
6708                 tree->SetOper(GT_IND);
6709                 tree->gtOp.gtOp1 = op1;
6710             }
6711         }
6712     }
6713     noway_assert(tree->gtOper == GT_IND);
6714     // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
6715     // We must clear it when we transform the node.
6716     // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
6717     // that the logic above does its own checking to determine whether a nullcheck is needed.
6718     tree->gtFlags &= ~GTF_IND_ARR_LEN;
6719
6720     GenTreePtr res = fgMorphSmpOp(tree);
6721
6722     // If we have a struct type, this node would previously have been under a GT_ADDR,
6723     // and therefore would have been marked GTF_DONT_CSE.
6724     // TODO-1stClassStructs: revisit this.
6725     if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
6726     {
6727         res->gtFlags |= GTF_DONT_CSE;
6728     }
6729
6730     if (fldOffset == 0 && res->OperGet() == GT_IND)
6731     {
6732         GenTreePtr addr = res->gtOp.gtOp1;
6733         // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
6734         FieldSeqNode* fieldSeq =
6735             fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6736         fgAddFieldSeqForZeroOffset(addr, fieldSeq);
6737     }
6738
6739     return res;
6740 }
6741
6742 //------------------------------------------------------------------------------
6743 // fgMorphCallInline: attempt to inline a call
6744 //
6745 // Arguments:
6746 //    call         - call expression to inline, inline candidate
6747 //    inlineResult - result tracking and reporting
6748 //
6749 // Notes:
6750 //    Attempts to inline the call.
6751 //
6752 //    If successful, callee's IR is inserted in place of the call, and
6753 //    is marked with an InlineContext.
6754 //
6755 //    If unsuccessful, the transformations done in anticpation of a
6756 //    possible inline are undone, and the candidate flag on the call
6757 //    is cleared.
6758
6759 void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
6760 {
6761     // The call must be a candiate for inlining.
6762     assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
6763
6764     // Attempt the inline
6765     fgMorphCallInlineHelper(call, inlineResult);
6766
6767     // We should have made up our minds one way or another....
6768     assert(inlineResult->IsDecided());
6769
6770     // If we failed to inline, we have a bit of work to do to cleanup
6771     if (inlineResult->IsFailure())
6772     {
6773
6774 #ifdef DEBUG
6775
6776         // Before we do any cleanup, create a failing InlineContext to
6777         // capture details of the inlining attempt.
6778         m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
6779
6780 #endif
6781
6782         // It was an inline candidate, but we haven't expanded it.
6783         if (call->gtCall.gtReturnType != TYP_VOID)
6784         {
6785             // Detach the GT_CALL tree from the original statement by
6786             // hanging a "nothing" node to it. Later the "nothing" node will be removed
6787             // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
6788
6789             noway_assert(fgMorphStmt->gtStmtExpr == call);
6790             fgMorphStmt->gtStmtExpr = gtNewNothingNode();
6791         }
6792
6793         // Clear the Inline Candidate flag so we can ensure later we tried
6794         // inlining all candidates.
6795         //
6796         call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
6797     }
6798 }
6799
6800 /*****************************************************************************
6801  *  Helper to attempt to inline a call
6802  *  Sets success/failure in inline result
6803  *  If success, modifies current method's IR with inlinee's IR
6804  *  If failed, undoes any speculative modifications to current method
6805  */
6806
6807 void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
6808 {
6809     // Don't expect any surprises here.
6810     assert(result->IsCandidate());
6811
6812     if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
6813     {
6814         // For now, attributing this to call site, though it's really
6815         // more of a budget issue (lvaCount currently includes all
6816         // caller and prospective callee locals). We still might be
6817         // able to inline other callees into this caller, or inline
6818         // this callee in other callers.
6819         result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
6820         return;
6821     }
6822
6823     if (call->IsVirtual())
6824     {
6825         result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
6826         return;
6827     }
6828
6829     // impMarkInlineCandidate() is expected not to mark tail prefixed calls
6830     // and recursive tail calls as inline candidates.
6831     noway_assert(!call->IsTailPrefixedCall());
6832     noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
6833
6834     /* If the caller's stack frame is marked, then we can't do any inlining. Period.
6835        Although we have checked this in impCanInline, it is possible that later IL instructions
6836        might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
6837     */
6838
6839     if (opts.compNeedSecurityCheck)
6840     {
6841         result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
6842         return;
6843     }
6844
6845     //
6846     // Calling inlinee's compiler to inline the method.
6847     //
6848
6849     unsigned startVars = lvaCount;
6850
6851 #ifdef DEBUG
6852     if (verbose)
6853     {
6854         printf("Expanding INLINE_CANDIDATE in statement ");
6855         printTreeID(fgMorphStmt);
6856         printf(" in BB%02u:\n", compCurBB->bbNum);
6857         gtDispTree(fgMorphStmt);
6858         if (call->IsImplicitTailCall())
6859         {
6860             printf("Note: candidate is implicit tail call\n");
6861         }
6862     }
6863 #endif
6864
6865     impInlineRoot()->m_inlineStrategy->NoteAttempt(result);
6866
6867     //
6868     // Invoke the compiler to inline the call.
6869     //
6870
6871     fgInvokeInlineeCompiler(call, result);
6872
6873     if (result->IsFailure())
6874     {
6875         // Undo some changes made in anticipation of inlining...
6876
6877         // Zero out the used locals
6878         memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
6879         for (unsigned i = startVars; i < lvaCount; i++)
6880         {
6881             new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
6882         }
6883
6884         lvaCount = startVars;
6885
6886 #ifdef DEBUG
6887         if (verbose)
6888         {
6889             // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
6890         }
6891 #endif
6892
6893         return;
6894     }
6895
6896 #ifdef DEBUG
6897     if (verbose)
6898     {
6899         // printf("After inlining lvaCount=%d.\n", lvaCount);
6900     }
6901 #endif
6902 }
6903
6904 /*****************************************************************************
6905  *
6906  * Performs checks to see if this tail call can be optimized as epilog+jmp.
6907  */
6908 bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
6909 {
6910 #if FEATURE_FASTTAILCALL
6911     // Reached here means that return types of caller and callee are tail call compatible.
6912     // In case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
6913     //
6914     // In an implicit tail call case callSig may not be available but it is guaranteed to be available
6915     // for explicit tail call cases.  The reason implicit tail case callSig may not be available is that
6916     // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
6917     // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
6918     // currently not copying/setting callSig.
6919     CLANG_FORMAT_COMMENT_ANCHOR;
6920
6921 #ifdef DEBUG
6922     if (callee->IsTailPrefixedCall())
6923     {
6924         assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
6925                                             (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
6926     }
6927 #endif
6928
6929     // Note on vararg methods:
6930     // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
6931     // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
6932     // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
6933     // out-going area required for callee is bounded by caller's fixed argument space.
6934     //
6935     // Note that callee being a vararg method is not a problem since we can account the params being passed.
6936
6937     // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie)
6938     unsigned nCallerArgs = info.compArgsCount;
6939
6940     // Count the callee args including implicit and hidden.
6941     // Note that GenericContext and VarargCookie are added by importer while
6942     // importing the call to gtCallArgs list along with explicit user args.
6943     unsigned nCalleeArgs = 0;
6944     if (callee->gtCallObjp) // thisPtr
6945     {
6946         nCalleeArgs++;
6947     }
6948
6949     if (callee->HasRetBufArg()) // RetBuf
6950     {
6951         nCalleeArgs++;
6952
6953         // If callee has RetBuf param, caller too must have it.
6954         // Otherwise go the slow route.
6955         if (info.compRetBuffArg == BAD_VAR_NUM)
6956         {
6957             return false;
6958         }
6959     }
6960
6961     // Count user args while tracking whether any of them is a multi-byte params
6962     // that cannot be passed in a register. Note that we don't need to count
6963     // non-standard and secret params passed in registers (e.g. R10, R11) since
6964     // these won't contribute to out-going arg size.
6965     bool hasMultiByteArgs = false;
6966     for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2)
6967     {
6968         nCalleeArgs++;
6969
6970         assert(args->OperIsList());
6971         GenTreePtr argx = args->gtOp.gtOp1;
6972
6973         if (varTypeIsStruct(argx))
6974         {
6975             // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
6976             while (argx->gtOper == GT_COMMA)
6977             {
6978                 argx = argx->gtOp.gtOp2;
6979             }
6980
6981             // Get the size of the struct and see if it is register passable.
6982             CORINFO_CLASS_HANDLE objClass = nullptr;
6983
6984             if (argx->OperGet() == GT_OBJ)
6985             {
6986                 objClass = argx->AsObj()->gtClass;
6987             }
6988             else if (argx->IsLocal())
6989             {
6990                 objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
6991             }
6992             if (objClass != nullptr)
6993             {
6994 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
6995
6996                 unsigned typeSize = 0;
6997                 hasMultiByteArgs  = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
6998
6999 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
7000                 // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers.
7001                 // Account for the second eightbyte in the nCalleeArgs.
7002                 // https://github.com/dotnet/coreclr/issues/2666
7003                 // TODO-CQ-Amd64-Unix/arm64:  Structs of size between 9 to 16 bytes are conservatively estimated
7004                 //                            as two args, since they need two registers whereas nCallerArgs is
7005                 //                            counting such an arg as one. This would mean we will not be optimizing
7006                 //                            certain calls though technically possible.
7007
7008                 if (typeSize > TARGET_POINTER_SIZE)
7009                 {
7010                     unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE);
7011                     nCalleeArgs += extraArgRegsToAdd;
7012                 }
7013 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
7014
7015 #else
7016                 assert(!"Target platform ABI rules regarding passing struct type args in registers");
7017                 unreached();
7018 #endif //_TARGET_AMD64_ || _TARGET_ARM64_
7019             }
7020             else
7021             {
7022                 hasMultiByteArgs = true;
7023             }
7024         }
7025     }
7026
7027     // Go the slow route, if it has multi-byte params
7028     if (hasMultiByteArgs)
7029     {
7030         return false;
7031     }
7032
7033     // If we reached here means that callee has only those argument types which can be passed in
7034     // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
7035     // If we are passing args on stack for callee and it has more args passed on stack than
7036     // caller, then fast tail call cannot be performed.
7037     //
7038     // Note that the GC'ness of on stack args need not match since the arg setup area is marked
7039     // as non-interruptible for fast tail calls.
7040     if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs))
7041     {
7042         return false;
7043     }
7044
7045     return true;
7046 #else
7047     return false;
7048 #endif
7049 }
7050
7051 /*****************************************************************************
7052  *
7053  *  Transform the given GT_CALL tree for tail call code generation.
7054  */
7055 void Compiler::fgMorphTailCall(GenTreeCall* call)
7056 {
7057     JITDUMP("fgMorphTailCall (before):\n");
7058     DISPTREE(call);
7059
7060 #if defined(_TARGET_ARM_)
7061     // For the helper-assisted tail calls, we need to push all the arguments
7062     // into a single list, and then add a few extra at the beginning
7063
7064     // Check for PInvoke call types that we don't handle in codegen yet.
7065     assert(!call->IsUnmanaged());
7066     assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));
7067
7068     // First move the this pointer (if any) onto the regular arg list
7069     GenTreePtr thisPtr = NULL;
7070     if (call->gtCallObjp)
7071     {
7072         GenTreePtr objp  = call->gtCallObjp;
7073         call->gtCallObjp = NULL;
7074
7075         if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
7076         {
7077             thisPtr      = gtClone(objp, true);
7078             var_types vt = objp->TypeGet();
7079             if (thisPtr == NULL)
7080             {
7081                 // Too complex, so use a temp
7082                 unsigned   lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7083                 GenTreePtr asg    = gtNewTempAssign(lclNum, objp);
7084                 if (!call->IsVirtualVtable())
7085                 {
7086                     // Add an indirection to get the nullcheck
7087                     GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7088                     GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7089                     asg            = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7090                 }
7091                 objp    = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7092                 thisPtr = gtNewLclvNode(lclNum, vt);
7093             }
7094             else if (!call->IsVirtualVtable())
7095             {
7096                 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7097                 objp           = gtNewOperNode(GT_COMMA, vt, ind, objp);
7098                 thisPtr        = gtClone(thisPtr, true);
7099             }
7100
7101             call->gtFlags &= ~GTF_CALL_NULLCHECK;
7102         }
7103
7104         call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
7105     }
7106
7107     // Add the extra VSD parameter if needed
7108     CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
7109     if (call->IsVirtualStub())
7110     {
7111         flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
7112
7113         GenTreePtr arg;
7114         if (call->gtCallType == CT_INDIRECT)
7115         {
7116             arg = gtClone(call->gtCallAddr, true);
7117             noway_assert(arg != NULL);
7118         }
7119         else
7120         {
7121             noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
7122             ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7123             arg          = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7124
7125             // Change the call type, so we can add the extra indirection here, rather than in codegen
7126             call->gtCallAddr         = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7127             call->gtStubCallStubAddr = NULL;
7128             call->gtCallType         = CT_INDIRECT;
7129         }
7130         // Add the extra indirection to generate the real target
7131         call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr);
7132         call->gtFlags |= GTF_EXCEPT;
7133
7134         // And push the stub address onto the list of arguments
7135         call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7136     }
7137     else if (call->IsVirtualVtable())
7138     {
7139         // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER
7140
7141         noway_assert(thisPtr != NULL);
7142
7143         GenTreePtr add  = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
7144         GenTreePtr vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7145         vtbl->gtFlags |= GTF_EXCEPT;
7146
7147         unsigned vtabOffsOfIndirection;
7148         unsigned vtabOffsAfterIndirection;
7149         info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection);
7150
7151         /* Get the appropriate vtable chunk */
7152
7153         add  = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
7154         vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7155
7156         /* Now the appropriate vtable slot */
7157
7158         add  = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
7159         vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7160
7161         // Switch this to a plain indirect call
7162         call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
7163         assert(!call->IsVirtual());
7164         call->gtCallType = CT_INDIRECT;
7165
7166         call->gtCallAddr   = vtbl;
7167         call->gtCallCookie = NULL;
7168         call->gtFlags |= GTF_EXCEPT;
7169     }
7170
7171     // Now inject a placeholder for the real call target that codegen
7172     // will generate
7173     GenTreePtr arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL);
7174     codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR);
7175     call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7176
7177     // Lastly inject the pointer for the copy routine
7178     noway_assert(call->callSig != NULL);
7179     void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7180     arg               = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7181     call->gtCallArgs  = gtNewListNode(arg, call->gtCallArgs);
7182
7183     // It is now a varargs tail call
7184     call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
7185     call->gtFlags &= ~GTF_CALL_POP_ARGS;
7186
7187 #elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
7188
7189     // x86 classic codegen doesn't require any morphing
7190
7191     // For the helper-assisted tail calls, we need to push all the arguments
7192     // into a single list, and then add a few extra at the beginning or end.
7193     //
7194     // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
7195     //
7196     //      JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
7197     //
7198     // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
7199     // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
7200     // for callTarget here which will be replaced later with callTarget in tail call lowering.
7201     //
7202     // For x86, the tailcall helper is defined as:
7203     //
7204     //      JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
7205     //      callTarget)
7206     //
7207     // Note that the special arguments are on the stack, whereas the function arguments follow
7208     // the normal convention: there might be register arguments in ECX and EDX. The stack will
7209     // look like (highest address at the top):
7210     //      first normal stack argument
7211     //      ...
7212     //      last normal stack argument
7213     //      numberOfOldStackArgs
7214     //      numberOfNewStackArgs
7215     //      flags
7216     //      callTarget
7217     //
7218     // Each special arg is 4 bytes.
7219     //
7220     // 'flags' is a bitmask where:
7221     //      1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
7222     //          callee-saved registers for tailcall functions. Note that the helper assumes
7223     //          that the callee-saved registers live immediately below EBP, and must have been
7224     //          pushed in this order: EDI, ESI, EBX.
7225     //      2 == call target is a virtual stub dispatch.
7226     //
7227     // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
7228     // on the custom calling convention.
7229
7230     // Check for PInvoke call types that we don't handle in codegen yet.
7231     assert(!call->IsUnmanaged());
7232     assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
7233
7234     // Don't support tail calling helper methods
7235     assert(call->gtCallType != CT_HELPER);
7236
7237     // We come this route only for tail prefixed calls that cannot be dispatched as
7238     // fast tail calls
7239     assert(!call->IsImplicitTailCall());
7240     assert(!fgCanFastTailCall(call));
7241
7242     // First move the 'this' pointer (if any) onto the regular arg list. We do this because
7243     // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
7244     // and thus shift where the 'this' pointer will be passed to a later argument slot. In
7245     // addition, for all platforms, we are going to change the call into a helper call. Our code
7246     // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
7247     // do this transformation, we must explicitly create a null 'this' pointer check, if required,
7248     // since special 'this' pointer handling will no longer kick in.
7249     //
7250     // Some call types, such as virtual vtable calls, require creating a call address expression
7251     // that involves the "this" pointer. Lowering will sometimes create an embedded statement
7252     // to create a temporary that is assigned to the "this" pointer expression, and then use
7253     // that temp to create the call address expression. This temp creation embedded statement
7254     // will occur immediately before the "this" pointer argument, and then will be used for both
7255     // the "this" pointer argument as well as the call address expression. In the normal ordering,
7256     // the embedded statement establishing the "this" pointer temp will execute before both uses
7257     // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
7258     // normal call argument list, and insert a placeholder which will hold the call address
7259     // expression. For non-x86, things are ok, because the order of execution of these is not
7260     // altered. However, for x86, the call address expression is inserted as the *last* argument
7261     // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
7262     // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
7263     // for those cases where call lowering creates an embedded form temp of "this", we will
7264     // create a temp here, early, that will later get morphed correctly.
7265
7266     if (call->gtCallObjp)
7267     {
7268         GenTreePtr thisPtr = nullptr;
7269         GenTreePtr objp    = call->gtCallObjp;
7270         call->gtCallObjp   = nullptr;
7271
7272 #ifdef _TARGET_X86_
7273         if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
7274         {
7275             // tmp = "this"
7276             unsigned   lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7277             GenTreePtr asg    = gtNewTempAssign(lclNum, objp);
7278
7279             // COMMA(tmp = "this", tmp)
7280             var_types  vt  = objp->TypeGet();
7281             GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7282             thisPtr        = gtNewOperNode(GT_COMMA, vt, asg, tmp);
7283
7284             objp = thisPtr;
7285         }
7286 #endif // _TARGET_X86_
7287
7288 #if defined(_TARGET_X86_)
7289         // When targeting x86, the runtime requires that we perforrm a null check on the `this` argument before tail
7290         // calling to a virtual dispatch stub. This requirement is a consequence of limitations in the runtime's
7291         // ability to map an AV to a NullReferenceException if the AV occurs in a dispatch stub.
7292         if (call->NeedsNullCheck() || call->IsVirtualStub())
7293 #else
7294         if (call->NeedsNullCheck())
7295 #endif // defined(_TARGET_X86_)
7296         {
7297             // clone "this" if "this" has no side effects.
7298             if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
7299             {
7300                 thisPtr = gtClone(objp, true);
7301             }
7302
7303             var_types vt = objp->TypeGet();
7304             if (thisPtr == nullptr)
7305             {
7306                 // create a temp if either "this" has side effects or "this" is too complex to clone.
7307
7308                 // tmp = "this"
7309                 unsigned   lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7310                 GenTreePtr asg    = gtNewTempAssign(lclNum, objp);
7311
7312                 // COMMA(tmp = "this", deref(tmp))
7313                 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7314                 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7315                 asg            = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7316
7317                 // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
7318                 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7319             }
7320             else
7321             {
7322                 // thisPtr = COMMA(deref("this"), "this")
7323                 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7324                 thisPtr        = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
7325             }
7326
7327             call->gtFlags &= ~GTF_CALL_NULLCHECK;
7328         }
7329         else
7330         {
7331             thisPtr = objp;
7332         }
7333
7334         // During rationalization tmp="this" and null check will
7335         // materialize as embedded stmts in right execution order.
7336         assert(thisPtr != nullptr);
7337         call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
7338     }
7339
7340 #if defined(_TARGET_AMD64_)
7341
7342     // Add the extra VSD parameter to arg list in case of VSD calls.
7343     // Tail call arg copying thunk will move this extra VSD parameter
7344     // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
7345     // in Stublinkerx86.cpp for more details.
7346     CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
7347     if (call->IsVirtualStub())
7348     {
7349         GenTreePtr stubAddrArg;
7350
7351         flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
7352
7353         if (call->gtCallType == CT_INDIRECT)
7354         {
7355             stubAddrArg = gtClone(call->gtCallAddr, true);
7356             noway_assert(stubAddrArg != nullptr);
7357         }
7358         else
7359         {
7360             noway_assert((call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0);
7361
7362             ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7363             stubAddrArg  = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7364         }
7365
7366         // Push the stub address onto the list of arguments
7367         call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
7368     }
7369
7370     // Now inject a placeholder for the real call target that Lower phase will generate.
7371     GenTreePtr arg   = gtNewIconNode(0, TYP_I_IMPL);
7372     call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7373
7374     // Inject the pointer for the copy routine to be used for struct copying
7375     noway_assert(call->callSig != nullptr);
7376     void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7377     arg               = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7378     call->gtCallArgs  = gtNewListNode(arg, call->gtCallArgs);
7379
7380 #else // !_TARGET_AMD64_
7381
7382     // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
7383     // append to the list.
7384     GenTreeArgList** ppArg = &call->gtCallArgs;
7385     for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
7386     {
7387         ppArg = (GenTreeArgList**)&args->gtOp2;
7388     }
7389     assert(ppArg != nullptr);
7390     assert(*ppArg == nullptr);
7391
7392     unsigned nOldStkArgsWords =
7393         (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
7394     GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
7395     *ppArg        = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
7396     ppArg         = (GenTreeArgList**)&((*ppArg)->gtOp2);
7397
7398     // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
7399     // The constant will be replaced.
7400     GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
7401     *ppArg        = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
7402     ppArg         = (GenTreeArgList**)&((*ppArg)->gtOp2);
7403
7404     // Inject a placeholder for the flags.
7405     // The constant will be replaced.
7406     GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
7407     *ppArg        = gtNewListNode(arg1, nullptr);
7408     ppArg         = (GenTreeArgList**)&((*ppArg)->gtOp2);
7409
7410     // Inject a placeholder for the real call target that the Lowering phase will generate.
7411     // The constant will be replaced.
7412     GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
7413     *ppArg        = gtNewListNode(arg0, nullptr);
7414
7415 #endif // !_TARGET_AMD64_
7416
7417     // It is now a varargs tail call dispatched via helper.
7418     call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
7419     call->gtFlags &= ~GTF_CALL_POP_ARGS;
7420
7421 #endif // _TARGET_*
7422
7423     JITDUMP("fgMorphTailCall (after):\n");
7424     DISPTREE(call);
7425 }
7426
7427 //------------------------------------------------------------------------------
7428 // fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
7429 //
7430 //
7431 // Arguments:
7432 //    block  - basic block ending with a recursive fast tail call
7433 //    recursiveTailCall - recursive tail call to transform
7434 //
7435 // Notes:
7436 //    The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
7437
7438 void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
7439 {
7440     assert(recursiveTailCall->IsTailCallConvertibleToLoop());
7441     GenTreePtr last = block->lastStmt();
7442     assert(recursiveTailCall == last->gtStmt.gtStmtExpr);
7443
7444     // Transform recursive tail call into a loop.
7445
7446     GenTreePtr earlyArgInsertionPoint = last;
7447     IL_OFFSETX callILOffset           = last->gtStmt.gtStmtILoffsx;
7448
7449     // Hoist arg setup statement for the 'this' argument.
7450     GenTreePtr thisArg = recursiveTailCall->gtCallObjp;
7451     if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
7452     {
7453         GenTreePtr thisArgStmt = gtNewStmt(thisArg, callILOffset);
7454         fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
7455     }
7456
7457     // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
7458     // then the temps need to be assigned to the method parameters. This is done so that the caller
7459     // parameters are not re-assigned before call arguments depending on them  are evaluated.
7460     // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
7461     // where the next temp or parameter assignment should be inserted.
7462
7463     // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
7464     // while the second call argument (const 1) doesn't.
7465     // Basic block before tail recursion elimination:
7466     //  ***** BB04, stmt 1 (top level)
7467     //  [000037] ------------             *  stmtExpr  void  (top level) (IL 0x00A...0x013)
7468     //  [000033] --C - G------ - \--*  call      void   RecursiveMethod
7469     //  [000030] ------------ | / --*  const     int - 1
7470     //  [000031] ------------arg0 in rcx + --*  +int
7471     //  [000029] ------------ | \--*  lclVar    int    V00 arg1
7472     //  [000032] ------------arg1 in rdx    \--*  const     int    1
7473     //
7474     //
7475     //  Basic block after tail recursion elimination :
7476     //  ***** BB04, stmt 1 (top level)
7477     //  [000051] ------------             *  stmtExpr  void  (top level) (IL 0x00A... ? ? ? )
7478     //  [000030] ------------ | / --*  const     int - 1
7479     //  [000031] ------------ | / --*  +int
7480     //  [000029] ------------ | | \--*  lclVar    int    V00 arg1
7481     //  [000050] - A----------             \--* = int
7482     //  [000049] D------N----                \--*  lclVar    int    V02 tmp0
7483     //
7484     //  ***** BB04, stmt 2 (top level)
7485     //  [000055] ------------             *  stmtExpr  void  (top level) (IL 0x00A... ? ? ? )
7486     //  [000052] ------------ | / --*  lclVar    int    V02 tmp0
7487     //  [000054] - A----------             \--* = int
7488     //  [000053] D------N----                \--*  lclVar    int    V00 arg0
7489
7490     //  ***** BB04, stmt 3 (top level)
7491     //  [000058] ------------             *  stmtExpr  void  (top level) (IL 0x00A... ? ? ? )
7492     //  [000032] ------------ | / --*  const     int    1
7493     //  [000057] - A----------             \--* = int
7494     //  [000056] D------N----                \--*  lclVar    int    V01 arg1
7495
7496     GenTreePtr tmpAssignmentInsertionPoint   = last;
7497     GenTreePtr paramAssignmentInsertionPoint = last;
7498
7499     // Process early args. They may contain both setup statements for late args and actual args.
7500     // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
7501     // below has the correct second argument.
7502     int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
7503     for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
7504          (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
7505     {
7506         GenTreePtr earlyArg = earlyArgs->Current();
7507         if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
7508         {
7509             if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
7510             {
7511                 // This is a setup node so we need to hoist it.
7512                 GenTreePtr earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
7513                 fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
7514             }
7515             else
7516             {
7517                 // This is an actual argument that needs to be assigned to the corresponding caller parameter.
7518                 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
7519                 GenTreePtr       paramAssignStmt =
7520                     fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
7521                                                           tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7522                 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7523                 {
7524                     // All temp assignments will happen before the first param assignment.
7525                     tmpAssignmentInsertionPoint = paramAssignStmt;
7526                 }
7527             }
7528         }
7529     }
7530
7531     // Process late args.
7532     int lateArgIndex = 0;
7533     for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
7534          (lateArgIndex++, lateArgs = lateArgs->Rest()))
7535     {
7536         // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
7537         GenTreePtr       lateArg        = lateArgs->Current();
7538         fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
7539         GenTreePtr       paramAssignStmt =
7540             fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
7541                                                   tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7542
7543         if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7544         {
7545             // All temp assignments will happen before the first param assignment.
7546             tmpAssignmentInsertionPoint = paramAssignStmt;
7547         }
7548     }
7549
7550     // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
7551     // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
7552     // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
7553     if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
7554     {
7555         var_types  thisType           = lvaTable[info.compThisArg].TypeGet();
7556         GenTreePtr arg0               = gtNewLclvNode(lvaArg0Var, thisType);
7557         GenTreePtr arg0Assignment     = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
7558         GenTreePtr arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
7559         fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
7560     }
7561
7562     // Remove the call
7563     fgRemoveStmt(block, last);
7564
7565     // Set the loop edge.
7566     block->bbJumpKind = BBJ_ALWAYS;
7567     block->bbJumpDest = fgFirstBBisScratch() ? fgFirstBB->bbNext : fgFirstBB;
7568     fgAddRefPred(block->bbJumpDest, block);
7569     block->bbFlags &= ~BBF_HAS_JMP;
7570 }
7571
7572 //------------------------------------------------------------------------------
7573 // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
7574 //
7575 //
7576 // Arguments:
7577 //    arg  -  argument to assign
7578 //    argTabEntry  -  argument table entry corresponding to arg
7579 //    block  --- basic block the call is in
7580 //    callILOffset  -  IL offset of the call
7581 //    tmpAssignmentInsertionPoint  -  tree before which temp assignment should be inserted (if necessary)
7582 //    paramAssignmentInsertionPoint  -  tree before which parameter assignment should be inserted
7583 //
7584 // Return Value:
7585 //    parameter assignment statement if one was inserted; nullptr otherwise.
7586
7587 GenTreePtr Compiler::fgAssignRecursiveCallArgToCallerParam(GenTreePtr       arg,
7588                                                            fgArgTabEntryPtr argTabEntry,
7589                                                            BasicBlock*      block,
7590                                                            IL_OFFSETX       callILOffset,
7591                                                            GenTreePtr       tmpAssignmentInsertionPoint,
7592                                                            GenTreePtr       paramAssignmentInsertionPoint)
7593 {
7594     // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
7595     // some argument trees may reference parameters directly.
7596
7597     GenTreePtr argInTemp             = nullptr;
7598     unsigned   originalArgNum        = argTabEntry->argNum;
7599     bool       needToAssignParameter = true;
7600
7601     // TODO-CQ: enable calls with struct arguments passed in registers.
7602     noway_assert(!varTypeIsStruct(arg->TypeGet()));
7603
7604     if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
7605     {
7606         // The argument is already assigned to a temp or is a const.
7607         argInTemp = arg;
7608     }
7609     else if (arg->OperGet() == GT_LCL_VAR)
7610     {
7611         unsigned   lclNum = arg->AsLclVar()->gtLclNum;
7612         LclVarDsc* varDsc = &lvaTable[lclNum];
7613         if (!varDsc->lvIsParam)
7614         {
7615             // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
7616             argInTemp = arg;
7617         }
7618         else if (lclNum == originalArgNum)
7619         {
7620             // The argument is the same parameter local that we were about to assign so
7621             // we can skip the assignment.
7622             needToAssignParameter = false;
7623         }
7624     }
7625
7626     // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
7627     // any caller parameters. Some common cases are handled above but we may be able to eliminate
7628     // more temp assignments.
7629
7630     GenTreePtr paramAssignStmt = nullptr;
7631     if (needToAssignParameter)
7632     {
7633         if (argInTemp == nullptr)
7634         {
7635             // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
7636             // TODO: we can avoid a temp assignment if we can prove that the argument tree
7637             // doesn't involve any caller parameters.
7638             unsigned   tmpNum        = lvaGrabTemp(true DEBUGARG("arg temp"));
7639             GenTreePtr tempSrc       = arg;
7640             GenTreePtr tempDest      = gtNewLclvNode(tmpNum, tempSrc->gtType);
7641             GenTreePtr tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
7642             GenTreePtr tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
7643             fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
7644             argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
7645         }
7646
7647         // Now assign the temp to the parameter.
7648         LclVarDsc* paramDsc = lvaTable + originalArgNum;
7649         assert(paramDsc->lvIsParam);
7650         GenTreePtr paramDest       = gtNewLclvNode(originalArgNum, paramDsc->lvType);
7651         GenTreePtr paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
7652         paramAssignStmt            = gtNewStmt(paramAssignNode, callILOffset);
7653
7654         fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
7655     }
7656     return paramAssignStmt;
7657 }
7658
7659 /*****************************************************************************
7660  *
7661  *  Transform the given GT_CALL tree for code generation.
7662  */
7663
7664 GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
7665 {
7666     if (call->CanTailCall())
7667     {
7668         // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
7669         assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
7670
7671         // It cannot be an inline candidate
7672         assert(!call->IsInlineCandidate());
7673
7674         const char* szFailReason   = nullptr;
7675         bool        hasStructParam = false;
7676         if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
7677         {
7678             szFailReason = "Might turn into an intrinsic";
7679         }
7680
7681         if (opts.compNeedSecurityCheck)
7682         {
7683             szFailReason = "Needs security check";
7684         }
7685         else if (compLocallocUsed)
7686         {
7687             szFailReason = "Localloc used";
7688         }
7689 #ifdef _TARGET_AMD64_
7690         // Needed for Jit64 compat.
7691         // In future, enabling tail calls from methods that need GS cookie check
7692         // would require codegen side work to emit GS cookie check before a tail
7693         // call.
7694         else if (getNeedsGSSecurityCookie())
7695         {
7696             szFailReason = "GS Security cookie check";
7697         }
7698 #endif
7699 #ifdef DEBUG
7700         // DDB 99324: Just disable tailcall under compGcChecks stress mode.
7701         else if (opts.compGcChecks)
7702         {
7703             szFailReason = "GcChecks";
7704         }
7705 #endif
7706 #if FEATURE_TAILCALL_OPT
7707         else
7708         {
7709             // We are still not sure whether it can be a tail call. Because, when converting
7710             // a call to an implicit tail call, we must check that there are no locals with
7711             // their address taken.  If this is the case, we have to assume that the address
7712             // has been leaked and the current stack frame must live until after the final
7713             // call.
7714
7715             // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
7716             // that lvHasLdAddrOp is much more conservative.  We cannot just base it on
7717             // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
7718             // during morph stage. The reason for also checking lvAddrExposed is that in case
7719             // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
7720             // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
7721             // never to be incorrect.
7722             //
7723             // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
7724             // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
7725             // is set. This avoids the need for iterating through all lcl vars of the current
7726             // method.  Right now throughout the code base we are not consistently using 'set'
7727             // method to set lvHasLdAddrOp and lvAddrExposed flags.
7728             unsigned   varNum;
7729             LclVarDsc* varDsc;
7730             bool       hasAddrExposedVars     = false;
7731             bool       hasStructPromotedParam = false;
7732             bool       hasPinnedVars          = false;
7733
7734             for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
7735             {
7736                 // If the method is marked as an explicit tail call we will skip the
7737                 // following three hazard checks.
7738                 // We still must check for any struct parameters and set 'hasStructParam'
7739                 // so that we won't transform the recursive tail call into a loop.
7740                 //
7741                 if (call->IsImplicitTailCall())
7742                 {
7743                     if (varDsc->lvHasLdAddrOp)
7744                     {
7745                         hasAddrExposedVars = true;
7746                         break;
7747                     }
7748                     if (varDsc->lvAddrExposed)
7749                     {
7750                         if (lvaIsImplicitByRefLocal(varNum))
7751                         {
7752                             // The address of the implicit-byref is a non-address use of the pointer parameter.
7753                         }
7754                         else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl))
7755                         {
7756                             // The address of the implicit-byref's field is likewise a non-address use of the pointer
7757                             // parameter.
7758                         }
7759                         else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum))
7760                         {
7761                             // This temp was used for struct promotion bookkeeping.  It will not be used, and will have
7762                             // its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs.
7763                             assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl));
7764                             assert(fgGlobalMorph);
7765                         }
7766                         else
7767                         {
7768                             hasAddrExposedVars = true;
7769                             break;
7770                         }
7771                     }
7772                     if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum))
7773                     {
7774                         hasStructPromotedParam = true;
7775                         break;
7776                     }
7777                     if (varDsc->lvPinned)
7778                     {
7779                         // A tail call removes the method from the stack, which means the pinning
7780                         // goes away for the callee.  We can't allow that.
7781                         hasPinnedVars = true;
7782                         break;
7783                     }
7784                 }
7785                 if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
7786                 {
7787                     hasStructParam = true;
7788                     // This prevents transforming a recursive tail call into a loop
7789                     // but doesn't prevent tail call optimization so we need to
7790                     // look at the rest of parameters.
7791                     continue;
7792                 }
7793             }
7794
7795             if (hasAddrExposedVars)
7796             {
7797                 szFailReason = "Local address taken";
7798             }
7799             if (hasStructPromotedParam)
7800             {
7801                 szFailReason = "Has Struct Promoted Param";
7802             }
7803             if (hasPinnedVars)
7804             {
7805                 szFailReason = "Has Pinned Vars";
7806             }
7807         }
7808 #endif // FEATURE_TAILCALL_OPT
7809
7810         if (varTypeIsStruct(call))
7811         {
7812             fgFixupStructReturn(call);
7813         }
7814
7815         var_types callType = call->TypeGet();
7816
7817         // We have to ensure to pass the incoming retValBuf as the
7818         // outgoing one. Using a temp will not do as this function will
7819         // not regain control to do the copy.
7820
7821         if (info.compRetBuffArg != BAD_VAR_NUM)
7822         {
7823             noway_assert(callType == TYP_VOID);
7824             GenTreePtr retValBuf = call->gtCallArgs->gtOp.gtOp1;
7825             if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
7826             {
7827                 szFailReason = "Need to copy return buffer";
7828             }
7829         }
7830
7831         // If this is an opportunistic tail call and cannot be dispatched as
7832         // fast tail call, go the non-tail call route.  This is done for perf
7833         // reason.
7834         //
7835         // Avoid the cost of determining whether can be dispatched as fast tail
7836         // call if we already know that tail call cannot be honored for other
7837         // reasons.
7838         bool canFastTailCall = false;
7839         if (szFailReason == nullptr)
7840         {
7841             canFastTailCall = fgCanFastTailCall(call);
7842             if (!canFastTailCall)
7843             {
7844                 // Implicit or opportunistic tail calls are always dispatched via fast tail call
7845                 // mechanism and never via tail call helper for perf.
7846                 if (call->IsImplicitTailCall())
7847                 {
7848                     szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
7849                 }
7850 #ifndef LEGACY_BACKEND
7851                 else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
7852                 {
7853                     // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
7854                     // dispatched as a fast tail call.
7855
7856                     // Methods with non-standard args will have indirection cell or cookie param passed
7857                     // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
7858                     // tail calling the target method and hence ".tail" prefix on such calls needs to be
7859                     // ignored.
7860                     //
7861                     // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
7862                     // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
7863                     // This is done by by adding stubAddr as an additional arg before the original list of
7864                     // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
7865                     // in Stublinkerx86.cpp.
7866                     szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
7867                                    "called via helper";
7868                 }
7869 #ifdef _TARGET_ARM64_
7870                 else
7871                 {
7872                     // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
7873                     // So, bail out if we can't make fast tail call.
7874                     szFailReason = "Non-qualified fast tail call";
7875                 }
7876 #endif
7877 #endif // LEGACY_BACKEND
7878             }
7879         }
7880
7881         // Clear these flags before calling fgMorphCall() to avoid recursion.
7882         bool isTailPrefixed = call->IsTailPrefixedCall();
7883         call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
7884
7885 #if FEATURE_TAILCALL_OPT
7886         call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
7887 #endif
7888
7889 #ifdef FEATURE_PAL
7890         if (!canFastTailCall && szFailReason == nullptr)
7891         {
7892             szFailReason = "Non fast tail calls disabled for PAL based systems.";
7893         }
7894 #endif // FEATURE_PAL
7895
7896         if (szFailReason != nullptr)
7897         {
7898 #ifdef DEBUG
7899             if (verbose)
7900             {
7901                 printf("\nRejecting tail call late for call ");
7902                 printTreeID(call);
7903                 printf(": %s\n", szFailReason);
7904             }
7905 #endif
7906
7907             // for non user funcs, we have no handles to report
7908             info.compCompHnd->reportTailCallDecision(nullptr,
7909                                                      (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
7910                                                      isTailPrefixed, TAILCALL_FAIL, szFailReason);
7911
7912             goto NO_TAIL_CALL;
7913         }
7914
7915 #if !FEATURE_TAILCALL_OPT_SHARED_RETURN
7916         // We enable shared-ret tail call optimization for recursive calls even if
7917         // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
7918         if (gtIsRecursiveCall(call))
7919 #endif
7920         {
7921             // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
7922             // but if the call falls through to a ret, and we are doing a tailcall, change it here.
7923             if (compCurBB->bbJumpKind != BBJ_RETURN)
7924             {
7925                 compCurBB->bbJumpKind = BBJ_RETURN;
7926             }
7927         }
7928
7929         // Set this flag before calling fgMorphCall() to prevent inlining this call.
7930         call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
7931
7932         bool fastTailCallToLoop = false;
7933 #if FEATURE_TAILCALL_OPT
7934         // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
7935         // or return type is a struct that can be passed in a register.
7936         //
7937         // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
7938         // hidden generic context param or through keep alive thisptr), then while transforming a recursive
7939         // call to such a method requires that the generic context stored on stack slot be updated.  Right now,
7940         // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
7941         // a recursive call into a loop.  Another option is to modify gtIsRecursiveCall() to check that the
7942         // generic type parameters of both caller and callee generic method are the same.
7943         if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
7944             !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet()))
7945         {
7946             call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
7947             fastTailCallToLoop = true;
7948         }
7949 #endif
7950
7951         // Do some target-specific transformations (before we process the args, etc.)
7952         // This is needed only for tail prefixed calls that cannot be dispatched as
7953         // fast calls.
7954         if (!canFastTailCall)
7955         {
7956             fgMorphTailCall(call);
7957         }
7958
7959         // Implementation note : If we optimize tailcall to do a direct jump
7960         // to the target function (after stomping on the return address, etc),
7961         // without using CORINFO_HELP_TAILCALL, we have to make certain that
7962         // we don't starve the hijacking logic (by stomping on the hijacked
7963         // return address etc).
7964
7965         // At this point, we are committed to do the tailcall.
7966         compTailCallUsed = true;
7967
7968         CorInfoTailCall tailCallResult;
7969
7970         if (fastTailCallToLoop)
7971         {
7972             tailCallResult = TAILCALL_RECURSIVE;
7973         }
7974         else if (canFastTailCall)
7975         {
7976             tailCallResult = TAILCALL_OPTIMIZED;
7977         }
7978         else
7979         {
7980             tailCallResult = TAILCALL_HELPER;
7981         }
7982
7983         // for non user funcs, we have no handles to report
7984         info.compCompHnd->reportTailCallDecision(nullptr,
7985                                                  (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
7986                                                  isTailPrefixed, tailCallResult, nullptr);
7987
7988         // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
7989         // to avoid doing any extra work for the return value.
7990         call->gtType = TYP_VOID;
7991
7992 #ifdef DEBUG
7993         if (verbose)
7994         {
7995             printf("\nGTF_CALL_M_TAILCALL bit set for call ");
7996             printTreeID(call);
7997             printf("\n");
7998             if (fastTailCallToLoop)
7999             {
8000                 printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
8001                 printTreeID(call);
8002                 printf("\n");
8003             }
8004         }
8005 #endif
8006
8007         GenTreePtr stmtExpr = fgMorphStmt->gtStmtExpr;
8008
8009 #ifdef DEBUG
8010         // Tail call needs to be in one of the following IR forms
8011         //    Either a call stmt or
8012         //    GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
8013         //    var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
8014         //    GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
8015         // In the above,
8016         //    GT_CASTS may be nested.
8017         genTreeOps stmtOper = stmtExpr->gtOper;
8018         if (stmtOper == GT_CALL)
8019         {
8020             noway_assert(stmtExpr == call);
8021         }
8022         else
8023         {
8024             noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA);
8025             GenTreePtr treeWithCall;
8026             if (stmtOper == GT_RETURN)
8027             {
8028                 treeWithCall = stmtExpr->gtGetOp1();
8029             }
8030             else if (stmtOper == GT_COMMA)
8031             {
8032                 // Second operation must be nop.
8033                 noway_assert(stmtExpr->gtGetOp2()->IsNothingNode());
8034                 treeWithCall = stmtExpr->gtGetOp1();
8035             }
8036             else
8037             {
8038                 treeWithCall = stmtExpr->gtGetOp2();
8039             }
8040
8041             // Peel off casts
8042             while (treeWithCall->gtOper == GT_CAST)
8043             {
8044                 noway_assert(!treeWithCall->gtOverflow());
8045                 treeWithCall = treeWithCall->gtGetOp1();
8046             }
8047
8048             noway_assert(treeWithCall == call);
8049         }
8050 #endif
8051
8052         // For void calls, we would have created a GT_CALL in the stmt list.
8053         // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
8054         // For calls returning structs, we would have a void call, followed by a void return.
8055         // For debuggable code, it would be an assignment of the call to a temp
8056         // We want to get rid of any of this extra trees, and just leave
8057         // the call.
8058         GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt;
8059
8060 #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_)
8061         // Legacy Jit64 Compat:
8062         // There could be any number of GT_NOPs between tail call and GT_RETURN.
8063         // That is tail call pattern could be one of the following:
8064         //  1) tail.call, nop*, ret
8065         //  2) tail.call, nop*, pop, nop*, ret
8066         //  3) var=tail.call, nop*, ret(var)
8067         //  4) var=tail.call, nop*, pop, ret
8068         //  5) comma(tail.call, nop), nop*, ret
8069         //
8070         // See impIsTailCallILPattern() for details on tail call IL patterns
8071         // that are supported.
8072         if (stmtExpr->gtOper != GT_RETURN)
8073         {
8074             // First delete all GT_NOPs after the call
8075             GenTreeStmt* morphStmtToRemove = nullptr;
8076             while (nextMorphStmt != nullptr)
8077             {
8078                 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
8079                 if (!nextStmtExpr->IsNothingNode())
8080                 {
8081                     break;
8082                 }
8083
8084                 morphStmtToRemove = nextMorphStmt;
8085                 nextMorphStmt     = morphStmtToRemove->gtNextStmt;
8086                 fgRemoveStmt(compCurBB, morphStmtToRemove);
8087             }
8088
8089             // Check to see if there is a pop.
8090             // Since tail call is honored, we can get rid of the stmt corresponding to pop.
8091             if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN)
8092             {
8093                 // Note that pop opcode may or may not result in a new stmt (for details see
8094                 // impImportBlockCode()). Hence, it is not possible to assert about the IR
8095                 // form generated by pop but pop tree must be side-effect free so that we can
8096                 // delete it safely.
8097                 GenTreeStmt* popStmt = nextMorphStmt;
8098                 nextMorphStmt        = nextMorphStmt->gtNextStmt;
8099
8100                 // Side effect flags on a GT_COMMA may be overly pessimistic, so examine
8101                 // the constituent nodes.
8102                 GenTreePtr popExpr          = popStmt->gtStmtExpr;
8103                 bool       isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0;
8104                 if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA))
8105                 {
8106                     isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) &&
8107                                        ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0);
8108                 }
8109                 noway_assert(isSideEffectFree);
8110                 fgRemoveStmt(compCurBB, popStmt);
8111             }
8112
8113             // Next delete any GT_NOP nodes after pop
8114             while (nextMorphStmt != nullptr)
8115             {
8116                 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
8117                 if (!nextStmtExpr->IsNothingNode())
8118                 {
8119                     break;
8120                 }
8121
8122                 morphStmtToRemove = nextMorphStmt;
8123                 nextMorphStmt     = morphStmtToRemove->gtNextStmt;
8124                 fgRemoveStmt(compCurBB, morphStmtToRemove);
8125             }
8126         }
8127 #endif // !FEATURE_CORECLR && _TARGET_AMD64_
8128
8129         // Delete GT_RETURN  if any
8130         if (nextMorphStmt != nullptr)
8131         {
8132             GenTreePtr retExpr = nextMorphStmt->gtStmtExpr;
8133             noway_assert(retExpr->gtOper == GT_RETURN);
8134
8135             // If var=call, then the next stmt must be a GT_RETURN(TYP_VOID) or GT_RETURN(var).
8136             // This can occur if impSpillStackEnsure() has introduced an assignment to a temp.
8137             if (stmtExpr->gtOper == GT_ASG && info.compRetType != TYP_VOID)
8138             {
8139                 noway_assert(stmtExpr->gtGetOp1()->OperIsLocal());
8140                 noway_assert(stmtExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum ==
8141                              retExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum);
8142             }
8143
8144             fgRemoveStmt(compCurBB, nextMorphStmt);
8145         }
8146
8147         fgMorphStmt->gtStmtExpr = call;
8148
8149         // Tail call via helper: The VM can't use return address hijacking if we're
8150         // not going to return and the helper doesn't have enough info to safely poll,
8151         // so we poll before the tail call, if the block isn't already safe.  Since
8152         // tail call via helper is a slow mechanism it doen't matter whether we emit
8153         // GC poll.  This is done to be in parity with Jit64. Also this avoids GC info
8154         // size increase if all most all methods are expected to be tail calls (e.g. F#).
8155         //
8156         // Note that we can avoid emitting GC-poll if we know that the current BB is
8157         // dominated by a Gc-SafePoint block.  But we don't have dominator info at this
8158         // point.  One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
8159         // here and remove it in lowering if the block is dominated by a GC-SafePoint.  For
8160         // now it not clear whether optimizing slow tail calls is worth the effort.  As a
8161         // low cost check, we check whether the first and current basic blocks are
8162         // GC-SafePoints.
8163         //
8164         // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
8165         // is going to mark the method as fully interruptible if the block containing this tail
8166         // call is reachable without executing any call.
8167         if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
8168             !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
8169         {
8170             // We didn't insert a poll block, so we need to morph the call now
8171             // (Normally it will get morphed when we get to the split poll block)
8172             GenTreePtr temp = fgMorphCall(call);
8173             noway_assert(temp == call);
8174         }
8175
8176         // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
8177         // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
8178         //
8179         // Fast tail call: in case of fast tail calls, we need a jmp epilog and
8180         // hence mark it as BBJ_RETURN with BBF_JMP flag set.
8181         noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
8182
8183         if (canFastTailCall)
8184         {
8185             compCurBB->bbFlags |= BBF_HAS_JMP;
8186         }
8187         else
8188         {
8189             compCurBB->bbJumpKind = BBJ_THROW;
8190         }
8191
8192         // For non-void calls, we return a place holder which will be
8193         // used by the parent GT_RETURN node of this call.
8194
8195         GenTree* result = call;
8196         if (callType != TYP_VOID && info.compRetType != TYP_VOID)
8197         {
8198 #ifdef FEATURE_HFA
8199             // Return a dummy node, as the return is already removed.
8200             if (callType == TYP_STRUCT)
8201             {
8202                 // This is a HFA, use float 0.
8203                 callType = TYP_FLOAT;
8204             }
8205 #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
8206             // Return a dummy node, as the return is already removed.
8207             if (varTypeIsStruct(callType))
8208             {
8209                 // This is a register-returned struct. Return a 0.
8210                 // The actual return registers are hacked in lower and the register allocator.
8211                 callType = TYP_INT;
8212             }
8213 #endif
8214 #ifdef FEATURE_SIMD
8215             // Return a dummy node, as the return is already removed.
8216             if (varTypeIsSIMD(callType))
8217             {
8218                 callType = TYP_DOUBLE;
8219             }
8220 #endif
8221             result = gtNewZeroConNode(genActualType(callType));
8222             result = fgMorphTree(result);
8223         }
8224
8225         return result;
8226     }
8227
8228 NO_TAIL_CALL:
8229
8230     if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
8231         (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
8232 #ifdef FEATURE_READYTORUN_COMPILER
8233          || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
8234 #endif
8235              ) &&
8236         (call == fgMorphStmt->gtStmtExpr))
8237     {
8238         // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
8239         // Transform it into a null check.
8240
8241         GenTreePtr thisPtr = call->gtCallArgs->gtOp.gtOp1;
8242
8243         GenTreePtr nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
8244         nullCheck->gtFlags |= GTF_EXCEPT;
8245
8246         return fgMorphTree(nullCheck);
8247     }
8248
8249     noway_assert(call->gtOper == GT_CALL);
8250
8251     //
8252     // Only count calls once (only in the global morph phase)
8253     //
8254     if (fgGlobalMorph)
8255     {
8256         if (call->gtCallType == CT_INDIRECT)
8257         {
8258             optCallCount++;
8259             optIndirectCallCount++;
8260         }
8261         else if (call->gtCallType == CT_USER_FUNC)
8262         {
8263             optCallCount++;
8264             if (call->IsVirtual())
8265             {
8266                 optIndirectCallCount++;
8267             }
8268         }
8269     }
8270
8271     // Couldn't inline - remember that this BB contains method calls
8272
8273     // If this is a 'regular' call, mark the basic block as
8274     // having a call (for computing full interruptibility).
8275     CLANG_FORMAT_COMMENT_ANCHOR;
8276
8277 #ifdef _TARGET_AMD64_
8278     // Amd64 note: If this is a fast tail call then don't count it as a call
8279     // since we don't insert GC-polls but instead make the method fully GC
8280     // interruptible.
8281     if (!call->IsFastTailCall())
8282 #endif
8283     {
8284         if (call->gtCallType == CT_INDIRECT)
8285         {
8286             compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
8287         }
8288         else if (call->gtCallType == CT_USER_FUNC)
8289         {
8290             if ((call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK) == 0)
8291             {
8292                 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
8293             }
8294         }
8295         // otherwise we have a CT_HELPER
8296     }
8297
8298     // Morph Type.op_Equality and Type.op_Inequality
8299     // We need to do this before the arguments are morphed
8300     if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
8301     {
8302         CorInfoIntrinsics methodID = info.compCompHnd->getIntrinsicID(call->gtCallMethHnd);
8303
8304         genTreeOps simpleOp = GT_CALL;
8305         if (methodID == CORINFO_INTRINSIC_TypeEQ)
8306         {
8307             simpleOp = GT_EQ;
8308         }
8309         else if (methodID == CORINFO_INTRINSIC_TypeNEQ)
8310         {
8311             simpleOp = GT_NE;
8312         }
8313
8314         if (simpleOp == GT_EQ || simpleOp == GT_NE)
8315         {
8316             noway_assert(call->TypeGet() == TYP_INT);
8317
8318             // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType
8319             // objects. Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to
8320             // GT_NE/GT_NE: One important invariance that should never change is that type equivalency is always
8321             // equivalent to object identity equality for runtime type objects in reflection. This is also reflected
8322             // in RuntimeTypeHandle::TypeEquals. If this invariance would ever be broken, we need to remove the
8323             // optimization below.
8324
8325             GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1;
8326             GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
8327
8328             if (gtCanOptimizeTypeEquality(op1) || gtCanOptimizeTypeEquality(op2))
8329             {
8330                 GenTreePtr compare = gtNewOperNode(simpleOp, TYP_INT, op1, op2);
8331
8332                 // fgMorphSmpOp will further optimize the following patterns:
8333                 //  1. typeof(...) == typeof(...)
8334                 //  2. typeof(...) == obj.GetType()
8335                 return fgMorphTree(compare);
8336             }
8337         }
8338     }
8339
8340     // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
8341     GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
8342                                    // copy-back).
8343     unsigned             retValTmpNum = BAD_VAR_NUM;
8344     CORINFO_CLASS_HANDLE structHnd    = nullptr;
8345     if (call->HasRetBufArg() &&
8346         call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
8347     {
8348         // We're enforcing the invariant that return buffers pointers (at least for
8349         // struct return types containing GC pointers) are never pointers into the heap.
8350         // The large majority of cases are address of local variables, which are OK.
8351         // Otherwise, allocate a local of the given struct type, pass its address,
8352         // then assign from that into the proper destination.  (We don't need to do this
8353         // if we're passing the caller's ret buff arg to the callee, since the caller's caller
8354         // will maintain the same invariant.)
8355
8356         GenTreePtr dest = call->gtCallArgs->gtOp.gtOp1;
8357         assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
8358         if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
8359         {
8360             // We'll exempt helper calls from this, assuming that the helper implementation
8361             // follows the old convention, and does whatever barrier is required.
8362             if (call->gtCallType != CT_HELPER)
8363             {
8364                 structHnd = call->gtRetClsHnd;
8365                 if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
8366                     !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR) &&
8367                       dest->gtLclVar.gtLclNum == info.compRetBuffArg))
8368                 {
8369                     origDest = dest;
8370
8371                     retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
8372                     lvaSetStruct(retValTmpNum, structHnd, true);
8373                     dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8374                 }
8375             }
8376         }
8377
8378         call->gtCallArgs->gtOp.gtOp1 = dest;
8379     }
8380
8381     /* Process the "normal" argument list */
8382     call = fgMorphArgs(call);
8383     noway_assert(call->gtOper == GT_CALL);
8384
8385     // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
8386     // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
8387     if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
8388     {
8389         GenTree* value = gtArgEntryByArgNum(call, 2)->node;
8390         if (value->IsIntegralConst(0))
8391         {
8392             assert(value->OperGet() == GT_CNS_INT);
8393
8394             GenTree* arr   = gtArgEntryByArgNum(call, 0)->node;
8395             GenTree* index = gtArgEntryByArgNum(call, 1)->node;
8396
8397             // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy
8398             // the spill trees as well if necessary.
8399             GenTreeOp* argSetup = nullptr;
8400             for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest())
8401             {
8402                 GenTree* const arg = earlyArgs->Current();
8403                 if (arg->OperGet() != GT_ASG)
8404                 {
8405                     continue;
8406                 }
8407
8408                 assert(arg != arr);
8409                 assert(arg != index);
8410
8411                 arg->gtFlags &= ~GTF_LATE_ARG;
8412
8413                 GenTree* op1 = argSetup;
8414                 if (op1 == nullptr)
8415                 {
8416                     op1 = gtNewNothingNode();
8417 #if DEBUG
8418                     op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8419 #endif // DEBUG
8420                 }
8421
8422                 argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg);
8423
8424 #if DEBUG
8425                 argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8426 #endif // DEBUG
8427             }
8428
8429 #ifdef DEBUG
8430             auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult {
8431                 (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
8432                 return WALK_CONTINUE;
8433             };
8434
8435             fgWalkTreePost(&arr, resetMorphedFlag);
8436             fgWalkTreePost(&index, resetMorphedFlag);
8437             fgWalkTreePost(&value, resetMorphedFlag);
8438 #endif // DEBUG
8439
8440             GenTree* const nullCheckedArr = impCheckForNullPointer(arr);
8441             GenTree* const arrIndexNode   = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
8442             GenTree* const arrStore       = gtNewAssignNode(arrIndexNode, value);
8443             arrStore->gtFlags |= GTF_ASG;
8444
8445             GenTree* result = fgMorphTree(arrStore);
8446             if (argSetup != nullptr)
8447             {
8448                 result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result);
8449 #if DEBUG
8450                 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8451 #endif // DEBUG
8452             }
8453
8454             return result;
8455         }
8456     }
8457
8458     // Optimize get_ManagedThreadId(get_CurrentThread)
8459     if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8460         info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
8461     {
8462         noway_assert(origDest == nullptr);
8463         noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);
8464
8465         GenTreePtr innerCall = call->gtCallLateArgs->gtOp.gtOp1;
8466
8467         if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8468             info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
8469                 CORINFO_INTRINSIC_GetCurrentManagedThread)
8470         {
8471             // substitute expression with call to helper
8472             GenTreePtr newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT, 0);
8473             JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
8474             return fgMorphTree(newCall);
8475         }
8476     }
8477
8478     if (origDest != nullptr)
8479     {
8480         GenTreePtr retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8481         // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
8482         // var, which would allow the whole assignment to be optimized away to a NOP.  So in that case, make the
8483         // origDest into a comma that uses the var.  Note that the var doesn't have to be a temp for this to
8484         // be correct.
8485         if (origDest->OperGet() == GT_ASG)
8486         {
8487             if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
8488             {
8489                 GenTreePtr var = origDest->gtOp.gtOp1;
8490                 origDest       = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
8491                                          gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
8492             }
8493         }
8494         GenTreePtr copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
8495         copyBlk            = fgMorphTree(copyBlk);
8496         GenTree* result    = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
8497 #ifdef DEBUG
8498         result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8499 #endif
8500         return result;
8501     }
8502
8503     if (call->IsNoReturn())
8504     {
8505         //
8506         // If we know that the call does not return then we can set fgRemoveRestOfBlock
8507         // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
8508         // As a result the compiler won't need to preserve live registers across the call.
8509         //
8510         // This isn't need for tail calls as there shouldn't be any code after the call anyway.
8511         // Besides, the tail call code is part of the epilog and converting the block to
8512         // BBJ_THROW would result in the tail call being dropped as the epilog is generated
8513         // only for BBJ_RETURN blocks.
8514         //
8515         // Currently this doesn't work for non-void callees. Some of the code that handles
8516         // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
8517         // do not have this flag by default. We could add the flag here but the proper solution
8518         // would be to replace the return expression with a local var node during inlining
8519         // so the rest of the call tree stays in a separate statement. That statement can then
8520         // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
8521         //
8522
8523         if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
8524         {
8525             fgRemoveRestOfBlock = true;
8526         }
8527     }
8528
8529     return call;
8530 }
8531
8532 /*****************************************************************************
8533  *
8534  *  Transform the given GTK_CONST tree for code generation.
8535  */
8536
8537 GenTreePtr Compiler::fgMorphConst(GenTreePtr tree)
8538 {
8539     noway_assert(tree->OperKind() & GTK_CONST);
8540
8541     /* Clear any exception flags or other unnecessary flags
8542      * that may have been set before folding this node to a constant */
8543
8544     tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
8545
8546     if (tree->OperGet() != GT_CNS_STR)
8547     {
8548         return tree;
8549     }
8550
8551     // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
8552     // guarantee slow performance for that block. Instead cache the return value
8553     // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
8554
8555     if (compCurBB->bbJumpKind == BBJ_THROW)
8556     {
8557         CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
8558         if (helper != CORINFO_HELP_UNDEF)
8559         {
8560             // For un-important blocks, we want to construct the string lazily
8561
8562             GenTreeArgList* args;
8563             if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
8564             {
8565                 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
8566             }
8567             else
8568             {
8569                 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
8570                                     gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
8571             }
8572
8573             tree = gtNewHelperCallNode(helper, TYP_REF, 0, args);
8574             return fgMorphTree(tree);
8575         }
8576     }
8577
8578     assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
8579
8580     LPVOID         pValue;
8581     InfoAccessType iat =
8582         info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);
8583
8584     tree = gtNewStringLiteralNode(iat, pValue);
8585
8586     return fgMorphTree(tree);
8587 }
8588
8589 /*****************************************************************************
8590  *
8591  *  Transform the given GTK_LEAF tree for code generation.
8592  */
8593
8594 GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree)
8595 {
8596     noway_assert(tree->OperKind() & GTK_LEAF);
8597
8598     if (tree->gtOper == GT_LCL_VAR)
8599     {
8600         const bool forceRemorph = false;
8601         return fgMorphLocalVar(tree, forceRemorph);
8602     }
8603 #ifdef _TARGET_X86_
8604     else if (tree->gtOper == GT_LCL_FLD)
8605     {
8606         if (info.compIsVarArgs)
8607         {
8608             GenTreePtr newTree =
8609                 fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
8610             if (newTree != nullptr)
8611             {
8612                 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
8613                 {
8614                     fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
8615                 }
8616                 return newTree;
8617             }
8618         }
8619     }
8620 #endif // _TARGET_X86_
8621     else if (tree->gtOper == GT_FTN_ADDR)
8622     {
8623         CORINFO_CONST_LOOKUP addrInfo;
8624
8625 #ifdef FEATURE_READYTORUN_COMPILER
8626         if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
8627         {
8628             addrInfo = tree->gtFptrVal.gtEntryPoint;
8629         }
8630         else
8631 #endif
8632         {
8633             info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
8634         }
8635
8636         // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
8637         //
8638         tree->SetOper(GT_CNS_INT);
8639         tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
8640         tree->gtFlags |= GTF_ICON_FTN_ADDR;
8641
8642         switch (addrInfo.accessType)
8643         {
8644             case IAT_PPVALUE:
8645                 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8646                 tree->gtFlags |= GTF_IND_INVARIANT;
8647
8648                 __fallthrough;
8649
8650             case IAT_PVALUE:
8651                 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8652                 break;
8653
8654             case IAT_VALUE:
8655                 tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
8656                 break;
8657
8658             default:
8659                 noway_assert(!"Unknown addrInfo.accessType");
8660         }
8661
8662         return fgMorphTree(tree);
8663     }
8664
8665     return tree;
8666 }
8667
8668 void Compiler::fgAssignSetVarDef(GenTreePtr tree)
8669 {
8670     GenTreeLclVarCommon* lclVarCmnTree;
8671     bool                 isEntire = false;
8672     if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
8673     {
8674         if (isEntire)
8675         {
8676             lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
8677         }
8678         else
8679         {
8680             // We consider partial definitions to be modeled as uses followed by definitions.
8681             // This captures the idea that precedings defs are not necessarily made redundant
8682             // by this definition.
8683             lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
8684         }
8685     }
8686 }
8687
8688 //------------------------------------------------------------------------
8689 // fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
8690 //
8691 // Arguments:
8692 //    tree - The block assignment to be possibly morphed
8693 //
8694 // Return Value:
8695 //    The modified tree if successful, nullptr otherwise.
8696 //
8697 // Assumptions:
8698 //    'tree' must be a block assignment.
8699 //
8700 // Notes:
8701 //    If successful, this method always returns the incoming tree, modifying only
8702 //    its arguments.
8703
8704 GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
8705 {
8706     // This must be a block assignment.
8707     noway_assert(tree->OperIsBlkOp());
8708     var_types asgType = tree->TypeGet();
8709
8710     GenTreePtr asg         = tree;
8711     GenTreePtr dest        = asg->gtGetOp1();
8712     GenTreePtr src         = asg->gtGetOp2();
8713     unsigned   destVarNum  = BAD_VAR_NUM;
8714     LclVarDsc* destVarDsc  = nullptr;
8715     GenTreePtr lclVarTree  = nullptr;
8716     bool       isCopyBlock = asg->OperIsCopyBlkOp();
8717     bool       isInitBlock = !isCopyBlock;
8718
8719     unsigned             size;
8720     CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
8721 #ifdef FEATURE_SIMD
8722     // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD)
8723     // The SIMD type in question could be Vector2f which is 8-bytes in size.
8724     // The below check is to make sure that we don't turn that copyblk
8725     // into a assignment, since rationalizer logic will transform the
8726     // copyblk appropriately. Otherwise, the transformation made in this
8727     // routine will prevent rationalizer logic and we might end up with
8728     // GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert
8729     // in codegen.
8730     // TODO-1stClassStructs: This is here to preserve old behavior.
8731     // It should be eliminated.
8732     if (src->OperGet() == GT_SIMD)
8733     {
8734         return nullptr;
8735     }
8736 #endif
8737
8738     if (dest->gtEffectiveVal()->OperIsBlk())
8739     {
8740         GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
8741         size               = lhsBlk->Size();
8742         if (impIsAddressInLocal(lhsBlk->Addr(), &lclVarTree))
8743         {
8744             destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
8745             destVarDsc = &(lvaTable[destVarNum]);
8746         }
8747         if (lhsBlk->OperGet() == GT_OBJ)
8748         {
8749             clsHnd = lhsBlk->AsObj()->gtClass;
8750         }
8751     }
8752     else
8753     {
8754         // Is this an enregisterable struct that is already a simple assignment?
8755         // This can happen if we are re-morphing.
8756         if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
8757         {
8758             return tree;
8759         }
8760         noway_assert(dest->OperIsLocal());
8761         lclVarTree = dest;
8762         destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
8763         destVarDsc = &(lvaTable[destVarNum]);
8764         if (isCopyBlock)
8765         {
8766             clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
8767             size   = info.compCompHnd->getClassSize(clsHnd);
8768         }
8769         else
8770         {
8771             size = destVarDsc->lvExactSize;
8772         }
8773     }
8774
8775     //
8776     //  See if we can do a simple transformation:
8777     //
8778     //          GT_ASG <TYP_size>
8779     //          /   \
8780     //      GT_IND GT_IND or CNS_INT
8781     //         |      |
8782     //       [dest] [src]
8783     //
8784
8785     if (size == REGSIZE_BYTES)
8786     {
8787         if (clsHnd == NO_CLASS_HANDLE)
8788         {
8789             // A register-sized cpblk can be treated as an integer asignment.
8790             asgType = TYP_I_IMPL;
8791         }
8792         else
8793         {
8794             BYTE gcPtr;
8795             info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
8796             asgType = getJitGCType(gcPtr);
8797         }
8798     }
8799     else
8800     {
8801         switch (size)
8802         {
8803             case 1:
8804                 asgType = TYP_BYTE;
8805                 break;
8806             case 2:
8807                 asgType = TYP_SHORT;
8808                 break;
8809
8810 #ifdef _TARGET_64BIT_
8811             case 4:
8812                 asgType = TYP_INT;
8813                 break;
8814 #endif // _TARGET_64BIT_
8815         }
8816     }
8817
8818     // TODO-1stClassStructs: Change this to asgType != TYP_STRUCT.
8819     if (!varTypeIsStruct(asgType))
8820     {
8821         // For initBlk, a non constant source is not going to allow us to fiddle
8822         // with the bits to create a single assigment.
8823         noway_assert(size <= REGSIZE_BYTES);
8824
8825         if (isInitBlock && !src->IsConstInitVal())
8826         {
8827             return nullptr;
8828         }
8829
8830         if (destVarDsc != nullptr)
8831         {
8832 #if LOCAL_ASSERTION_PROP
8833             // Kill everything about dest
8834             if (optLocalAssertionProp)
8835             {
8836                 if (optAssertionCount > 0)
8837                 {
8838                     fgKillDependentAssertions(destVarNum DEBUGARG(tree));
8839                 }
8840             }
8841 #endif // LOCAL_ASSERTION_PROP
8842
8843             // A previous incarnation of this code also required the local not to be
8844             // address-exposed(=taken).  That seems orthogonal to the decision of whether
8845             // to do field-wise assignments: being address-exposed will cause it to be
8846             // "dependently" promoted, so it will be in the right memory location.  One possible
8847             // further reason for avoiding field-wise stores is that the struct might have alignment-induced
8848             // holes, whose contents could be meaningful in unsafe code.  If we decide that's a valid
8849             // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
8850             // memory of the struct prevent field-wise assignments.  Same situation exists for the "src" decision.
8851             if (varTypeIsStruct(lclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
8852             {
8853                 // Let fgMorphInitBlock handle it.  (Since we'll need to do field-var-wise assignments.)
8854                 return nullptr;
8855             }
8856             else if (!varTypeIsFloating(lclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
8857             {
8858                 // Use the dest local var directly, as well as its type.
8859                 dest    = lclVarTree;
8860                 asgType = destVarDsc->lvType;
8861
8862                 // If the block operation had been a write to a local var of a small int type,
8863                 // of the exact size of the small int type, and the var is NormalizeOnStore,
8864                 // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
8865                 // have done that normalization.  If we're now making it into an assignment,
8866                 // the NormalizeOnStore will work, and it can be a full def.
8867                 if (destVarDsc->lvNormalizeOnStore())
8868                 {
8869                     dest->gtFlags &= (~GTF_VAR_USEASG);
8870                 }
8871             }
8872             else
8873             {
8874                 // Could be a non-promoted struct, or a floating point type local, or
8875                 // an int subject to a partial write.  Don't enregister.
8876                 lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
8877
8878                 // Mark the local var tree as a definition point of the local.
8879                 lclVarTree->gtFlags |= GTF_VAR_DEF;
8880                 if (size < destVarDsc->lvExactSize)
8881                 { // If it's not a full-width assignment....
8882                     lclVarTree->gtFlags |= GTF_VAR_USEASG;
8883                 }
8884
8885                 if (dest == lclVarTree)
8886                 {
8887                     dest = gtNewOperNode(GT_IND, asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
8888                 }
8889             }
8890         }
8891
8892         // Check to ensure we don't have a reducible *(& ... )
8893         if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
8894         {
8895             GenTreePtr addrOp = dest->AsIndir()->Addr()->gtGetOp1();
8896             // Ignore reinterpret casts between int/gc
8897             if ((addrOp->TypeGet() == asgType) || (varTypeIsIntegralOrI(addrOp) && (genTypeSize(asgType) == size)))
8898             {
8899                 dest    = addrOp;
8900                 asgType = addrOp->TypeGet();
8901             }
8902         }
8903
8904         if (dest->gtEffectiveVal()->OperIsIndir())
8905         {
8906             // If we have no information about the destination, we have to assume it could
8907             // live anywhere (not just in the GC heap).
8908             // Mark the GT_IND node so that we use the correct write barrier helper in case
8909             // the field is a GC ref.
8910
8911             if (!fgIsIndirOfAddrOfLocal(dest))
8912             {
8913                 dest->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8914                 tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8915             }
8916         }
8917
8918         LclVarDsc* srcVarDsc = nullptr;
8919         if (isCopyBlock)
8920         {
8921             if (src->OperGet() == GT_LCL_VAR)
8922             {
8923                 lclVarTree = src;
8924                 srcVarDsc  = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
8925             }
8926             else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &lclVarTree))
8927             {
8928                 srcVarDsc = &(lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum]);
8929             }
8930             if (srcVarDsc != nullptr)
8931             {
8932                 if (varTypeIsStruct(lclVarTree) && (srcVarDsc->lvPromoted || srcVarDsc->lvIsSIMDType()))
8933                 {
8934                     // Let fgMorphCopyBlock handle it.
8935                     return nullptr;
8936                 }
8937                 else if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
8938                          size == genTypeSize(genActualType(lclVarTree->TypeGet())))
8939                 {
8940                     // Use the src local var directly.
8941                     src = lclVarTree;
8942                 }
8943                 else
8944                 {
8945 #ifndef LEGACY_BACKEND
8946
8947                     // The source argument of the copyblk can potentially
8948                     // be accessed only through indir(addr(lclVar))
8949                     // or indir(lclVarAddr) in rational form and liveness
8950                     // won't account for these uses. That said,
8951                     // we have to mark this local as address exposed so
8952                     // we don't delete it as a dead store later on.
8953                     unsigned lclVarNum                = lclVarTree->gtLclVarCommon.gtLclNum;
8954                     lvaTable[lclVarNum].lvAddrExposed = true;
8955                     lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
8956
8957 #else  // LEGACY_BACKEND
8958                     lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
8959 #endif // LEGACY_BACKEND
8960                     GenTree* srcAddr;
8961                     if (src == lclVarTree)
8962                     {
8963                         srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
8964                         src     = gtNewOperNode(GT_IND, asgType, srcAddr);
8965                     }
8966                     else
8967                     {
8968                         assert(src->OperIsIndir());
8969                     }
8970                 }
8971             }
8972             // If we have no information about the src, we have to assume it could
8973             // live anywhere (not just in the GC heap).
8974             // Mark the GT_IND node so that we use the correct write barrier helper in case
8975             // the field is a GC ref.
8976
8977             if (!fgIsIndirOfAddrOfLocal(src))
8978             {
8979                 src->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8980             }
8981         }
8982         else
8983         {
8984 // InitBlk
8985 #if FEATURE_SIMD
8986             if (varTypeIsSIMD(asgType))
8987             {
8988                 assert(!isCopyBlock); // Else we would have returned the tree above.
8989                 noway_assert(src->IsIntegralConst(0));
8990                 noway_assert(destVarDsc != nullptr);
8991
8992                 src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
8993                 tree->gtOp.gtOp2 = src;
8994                 return tree;
8995             }
8996             else
8997 #endif
8998             {
8999                 if (src->OperIsInitVal())
9000                 {
9001                     src = src->gtGetOp1();
9002                 }
9003                 assert(src->IsCnsIntOrI());
9004                 // This will mutate the integer constant, in place, to be the correct
9005                 // value for the type we are using in the assignment.
9006                 src->AsIntCon()->FixupInitBlkValue(asgType);
9007             }
9008         }
9009
9010         // Ensure that the dest is setup appropriately.
9011         if (dest->gtEffectiveVal()->OperIsIndir())
9012         {
9013             dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/);
9014         }
9015
9016         // Ensure that the rhs is setup appropriately.
9017         if (isCopyBlock)
9018         {
9019             src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/);
9020         }
9021
9022         // Set the lhs and rhs on the assignment.
9023         if (dest != tree->gtOp.gtOp1)
9024         {
9025             asg->gtOp.gtOp1 = dest;
9026         }
9027         if (src != asg->gtOp.gtOp2)
9028         {
9029             asg->gtOp.gtOp2 = src;
9030         }
9031
9032         asg->ChangeType(asgType);
9033         dest->gtFlags |= GTF_DONT_CSE;
9034         asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
9035         // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
9036         asg->gtFlags &= ~GTF_REVERSE_OPS;
9037
9038 #ifdef DEBUG
9039         if (verbose)
9040         {
9041             printf("fgMorphOneAsgBlock (after):\n");
9042             gtDispTree(tree);
9043         }
9044 #endif
9045         return tree;
9046     }
9047
9048     return nullptr;
9049 }
9050
9051 //------------------------------------------------------------------------
9052 // fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
9053 //
9054 // Arguments:
9055 //    tree - a tree node with a gtOper of GT_INITBLK
9056 //           the child nodes for tree have already been Morphed
9057 //
9058 // Return Value:
9059 //    We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
9060 //    We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
9061 //    If we have performed struct promotion of the Dest() then we will try to
9062 //    perform a field by field assignment for each of the promoted struct fields
9063 //
9064 // Notes:
9065 //    If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
9066 //    if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9067 //    can not use a field by field assignment and must the orginal GT_INITBLK unmodified.
9068
9069 GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
9070 {
9071     // We must have the GT_ASG form of InitBlkOp.
9072     noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp());
9073 #ifdef DEBUG
9074     bool morphed = false;
9075 #endif // DEBUG
9076
9077     GenTree* asg      = tree;
9078     GenTree* src      = tree->gtGetOp2();
9079     GenTree* origDest = tree->gtGetOp1();
9080
9081     GenTree* dest = fgMorphBlkNode(origDest, true);
9082     if (dest != origDest)
9083     {
9084         tree->gtOp.gtOp1 = dest;
9085     }
9086     tree->gtType = dest->TypeGet();
9087     // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its
9088     // type will be the type of the original lclVar, in which case we will change it to TYP_INT).
9089     if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src))
9090     {
9091         src->gtType = TYP_INT;
9092     }
9093     JITDUMP("\nfgMorphInitBlock:");
9094
9095     GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
9096     if (oneAsgTree)
9097     {
9098         JITDUMP(" using oneAsgTree.\n");
9099         tree = oneAsgTree;
9100     }
9101     else
9102     {
9103         GenTree*             destAddr          = nullptr;
9104         GenTree*             initVal           = src->OperIsInitVal() ? src->gtGetOp1() : src;
9105         GenTree*             blockSize         = nullptr;
9106         unsigned             blockWidth        = 0;
9107         FieldSeqNode*        destFldSeq        = nullptr;
9108         LclVarDsc*           destLclVar        = nullptr;
9109         bool                 destDoFldAsg      = false;
9110         unsigned             destLclNum        = BAD_VAR_NUM;
9111         bool                 blockWidthIsConst = false;
9112         GenTreeLclVarCommon* lclVarTree        = nullptr;
9113         if (dest->IsLocal())
9114         {
9115             lclVarTree = dest->AsLclVarCommon();
9116         }
9117         else
9118         {
9119             if (dest->OperIsBlk())
9120             {
9121                 destAddr   = dest->AsBlk()->Addr();
9122                 blockWidth = dest->AsBlk()->gtBlkSize;
9123             }
9124             else
9125             {
9126                 assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT));
9127                 destAddr   = dest->gtGetOp1();
9128                 blockWidth = genTypeSize(dest->TypeGet());
9129             }
9130         }
9131         if (lclVarTree != nullptr)
9132         {
9133             destLclNum        = lclVarTree->gtLclNum;
9134             destLclVar        = &lvaTable[destLclNum];
9135             blockWidth        = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar);
9136             blockWidthIsConst = true;
9137         }
9138         else
9139         {
9140             if (dest->gtOper == GT_DYN_BLK)
9141             {
9142                 // The size must be an integer type
9143                 blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize;
9144                 assert(varTypeIsIntegral(blockSize->gtType));
9145             }
9146             else
9147             {
9148                 assert(blockWidth != 0);
9149                 blockWidthIsConst = true;
9150             }
9151
9152             if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
9153             {
9154                 destLclNum = lclVarTree->gtLclNum;
9155                 destLclVar = &lvaTable[destLclNum];
9156             }
9157         }
9158         if (destLclNum != BAD_VAR_NUM)
9159         {
9160 #if LOCAL_ASSERTION_PROP
9161             // Kill everything about destLclNum (and its field locals)
9162             if (optLocalAssertionProp)
9163             {
9164                 if (optAssertionCount > 0)
9165                 {
9166                     fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9167                 }
9168             }
9169 #endif // LOCAL_ASSERTION_PROP
9170
9171             if (destLclVar->lvPromoted && blockWidthIsConst)
9172             {
9173                 assert(initVal->OperGet() == GT_CNS_INT);
9174                 noway_assert(varTypeIsStruct(destLclVar));
9175                 noway_assert(!opts.MinOpts());
9176                 if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
9177                 {
9178                     JITDUMP(" dest is address exposed");
9179                 }
9180                 else
9181                 {
9182                     if (blockWidth == destLclVar->lvExactSize)
9183                     {
9184                         JITDUMP(" (destDoFldAsg=true)");
9185                         // We may decide later that a copyblk is required when this struct has holes
9186                         destDoFldAsg = true;
9187                     }
9188                     else
9189                     {
9190                         JITDUMP(" with mismatched size");
9191                     }
9192                 }
9193             }
9194         }
9195
9196         // Can we use field by field assignment for the dest?
9197         if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9198         {
9199             JITDUMP(" dest contains holes");
9200             destDoFldAsg = false;
9201         }
9202
9203         JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n");
9204
9205         // If we're doing an InitBlock and we've transformed the dest to a non-Blk
9206         // we need to change it back.
9207         if (!destDoFldAsg && !dest->OperIsBlk())
9208         {
9209             noway_assert(blockWidth != 0);
9210             tree->gtOp.gtOp1 = origDest;
9211             tree->gtType     = origDest->gtType;
9212         }
9213
9214         if (!destDoFldAsg && (destLclVar != nullptr))
9215         {
9216             // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
9217             if (!destLclVar->lvRegStruct)
9218             {
9219                 // Mark it as DoNotEnregister.
9220                 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
9221             }
9222         }
9223
9224         // Mark the dest struct as DoNotEnreg
9225         // when they are LclVar structs and we are using a CopyBlock
9226         // or the struct is not promoted
9227         //
9228         if (!destDoFldAsg)
9229         {
9230 #if CPU_USES_BLOCK_MOVE
9231             compBlkOpUsed = true;
9232 #endif
9233             dest             = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true);
9234             tree->gtOp.gtOp1 = dest;
9235             tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
9236         }
9237         else
9238         {
9239             // The initVal must be a constant of TYP_INT
9240             noway_assert(initVal->OperGet() == GT_CNS_INT);
9241             noway_assert(genActualType(initVal->gtType) == TYP_INT);
9242
9243             // The dest must be of a struct type.
9244             noway_assert(varTypeIsStruct(destLclVar));
9245
9246             //
9247             // Now, convert InitBlock to individual assignments
9248             //
9249
9250             tree = nullptr;
9251             INDEBUG(morphed = true);
9252
9253             GenTreePtr dest;
9254             GenTreePtr srcCopy;
9255             unsigned   fieldLclNum;
9256             unsigned   fieldCnt = destLclVar->lvFieldCnt;
9257
9258             for (unsigned i = 0; i < fieldCnt; ++i)
9259             {
9260                 fieldLclNum = destLclVar->lvFieldLclStart + i;
9261                 dest        = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
9262
9263                 noway_assert(lclVarTree->gtOper == GT_LCL_VAR);
9264                 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
9265                 dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));
9266
9267                 srcCopy = gtCloneExpr(initVal);
9268                 noway_assert(srcCopy != nullptr);
9269
9270                 // need type of oper to be same as tree
9271                 if (dest->gtType == TYP_LONG)
9272                 {
9273                     srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
9274                     // copy and extend the value
9275                     srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
9276                     /* Change the types of srcCopy to TYP_LONG */
9277                     srcCopy->gtType = TYP_LONG;
9278                 }
9279                 else if (varTypeIsFloating(dest->gtType))
9280                 {
9281                     srcCopy->ChangeOperConst(GT_CNS_DBL);
9282                     // setup the bit pattern
9283                     memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal,
9284                            sizeof(srcCopy->gtDblCon.gtDconVal));
9285                     /* Change the types of srcCopy to TYP_DOUBLE */
9286                     srcCopy->gtType = TYP_DOUBLE;
9287                 }
9288                 else
9289                 {
9290                     noway_assert(srcCopy->gtOper == GT_CNS_INT);
9291                     noway_assert(srcCopy->TypeGet() == TYP_INT);
9292                     // setup the bit pattern
9293                     memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal,
9294                            sizeof(srcCopy->gtIntCon.gtIconVal));
9295                 }
9296
9297                 srcCopy->gtType = dest->TypeGet();
9298
9299                 asg = gtNewAssignNode(dest, srcCopy);
9300
9301 #if LOCAL_ASSERTION_PROP
9302                 if (optLocalAssertionProp)
9303                 {
9304                     optAssertionGen(asg);
9305                 }
9306 #endif // LOCAL_ASSERTION_PROP
9307
9308                 if (tree)
9309                 {
9310                     tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
9311                 }
9312                 else
9313                 {
9314                     tree = asg;
9315                 }
9316             }
9317         }
9318     }
9319
9320 #ifdef DEBUG
9321     if (morphed)
9322     {
9323         tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9324
9325         if (verbose)
9326         {
9327             printf("fgMorphInitBlock (after):\n");
9328             gtDispTree(tree);
9329         }
9330     }
9331 #endif
9332
9333     return tree;
9334 }
9335
9336 //------------------------------------------------------------------------
9337 // fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
9338 //
9339 // Arguments:
9340 //    tree - the node to be modified.
9341 //    type - the type of indirection to change it to.
9342 //
9343 // Return Value:
9344 //    Returns the node, modified in place.
9345 //
9346 // Notes:
9347 //    This doesn't really warrant a separate method, but is here to abstract
9348 //    the fact that these nodes can be modified in-place.
9349
9350 GenTreePtr Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
9351 {
9352     tree->SetOper(GT_IND);
9353     tree->gtType = type;
9354     return tree;
9355 }
9356
9357 //------------------------------------------------------------------------
9358 // fgMorphGetStructAddr: Gets the address of a struct object
9359 //
9360 // Arguments:
9361 //    pTree    - the parent's pointer to the struct object node
9362 //    clsHnd   - the class handle for the struct type
9363 //    isRValue - true if this is a source (not dest)
9364 //
9365 // Return Value:
9366 //    Returns the address of the struct value, possibly modifying the existing tree to
9367 //    sink the address below any comma nodes (this is to canonicalize for value numbering).
9368 //    If this is a source, it will morph it to an GT_IND before taking its address,
9369 //    since it may not be remorphed (and we don't want blk nodes as rvalues).
9370
9371 GenTreePtr Compiler::fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
9372 {
9373     GenTree* addr;
9374     GenTree* tree = *pTree;
9375     // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
9376     // need to hang onto that for the purposes of value numbering.
9377     if (tree->OperIsIndir())
9378     {
9379         if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
9380         {
9381             addr = tree->gtOp.gtOp1;
9382         }
9383         else
9384         {
9385             if (isRValue && tree->OperIsBlk())
9386             {
9387                 tree->ChangeOper(GT_IND);
9388             }
9389             addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9390         }
9391     }
9392     else if (tree->gtOper == GT_COMMA)
9393     {
9394         // If this is a comma, we're going to "sink" the GT_ADDR below it.
9395         (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
9396         tree->gtType = TYP_BYREF;
9397         addr         = tree;
9398     }
9399     else
9400     {
9401         switch (tree->gtOper)
9402         {
9403             case GT_LCL_FLD:
9404             case GT_LCL_VAR:
9405             case GT_INDEX:
9406             case GT_FIELD:
9407             case GT_ARR_ELEM:
9408                 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9409                 break;
9410             default:
9411             {
9412                 // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
9413                 // not going to use "temp"
9414                 GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
9415                 addr          = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
9416                 break;
9417             }
9418         }
9419     }
9420     *pTree = addr;
9421     return addr;
9422 }
9423
9424 //------------------------------------------------------------------------
9425 // fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
9426 //
9427 // Arguments:
9428 //    tree   - The struct type node
9429 //    isDest - True if this is the destination of the assignment
9430 //
9431 // Return Value:
9432 //    Returns the possibly-morphed node. The caller is responsible for updating
9433 //    the parent of this node..
9434
9435 GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
9436 {
9437     if (tree->gtOper == GT_COMMA)
9438     {
9439         GenTree* effectiveVal = tree->gtEffectiveVal();
9440         GenTree* addr         = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9441 #ifdef DEBUG
9442         addr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9443 #endif
9444         // In order to CSE and value number array index expressions and bounds checks,
9445         // the commas in which they are contained need to match.
9446         // The pattern is that the COMMA should be the address expression.
9447         // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
9448         // TODO-1stClassStructs: Consider whether this can be improved.
9449         // Also consider whether some of this can be included in gtNewBlockVal (though note
9450         // that doing so may cause us to query the type system before we otherwise would).
9451         GenTree* lastComma = nullptr;
9452         for (GenTree* next = tree; next != nullptr && next->gtOper == GT_COMMA; next = next->gtGetOp2())
9453         {
9454             next->gtType = TYP_BYREF;
9455             lastComma    = next;
9456         }
9457         if (lastComma != nullptr)
9458         {
9459             noway_assert(lastComma->gtGetOp2() == effectiveVal);
9460             lastComma->gtOp.gtOp2 = addr;
9461             addr                  = tree;
9462         }
9463         var_types structType = effectiveVal->TypeGet();
9464         if (structType == TYP_STRUCT)
9465         {
9466             CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(effectiveVal);
9467             if (structHnd == NO_CLASS_HANDLE)
9468             {
9469                 tree = gtNewOperNode(GT_IND, effectiveVal->TypeGet(), addr);
9470             }
9471             else
9472             {
9473                 tree = gtNewObjNode(structHnd, addr);
9474                 if (tree->OperGet() == GT_OBJ)
9475                 {
9476                     gtSetObjGcInfo(tree->AsObj());
9477                 }
9478             }
9479         }
9480         else
9481         {
9482             tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
9483         }
9484 #ifdef DEBUG
9485         tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9486 #endif
9487     }
9488
9489     if (!tree->OperIsBlk())
9490     {
9491         return tree;
9492     }
9493     GenTreeBlk* blkNode = tree->AsBlk();
9494     if (blkNode->OperGet() == GT_DYN_BLK)
9495     {
9496         if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
9497         {
9498             unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
9499             // A GT_BLK with size of zero is not supported,
9500             // so if we encounter such a thing we just leave it as a GT_DYN_BLK
9501             if (size != 0)
9502             {
9503                 blkNode->AsDynBlk()->gtDynamicSize = nullptr;
9504                 blkNode->ChangeOper(GT_BLK);
9505                 blkNode->gtBlkSize = size;
9506             }
9507             else
9508             {
9509                 return tree;
9510             }
9511         }
9512         else
9513         {
9514             return tree;
9515         }
9516     }
9517     if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
9518         (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
9519     {
9520         GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
9521         if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
9522         {
9523             lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
9524         }
9525     }
9526
9527     return tree;
9528 }
9529
9530 //------------------------------------------------------------------------
9531 // fgMorphBlockOperand: Canonicalize an operand of a block assignment
9532 //
9533 // Arguments:
9534 //    tree     - The block operand
9535 //    asgType  - The type of the assignment
9536 //    blockWidth - The size of the block
9537 //    isDest     - true iff this is the destination of the assignment
9538 //
9539 // Return Value:
9540 //    Returns the morphed block operand
9541 //
9542 // Notes:
9543 //    This does the following:
9544 //    - Ensures that a struct operand is a block node or (for non-LEGACY_BACKEND) lclVar.
9545 //    - Ensures that any COMMAs are above ADDR nodes.
9546 //    Although 'tree' WAS an operand of a block assignment, the assignment
9547 //    may have been retyped to be a scalar assignment.
9548
9549 GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest)
9550 {
9551     GenTree* effectiveVal = tree->gtEffectiveVal();
9552
9553     if (!varTypeIsStruct(asgType))
9554     {
9555         if (effectiveVal->OperIsIndir())
9556         {
9557             GenTree* addr = effectiveVal->AsIndir()->Addr();
9558             if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
9559             {
9560                 effectiveVal = addr->gtGetOp1();
9561             }
9562             else if (effectiveVal->OperIsBlk())
9563             {
9564                 effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9565             }
9566             else
9567             {
9568                 effectiveVal->gtType = asgType;
9569             }
9570         }
9571         else if (effectiveVal->TypeGet() != asgType)
9572         {
9573             GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9574             effectiveVal  = gtNewOperNode(GT_IND, asgType, addr);
9575         }
9576     }
9577     else
9578     {
9579         GenTreeIndir*        indirTree        = nullptr;
9580         GenTreeLclVarCommon* lclNode          = nullptr;
9581         bool                 needsIndirection = true;
9582
9583         if (effectiveVal->OperIsIndir())
9584         {
9585             indirTree     = effectiveVal->AsIndir();
9586             GenTree* addr = effectiveVal->AsIndir()->Addr();
9587             if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
9588             {
9589                 lclNode = addr->gtGetOp1()->AsLclVarCommon();
9590             }
9591         }
9592         else if (effectiveVal->OperGet() == GT_LCL_VAR)
9593         {
9594             lclNode = effectiveVal->AsLclVarCommon();
9595         }
9596 #ifdef FEATURE_SIMD
9597         if (varTypeIsSIMD(asgType))
9598         {
9599             if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) &&
9600                 (indirTree->Addr()->gtGetOp1()->gtOper == GT_SIMD))
9601             {
9602                 assert(!isDest);
9603                 needsIndirection = false;
9604                 effectiveVal     = indirTree->Addr()->gtGetOp1();
9605             }
9606             if (effectiveVal->OperIsSIMD())
9607             {
9608                 needsIndirection = false;
9609             }
9610         }
9611 #endif // FEATURE_SIMD
9612         if (lclNode != nullptr)
9613         {
9614             LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]);
9615             if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth))
9616             {
9617 #ifndef LEGACY_BACKEND
9618                 effectiveVal     = lclNode;
9619                 needsIndirection = false;
9620 #endif // !LEGACY_BACKEND
9621             }
9622             else
9623             {
9624                 // This may be a lclVar that was determined to be address-exposed.
9625                 effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT);
9626             }
9627         }
9628         if (needsIndirection)
9629         {
9630             if (indirTree != nullptr)
9631             {
9632                 // We should never find a struct indirection on the lhs of an assignment.
9633                 assert(!isDest || indirTree->OperIsBlk());
9634                 if (!isDest && indirTree->OperIsBlk())
9635                 {
9636                     (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9637                 }
9638             }
9639             else
9640             {
9641                 GenTree* newTree;
9642                 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9643                 if (isDest)
9644                 {
9645                     CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
9646                     if (clsHnd == NO_CLASS_HANDLE)
9647                     {
9648                         newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
9649                     }
9650                     else
9651                     {
9652                         newTree = gtNewObjNode(clsHnd, addr);
9653                         if (isDest && (newTree->OperGet() == GT_OBJ))
9654                         {
9655                             gtSetObjGcInfo(newTree->AsObj());
9656                         }
9657                         if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
9658                         {
9659                             // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
9660                             // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
9661                             // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
9662                             // separately now to avoid excess diffs.
9663                             newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
9664                         }
9665                     }
9666                 }
9667                 else
9668                 {
9669                     newTree = new (this, GT_IND) GenTreeIndir(GT_IND, asgType, addr, nullptr);
9670                 }
9671                 effectiveVal = newTree;
9672             }
9673         }
9674     }
9675     tree = effectiveVal;
9676     return tree;
9677 }
9678
9679 //------------------------------------------------------------------------
9680 // fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk
9681 //
9682 // Arguments:
9683 //    dest - the GT_OBJ or GT_STORE_OBJ
9684 //
9685 // Assumptions:
9686 //    The destination must be known (by the caller) to be on the stack.
9687 //
9688 // Notes:
9689 //    If we have a CopyObj with a dest on the stack, and its size is small enouch
9690 //    to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a
9691 //    GC Unsafe CopyBlk that is non-interruptible.
9692 //    This is not supported for the JIT32_GCENCODER, in which case this method is a no-op.
9693 //
9694 void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest)
9695 {
9696 #if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9697     assert(dest->gtGcPtrCount != 0);
9698     unsigned blockWidth = dest->AsBlk()->gtBlkSize;
9699 #ifdef DEBUG
9700     bool     destOnStack = false;
9701     GenTree* destAddr    = dest->Addr();
9702     assert(destAddr->IsLocalAddrExpr() != nullptr);
9703 #endif
9704     if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT))
9705     {
9706         genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK;
9707         dest->SetOper(newOper);
9708         dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
9709     }
9710 #endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9711 }
9712
9713 //------------------------------------------------------------------------
9714 // fgMorphCopyBlock: Perform the Morphing of block copy
9715 //
9716 // Arguments:
9717 //    tree - a block copy (i.e. an assignment with a block op on the lhs).
9718 //
9719 // Return Value:
9720 //    We can return the orginal block copy unmodified (least desirable, but always correct)
9721 //    We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
9722 //    If we have performed struct promotion of the Source() or the Dest() then we will try to
9723 //    perform a field by field assignment for each of the promoted struct fields.
9724 //
9725 // Assumptions:
9726 //    The child nodes for tree have already been Morphed.
9727 //
9728 // Notes:
9729 //    If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
9730 //    When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
9731 //    and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
9732 //    if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9733 //    can not use a field by field assignment and must the orginal block copy unmodified.
9734
9735 GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
9736 {
9737     noway_assert(tree->OperIsCopyBlkOp());
9738
9739     JITDUMP("\nfgMorphCopyBlock:");
9740
9741     bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
9742
9743     GenTree* asg  = tree;
9744     GenTree* rhs  = asg->gtGetOp2();
9745     GenTree* dest = asg->gtGetOp1();
9746
9747 #if FEATURE_MULTIREG_RET
9748     // If this is a multi-reg return, we will not do any morphing of this node.
9749     if (rhs->IsMultiRegCall())
9750     {
9751         assert(dest->OperGet() == GT_LCL_VAR);
9752         JITDUMP(" not morphing a multireg call return\n");
9753         return tree;
9754     }
9755 #endif // FEATURE_MULTIREG_RET
9756
9757     // If we have an array index on the lhs, we need to create an obj node.
9758
9759     dest = fgMorphBlkNode(dest, true);
9760     if (dest != asg->gtGetOp1())
9761     {
9762         asg->gtOp.gtOp1 = dest;
9763         if (dest->IsLocal())
9764         {
9765             dest->gtFlags |= GTF_VAR_DEF;
9766         }
9767     }
9768     asg->gtType = dest->TypeGet();
9769     rhs         = fgMorphBlkNode(rhs, false);
9770
9771     asg->gtOp.gtOp2 = rhs;
9772
9773     GenTreePtr oldTree    = tree;
9774     GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
9775
9776     if (oneAsgTree)
9777     {
9778         JITDUMP(" using oneAsgTree.\n");
9779         tree = oneAsgTree;
9780     }
9781     else
9782     {
9783         unsigned             blockWidth;
9784         bool                 blockWidthIsConst = false;
9785         GenTreeLclVarCommon* lclVarTree        = nullptr;
9786         GenTreeLclVarCommon* srcLclVarTree     = nullptr;
9787         unsigned             destLclNum        = BAD_VAR_NUM;
9788         LclVarDsc*           destLclVar        = nullptr;
9789         FieldSeqNode*        destFldSeq        = nullptr;
9790         bool                 destDoFldAsg      = false;
9791         GenTreePtr           destAddr          = nullptr;
9792         GenTreePtr           srcAddr           = nullptr;
9793         bool                 destOnStack       = false;
9794         bool                 hasGCPtrs         = false;
9795
9796         JITDUMP("block assignment to morph:\n");
9797         DISPTREE(asg);
9798
9799         if (dest->IsLocal())
9800         {
9801             blockWidthIsConst = true;
9802             destOnStack       = true;
9803             if (dest->gtOper == GT_LCL_VAR)
9804             {
9805                 lclVarTree = dest->AsLclVarCommon();
9806                 destLclNum = lclVarTree->gtLclNum;
9807                 destLclVar = &lvaTable[destLclNum];
9808                 if (destLclVar->lvType == TYP_STRUCT)
9809                 {
9810                     // It would be nice if lvExactSize always corresponded to the size of the struct,
9811                     // but it doesn't always for the temps that the importer creates when it spills side
9812                     // effects.
9813                     // TODO-Cleanup: Determine when this happens, and whether it can be changed.
9814                     blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
9815                 }
9816                 else
9817                 {
9818                     blockWidth = genTypeSize(destLclVar->lvType);
9819                 }
9820                 hasGCPtrs = destLclVar->lvStructGcCount != 0;
9821             }
9822             else
9823             {
9824                 assert(dest->TypeGet() != TYP_STRUCT);
9825                 assert(dest->gtOper == GT_LCL_FLD);
9826                 blockWidth = genTypeSize(dest->TypeGet());
9827                 destAddr   = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
9828                 destFldSeq = dest->AsLclFld()->gtFieldSeq;
9829             }
9830         }
9831         else
9832         {
9833             GenTree* effectiveDest = dest->gtEffectiveVal();
9834             if (effectiveDest->OperGet() == GT_IND)
9835             {
9836                 assert(dest->TypeGet() != TYP_STRUCT);
9837                 blockWidth        = genTypeSize(effectiveDest->TypeGet());
9838                 blockWidthIsConst = true;
9839                 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
9840                 {
9841                     destAddr = dest->gtGetOp1();
9842                 }
9843             }
9844             else
9845             {
9846                 assert(effectiveDest->OperIsBlk());
9847                 GenTreeBlk* blk = effectiveDest->AsBlk();
9848
9849                 blockWidth        = blk->gtBlkSize;
9850                 blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
9851                 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
9852                 {
9853                     destAddr = blk->Addr();
9854                 }
9855             }
9856             if (destAddr != nullptr)
9857             {
9858                 noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
9859                 if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
9860                 {
9861                     destOnStack = true;
9862                     destLclNum  = lclVarTree->gtLclNum;
9863                     destLclVar  = &lvaTable[destLclNum];
9864                 }
9865             }
9866         }
9867
9868         if (destLclVar != nullptr)
9869         {
9870 #if LOCAL_ASSERTION_PROP
9871             // Kill everything about destLclNum (and its field locals)
9872             if (optLocalAssertionProp)
9873             {
9874                 if (optAssertionCount > 0)
9875                 {
9876                     fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9877                 }
9878             }
9879 #endif // LOCAL_ASSERTION_PROP
9880
9881             if (destLclVar->lvPromoted && blockWidthIsConst)
9882             {
9883                 noway_assert(varTypeIsStruct(destLclVar));
9884                 noway_assert(!opts.MinOpts());
9885
9886                 if (blockWidth == destLclVar->lvExactSize)
9887                 {
9888                     JITDUMP(" (destDoFldAsg=true)");
9889                     // We may decide later that a copyblk is required when this struct has holes
9890                     destDoFldAsg = true;
9891                 }
9892                 else
9893                 {
9894                     JITDUMP(" with mismatched dest size");
9895                 }
9896             }
9897         }
9898
9899         FieldSeqNode* srcFldSeq   = nullptr;
9900         unsigned      srcLclNum   = BAD_VAR_NUM;
9901         LclVarDsc*    srcLclVar   = nullptr;
9902         bool          srcDoFldAsg = false;
9903
9904         if (rhs->IsLocal())
9905         {
9906             srcLclVarTree = rhs->AsLclVarCommon();
9907             srcLclNum     = srcLclVarTree->gtLclNum;
9908             if (rhs->OperGet() == GT_LCL_FLD)
9909             {
9910                 srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
9911             }
9912         }
9913         else if (rhs->OperIsIndir())
9914         {
9915             if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
9916             {
9917                 srcLclNum = srcLclVarTree->gtLclNum;
9918             }
9919             else
9920             {
9921                 srcAddr = rhs->gtOp.gtOp1;
9922             }
9923         }
9924
9925         if (srcLclNum != BAD_VAR_NUM)
9926         {
9927             srcLclVar = &lvaTable[srcLclNum];
9928
9929             if (srcLclVar->lvPromoted && blockWidthIsConst)
9930             {
9931                 noway_assert(varTypeIsStruct(srcLclVar));
9932                 noway_assert(!opts.MinOpts());
9933
9934                 if (blockWidth == srcLclVar->lvExactSize)
9935                 {
9936                     JITDUMP(" (srcDoFldAsg=true)");
9937                     // We may decide later that a copyblk is required when this struct has holes
9938                     srcDoFldAsg = true;
9939                 }
9940                 else
9941                 {
9942                     JITDUMP(" with mismatched src size");
9943                 }
9944             }
9945         }
9946
9947         // Check to see if we are required to do a copy block because the struct contains holes
9948         // and either the src or dest is externally visible
9949         //
9950         bool requiresCopyBlock   = false;
9951         bool srcSingleLclVarAsg  = false;
9952         bool destSingleLclVarAsg = false;
9953
9954         if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq))
9955         {
9956             // Self-assign; no effect.
9957             GenTree* nop = gtNewNothingNode();
9958             INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
9959             return nop;
9960         }
9961
9962         // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
9963         if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
9964         {
9965             requiresCopyBlock = true;
9966         }
9967
9968         // Can we use field by field assignment for the dest?
9969         if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9970         {
9971             JITDUMP(" dest contains custom layout and contains holes");
9972             // C++ style CopyBlock with holes
9973             requiresCopyBlock = true;
9974         }
9975
9976         // Can we use field by field assignment for the src?
9977         if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
9978         {
9979             JITDUMP(" src contains custom layout and contains holes");
9980             // C++ style CopyBlock with holes
9981             requiresCopyBlock = true;
9982         }
9983
9984 #if defined(_TARGET_ARM_)
9985         if ((rhs->OperIsIndir()) && (rhs->gtFlags & GTF_IND_UNALIGNED))
9986         {
9987             JITDUMP(" rhs is unaligned");
9988             requiresCopyBlock = true;
9989         }
9990
9991         if (asg->gtFlags & GTF_BLK_UNALIGNED)
9992         {
9993             JITDUMP(" asg is unaligned");
9994             requiresCopyBlock = true;
9995         }
9996 #endif // _TARGET_ARM_
9997
9998         if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
9999         {
10000             requiresCopyBlock = true;
10001         }
10002
10003         // Can't use field by field assignment if the src is a call.
10004         if (rhs->OperGet() == GT_CALL)
10005         {
10006             JITDUMP(" src is a call");
10007             // C++ style CopyBlock with holes
10008             requiresCopyBlock = true;
10009         }
10010
10011         // If we passed the above checks, then we will check these two
10012         if (!requiresCopyBlock)
10013         {
10014             // Are both dest and src promoted structs?
10015             if (destDoFldAsg && srcDoFldAsg)
10016             {
10017                 // Both structs should be of the same type, or each have a single field of the same type.
10018                 // If not we will use a copy block.
10019                 if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
10020                     lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
10021                 {
10022                     unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart;
10023                     unsigned srcFieldNum  = lvaTable[srcLclNum].lvFieldLclStart;
10024                     if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) ||
10025                         (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType))
10026                     {
10027                         requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
10028                         JITDUMP(" with mismatched types");
10029                     }
10030                 }
10031             }
10032             // Are neither dest or src promoted structs?
10033             else if (!destDoFldAsg && !srcDoFldAsg)
10034             {
10035                 requiresCopyBlock = true; // Leave as a CopyBlock
10036                 JITDUMP(" with no promoted structs");
10037             }
10038             else if (destDoFldAsg)
10039             {
10040                 // Match the following kinds of trees:
10041                 //  fgMorphTree BB01, stmt 9 (before)
10042                 //   [000052] ------------        const     int    8
10043                 //   [000053] -A--G-------     copyBlk   void
10044                 //   [000051] ------------           addr      byref
10045                 //   [000050] ------------              lclVar    long   V07 loc5
10046                 //   [000054] --------R---        <list>    void
10047                 //   [000049] ------------           addr      byref
10048                 //   [000048] ------------              lclVar    struct(P) V06 loc4
10049                 //                                              long   V06.h (offs=0x00) -> V17 tmp9
10050                 // Yields this transformation
10051                 //  fgMorphCopyBlock (after):
10052                 //   [000050] ------------        lclVar    long   V07 loc5
10053                 //   [000085] -A----------     =         long
10054                 //   [000083] D------N----        lclVar    long   V17 tmp9
10055                 //
10056                 if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
10057                     (blockWidth == genTypeSize(srcLclVar->TypeGet())))
10058                 {
10059                     // Reject the following tree:
10060                     //  - seen on x86chk    jit\jit64\hfa\main\hfa_sf3E_r.exe
10061                     //
10062                     //  fgMorphTree BB01, stmt 6 (before)
10063                     //   [000038] -------------        const     int    4
10064                     //   [000039] -A--G--------     copyBlk   void
10065                     //   [000037] -------------           addr      byref
10066                     //   [000036] -------------              lclVar    int    V05 loc3
10067                     //   [000040] --------R----        <list>    void
10068                     //   [000035] -------------           addr      byref
10069                     //   [000034] -------------              lclVar    struct(P) V04 loc2
10070                     //                                          float  V04.f1 (offs=0x00) -> V13 tmp6
10071                     // As this would framsform into
10072                     //   float V13 = int V05
10073                     //
10074                     unsigned  fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
10075                     var_types destType    = lvaTable[fieldLclNum].TypeGet();
10076                     if (srcLclVar->TypeGet() == destType)
10077                     {
10078                         srcSingleLclVarAsg = true;
10079                     }
10080                 }
10081             }
10082             else
10083             {
10084                 assert(srcDoFldAsg);
10085                 // Check for the symmetric case (which happens for the _pointer field of promoted spans):
10086                 //
10087                 //               [000240] -----+------             /--*  lclVar    struct(P) V18 tmp9
10088                 //                                                  /--*    byref  V18._value (offs=0x00) -> V30 tmp21
10089                 //               [000245] -A------R---             *  =         struct (copy)
10090                 //               [000244] -----+------             \--*  obj(8)    struct
10091                 //               [000243] -----+------                \--*  addr      byref
10092                 //               [000242] D----+-N----                   \--*  lclVar    byref  V28 tmp19
10093                 //
10094                 if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) &&
10095                     (blockWidth == genTypeSize(destLclVar->TypeGet())))
10096                 {
10097                     // Check for type agreement
10098                     unsigned  fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart;
10099                     var_types srcType     = lvaTable[fieldLclNum].TypeGet();
10100                     if (destLclVar->TypeGet() == srcType)
10101                     {
10102                         destSingleLclVarAsg = true;
10103                     }
10104                 }
10105             }
10106         }
10107
10108         // If we require a copy block the set both of the field assign bools to false
10109         if (requiresCopyBlock)
10110         {
10111             // If a copy block is required then we won't do field by field assignments
10112             destDoFldAsg = false;
10113             srcDoFldAsg  = false;
10114         }
10115
10116         JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
10117
10118         // Mark the dest/src structs as DoNotEnreg
10119         // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
10120         // or the struct is not promoted
10121         //
10122         if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg)
10123         {
10124             if (!destLclVar->lvRegStruct)
10125             {
10126                 // Mark it as DoNotEnregister.
10127                 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
10128             }
10129         }
10130
10131         if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
10132         {
10133             if (!srcLclVar->lvRegStruct)
10134             {
10135                 lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
10136             }
10137         }
10138
10139         if (requiresCopyBlock)
10140         {
10141 #if CPU_USES_BLOCK_MOVE
10142             compBlkOpUsed = true;
10143 #endif
10144             var_types asgType = dest->TypeGet();
10145             dest              = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
10146             asg->gtOp.gtOp1   = dest;
10147             asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
10148
10149             // Note that the unrolling of CopyBlk is only implemented on some platforms.
10150             // Currently that includes x64 and ARM but not x86: the code generation for this
10151             // construct requires the ability to mark certain regions of the generated code
10152             // as non-interruptible, and the GC encoding for the latter platform does not
10153             // have this capability.
10154
10155             // If we have a CopyObj with a dest on the stack
10156             // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
10157             // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes).
10158             // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.)
10159             //
10160             if (destOnStack && (dest->OperGet() == GT_OBJ))
10161             {
10162                 fgMorphUnsafeBlk(dest->AsObj());
10163             }
10164
10165             // Eliminate the "OBJ or BLK" node on the rhs.
10166             rhs             = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
10167             asg->gtOp.gtOp2 = rhs;
10168
10169 #ifdef LEGACY_BACKEND
10170             if (!rhs->OperIsIndir())
10171             {
10172                 noway_assert(rhs->gtOper == GT_LCL_VAR);
10173                 GenTree* rhsAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, rhs);
10174                 rhs              = gtNewOperNode(GT_IND, TYP_STRUCT, rhsAddr);
10175             }
10176 #endif // LEGACY_BACKEND
10177             // Formerly, liveness did not consider copyblk arguments of simple types as being
10178             // a use or def, so these variables were marked as address-exposed.
10179             // TODO-1stClassStructs: This should no longer be needed.
10180             if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
10181             {
10182                 JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
10183                 lvaTable[srcLclNum].lvAddrExposed = true;
10184             }
10185
10186             if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
10187             {
10188                 JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
10189                 lvaTable[destLclNum].lvAddrExposed = true;
10190             }
10191
10192             goto _Done;
10193         }
10194
10195         //
10196         // Otherwise we convert this CopyBlock into individual field by field assignments
10197         //
10198         tree = nullptr;
10199
10200         GenTreePtr src;
10201         GenTreePtr addrSpill            = nullptr;
10202         unsigned   addrSpillTemp        = BAD_VAR_NUM;
10203         bool       addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
10204
10205         unsigned fieldCnt = DUMMY_INIT(0);
10206
10207         if (destDoFldAsg && srcDoFldAsg)
10208         {
10209             // To do fieldwise assignments for both sides, they'd better be the same struct type!
10210             // All of these conditions were checked above...
10211             assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
10212             assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
10213
10214             fieldCnt = destLclVar->lvFieldCnt;
10215             goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
10216                                 // assignments.
10217         }
10218         else if (destDoFldAsg)
10219         {
10220             fieldCnt = destLclVar->lvFieldCnt;
10221             rhs      = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
10222             if (srcAddr == nullptr)
10223             {
10224                 srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
10225             }
10226         }
10227         else
10228         {
10229             assert(srcDoFldAsg);
10230             fieldCnt = srcLclVar->lvFieldCnt;
10231             dest     = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
10232             if (dest->OperIsBlk())
10233             {
10234                 (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
10235             }
10236             destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
10237         }
10238
10239         if (destDoFldAsg)
10240         {
10241             noway_assert(!srcDoFldAsg);
10242             if (gtClone(srcAddr))
10243             {
10244                 // srcAddr is simple expression. No need to spill.
10245                 noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10246             }
10247             else
10248             {
10249                 // srcAddr is complex expression. Clone and spill it (unless the destination is
10250                 // a struct local that only has one field, in which case we'd only use the
10251                 // address value once...)
10252                 if (destLclVar->lvFieldCnt > 1)
10253                 {
10254                     addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
10255                     noway_assert(addrSpill != nullptr);
10256                 }
10257             }
10258         }
10259
10260         if (srcDoFldAsg)
10261         {
10262             noway_assert(!destDoFldAsg);
10263
10264             // If we're doing field-wise stores, to an address within a local, and we copy
10265             // the address into "addrSpill", do *not* declare the original local var node in the
10266             // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
10267             // field-wise assignments as an "indirect" assignment to the local.
10268             // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
10269             // we clone it.)
10270             if (lclVarTree != nullptr)
10271             {
10272                 lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
10273             }
10274
10275             if (gtClone(destAddr))
10276             {
10277                 // destAddr is simple expression. No need to spill
10278                 noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10279             }
10280             else
10281             {
10282                 // destAddr is complex expression. Clone and spill it (unless
10283                 // the source is a struct local that only has one field, in which case we'd only
10284                 // use the address value once...)
10285                 if (srcLclVar->lvFieldCnt > 1)
10286                 {
10287                     addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
10288                     noway_assert(addrSpill != nullptr);
10289                 }
10290
10291                 // TODO-CQ: this should be based on a more general
10292                 // "BaseAddress" method, that handles fields of structs, before or after
10293                 // morphing.
10294                 if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
10295                 {
10296                     if (addrSpill->gtOp.gtOp1->IsLocal())
10297                     {
10298                         // We will *not* consider this to define the local, but rather have each individual field assign
10299                         // be a definition.
10300                         addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
10301                         assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
10302                                PROMOTION_TYPE_INDEPENDENT);
10303                         addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
10304                                                      // local stack frame
10305                     }
10306                 }
10307             }
10308         }
10309
10310         if (addrSpill != nullptr)
10311         {
10312             // Spill the (complex) address to a BYREF temp.
10313             // Note, at most one address may need to be spilled.
10314             addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
10315
10316             lvaTable[addrSpillTemp].lvType = TYP_BYREF;
10317
10318             if (addrSpillIsStackDest)
10319             {
10320                 lvaTable[addrSpillTemp].lvStackByref = true;
10321             }
10322
10323             tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);
10324
10325 #ifndef LEGACY_BACKEND
10326             // If we are assigning the address of a LclVar here
10327             // liveness does not account for this kind of address taken use.
10328             //
10329             // We have to mark this local as address exposed so
10330             // that we don't delete the definition for this LclVar
10331             // as a dead store later on.
10332             //
10333             if (addrSpill->OperGet() == GT_ADDR)
10334             {
10335                 GenTreePtr addrOp = addrSpill->gtOp.gtOp1;
10336                 if (addrOp->IsLocal())
10337                 {
10338                     unsigned lclVarNum                = addrOp->gtLclVarCommon.gtLclNum;
10339                     lvaTable[lclVarNum].lvAddrExposed = true;
10340                     lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
10341                 }
10342             }
10343 #endif // !LEGACY_BACKEND
10344         }
10345
10346     _AssignFields:
10347
10348         for (unsigned i = 0; i < fieldCnt; ++i)
10349         {
10350             FieldSeqNode* curFieldSeq = nullptr;
10351             if (destDoFldAsg)
10352             {
10353                 noway_assert(destLclNum != BAD_VAR_NUM);
10354                 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10355                 dest                 = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10356                 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
10357                 if (destAddr != nullptr)
10358                 {
10359                     noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
10360                     dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10361                 }
10362                 else
10363                 {
10364                     noway_assert(lclVarTree != nullptr);
10365                     dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10366                 }
10367                 // Don't CSE the lhs of an assignment.
10368                 dest->gtFlags |= GTF_DONT_CSE;
10369             }
10370             else
10371             {
10372                 noway_assert(srcDoFldAsg);
10373                 noway_assert(srcLclNum != BAD_VAR_NUM);
10374                 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10375
10376                 if (destSingleLclVarAsg)
10377                 {
10378                     noway_assert(fieldCnt == 1);
10379                     noway_assert(destLclVar != nullptr);
10380                     noway_assert(addrSpill == nullptr);
10381
10382                     dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet());
10383                 }
10384                 else
10385                 {
10386                     if (addrSpill)
10387                     {
10388                         assert(addrSpillTemp != BAD_VAR_NUM);
10389                         dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10390                     }
10391                     else
10392                     {
10393                         dest = gtCloneExpr(destAddr);
10394                         noway_assert(dest != nullptr);
10395
10396                         // Is the address of a local?
10397                         GenTreeLclVarCommon* lclVarTree = nullptr;
10398                         bool                 isEntire   = false;
10399                         bool*                pIsEntire  = (blockWidthIsConst ? &isEntire : nullptr);
10400                         if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
10401                         {
10402                             lclVarTree->gtFlags |= GTF_VAR_DEF;
10403                             if (!isEntire)
10404                             {
10405                                 lclVarTree->gtFlags |= GTF_VAR_USEASG;
10406                             }
10407                         }
10408                     }
10409
10410                     GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
10411                     // Have to set the field sequence -- which means we need the field handle.
10412                     CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
10413                     CORINFO_FIELD_HANDLE fieldHnd =
10414                         info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10415                     curFieldSeq                          = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10416                     fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
10417
10418                     dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
10419
10420                     dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
10421
10422                     // !!! The destination could be on stack. !!!
10423                     // This flag will let us choose the correct write barrier.
10424                     dest->gtFlags |= GTF_IND_TGTANYWHERE;
10425                 }
10426             }
10427
10428             if (srcDoFldAsg)
10429             {
10430                 noway_assert(srcLclNum != BAD_VAR_NUM);
10431                 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10432                 src                  = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10433
10434                 noway_assert(srcLclVarTree != nullptr);
10435                 src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
10436                 // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
10437                 // but they are when they are under a GT_ADDR.
10438                 src->gtFlags |= GTF_DONT_CSE;
10439             }
10440             else
10441             {
10442                 noway_assert(destDoFldAsg);
10443                 noway_assert(destLclNum != BAD_VAR_NUM);
10444                 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10445
10446                 if (srcSingleLclVarAsg)
10447                 {
10448                     noway_assert(fieldCnt == 1);
10449                     noway_assert(srcLclVar != nullptr);
10450                     noway_assert(addrSpill == nullptr);
10451
10452                     src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet());
10453                 }
10454                 else
10455                 {
10456                     if (addrSpill)
10457                     {
10458                         assert(addrSpillTemp != BAD_VAR_NUM);
10459                         src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10460                     }
10461                     else
10462                     {
10463                         src = gtCloneExpr(srcAddr);
10464                         noway_assert(src != nullptr);
10465                     }
10466
10467                     CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
10468                     CORINFO_FIELD_HANDLE fieldHnd =
10469                         info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10470                     curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10471
10472                     src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
10473                                         new (this, GT_CNS_INT)
10474                                             GenTreeIntCon(TYP_I_IMPL, lvaTable[fieldLclNum].lvFldOffset, curFieldSeq));
10475
10476                     src = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), src);
10477                 }
10478             }
10479
10480             noway_assert(dest->TypeGet() == src->TypeGet());
10481
10482             asg = gtNewAssignNode(dest, src);
10483
10484             // If we spilled the address, and we didn't do individual field assignments to promoted fields,
10485             // and it was of a local, record the assignment as an indirect update of a local.
10486             if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
10487             {
10488                 curFieldSeq   = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq);
10489                 bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType)) == genTypeSize(dest->TypeGet()));
10490                 IndirectAssignmentAnnotation* pIndirAnnot =
10491                     new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire);
10492                 GetIndirAssignMap()->Set(asg, pIndirAnnot);
10493             }
10494
10495 #if LOCAL_ASSERTION_PROP
10496             if (optLocalAssertionProp)
10497             {
10498                 optAssertionGen(asg);
10499             }
10500 #endif // LOCAL_ASSERTION_PROP
10501
10502             if (tree)
10503             {
10504                 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
10505             }
10506             else
10507             {
10508                 tree = asg;
10509             }
10510         }
10511     }
10512
10513     if (isLateArg)
10514     {
10515         tree->gtFlags |= GTF_LATE_ARG;
10516     }
10517
10518 #ifdef DEBUG
10519     if (tree != oldTree)
10520     {
10521         tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10522     }
10523
10524     if (verbose)
10525     {
10526         printf("\nfgMorphCopyBlock (after):\n");
10527         gtDispTree(tree);
10528     }
10529 #endif
10530
10531 _Done:
10532     return tree;
10533 }
10534
10535 // insert conversions and normalize to make tree amenable to register
10536 // FP architectures
10537 GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
10538 {
10539     if (tree->OperIsArithmetic())
10540     {
10541         if (varTypeIsFloating(tree))
10542         {
10543             GenTreePtr op1 = tree->gtOp.gtOp1;
10544             GenTreePtr op2 = tree->gtGetOp2();
10545
10546             if (op1->TypeGet() != tree->TypeGet())
10547             {
10548                 tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, tree->TypeGet());
10549             }
10550             if (op2->TypeGet() != tree->TypeGet())
10551             {
10552                 tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, tree->TypeGet());
10553             }
10554         }
10555     }
10556     else if (tree->OperIsCompare())
10557     {
10558         GenTreePtr op1 = tree->gtOp.gtOp1;
10559
10560         if (varTypeIsFloating(op1))
10561         {
10562             GenTreePtr op2 = tree->gtGetOp2();
10563             assert(varTypeIsFloating(op2));
10564
10565             if (op1->TypeGet() != op2->TypeGet())
10566             {
10567                 // both had better be floating, just one bigger than other
10568                 if (op1->TypeGet() == TYP_FLOAT)
10569                 {
10570                     assert(op2->TypeGet() == TYP_DOUBLE);
10571                     tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
10572                 }
10573                 else if (op2->TypeGet() == TYP_FLOAT)
10574                 {
10575                     assert(op1->TypeGet() == TYP_DOUBLE);
10576                     tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
10577                 }
10578             }
10579         }
10580     }
10581
10582     return tree;
10583 }
10584
10585 GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
10586 {
10587     GenTree*     op1 = compare->gtOp.gtOp1;
10588     GenTree*     op2 = compare->gtOp.gtOp2;
10589     GenTree*     opCns;
10590     GenTreeCall* opCall;
10591
10592     // recognize this pattern:
10593     //
10594     // stmtExpr  void  (IL 0x000...  ???)
10595     //     return    int
10596     //             const     ref    null
10597     //         ==        int
10598     //             call help ref    HELPER.CORINFO_HELP_BOX_NULLABLE
10599     //                 const(h)  long   0x7fed96836c8 class
10600     //                 addr      byref
10601     //                     ld.lclVar struct V00 arg0
10602     //
10603     //
10604     // which comes from this code (reported by customer as being slow) :
10605     //
10606     // private static bool IsNull<T>(T arg)
10607     // {
10608     //    return arg==null;
10609     // }
10610     //
10611
10612     if (op1->IsCnsIntOrI() && op2->IsHelperCall())
10613     {
10614         opCns  = op1;
10615         opCall = op2->AsCall();
10616     }
10617     else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
10618     {
10619         opCns  = op2;
10620         opCall = op1->AsCall();
10621     }
10622     else
10623     {
10624         return compare;
10625     }
10626
10627     if (!opCns->IsIntegralConst(0))
10628     {
10629         return compare;
10630     }
10631
10632     if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
10633     {
10634         return compare;
10635     }
10636
10637     // replace the box with an access of the nullable 'hasValue' field which is at the zero offset
10638     GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1);
10639
10640     if (opCall == op1)
10641     {
10642         compare->gtOp.gtOp1 = newOp;
10643     }
10644     else
10645     {
10646         compare->gtOp.gtOp2 = newOp;
10647     }
10648
10649     return compare;
10650 }
10651
10652 #ifdef FEATURE_SIMD
10653
10654 //--------------------------------------------------------------------------------------------------------------
10655 // getSIMDStructFromField:
10656 //   Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for
10657 //   the struct node, also base type, field index and simd size. If it is not, just return  nullptr.
10658 //   Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
10659 //   should return nullptr, since in this case we should treat SIMD struct as a regular struct.
10660 //   However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
10661 //   as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
10662 //   if the struct is a SIMD struct.
10663 //
10664 // Arguments:
10665 //       tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
10666 //               struct used for simd intrinsic or not.
10667 //       pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
10668 //                      to simd lclvar's base type.
10669 //       indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
10670 //                  equals to the index number of this field.
10671 //       simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
10672 //                     equals to the simd struct size which this tree belongs to.
10673 //      ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
10674 //                                  the UsedInSIMDIntrinsic check.
10675 //
10676 // return value:
10677 //       A GenTreePtr which points the simd lclvar tree belongs to. If the tree is not the simd
10678 //       instrinic related field, return nullptr.
10679 //
10680
10681 GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree,
10682                                             var_types* pBaseTypeOut,
10683                                             unsigned*  indexOut,
10684                                             unsigned*  simdSizeOut,
10685                                             bool       ignoreUsedInSIMDIntrinsic /*false*/)
10686 {
10687     GenTreePtr ret = nullptr;
10688     if (tree->OperGet() == GT_FIELD)
10689     {
10690         GenTreePtr objRef = tree->gtField.gtFldObj;
10691         if (objRef != nullptr)
10692         {
10693             GenTreePtr obj = nullptr;
10694             if (objRef->gtOper == GT_ADDR)
10695             {
10696                 obj = objRef->gtOp.gtOp1;
10697             }
10698             else if (ignoreUsedInSIMDIntrinsic)
10699             {
10700                 obj = objRef;
10701             }
10702             else
10703             {
10704                 return nullptr;
10705             }
10706
10707             if (isSIMDTypeLocal(obj))
10708             {
10709                 unsigned   lclNum = obj->gtLclVarCommon.gtLclNum;
10710                 LclVarDsc* varDsc = &lvaTable[lclNum];
10711                 if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
10712                 {
10713                     *simdSizeOut  = varDsc->lvExactSize;
10714                     *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
10715                     ret           = obj;
10716                 }
10717             }
10718             else if (obj->OperGet() == GT_SIMD)
10719             {
10720                 ret                   = obj;
10721                 GenTreeSIMD* simdNode = obj->AsSIMD();
10722                 *simdSizeOut          = simdNode->gtSIMDSize;
10723                 *pBaseTypeOut         = simdNode->gtSIMDBaseType;
10724             }
10725         }
10726     }
10727     if (ret != nullptr)
10728     {
10729         unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
10730         *indexOut             = tree->gtField.gtFldOffset / BaseTypeSize;
10731     }
10732     return ret;
10733 }
10734
10735 /*****************************************************************************
10736 *  If a read operation tries to access simd struct field, then transform the
10737 *  operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
10738 *  Otherwise, return the old tree.
10739 *  Argument:
10740 *   tree - GenTreePtr. If this pointer points to simd struct which is used for simd
10741 *          intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem.
10742 *  Return:
10743 *   A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
10744 *   return nullptr.
10745 */
10746
10747 GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
10748 {
10749     unsigned   index          = 0;
10750     var_types  baseType       = TYP_UNKNOWN;
10751     unsigned   simdSize       = 0;
10752     GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
10753     if (simdStructNode != nullptr)
10754     {
10755         assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
10756         GenTree* op2 = gtNewIconNode(index);
10757         tree         = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
10758 #ifdef DEBUG
10759         tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10760 #endif
10761     }
10762     return tree;
10763 }
10764
10765 /*****************************************************************************
10766 *  Transform an assignment of a SIMD struct field to SIMD intrinsic
10767 *  SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment,
10768 *  then return the old tree.
10769 *  Argument:
10770 *   tree - GenTreePtr. If this pointer points to simd struct which is used for simd
10771 *          intrinsic, we will morph it as simd intrinsic set.
10772 *  Return:
10773 *   A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
10774 *   return nullptr.
10775 */
10776
10777 GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree)
10778 {
10779     assert(tree->OperGet() == GT_ASG);
10780     GenTreePtr op1 = tree->gtGetOp1();
10781     GenTreePtr op2 = tree->gtGetOp2();
10782
10783     unsigned   index         = 0;
10784     var_types  baseType      = TYP_UNKNOWN;
10785     unsigned   simdSize      = 0;
10786     GenTreePtr simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
10787     if (simdOp1Struct != nullptr)
10788     {
10789         // Generate the simd set intrinsic
10790         assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
10791
10792         SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
10793         switch (index)
10794         {
10795             case 0:
10796                 simdIntrinsicID = SIMDIntrinsicSetX;
10797                 break;
10798             case 1:
10799                 simdIntrinsicID = SIMDIntrinsicSetY;
10800                 break;
10801             case 2:
10802                 simdIntrinsicID = SIMDIntrinsicSetZ;
10803                 break;
10804             case 3:
10805                 simdIntrinsicID = SIMDIntrinsicSetW;
10806                 break;
10807             default:
10808                 noway_assert(!"There is no set intrinsic for index bigger than 3");
10809         }
10810
10811         GenTreePtr target = gtClone(simdOp1Struct);
10812         assert(target != nullptr);
10813         GenTreePtr simdTree = gtNewSIMDNode(target->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
10814         tree->gtOp.gtOp1    = target;
10815         tree->gtOp.gtOp2    = simdTree;
10816 #ifdef DEBUG
10817         tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10818 #endif
10819     }
10820
10821     return tree;
10822 }
10823
10824 #endif // FEATURE_SIMD
10825
10826 /*****************************************************************************
10827  *
10828  *  Transform the given GTK_SMPOP tree for code generation.
10829  */
10830
10831 #ifdef _PREFAST_
10832 #pragma warning(push)
10833 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
10834 #endif
10835 GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
10836 {
10837     // this extra scope is a workaround for a gcc bug
10838     // the inline destructor for ALLOCA_CHECK confuses the control
10839     // flow and gcc thinks that the function never returns
10840     {
10841         ALLOCA_CHECK();
10842         noway_assert(tree->OperKind() & GTK_SMPOP);
10843
10844         /* The steps in this function are :
10845            o Perform required preorder processing
10846            o Process the first, then second operand, if any
10847            o Perform required postorder morphing
10848            o Perform optional postorder morphing if optimizing
10849          */
10850
10851         bool isQmarkColon = false;
10852
10853 #if LOCAL_ASSERTION_PROP
10854         AssertionIndex origAssertionCount = DUMMY_INIT(0);
10855         AssertionDsc*  origAssertionTab   = DUMMY_INIT(NULL);
10856
10857         AssertionIndex thenAssertionCount = DUMMY_INIT(0);
10858         AssertionDsc*  thenAssertionTab   = DUMMY_INIT(NULL);
10859 #endif
10860
10861         if (fgGlobalMorph)
10862         {
10863 #if !FEATURE_STACK_FP_X87
10864             tree = fgMorphForRegisterFP(tree);
10865 #endif
10866         }
10867
10868         genTreeOps oper = tree->OperGet();
10869         var_types  typ  = tree->TypeGet();
10870         GenTreePtr op1  = tree->gtOp.gtOp1;
10871         GenTreePtr op2  = tree->gtGetOp2IfPresent();
10872
10873         /*-------------------------------------------------------------------------
10874          * First do any PRE-ORDER processing
10875          */
10876
10877         switch (oper)
10878         {
10879             // Some arithmetic operators need to use a helper call to the EE
10880             int helper;
10881
10882             case GT_ASG:
10883                 tree = fgDoNormalizeOnStore(tree);
10884                 /* fgDoNormalizeOnStore can change op2 */
10885                 noway_assert(op1 == tree->gtOp.gtOp1);
10886                 op2 = tree->gtOp.gtOp2;
10887
10888 #ifdef FEATURE_SIMD
10889                 {
10890                     // We should check whether op2 should be assigned to a SIMD field or not.
10891                     // If it is, we should tranlate the tree to simd intrinsic.
10892                     assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0));
10893                     GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
10894                     typ                = tree->TypeGet();
10895                     op1                = tree->gtGetOp1();
10896                     op2                = tree->gtGetOp2();
10897 #ifdef DEBUG
10898                     assert((tree == newTree) && (tree->OperGet() == oper));
10899                     if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
10900                     {
10901                         tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
10902                     }
10903 #endif // DEBUG
10904                 }
10905 #endif
10906
10907                 __fallthrough;
10908
10909             case GT_ASG_ADD:
10910             case GT_ASG_SUB:
10911             case GT_ASG_MUL:
10912             case GT_ASG_DIV:
10913             case GT_ASG_MOD:
10914             case GT_ASG_UDIV:
10915             case GT_ASG_UMOD:
10916             case GT_ASG_OR:
10917             case GT_ASG_XOR:
10918             case GT_ASG_AND:
10919             case GT_ASG_LSH:
10920             case GT_ASG_RSH:
10921             case GT_ASG_RSZ:
10922             case GT_CHS:
10923
10924                 // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
10925                 // Previously, the "lhs" (addr) of a block op was CSE'd.  So, to duplicate the former
10926                 // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
10927                 // TODO-1stClassStructs: improve this.
10928                 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
10929                 {
10930                     op1->gtFlags |= GTF_DONT_CSE;
10931                 }
10932                 break;
10933
10934             case GT_ADDR:
10935
10936                 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
10937                 op1->gtFlags |= GTF_DONT_CSE;
10938                 break;
10939
10940             case GT_QMARK:
10941             case GT_JTRUE:
10942
10943                 noway_assert(op1);
10944
10945                 if (op1->OperKind() & GTK_RELOP)
10946                 {
10947                     noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
10948                     /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
10949                        not need to materialize the result as a 0 or 1. */
10950
10951                     /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
10952                     op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
10953
10954                     // Request that the codegen for op1 sets the condition flags
10955                     // when it generates the code for op1.
10956                     //
10957                     // Codegen for op1 must set the condition flags if
10958                     // this method returns true.
10959                     //
10960                     op1->gtRequestSetFlags();
10961                 }
10962                 else
10963                 {
10964                     GenTreePtr effOp1 = op1->gtEffectiveVal();
10965                     noway_assert((effOp1->gtOper == GT_CNS_INT) &&
10966                                  (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
10967                 }
10968                 break;
10969
10970             case GT_COLON:
10971 #if LOCAL_ASSERTION_PROP
10972                 if (optLocalAssertionProp)
10973 #endif
10974                 {
10975                     isQmarkColon = true;
10976                 }
10977                 break;
10978
10979             case GT_INDEX:
10980                 return fgMorphArrayIndex(tree);
10981
10982             case GT_CAST:
10983                 return fgMorphCast(tree);
10984
10985             case GT_MUL:
10986
10987 #ifndef _TARGET_64BIT_
10988                 if (typ == TYP_LONG)
10989                 {
10990                     /* For (long)int1 * (long)int2, we dont actually do the
10991                        casts, and just multiply the 32 bit values, which will
10992                        give us the 64 bit result in edx:eax */
10993
10994                     noway_assert(op2);
10995                     if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
10996                          genActualType(op1->CastFromType()) == TYP_INT &&
10997                          genActualType(op2->CastFromType()) == TYP_INT) &&
10998                         !op1->gtOverflow() && !op2->gtOverflow())
10999                     {
11000                         // The casts have to be of the same signedness.
11001                         if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
11002                         {
11003                             // We see if we can force an int constant to change its signedness
11004                             GenTreePtr constOp;
11005                             if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
11006                                 constOp = op1;
11007                             else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
11008                                 constOp = op2;
11009                             else
11010                                 goto NO_MUL_64RSLT;
11011
11012                             if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
11013                                 constOp->gtFlags ^= GTF_UNSIGNED;
11014                             else
11015                                 goto NO_MUL_64RSLT;
11016                         }
11017
11018                         // The only combination that can overflow
11019                         if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
11020                             goto NO_MUL_64RSLT;
11021
11022                         /* Remaining combinations can never overflow during long mul. */
11023
11024                         tree->gtFlags &= ~GTF_OVERFLOW;
11025
11026                         /* Do unsigned mul only if the casts were unsigned */
11027
11028                         tree->gtFlags &= ~GTF_UNSIGNED;
11029                         tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
11030
11031                         /* Since we are committing to GTF_MUL_64RSLT, we don't want
11032                            the casts to be folded away. So morph the castees directly */
11033
11034                         op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
11035                         op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
11036
11037                         // Propagate side effect flags up the tree
11038                         op1->gtFlags &= ~GTF_ALL_EFFECT;
11039                         op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11040                         op2->gtFlags &= ~GTF_ALL_EFFECT;
11041                         op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11042
11043                         // If the GT_MUL can be altogether folded away, we should do that.
11044
11045                         if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
11046                             opts.OptEnabled(CLFLG_CONSTANTFOLD))
11047                         {
11048                             tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
11049                             tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
11050                             noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
11051                             tree = gtFoldExprConst(tree);
11052                             noway_assert(tree->OperIsConst());
11053                             return tree;
11054                         }
11055
11056                         tree->gtFlags |= GTF_MUL_64RSLT;
11057
11058                         // If op1 and op2 are unsigned casts, we need to do an unsigned mult
11059                         tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
11060
11061                         // Insert GT_NOP nodes for the cast operands so that they do not get folded
11062                         // And propagate the new flags. We don't want to CSE the casts because
11063                         // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
11064
11065                         if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
11066                         {
11067                             op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
11068                             op1->gtFlags &= ~GTF_ALL_EFFECT;
11069                             op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11070                         }
11071
11072                         if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
11073                         {
11074                             op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
11075                             op2->gtFlags &= ~GTF_ALL_EFFECT;
11076                             op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11077                         }
11078
11079                         op1->gtFlags |= GTF_DONT_CSE;
11080                         op2->gtFlags |= GTF_DONT_CSE;
11081
11082                         tree->gtFlags &= ~GTF_ALL_EFFECT;
11083                         tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
11084
11085                         goto DONE_MORPHING_CHILDREN;
11086                     }
11087                     else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
11088                     {
11089                     NO_MUL_64RSLT:
11090                         if (tree->gtOverflow())
11091                             helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
11092                         else
11093                             helper = CORINFO_HELP_LMUL;
11094
11095                         goto USE_HELPER_FOR_ARITH;
11096                     }
11097                     else
11098                     {
11099                         /* We are seeing this node again. We have decided to use
11100                            GTF_MUL_64RSLT, so leave it alone. */
11101
11102                         assert(tree->gtIsValid64RsltMul());
11103                     }
11104                 }
11105 #endif // !_TARGET_64BIT_
11106                 break;
11107
11108             case GT_DIV:
11109
11110 #ifndef _TARGET_64BIT_
11111                 if (typ == TYP_LONG)
11112                 {
11113                     helper = CORINFO_HELP_LDIV;
11114                     goto USE_HELPER_FOR_ARITH;
11115                 }
11116
11117 #if USE_HELPERS_FOR_INT_DIV
11118                 if (typ == TYP_INT && !fgIsSignedDivOptimizable(op2))
11119                 {
11120                     helper = CORINFO_HELP_DIV;
11121                     goto USE_HELPER_FOR_ARITH;
11122                 }
11123 #endif
11124 #endif // !_TARGET_64BIT_
11125
11126 #ifndef LEGACY_BACKEND
11127                 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11128                 {
11129                     op2 = gtFoldExprConst(op2);
11130                 }
11131 #endif // !LEGACY_BACKEND
11132                 break;
11133
11134             case GT_UDIV:
11135
11136 #ifndef _TARGET_64BIT_
11137                 if (typ == TYP_LONG)
11138                 {
11139                     helper = CORINFO_HELP_ULDIV;
11140                     goto USE_HELPER_FOR_ARITH;
11141                 }
11142 #if USE_HELPERS_FOR_INT_DIV
11143                 if (typ == TYP_INT && !fgIsUnsignedDivOptimizable(op2))
11144                 {
11145                     helper = CORINFO_HELP_UDIV;
11146                     goto USE_HELPER_FOR_ARITH;
11147                 }
11148 #endif
11149 #endif // _TARGET_64BIT_
11150                 break;
11151
11152             case GT_MOD:
11153
11154                 if (varTypeIsFloating(typ))
11155                 {
11156                     helper = CORINFO_HELP_DBLREM;
11157                     noway_assert(op2);
11158                     if (op1->TypeGet() == TYP_FLOAT)
11159                     {
11160                         if (op2->TypeGet() == TYP_FLOAT)
11161                         {
11162                             helper = CORINFO_HELP_FLTREM;
11163                         }
11164                         else
11165                         {
11166                             tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
11167                         }
11168                     }
11169                     else if (op2->TypeGet() == TYP_FLOAT)
11170                     {
11171                         tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
11172                     }
11173                     goto USE_HELPER_FOR_ARITH;
11174                 }
11175
11176                 // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
11177                 // A similar optimization for signed mod will not work for a negative perfectly divisible
11178                 // HI-word. To make it correct, we would need to divide without the sign and then flip the
11179                 // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
11180                 goto ASSIGN_HELPER_FOR_MOD;
11181
11182             case GT_UMOD:
11183
11184 #ifdef _TARGET_ARMARCH_
11185 //
11186 // Note for _TARGET_ARMARCH_ we don't have  a remainder instruction, so we don't do this optimization
11187 //
11188 #else  // _TARGET_XARCH
11189                 /* If this is an unsigned long mod with op2 which is a cast to long from a
11190                    constant int, then don't morph to a call to the helper.  This can be done
11191                    faster inline using idiv.
11192                 */
11193
11194                 noway_assert(op2);
11195                 if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
11196                     ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
11197                     ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
11198                 {
11199                     if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
11200                         op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
11201                         op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
11202                         (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
11203                     {
11204                         tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
11205                         noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
11206                     }
11207
11208                     if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
11209                         op2->gtIntConCommon.LngValue() <= 0x3fffffff)
11210                     {
11211                         tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
11212                         noway_assert(op1->TypeGet() == TYP_LONG);
11213
11214                         // Update flags for op1 morph
11215                         tree->gtFlags &= ~GTF_ALL_EFFECT;
11216
11217                         tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
11218
11219                         // If op1 is a constant, then do constant folding of the division operator
11220                         if (op1->gtOper == GT_CNS_NATIVELONG)
11221                         {
11222                             tree = gtFoldExpr(tree);
11223                         }
11224                         return tree;
11225                     }
11226                 }
11227 #endif // _TARGET_XARCH
11228
11229             ASSIGN_HELPER_FOR_MOD:
11230
11231                 // For "val % 1", return 0 if op1 doesn't have any side effects
11232                 // and we are not in the CSE phase, we cannot discard 'tree'
11233                 // because it may contain CSE expressions that we haven't yet examined.
11234                 //
11235                 if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
11236                 {
11237                     if (op2->IsIntegralConst(1))
11238                     {
11239                         GenTreePtr zeroNode = gtNewZeroConNode(typ);
11240 #ifdef DEBUG
11241                         zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11242 #endif
11243                         DEBUG_DESTROY_NODE(tree);
11244                         return zeroNode;
11245                     }
11246                 }
11247
11248 #ifndef _TARGET_64BIT_
11249                 if (typ == TYP_LONG)
11250                 {
11251                     helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
11252                     goto USE_HELPER_FOR_ARITH;
11253                 }
11254
11255 #if USE_HELPERS_FOR_INT_DIV
11256                 if (typ == TYP_INT)
11257                 {
11258                     if (oper == GT_UMOD && !fgIsUnsignedModOptimizable(op2))
11259                     {
11260                         helper = CORINFO_HELP_UMOD;
11261                         goto USE_HELPER_FOR_ARITH;
11262                     }
11263                     else if (oper == GT_MOD && !fgIsSignedModOptimizable(op2))
11264                     {
11265                         helper = CORINFO_HELP_MOD;
11266                         goto USE_HELPER_FOR_ARITH;
11267                     }
11268                 }
11269 #endif
11270 #endif // !_TARGET_64BIT_
11271
11272 #ifndef LEGACY_BACKEND
11273                 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11274                 {
11275                     op2 = gtFoldExprConst(op2);
11276                 }
11277
11278 #ifdef _TARGET_ARM64_
11279
11280                 // For ARM64 we don't have a remainder instruction,
11281                 // The architecture manual suggests the following transformation to
11282                 // generate code for such operator:
11283                 //
11284                 // a % b = a - (a / b) * b;
11285                 //
11286                 // NOTE: we should never need to perform this transformation when remorphing, since global morphing
11287                 //       should already have done so and we do not introduce new modulus nodes in later phases.
11288                 assert(!optValnumCSE_phase);
11289                 tree = fgMorphModToSubMulDiv(tree->AsOp());
11290                 op1  = tree->gtOp.gtOp1;
11291                 op2  = tree->gtOp.gtOp2;
11292 #else  //_TARGET_ARM64_
11293                 // If b is not a power of 2 constant then lowering replaces a % b
11294                 // with a - (a / b) * b and applies magic division optimization to
11295                 // a / b. The code may already contain an a / b expression (e.g.
11296                 // x = a / 10; y = a % 10;) and then we end up with redundant code.
11297                 // If we convert % to / here we give CSE the opportunity to eliminate
11298                 // the redundant division. If there's no redundant division then
11299                 // nothing is lost, lowering would have done this transform anyway.
11300
11301                 if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
11302                 {
11303                     ssize_t divisorValue    = op2->AsIntCon()->IconValue();
11304                     size_t  absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
11305                                                                            : static_cast<size_t>(abs(divisorValue));
11306
11307                     if (!isPow2(absDivisorValue))
11308                     {
11309                         tree = fgMorphModToSubMulDiv(tree->AsOp());
11310                         op1  = tree->gtOp.gtOp1;
11311                         op2  = tree->gtOp.gtOp2;
11312                     }
11313                 }
11314 #endif //_TARGET_ARM64_
11315 #endif // !LEGACY_BACKEND
11316                 break;
11317
11318             USE_HELPER_FOR_ARITH:
11319             {
11320                 /* We have to morph these arithmetic operations into helper calls
11321                    before morphing the arguments (preorder), else the arguments
11322                    won't get correct values of fgPtrArgCntCur.
11323                    However, try to fold the tree first in case we end up with a
11324                    simple node which won't need a helper call at all */
11325
11326                 noway_assert(tree->OperIsBinary());
11327
11328                 GenTreePtr oldTree = tree;
11329
11330                 tree = gtFoldExpr(tree);
11331
11332                 // Were we able to fold it ?
11333                 // Note that gtFoldExpr may return a non-leaf even if successful
11334                 // e.g. for something like "expr / 1" - see also bug #290853
11335                 if (tree->OperIsLeaf() || (oldTree != tree))
11336
11337                 {
11338                     return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
11339                 }
11340
11341                 // Did we fold it into a comma node with throw?
11342                 if (tree->gtOper == GT_COMMA)
11343                 {
11344                     noway_assert(fgIsCommaThrow(tree));
11345                     return fgMorphTree(tree);
11346                 }
11347             }
11348                 return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
11349
11350             case GT_RETURN:
11351                 // normalize small integer return values
11352                 if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) &&
11353                     (op1->TypeGet() != TYP_VOID) && fgCastNeeded(op1, info.compRetType))
11354                 {
11355                     // Small-typed return values are normalized by the callee
11356                     op1 = gtNewCastNode(TYP_INT, op1, info.compRetType);
11357
11358                     // Propagate GTF_COLON_COND
11359                     op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
11360
11361                     tree->gtOp.gtOp1 = fgMorphCast(op1);
11362
11363                     // Propagate side effect flags
11364                     tree->gtFlags &= ~GTF_ALL_EFFECT;
11365                     tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11366
11367                     return tree;
11368                 }
11369                 break;
11370
11371             case GT_EQ:
11372             case GT_NE:
11373
11374                 // Check for typeof(...) == obj.GetType()
11375                 // Also check for typeof(...) == typeof(...)
11376                 // IMPORTANT NOTE: this optimization relies on a one-to-one mapping between
11377                 // type handles and instances of System.Type
11378                 // If this invariant is ever broken, the optimization will need updating
11379                 CLANG_FORMAT_COMMENT_ANCHOR;
11380
11381 #ifdef LEGACY_BACKEND
11382                 if (op1->gtOper == GT_CALL && op2->gtOper == GT_CALL &&
11383                     ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
11384                      (op1->gtCall.gtCallType == CT_HELPER)) &&
11385                     ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
11386                      (op2->gtCall.gtCallType == CT_HELPER)))
11387 #else
11388                 if ((((op1->gtOper == GT_INTRINSIC) &&
11389                       (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
11390                      ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) &&
11391                     (((op2->gtOper == GT_INTRINSIC) &&
11392                       (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
11393                      ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER))))
11394 #endif
11395                 {
11396                     GenTreePtr pGetClassFromHandle;
11397                     GenTreePtr pGetType;
11398
11399 #ifdef LEGACY_BACKEND
11400                     bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall());
11401                     bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall());
11402 #else
11403                     bool bOp1ClassFromHandle =
11404                         op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall()) : false;
11405                     bool bOp2ClassFromHandle =
11406                         op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall()) : false;
11407 #endif
11408
11409                     // Optimize typeof(...) == typeof(...)
11410                     // Typically this occurs in generic code that attempts a type switch
11411                     // e.g. typeof(T) == typeof(int)
11412
11413                     if (bOp1ClassFromHandle && bOp2ClassFromHandle)
11414                     {
11415                         GenTreePtr classFromHandleArg1 = tree->gtOp.gtOp1->gtCall.gtCallArgs->gtOp.gtOp1;
11416                         GenTreePtr classFromHandleArg2 = tree->gtOp.gtOp2->gtCall.gtCallArgs->gtOp.gtOp1;
11417
11418                         GenTreePtr compare = gtNewOperNode(oper, TYP_INT, classFromHandleArg1, classFromHandleArg2);
11419
11420                         compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
11421
11422                         // Morph and return
11423                         return fgMorphTree(compare);
11424                     }
11425                     else if (bOp1ClassFromHandle || bOp2ClassFromHandle)
11426                     {
11427                         //
11428                         // Now check for GetClassFromHandle(handle) == obj.GetType()
11429                         //
11430
11431                         if (bOp1ClassFromHandle)
11432                         {
11433                             pGetClassFromHandle = tree->gtOp.gtOp1;
11434                             pGetType            = op2;
11435                         }
11436                         else
11437                         {
11438                             pGetClassFromHandle = tree->gtOp.gtOp2;
11439                             pGetType            = op1;
11440                         }
11441
11442                         GenTreePtr pGetClassFromHandleArgument = pGetClassFromHandle->gtCall.gtCallArgs->gtOp.gtOp1;
11443                         GenTreePtr pConstLiteral               = pGetClassFromHandleArgument;
11444
11445                         // Unwrap GT_NOP node used to prevent constant folding
11446                         if (pConstLiteral->gtOper == GT_NOP && pConstLiteral->gtType == TYP_I_IMPL)
11447                         {
11448                             pConstLiteral = pConstLiteral->gtOp.gtOp1;
11449                         }
11450
11451                         // In the ngen case, we have to go thru an indirection to get the right handle.
11452                         if (pConstLiteral->gtOper == GT_IND)
11453                         {
11454                             pConstLiteral = pConstLiteral->gtOp.gtOp1;
11455                         }
11456 #ifdef LEGACY_BACKEND
11457
11458                         if (pGetType->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC &&
11459                             info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) ==
11460                                 CORINFO_INTRINSIC_Object_GetType &&
11461 #else
11462                         if ((pGetType->gtOper == GT_INTRINSIC) &&
11463                             (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) &&
11464 #endif
11465                             pConstLiteral->gtOper == GT_CNS_INT && pConstLiteral->gtType == TYP_I_IMPL)
11466                         {
11467                             CORINFO_CLASS_HANDLE clsHnd =
11468                                 CORINFO_CLASS_HANDLE(pConstLiteral->gtIntCon.gtCompileTimeHandle);
11469
11470                             if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd))
11471                             {
11472                                 // Method Table tree
11473                                 CLANG_FORMAT_COMMENT_ANCHOR;
11474 #ifdef LEGACY_BACKEND
11475                                 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
11476 #else
11477                                 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1);
11478 #endif
11479                                 objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null
11480                                 compCurBB->bbFlags |= BBF_HAS_VTABREF;
11481                                 optMethodFlags |= OMF_HAS_VTABLEREF;
11482
11483                                 // Method table constant
11484                                 GenTreePtr cnsMT = pGetClassFromHandleArgument;
11485
11486                                 GenTreePtr compare = gtNewOperNode(oper, TYP_INT, objMT, cnsMT);
11487
11488                                 compare->gtFlags |=
11489                                     tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
11490
11491                                 // Morph and return
11492                                 return fgMorphTree(compare);
11493                             }
11494                         }
11495                     }
11496                 }
11497                 fgMorphRecognizeBoxNullable(tree);
11498                 op1 = tree->gtOp.gtOp1;
11499                 op2 = tree->gtGetOp2IfPresent();
11500
11501                 break;
11502
11503 #ifdef _TARGET_ARM_
11504             case GT_INTRINSIC:
11505                 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
11506                 {
11507                     switch (tree->TypeGet())
11508                     {
11509                         case TYP_DOUBLE:
11510                             return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
11511                         case TYP_FLOAT:
11512                             return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
11513                         default:
11514                             unreached();
11515                     }
11516                 }
11517                 break;
11518 #endif
11519
11520             default:
11521                 break;
11522         }
11523
11524 #if !CPU_HAS_FP_SUPPORT
11525         tree = fgMorphToEmulatedFP(tree);
11526 #endif
11527
11528         /* Could this operator throw an exception? */
11529         if (fgGlobalMorph && tree->OperMayThrow())
11530         {
11531             if (((tree->OperGet() != GT_IND) && !tree->OperIsBlk()) || fgAddrCouldBeNull(tree->gtOp.gtOp1))
11532             {
11533                 /* Mark the tree node as potentially throwing an exception */
11534                 tree->gtFlags |= GTF_EXCEPT;
11535             }
11536         }
11537
11538         /*-------------------------------------------------------------------------
11539          * Process the first operand, if any
11540          */
11541
11542         if (op1)
11543         {
11544
11545 #if LOCAL_ASSERTION_PROP
11546             // If we are entering the "then" part of a Qmark-Colon we must
11547             // save the state of the current copy assignment table
11548             // so that we can restore this state when entering the "else" part
11549             if (isQmarkColon)
11550             {
11551                 noway_assert(optLocalAssertionProp);
11552                 if (optAssertionCount)
11553                 {
11554                     noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11555                     unsigned tabSize   = optAssertionCount * sizeof(AssertionDsc);
11556                     origAssertionTab   = (AssertionDsc*)ALLOCA(tabSize);
11557                     origAssertionCount = optAssertionCount;
11558                     memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
11559                 }
11560                 else
11561                 {
11562                     origAssertionCount = 0;
11563                     origAssertionTab   = nullptr;
11564                 }
11565             }
11566 #endif // LOCAL_ASSERTION_PROP
11567
11568             // We might need a new MorphAddressContext context.  (These are used to convey
11569             // parent context about how addresses being calculated will be used; see the
11570             // specification comment for MorphAddrContext for full details.)
11571             // Assume it's an Ind context to start.
11572             MorphAddrContext  subIndMac1(MACK_Ind);
11573             MorphAddrContext* subMac1 = mac;
11574             if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind)
11575             {
11576                 switch (tree->gtOper)
11577                 {
11578                     case GT_ADDR:
11579                         if (subMac1 == nullptr)
11580                         {
11581                             subMac1         = &subIndMac1;
11582                             subMac1->m_kind = MACK_Addr;
11583                         }
11584                         break;
11585                     case GT_COMMA:
11586                         // In a comma, the incoming context only applies to the rightmost arg of the
11587                         // comma list.  The left arg (op1) gets a fresh context.
11588                         subMac1 = nullptr;
11589                         break;
11590                     case GT_OBJ:
11591                     case GT_BLK:
11592                     case GT_DYN_BLK:
11593                     case GT_IND:
11594                         subMac1 = &subIndMac1;
11595                         break;
11596                     default:
11597                         break;
11598                 }
11599             }
11600
11601             // For additions, if we're in an IND context keep track of whether
11602             // all offsets added to the address are constant, and their sum.
11603             if (tree->gtOper == GT_ADD && subMac1 != nullptr)
11604             {
11605                 assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
11606                 GenTreePtr otherOp = tree->gtOp.gtOp2;
11607                 // Is the other operator a constant?
11608                 if (otherOp->IsCnsIntOrI())
11609                 {
11610                     ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
11611                     totalOffset += otherOp->gtIntConCommon.IconValue();
11612                     if (totalOffset.IsOverflow())
11613                     {
11614                         // We will consider an offset so large as to overflow as "not a constant" --
11615                         // we will do a null check.
11616                         subMac1->m_allConstantOffsets = false;
11617                     }
11618                     else
11619                     {
11620                         subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11621                     }
11622                 }
11623                 else
11624                 {
11625                     subMac1->m_allConstantOffsets = false;
11626                 }
11627             }
11628
11629             // If gtOp1 is a GT_FIELD, we need to pass down the mac if
11630             // its parent is GT_ADDR, since the address of the field
11631             // is part of an ongoing address computation. Otherwise
11632             // op1 represents the value of the field and so any address
11633             // calculations it does are in a new context.
11634             if ((op1->gtOper == GT_FIELD) && (tree->gtOper != GT_ADDR))
11635             {
11636                 subMac1 = nullptr;
11637
11638                 // The impact of this field's value to any ongoing
11639                 // address computation is handled below when looking
11640                 // at op2.
11641             }
11642
11643             tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
11644
11645 #if LOCAL_ASSERTION_PROP
11646             // If we are exiting the "then" part of a Qmark-Colon we must
11647             // save the state of the current copy assignment table
11648             // so that we can merge this state with the "else" part exit
11649             if (isQmarkColon)
11650             {
11651                 noway_assert(optLocalAssertionProp);
11652                 if (optAssertionCount)
11653                 {
11654                     noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11655                     unsigned tabSize   = optAssertionCount * sizeof(AssertionDsc);
11656                     thenAssertionTab   = (AssertionDsc*)ALLOCA(tabSize);
11657                     thenAssertionCount = optAssertionCount;
11658                     memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
11659                 }
11660                 else
11661                 {
11662                     thenAssertionCount = 0;
11663                     thenAssertionTab   = nullptr;
11664                 }
11665             }
11666 #endif // LOCAL_ASSERTION_PROP
11667
11668             /* Morphing along with folding and inlining may have changed the
11669              * side effect flags, so we have to reset them
11670              *
11671              * NOTE: Don't reset the exception flags on nodes that may throw */
11672
11673             noway_assert(tree->gtOper != GT_CALL);
11674
11675             if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId))
11676             {
11677                 tree->gtFlags &= ~GTF_CALL;
11678             }
11679
11680             if (!tree->OperMayThrow())
11681             {
11682                 tree->gtFlags &= ~GTF_EXCEPT;
11683             }
11684
11685             /* Propagate the new flags */
11686             tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
11687
11688             // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
11689             // Similarly for clsVar
11690             if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
11691             {
11692                 tree->gtFlags &= ~GTF_GLOB_REF;
11693             }
11694         } // if (op1)
11695
11696         /*-------------------------------------------------------------------------
11697          * Process the second operand, if any
11698          */
11699
11700         if (op2)
11701         {
11702
11703 #if LOCAL_ASSERTION_PROP
11704             // If we are entering the "else" part of a Qmark-Colon we must
11705             // reset the state of the current copy assignment table
11706             if (isQmarkColon)
11707             {
11708                 noway_assert(optLocalAssertionProp);
11709                 optAssertionReset(0);
11710                 if (origAssertionCount)
11711                 {
11712                     size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
11713                     memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
11714                     optAssertionReset(origAssertionCount);
11715                 }
11716             }
11717 #endif // LOCAL_ASSERTION_PROP
11718
11719             // We might need a new MorphAddressContext context to use in evaluating op2.
11720             // (These are used to convey parent context about how addresses being calculated
11721             // will be used; see the specification comment for MorphAddrContext for full details.)
11722             // Assume it's an Ind context to start.
11723             switch (tree->gtOper)
11724             {
11725                 case GT_ADD:
11726                     if (mac != nullptr && mac->m_kind == MACK_Ind)
11727                     {
11728                         GenTreePtr otherOp = tree->gtOp.gtOp1;
11729                         // Is the other operator a constant?
11730                         if (otherOp->IsCnsIntOrI())
11731                         {
11732                             mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11733                         }
11734                         else
11735                         {
11736                             mac->m_allConstantOffsets = false;
11737                         }
11738                     }
11739                     break;
11740                 default:
11741                     break;
11742             }
11743
11744             // If gtOp2 is a GT_FIELD, we must be taking its value,
11745             // so it should evaluate its address in a new context.
11746             if (op2->gtOper == GT_FIELD)
11747             {
11748                 // The impact of this field's value to any ongoing
11749                 // address computation is handled above when looking
11750                 // at op1.
11751                 mac = nullptr;
11752             }
11753
11754             tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
11755
11756             /* Propagate the side effect flags from op2 */
11757
11758             tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
11759
11760 #if LOCAL_ASSERTION_PROP
11761             // If we are exiting the "else" part of a Qmark-Colon we must
11762             // merge the state of the current copy assignment table with
11763             // that of the exit of the "then" part.
11764             if (isQmarkColon)
11765             {
11766                 noway_assert(optLocalAssertionProp);
11767                 // If either exit table has zero entries then
11768                 // the merged table also has zero entries
11769                 if (optAssertionCount == 0 || thenAssertionCount == 0)
11770                 {
11771                     optAssertionReset(0);
11772                 }
11773                 else
11774                 {
11775                     size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
11776                     if ((optAssertionCount != thenAssertionCount) ||
11777                         (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
11778                     {
11779                         // Yes they are different so we have to find the merged set
11780                         // Iterate over the copy asgn table removing any entries
11781                         // that do not have an exact match in the thenAssertionTab
11782                         AssertionIndex index = 1;
11783                         while (index <= optAssertionCount)
11784                         {
11785                             AssertionDsc* curAssertion = optGetAssertion(index);
11786
11787                             for (unsigned j = 0; j < thenAssertionCount; j++)
11788                             {
11789                                 AssertionDsc* thenAssertion = &thenAssertionTab[j];
11790
11791                                 // Do the left sides match?
11792                                 if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
11793                                     (curAssertion->assertionKind == thenAssertion->assertionKind))
11794                                 {
11795                                     // Do the right sides match?
11796                                     if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
11797                                         (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
11798                                     {
11799                                         goto KEEP;
11800                                     }
11801                                     else
11802                                     {
11803                                         goto REMOVE;
11804                                     }
11805                                 }
11806                             }
11807                         //
11808                         // If we fall out of the loop above then we didn't find
11809                         // any matching entry in the thenAssertionTab so it must
11810                         // have been killed on that path so we remove it here
11811                         //
11812                         REMOVE:
11813                             // The data at optAssertionTabPrivate[i] is to be removed
11814                             CLANG_FORMAT_COMMENT_ANCHOR;
11815 #ifdef DEBUG
11816                             if (verbose)
11817                             {
11818                                 printf("The QMARK-COLON ");
11819                                 printTreeID(tree);
11820                                 printf(" removes assertion candidate #%d\n", index);
11821                             }
11822 #endif
11823                             optAssertionRemove(index);
11824                             continue;
11825                         KEEP:
11826                             // The data at optAssertionTabPrivate[i] is to be kept
11827                             index++;
11828                         }
11829                     }
11830                 }
11831             }
11832 #endif    // LOCAL_ASSERTION_PROP
11833         } // if (op2)
11834
11835     DONE_MORPHING_CHILDREN:
11836
11837 /*-------------------------------------------------------------------------
11838  * Now do POST-ORDER processing
11839  */
11840
11841 #if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_)
11842         // Variable shifts of a long end up being helper calls, so mark the tree as such. This
11843         // is potentially too conservative, since they'll get treated as having side effects.
11844         // It is important to mark them as calls so if they are part of an argument list,
11845         // they will get sorted and processed properly (for example, it is important to handle
11846         // all nested calls before putting struct arguments in the argument registers). We
11847         // could mark the trees just before argument processing, but it would require a full
11848         // tree walk of the argument tree, so we just do it here, instead, even though we'll
11849         // mark non-argument trees (that will still get converted to calls, anyway).
11850         if (GenTree::OperIsShift(oper) && (tree->TypeGet() == TYP_LONG) && (op2->OperGet() != GT_CNS_INT))
11851         {
11852             tree->gtFlags |= GTF_CALL;
11853         }
11854 #endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_
11855
11856         if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) &&
11857             (op2 && !varTypeIsGC(op2->TypeGet())))
11858         {
11859             // The tree is really not GC but was marked as such. Now that the
11860             // children have been unmarked, unmark the tree too.
11861
11862             // Remember that GT_COMMA inherits it's type only from op2
11863             if (tree->gtOper == GT_COMMA)
11864             {
11865                 tree->gtType = genActualType(op2->TypeGet());
11866             }
11867             else
11868             {
11869                 tree->gtType = genActualType(op1->TypeGet());
11870             }
11871         }
11872
11873         GenTreePtr oldTree = tree;
11874
11875         GenTreePtr qmarkOp1 = nullptr;
11876         GenTreePtr qmarkOp2 = nullptr;
11877
11878         if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
11879         {
11880             qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
11881             qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
11882         }
11883
11884         // Try to fold it, maybe we get lucky,
11885         tree = gtFoldExpr(tree);
11886
11887         if (oldTree != tree)
11888         {
11889             /* if gtFoldExpr returned op1 or op2 then we are done */
11890             if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
11891             {
11892                 return tree;
11893             }
11894
11895             /* If we created a comma-throw tree then we need to morph op1 */
11896             if (fgIsCommaThrow(tree))
11897             {
11898                 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
11899                 fgMorphTreeDone(tree);
11900                 return tree;
11901             }
11902
11903             return tree;
11904         }
11905         else if (tree->OperKind() & GTK_CONST)
11906         {
11907             return tree;
11908         }
11909
11910         /* gtFoldExpr could have used setOper to change the oper */
11911         oper = tree->OperGet();
11912         typ  = tree->TypeGet();
11913
11914         /* gtFoldExpr could have changed op1 and op2 */
11915         op1 = tree->gtOp.gtOp1;
11916         op2 = tree->gtGetOp2IfPresent();
11917
11918         // Do we have an integer compare operation?
11919         //
11920         if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
11921         {
11922             // Are we comparing against zero?
11923             //
11924             if (op2->IsIntegralConst(0))
11925             {
11926                 // Request that the codegen for op1 sets the condition flags
11927                 // when it generates the code for op1.
11928                 //
11929                 // Codegen for op1 must set the condition flags if
11930                 // this method returns true.
11931                 //
11932                 op1->gtRequestSetFlags();
11933             }
11934         }
11935         /*-------------------------------------------------------------------------
11936          * Perform the required oper-specific postorder morphing
11937          */
11938
11939         GenTreePtr           temp;
11940         GenTreePtr           cns1, cns2;
11941         GenTreePtr           thenNode;
11942         GenTreePtr           elseNode;
11943         size_t               ival1, ival2;
11944         GenTreePtr           lclVarTree;
11945         GenTreeLclVarCommon* lclVarCmnTree;
11946         FieldSeqNode*        fieldSeq = nullptr;
11947
11948         switch (oper)
11949         {
11950             case GT_ASG:
11951
11952                 lclVarTree = fgIsIndirOfAddrOfLocal(op1);
11953                 if (lclVarTree != nullptr)
11954                 {
11955                     lclVarTree->gtFlags |= GTF_VAR_DEF;
11956                 }
11957
11958                 if (op1->gtEffectiveVal()->OperIsConst())
11959                 {
11960                     op1              = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
11961                     tree->gtOp.gtOp1 = op1;
11962                 }
11963
11964                 /* If we are storing a small type, we might be able to omit a cast */
11965                 if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet()))
11966                 {
11967                     if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
11968                     {
11969                         var_types castType = op2->CastToType();
11970
11971                         // If we are performing a narrowing cast and
11972                         // castType is larger or the same as op1's type
11973                         // then we can discard the cast.
11974
11975                         if (varTypeIsSmall(castType) && (castType >= op1->TypeGet()))
11976                         {
11977                             tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
11978                         }
11979                     }
11980                     else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet()))
11981                     {
11982                         /* We don't need to zero extend the setcc instruction */
11983                         op2->gtType = TYP_BYTE;
11984                     }
11985                 }
11986                 // If we introduced a CSE we may need to undo the optimization above
11987                 // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
11988                 // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
11989                 else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
11990                 {
11991                     unsigned   varNum = op1->gtLclVarCommon.gtLclNum;
11992                     LclVarDsc* varDsc = &lvaTable[varNum];
11993
11994                     /* We again need to zero extend the setcc instruction */
11995                     op2->gtType = varDsc->TypeGet();
11996                 }
11997                 fgAssignSetVarDef(tree);
11998
11999                 __fallthrough;
12000
12001             case GT_ASG_ADD:
12002             case GT_ASG_SUB:
12003             case GT_ASG_MUL:
12004             case GT_ASG_DIV:
12005             case GT_ASG_MOD:
12006             case GT_ASG_UDIV:
12007             case GT_ASG_UMOD:
12008             case GT_ASG_OR:
12009             case GT_ASG_XOR:
12010             case GT_ASG_AND:
12011             case GT_ASG_LSH:
12012             case GT_ASG_RSH:
12013             case GT_ASG_RSZ:
12014
12015                 /* We can't CSE the LHS of an assignment */
12016                 /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
12017                 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
12018                 {
12019                     op1->gtFlags |= GTF_DONT_CSE;
12020                 }
12021                 break;
12022
12023             case GT_EQ:
12024             case GT_NE:
12025
12026                 /* Make sure we're allowed to do this */
12027
12028                 if (optValnumCSE_phase)
12029                 {
12030                     // It is not safe to reorder/delete CSE's
12031                     break;
12032                 }
12033
12034                 cns2 = op2;
12035
12036                 /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
12037
12038                 if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
12039                 {
12040                     op1 = tree->gtOp.gtOp1;
12041
12042                     /* Since this can occur repeatedly we use a while loop */
12043
12044                     while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) &&
12045                            (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) && (op1->gtType == TYP_INT) &&
12046                            (op1->gtOverflow() == false))
12047                     {
12048                         /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
12049
12050                         ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
12051                         ival2 = cns2->gtIntCon.gtIconVal;
12052
12053                         if (op1->gtOper == GT_ADD)
12054                         {
12055                             ival2 -= ival1;
12056                         }
12057                         else
12058                         {
12059                             ival2 += ival1;
12060                         }
12061                         cns2->gtIntCon.gtIconVal = ival2;
12062
12063 #ifdef _TARGET_64BIT_
12064                         // we need to properly re-sign-extend or truncate as needed.
12065                         cns2->AsIntCon()->TruncateOrSignExtend32();
12066 #endif // _TARGET_64BIT_
12067
12068                         op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12069                     }
12070                 }
12071
12072                 //
12073                 // Here we look for the following tree
12074                 //
12075                 //                        EQ/NE
12076                 //                        /  \
12077                 //                      op1   CNS 0/1
12078                 //
12079                 ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
12080
12081                 // cast to unsigned allows test for both 0 and 1
12082                 if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
12083                 {
12084                     ival2 = (size_t)cns2->gtIntConCommon.IconValue();
12085                 }
12086                 else // cast to UINT64 allows test for both 0 and 1
12087                     if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
12088                 {
12089                     ival2 = (size_t)cns2->gtIntConCommon.LngValue();
12090                 }
12091
12092                 if (ival2 != INT_MAX)
12093                 {
12094                     // If we don't have a comma and relop, we can't do this optimization
12095                     //
12096                     if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
12097                     {
12098                         // Here we look for the following transformation
12099                         //
12100                         //                  EQ/NE                    Possible REVERSE(RELOP)
12101                         //                  /  \                           /      \
12102                         //               COMMA CNS 0/1             ->   COMMA   relop_op2
12103                         //              /   \                          /    \
12104                         //             x  RELOP                       x     relop_op1
12105                         //               /    \
12106                         //         relop_op1  relop_op2
12107                         //
12108                         //
12109                         //
12110                         GenTreePtr comma = op1;
12111                         GenTreePtr relop = comma->gtOp.gtOp2;
12112
12113                         GenTreePtr relop_op1 = relop->gtOp.gtOp1;
12114
12115                         bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12116
12117                         if (reverse)
12118                         {
12119                             gtReverseCond(relop);
12120                         }
12121
12122                         relop->gtOp.gtOp1 = comma;
12123                         comma->gtOp.gtOp2 = relop_op1;
12124
12125                         // Comma now has fewer nodes underneath it, so we need to regenerate its flags
12126                         comma->gtFlags &= ~GTF_ALL_EFFECT;
12127                         comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
12128                         comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12129
12130                         noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
12131                         noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
12132                         relop->gtFlags |=
12133                             tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);
12134
12135                         return relop;
12136                     }
12137
12138                     if (op1->gtOper == GT_COMMA)
12139                     {
12140                         // Here we look for the following tree
12141                         // and when the LCL_VAR is a temp we can fold the tree:
12142                         //
12143                         //                        EQ/NE                  EQ/NE
12144                         //                        /  \                   /  \
12145                         //                     COMMA  CNS 0/1  ->     RELOP CNS 0/1
12146                         //                     /   \                   / \
12147                         //                   ASG  LCL_VAR
12148                         //                  /  \
12149                         //           LCL_VAR   RELOP
12150                         //                      / \
12151                         //
12152
12153                         GenTreePtr asg = op1->gtOp.gtOp1;
12154                         GenTreePtr lcl = op1->gtOp.gtOp2;
12155
12156                         /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
12157                         if (asg->gtOper != GT_ASG)
12158                         {
12159                             goto SKIP;
12160                         }
12161
12162                         /* The right side of the comma must be a LCL_VAR temp */
12163                         if (lcl->gtOper != GT_LCL_VAR)
12164                         {
12165                             goto SKIP;
12166                         }
12167
12168                         unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
12169                         noway_assert(lclNum < lvaCount);
12170
12171                         /* If the LCL_VAR is not a temp then bail, a temp has a single def */
12172                         if (!lvaTable[lclNum].lvIsTemp)
12173                         {
12174                             goto SKIP;
12175                         }
12176
12177 #if FEATURE_ANYCSE
12178                         /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
12179                         // Fix 383856 X86/ARM ILGEN
12180                         if (lclNumIsCSE(lclNum))
12181                         {
12182                             goto SKIP;
12183                         }
12184 #endif
12185
12186                         /* We also must be assigning the result of a RELOP */
12187                         if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
12188                         {
12189                             goto SKIP;
12190                         }
12191
12192                         /* Both of the LCL_VAR must match */
12193                         if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
12194                         {
12195                             goto SKIP;
12196                         }
12197
12198                         /* If right side of asg is not a RELOP then skip */
12199                         if (!asg->gtOp.gtOp2->OperIsCompare())
12200                         {
12201                             goto SKIP;
12202                         }
12203
12204                         LclVarDsc* varDsc = lvaTable + lclNum;
12205
12206                         /* Set op1 to the right side of asg, (i.e. the RELOP) */
12207                         op1 = asg->gtOp.gtOp2;
12208
12209                         DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
12210                         DEBUG_DESTROY_NODE(lcl);
12211
12212                         /* This local variable should never be used again */
12213                         // <BUGNUM>
12214                         // VSW 184221: Make RefCnt to zero to indicate that this local var
12215                         // is not used any more. (Keey the lvType as is.)
12216                         // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars
12217                         // And then emitter::emitEndCodeGen will assert in the following line:
12218                         //        noway_assert( dsc->lvTracked);
12219                         // </BUGNUM>
12220                         noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet.
12221                                      varDsc->lvRefCnt == 2    // Or, we assume this tmp should only be used here,
12222                                                               // and it only shows up twice.
12223                                      );
12224                         lvaTable[lclNum].lvRefCnt = 0;
12225                         lvaTable[lclNum].lvaResetSortAgainFlag(this);
12226                     }
12227
12228                     if (op1->OperIsCompare())
12229                     {
12230                         // Here we look for the following tree
12231                         //
12232                         //                        EQ/NE           ->      RELOP/!RELOP
12233                         //                        /  \                       /    \
12234                         //                     RELOP  CNS 0/1
12235                         //                     /   \
12236                         //
12237                         // Note that we will remove/destroy the EQ/NE node and move
12238                         // the RELOP up into it's location.
12239
12240                         /* Here we reverse the RELOP if necessary */
12241
12242                         bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12243
12244                         if (reverse)
12245                         {
12246                             gtReverseCond(op1);
12247                         }
12248
12249                         /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
12250                         op1->gtType = tree->gtType;
12251
12252                         noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
12253                         op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12254
12255                         DEBUG_DESTROY_NODE(tree);
12256                         return op1;
12257                     }
12258
12259                     //
12260                     // Now we check for a compare with the result of an '&' operator
12261                     //
12262                     // Here we look for the following transformation:
12263                     //
12264                     //                        EQ/NE                  EQ/NE
12265                     //                        /  \                   /  \
12266                     //                      AND   CNS 0/1  ->      AND   CNS 0
12267                     //                     /   \                  /   \
12268                     //                RSZ/RSH   CNS 1            x     CNS (1 << y)
12269                     //                  /  \
12270                     //                 x   CNS_INT +y
12271
12272                     if (op1->gtOper == GT_AND)
12273                     {
12274                         GenTreePtr andOp    = op1;
12275                         GenTreePtr rshiftOp = andOp->gtOp.gtOp1;
12276
12277                         if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
12278                         {
12279                             goto SKIP;
12280                         }
12281
12282                         if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
12283                         {
12284                             goto SKIP;
12285                         }
12286
12287                         ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
12288
12289                         if (shiftAmount < 0)
12290                         {
12291                             goto SKIP;
12292                         }
12293
12294                         if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
12295                         {
12296                             goto SKIP;
12297                         }
12298
12299                         if (andOp->gtType == TYP_INT)
12300                         {
12301                             if (shiftAmount > 31)
12302                             {
12303                                 goto SKIP;
12304                             }
12305
12306                             UINT32 newAndOperand = ((UINT32)1) << shiftAmount;
12307
12308                             andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
12309
12310                             // Reverse the cond if necessary
12311                             if (ival2 == 1)
12312                             {
12313                                 gtReverseCond(tree);
12314                                 cns2->gtIntCon.gtIconVal = 0;
12315                                 oper                     = tree->gtOper;
12316                             }
12317                         }
12318                         else if (andOp->gtType == TYP_LONG)
12319                         {
12320                             if (shiftAmount > 63)
12321                             {
12322                                 goto SKIP;
12323                             }
12324
12325                             UINT64 newAndOperand = ((UINT64)1) << shiftAmount;
12326
12327                             andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
12328
12329                             // Reverse the cond if necessary
12330                             if (ival2 == 1)
12331                             {
12332                                 gtReverseCond(tree);
12333                                 cns2->gtIntConCommon.SetLngValue(0);
12334                                 oper = tree->gtOper;
12335                             }
12336                         }
12337
12338                         andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
12339
12340                         DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
12341                         DEBUG_DESTROY_NODE(rshiftOp);
12342                     }
12343                 } // END if (ival2 != INT_MAX)
12344
12345             SKIP:
12346                 /* Now check for compares with small constant longs that can be cast to int */
12347
12348                 if (!cns2->OperIsConst())
12349                 {
12350                     goto COMPARE;
12351                 }
12352
12353                 if (cns2->TypeGet() != TYP_LONG)
12354                 {
12355                     goto COMPARE;
12356                 }
12357
12358                 /* Is the constant 31 bits or smaller? */
12359
12360                 if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
12361                 {
12362                     goto COMPARE;
12363                 }
12364
12365                 /* Is the first comparand mask operation of type long ? */
12366
12367                 if (op1->gtOper != GT_AND)
12368                 {
12369                     /* Another interesting case: cast from int */
12370
12371                     if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
12372                         !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
12373                         !op1->gtOverflow())              // cannot be an overflow checking cast
12374                     {
12375                         /* Simply make this into an integer comparison */
12376
12377                         tree->gtOp.gtOp1 = op1->gtCast.CastOp();
12378                         tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
12379                     }
12380
12381                     goto COMPARE;
12382                 }
12383
12384                 noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
12385
12386                 /* Is the result of the mask effectively an INT ? */
12387
12388                 GenTreePtr andMask;
12389                 andMask = op1->gtOp.gtOp2;
12390                 if (andMask->gtOper != GT_CNS_NATIVELONG)
12391                 {
12392                     goto COMPARE;
12393                 }
12394                 if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
12395                 {
12396                     goto COMPARE;
12397                 }
12398
12399                 /* Now we know that we can cast gtOp.gtOp1 of AND to int */
12400
12401                 op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, TYP_INT);
12402
12403                 /* now replace the mask node (gtOp.gtOp2 of AND node) */
12404
12405                 noway_assert(andMask == op1->gtOp.gtOp2);
12406
12407                 ival1 = (int)andMask->gtIntConCommon.LngValue();
12408                 andMask->SetOper(GT_CNS_INT);
12409                 andMask->gtType             = TYP_INT;
12410                 andMask->gtIntCon.gtIconVal = ival1;
12411
12412                 /* now change the type of the AND node */
12413
12414                 op1->gtType = TYP_INT;
12415
12416                 /* finally we replace the comparand */
12417
12418                 ival2 = (int)cns2->gtIntConCommon.LngValue();
12419                 cns2->SetOper(GT_CNS_INT);
12420                 cns2->gtType = TYP_INT;
12421
12422                 noway_assert(cns2 == op2);
12423                 cns2->gtIntCon.gtIconVal = ival2;
12424
12425                 goto COMPARE;
12426
12427             case GT_LT:
12428             case GT_LE:
12429             case GT_GE:
12430             case GT_GT:
12431
12432                 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
12433                 {
12434                     if (op2->gtOper == GT_CNS_INT)
12435                     {
12436                         cns2 = op2;
12437                         /* Check for "expr relop 1" */
12438                         if (cns2->IsIntegralConst(1))
12439                         {
12440                             /* Check for "expr >= 1" */
12441                             if (oper == GT_GE)
12442                             {
12443                                 /* Change to "expr > 0" */
12444                                 oper = GT_GT;
12445                                 goto SET_OPER;
12446                             }
12447                             /* Check for "expr < 1" */
12448                             else if (oper == GT_LT)
12449                             {
12450                                 /* Change to "expr <= 0" */
12451                                 oper = GT_LE;
12452                                 goto SET_OPER;
12453                             }
12454                         }
12455                         /* Check for "expr relop -1" */
12456                         else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
12457                         {
12458                             /* Check for "expr <= -1" */
12459                             if (oper == GT_LE)
12460                             {
12461                                 /* Change to "expr < 0" */
12462                                 oper = GT_LT;
12463                                 goto SET_OPER;
12464                             }
12465                             /* Check for "expr > -1" */
12466                             else if (oper == GT_GT)
12467                             {
12468                                 /* Change to "expr >= 0" */
12469                                 oper = GT_GE;
12470
12471                             SET_OPER:
12472                                 // IF we get here we should be changing 'oper'
12473                                 assert(tree->OperGet() != oper);
12474
12475                                 // Keep the old ValueNumber for 'tree' as the new expr
12476                                 // will still compute the same value as before
12477                                 tree->SetOper(oper, GenTree::PRESERVE_VN);
12478                                 cns2->gtIntCon.gtIconVal = 0;
12479
12480                                 // vnStore is null before the ValueNumber phase has run
12481                                 if (vnStore != nullptr)
12482                                 {
12483                                     // Update the ValueNumber for 'cns2', as we just changed it to 0
12484                                     fgValueNumberTreeConst(cns2);
12485                                 }
12486
12487                                 op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
12488                             }
12489                         }
12490                     }
12491                 }
12492                 else // we have an unsigned comparison
12493                 {
12494                     if (op2->IsIntegralConst(0))
12495                     {
12496                         if ((oper == GT_GT) || (oper == GT_LE))
12497                         {
12498                             // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
12499                             // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
12500                             // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
12501                             // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
12502                             // occurs as a result of branch inversion.
12503                             oper = (oper == GT_LE) ? GT_EQ : GT_NE;
12504                             tree->SetOper(oper, GenTree::PRESERVE_VN);
12505                             tree->gtFlags &= ~GTF_UNSIGNED;
12506                         }
12507                     }
12508                 }
12509
12510             COMPARE:
12511
12512                 noway_assert(tree->OperKind() & GTK_RELOP);
12513
12514                 /* Check if the result of the comparison is used for a jump.
12515                  * If not then only the int (i.e. 32 bit) case is handled in
12516                  * the code generator through the (x86) "set" instructions.
12517                  * For the rest of the cases, the simplest way is to
12518                  * "simulate" the comparison with ?:
12519                  *
12520                  * On ARM, we previously used the IT instruction, but the IT instructions
12521                  * have mostly been declared obsolete and off-limits, so all cases on ARM
12522                  * get converted to ?: */
12523
12524                 if (!(tree->gtFlags & GTF_RELOP_JMP_USED) && fgMorphRelopToQmark(op1))
12525                 {
12526                     /* We convert it to "(CMP_TRUE) ? (1):(0)" */
12527
12528                     op1 = tree;
12529                     op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12530                     op1->gtRequestSetFlags();
12531
12532                     op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0));
12533                     op2 = fgMorphTree(op2);
12534
12535                     tree = gtNewQmarkNode(TYP_INT, op1, op2);
12536
12537                     fgMorphTreeDone(tree);
12538
12539                     return tree;
12540                 }
12541                 break;
12542
12543             case GT_QMARK:
12544
12545                 /* If op1 is a comma throw node then we won't be keeping op2 */
12546                 if (fgIsCommaThrow(op1))
12547                 {
12548                     break;
12549                 }
12550
12551                 /* Get hold of the two branches */
12552
12553                 noway_assert(op2->OperGet() == GT_COLON);
12554                 elseNode = op2->AsColon()->ElseNode();
12555                 thenNode = op2->AsColon()->ThenNode();
12556
12557                 /* Try to hoist assignments out of qmark colon constructs.
12558                    ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */
12559
12560                 if (tree->TypeGet() == TYP_VOID && thenNode->OperGet() == GT_ASG && elseNode->OperGet() == GT_ASG &&
12561                     thenNode->TypeGet() != TYP_LONG && GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) &&
12562                     thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet())
12563                 {
12564                     noway_assert(thenNode->TypeGet() == elseNode->TypeGet());
12565
12566                     GenTreePtr asg    = thenNode;
12567                     GenTreePtr colon  = op2;
12568                     colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2;
12569                     colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2;
12570                     tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType;
12571                     asg->gtOp.gtOp2              = tree;
12572
12573                     // Asg will have all the flags that the QMARK had
12574                     asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT);
12575
12576                     // Colon flag won't have the flags that x had.
12577                     colon->gtFlags &= ~GTF_ALL_EFFECT;
12578                     colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags | colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12579
12580                     DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1);
12581                     DEBUG_DESTROY_NODE(elseNode);
12582
12583                     return asg;
12584                 }
12585
12586                 /* If the 'else' branch is empty swap the two branches and reverse the condition */
12587
12588                 if (elseNode->IsNothingNode())
12589                 {
12590                     /* This can only happen for VOID ?: */
12591                     noway_assert(op2->gtType == TYP_VOID);
12592
12593                     /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */
12594                     if (thenNode->IsNothingNode())
12595                     {
12596                         // We may be able to throw away op1 (unless it has side-effects)
12597
12598                         if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
12599                         {
12600                             /* Just return a a Nop Node */
12601                             return thenNode;
12602                         }
12603                         else
12604                         {
12605                             /* Just return the relop, but clear the special flags.  Note
12606                                that we can't do that for longs and floats (see code under
12607                                COMPARE label above) */
12608
12609                             if (!fgMorphRelopToQmark(op1->gtOp.gtOp1))
12610                             {
12611                                 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
12612                                 return op1;
12613                             }
12614                         }
12615                     }
12616                     else
12617                     {
12618                         GenTreePtr tmp = elseNode;
12619
12620                         op2->AsColon()->ElseNode() = elseNode = thenNode;
12621                         op2->AsColon()->ThenNode() = thenNode = tmp;
12622                         gtReverseCond(op1);
12623                     }
12624                 }
12625
12626 #if !defined(_TARGET_ARM_)
12627                 // If we have (cond)?0:1, then we just return "cond" for TYP_INTs
12628                 //
12629                 // Don't do this optimization for ARM: we always require assignment
12630                 // to boolean to remain ?:, since we don't have any way to generate
12631                 // this with straight-line code, like x86 does using setcc (at least
12632                 // after the IT instruction is deprecated).
12633
12634                 if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT && genActualType(typ) == TYP_INT &&
12635                     thenNode->gtOper == GT_CNS_INT && elseNode->gtOper == GT_CNS_INT)
12636                 {
12637                     ival1 = thenNode->gtIntCon.gtIconVal;
12638                     ival2 = elseNode->gtIntCon.gtIconVal;
12639
12640                     // Is one constant 0 and the other 1?
12641                     if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0)
12642                     {
12643                         // If the constants are {1, 0}, reverse the condition
12644                         if (ival1 == 1)
12645                         {
12646                             gtReverseCond(op1);
12647                         }
12648
12649                         // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it
12650                         // needs to materialize the result as a 0 or 1.
12651                         noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED));
12652                         op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
12653
12654                         DEBUG_DESTROY_NODE(tree);
12655                         DEBUG_DESTROY_NODE(op2);
12656
12657                         return op1;
12658                     }
12659                 }
12660 #endif // !_TARGET_ARM_
12661
12662                 break; // end case GT_QMARK
12663
12664             case GT_MUL:
12665
12666 #ifndef _TARGET_64BIT_
12667                 if (typ == TYP_LONG)
12668                 {
12669                     // This must be GTF_MUL_64RSLT
12670                     assert(tree->gtIsValid64RsltMul());
12671                     return tree;
12672                 }
12673 #endif // _TARGET_64BIT_
12674                 goto CM_OVF_OP;
12675
12676             case GT_SUB:
12677
12678                 if (tree->gtOverflow())
12679                 {
12680                     goto CM_OVF_OP;
12681                 }
12682
12683                 // TODO #4104: there are a lot of other places where
12684                 // this condition is not checked before transformations.
12685                 if (fgGlobalMorph)
12686                 {
12687                     /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
12688
12689                     noway_assert(op2);
12690                     if (op2->IsCnsIntOrI())
12691                     {
12692                         /* Negate the constant and change the node to be "+" */
12693
12694                         op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
12695                         oper = GT_ADD;
12696                         tree->ChangeOper(oper);
12697                         goto CM_ADD_OP;
12698                     }
12699
12700                     /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
12701
12702                     noway_assert(op1);
12703                     if (op1->IsCnsIntOrI())
12704                     {
12705                         noway_assert(varTypeIsIntOrI(tree));
12706
12707                         tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG
12708                                                                                            // node should be the same
12709                         // as the type of the tree, i.e. tree->gtType.
12710                         fgMorphTreeDone(op2);
12711
12712                         oper = GT_ADD;
12713                         tree->ChangeOper(oper);
12714                         goto CM_ADD_OP;
12715                     }
12716
12717                     /* No match - exit */
12718                 }
12719                 break;
12720
12721 #ifdef _TARGET_ARM64_
12722             case GT_DIV:
12723                 if (!varTypeIsFloating(tree->gtType))
12724                 {
12725                     // Codegen for this instruction needs to be able to throw two exceptions:
12726                     fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
12727                     fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
12728                 }
12729                 break;
12730             case GT_UDIV:
12731                 // Codegen for this instruction needs to be able to throw one exception:
12732                 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
12733                 break;
12734 #endif
12735
12736             case GT_ADD:
12737
12738             CM_OVF_OP:
12739                 if (tree->gtOverflow())
12740                 {
12741                     tree->gtRequestSetFlags();
12742
12743                     // Add the excptn-throwing basic block to jump to on overflow
12744
12745                     fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
12746
12747                     // We can't do any commutative morphing for overflow instructions
12748
12749                     break;
12750                 }
12751
12752             CM_ADD_OP:
12753
12754             case GT_OR:
12755             case GT_XOR:
12756             case GT_AND:
12757
12758                 /* Commute any non-REF constants to the right */
12759
12760                 noway_assert(op1);
12761                 if (op1->OperIsConst() && (op1->gtType != TYP_REF))
12762                 {
12763                     // TODO-Review: We used to assert here that
12764                     // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
12765                     // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
12766                     // and would sometimes hit this assertion.  This may indicate a missed "remorph".
12767                     // Task is to re-enable this assertion and investigate.
12768
12769                     /* Swap the operands */
12770                     tree->gtOp.gtOp1 = op2;
12771                     tree->gtOp.gtOp2 = op1;
12772
12773                     op1 = op2;
12774                     op2 = tree->gtOp.gtOp2;
12775                 }
12776
12777                 /* See if we can fold GT_ADD nodes. */
12778
12779                 if (oper == GT_ADD)
12780                 {
12781                     /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
12782
12783                     if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
12784                         op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
12785                         !op1->gtOverflow() && !op2->gtOverflow())
12786                     {
12787                         cns1 = op1->gtOp.gtOp2;
12788                         cns2 = op2->gtOp.gtOp2;
12789                         cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
12790 #ifdef _TARGET_64BIT_
12791                         if (cns1->TypeGet() == TYP_INT)
12792                         {
12793                             // we need to properly re-sign-extend or truncate after adding two int constants above
12794                             cns1->AsIntCon()->TruncateOrSignExtend32();
12795                         }
12796 #endif //_TARGET_64BIT_
12797
12798                         tree->gtOp.gtOp2 = cns1;
12799                         DEBUG_DESTROY_NODE(cns2);
12800
12801                         op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
12802                         op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
12803                         DEBUG_DESTROY_NODE(op2);
12804                         op2 = tree->gtOp.gtOp2;
12805                     }
12806
12807                     if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
12808                     {
12809                         /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
12810
12811                         if (op1->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op1) && op1->gtOp.gtOp2->IsCnsIntOrI() &&
12812                             !op1->gtOverflow() && op1->gtOp.gtOp2->OperGet() == op2->OperGet())
12813                         {
12814                             cns1 = op1->gtOp.gtOp2;
12815                             op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
12816                                                              op2->gtIntConCommon.IconValue());
12817 #ifdef _TARGET_64BIT_
12818                             if (op2->TypeGet() == TYP_INT)
12819                             {
12820                                 // we need to properly re-sign-extend or truncate after adding two int constants above
12821                                 op2->AsIntCon()->TruncateOrSignExtend32();
12822                             }
12823 #endif //_TARGET_64BIT_
12824
12825                             if (cns1->OperGet() == GT_CNS_INT)
12826                             {
12827                                 op2->gtIntCon.gtFieldSeq =
12828                                     GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
12829                             }
12830                             DEBUG_DESTROY_NODE(cns1);
12831
12832                             tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12833                             DEBUG_DESTROY_NODE(op1);
12834                             op1 = tree->gtOp.gtOp1;
12835                         }
12836
12837                         // Fold (x + 0).
12838
12839                         if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
12840                         {
12841
12842                             // If this addition is adding an offset to a null pointer,
12843                             // avoid the work and yield the null pointer immediately.
12844                             // Dereferencing the pointer in either case will have the
12845                             // same effect.
12846
12847                             if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) &&
12848                                 ((op1->gtFlags & GTF_ALL_EFFECT) == 0))
12849                             {
12850                                 op2->gtType = tree->gtType;
12851                                 DEBUG_DESTROY_NODE(op1);
12852                                 DEBUG_DESTROY_NODE(tree);
12853                                 return op2;
12854                             }
12855
12856                             // Remove the addition iff it won't change the tree type
12857                             // to TYP_REF.
12858
12859                             if (!gtIsActiveCSE_Candidate(op2) &&
12860                                 ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
12861                             {
12862                                 if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
12863                                     (op2->gtIntCon.gtFieldSeq != nullptr) &&
12864                                     (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
12865                                 {
12866                                     fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
12867                                 }
12868
12869                                 DEBUG_DESTROY_NODE(op2);
12870                                 DEBUG_DESTROY_NODE(tree);
12871
12872                                 return op1;
12873                             }
12874                         }
12875                     }
12876                 }
12877                 /* See if we can fold GT_MUL by const nodes */
12878                 else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
12879                 {
12880 #ifndef _TARGET_64BIT_
12881                     noway_assert(typ <= TYP_UINT);
12882 #endif // _TARGET_64BIT_
12883                     noway_assert(!tree->gtOverflow());
12884
12885                     ssize_t mult            = op2->gtIntConCommon.IconValue();
12886                     bool    op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12887                                            op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
12888
12889                     assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
12890
12891                     if (mult == 0)
12892                     {
12893                         // We may be able to throw away op1 (unless it has side-effects)
12894
12895                         if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
12896                         {
12897                             DEBUG_DESTROY_NODE(op1);
12898                             DEBUG_DESTROY_NODE(tree);
12899                             return op2; // Just return the "0" node
12900                         }
12901
12902                         // We need to keep op1 for the side-effects. Hang it off
12903                         // a GT_COMMA node
12904
12905                         tree->ChangeOper(GT_COMMA);
12906                         return tree;
12907                     }
12908
12909                     size_t abs_mult      = (mult >= 0) ? mult : -mult;
12910                     size_t lowestBit     = genFindLowestBit(abs_mult);
12911                     bool   changeToShift = false;
12912
12913                     // is it a power of two? (positive or negative)
12914                     if (abs_mult == lowestBit)
12915                     {
12916                         // if negative negate (min-int does not need negation)
12917                         if (mult < 0 && mult != SSIZE_T_MIN)
12918                         {
12919                             tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
12920                             fgMorphTreeDone(op1);
12921                         }
12922
12923                         // If "op2" is a constant array index, the other multiplicand must be a constant.
12924                         // Transfer the annotation to the other one.
12925                         if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12926                             op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
12927                         {
12928                             assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
12929                             GenTreePtr otherOp = op1;
12930                             if (otherOp->OperGet() == GT_NEG)
12931                             {
12932                                 otherOp = otherOp->gtOp.gtOp1;
12933                             }
12934                             assert(otherOp->OperGet() == GT_CNS_INT);
12935                             assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
12936                             otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
12937                         }
12938
12939                         if (abs_mult == 1)
12940                         {
12941                             DEBUG_DESTROY_NODE(op2);
12942                             DEBUG_DESTROY_NODE(tree);
12943                             return op1;
12944                         }
12945
12946                         /* Change the multiplication into a shift by log2(val) bits */
12947                         op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
12948                         changeToShift = true;
12949                     }
12950 #if LEA_AVAILABLE
12951                     else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
12952                     {
12953                         int     shift  = genLog2(lowestBit);
12954                         ssize_t factor = abs_mult >> shift;
12955
12956                         if (factor == 3 || factor == 5 || factor == 9)
12957                         {
12958                             // if negative negate (min-int does not need negation)
12959                             if (mult < 0 && mult != SSIZE_T_MIN)
12960                             {
12961                                 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
12962                                 fgMorphTreeDone(op1);
12963                             }
12964
12965                             GenTreePtr factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
12966                             if (op2IsConstIndex)
12967                             {
12968                                 factorIcon->AsIntCon()->gtFieldSeq =
12969                                     GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
12970                             }
12971
12972                             // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
12973                             tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
12974                             fgMorphTreeDone(op1);
12975
12976                             op2->gtIntConCommon.SetIconValue(shift);
12977                             changeToShift = true;
12978                         }
12979                     }
12980 #endif // LEA_AVAILABLE
12981                     if (changeToShift)
12982                     {
12983                         // vnStore is null before the ValueNumber phase has run
12984                         if (vnStore != nullptr)
12985                         {
12986                             // Update the ValueNumber for 'op2', as we just changed the constant
12987                             fgValueNumberTreeConst(op2);
12988                         }
12989                         oper = GT_LSH;
12990                         // Keep the old ValueNumber for 'tree' as the new expr
12991                         // will still compute the same value as before
12992                         tree->ChangeOper(oper, GenTree::PRESERVE_VN);
12993
12994                         goto DONE_MORPHING_CHILDREN;
12995                     }
12996                 }
12997                 else if (fgOperIsBitwiseRotationRoot(oper))
12998                 {
12999                     tree = fgRecognizeAndMorphBitwiseRotation(tree);
13000
13001                     // fgRecognizeAndMorphBitwiseRotation may return a new tree
13002                     oper = tree->OperGet();
13003                     typ  = tree->TypeGet();
13004                     op1  = tree->gtOp.gtOp1;
13005                     op2  = tree->gtOp.gtOp2;
13006                 }
13007
13008                 break;
13009
13010             case GT_CHS:
13011             case GT_NOT:
13012             case GT_NEG:
13013
13014                 /* Any constant cases should have been folded earlier */
13015                 noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
13016                 break;
13017
13018             case GT_CKFINITE:
13019
13020                 noway_assert(varTypeIsFloating(op1->TypeGet()));
13021
13022                 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN, fgPtrArgCntCur);
13023                 break;
13024
13025             case GT_OBJ:
13026                 // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
13027                 // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
13028                 // is a local or clsVar, even if it has been address-exposed.
13029                 if (op1->OperGet() == GT_ADDR)
13030                 {
13031                     tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
13032                 }
13033                 break;
13034
13035             case GT_IND:
13036
13037                 // Can not remove a GT_IND if it is currently a CSE candidate.
13038                 if (gtIsActiveCSE_Candidate(tree))
13039                 {
13040                     break;
13041                 }
13042
13043                 bool foldAndReturnTemp;
13044                 foldAndReturnTemp = false;
13045                 temp              = nullptr;
13046                 ival1             = 0;
13047
13048                 /* Try to Fold *(&X) into X */
13049                 if (op1->gtOper == GT_ADDR)
13050                 {
13051                     // Can not remove a GT_ADDR if it is currently a CSE candidate.
13052                     if (gtIsActiveCSE_Candidate(op1))
13053                     {
13054                         break;
13055                     }
13056
13057                     temp = op1->gtOp.gtOp1; // X
13058
13059                     // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
13060                     // they are the *same* struct type.  In fact, they almost certainly aren't.  If the
13061                     // address has an associated field sequence, that identifies this case; go through
13062                     // the "lcl_fld" path rather than this one.
13063                     FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
13064                     if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
13065                     {
13066                         foldAndReturnTemp = true;
13067                     }
13068                     else if (temp->OperIsLocal())
13069                     {
13070                         unsigned   lclNum = temp->gtLclVarCommon.gtLclNum;
13071                         LclVarDsc* varDsc = &lvaTable[lclNum];
13072
13073                         // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
13074                         if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
13075                         {
13076                             noway_assert(varTypeIsStruct(varDsc));
13077
13078                             // We will try to optimize when we have a single field struct that is being struct promoted
13079                             if (varDsc->lvFieldCnt == 1)
13080                             {
13081                                 unsigned lclNumFld = varDsc->lvFieldLclStart;
13082                                 // just grab the promoted field
13083                                 LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
13084
13085                                 // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
13086                                 // is zero
13087                                 if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
13088                                 {
13089                                     // We can just use the existing promoted field LclNum
13090                                     temp->gtLclVarCommon.SetLclNum(lclNumFld);
13091                                     temp->gtType = fieldVarDsc->TypeGet();
13092
13093                                     foldAndReturnTemp = true;
13094                                 }
13095                             }
13096                         }
13097                         // If the type of the IND (typ) is a "small int", and the type of the local has the
13098                         // same width, then we can reduce to just the local variable -- it will be
13099                         // correctly normalized, and signed/unsigned differences won't matter.
13100                         //
13101                         // The below transformation cannot be applied if the local var needs to be normalized on load.
13102                         else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
13103                                  !lvaTable[lclNum].lvNormalizeOnLoad())
13104                         {
13105                             tree->gtType = typ = temp->TypeGet();
13106                             foldAndReturnTemp  = true;
13107                         }
13108                         else
13109                         {
13110                             // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
13111                             // nullptr)
13112                             assert(fieldSeq == nullptr);
13113                             bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
13114                             assert(b || fieldSeq == nullptr);
13115
13116                             if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
13117                             {
13118                                 // Append the field sequence, change the type.
13119                                 temp->AsLclFld()->gtFieldSeq =
13120                                     GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13121                                 temp->gtType = typ;
13122
13123                                 foldAndReturnTemp = true;
13124                             }
13125                         }
13126                         // Otherwise will will fold this into a GT_LCL_FLD below
13127                         //   where we check (temp != nullptr)
13128                     }
13129                     else // !temp->OperIsLocal()
13130                     {
13131                         // We don't try to fold away the GT_IND/GT_ADDR for this case
13132                         temp = nullptr;
13133                     }
13134                 }
13135                 else if (op1->OperGet() == GT_ADD)
13136                 {
13137                     /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
13138
13139                     if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
13140                         (!(opts.MinOpts() || opts.compDbgCode)))
13141                     {
13142                         // No overflow arithmetic with pointers
13143                         noway_assert(!op1->gtOverflow());
13144
13145                         temp = op1->gtOp.gtOp1->gtOp.gtOp1;
13146                         if (!temp->OperIsLocal())
13147                         {
13148                             temp = nullptr;
13149                             break;
13150                         }
13151
13152                         // Can not remove the GT_ADDR if it is currently a CSE candidate.
13153                         if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
13154                         {
13155                             break;
13156                         }
13157
13158                         ival1    = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
13159                         fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
13160
13161                         // Does the address have an associated zero-offset field sequence?
13162                         FieldSeqNode* addrFieldSeq = nullptr;
13163                         if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
13164                         {
13165                             fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
13166                         }
13167
13168                         if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
13169                         {
13170                             noway_assert(!varTypeIsGC(temp->TypeGet()));
13171                             foldAndReturnTemp = true;
13172                         }
13173                         else
13174                         {
13175                             // The emitter can't handle large offsets
13176                             if (ival1 != (unsigned short)ival1)
13177                             {
13178                                 break;
13179                             }
13180
13181                             // The emitter can get confused by invalid offsets
13182                             if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
13183                             {
13184                                 break;
13185                             }
13186
13187 #ifdef _TARGET_ARM_
13188                             // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
13189                             //
13190                             if (varTypeIsFloating(typ))
13191                             {
13192                                 if ((ival1 % emitTypeSize(typ)) != 0)
13193                                 {
13194                                     tree->gtFlags |= GTF_IND_UNALIGNED;
13195                                     break;
13196                                 }
13197                             }
13198 #endif
13199                         }
13200                         // Now we can fold this into a GT_LCL_FLD below
13201                         //   where we check (temp != nullptr)
13202                     }
13203                 }
13204
13205                 // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging:
13206                 // - We may have a load of a local where the load has a different type than the local
13207                 // - We may have a load of a local plus an offset
13208                 //
13209                 // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and
13210                 // offset if doing so is legal. The only cases in which this transformation is illegal are if the load
13211                 // begins before the local or if the load extends beyond the end of the local (i.e. if the load is
13212                 // out-of-bounds w.r.t. the local).
13213                 if ((temp != nullptr) && !foldAndReturnTemp)
13214                 {
13215                     assert(temp->OperIsLocal());
13216
13217                     const unsigned   lclNum = temp->AsLclVarCommon()->gtLclNum;
13218                     LclVarDsc* const varDsc = &lvaTable[lclNum];
13219
13220                     const var_types tempTyp = temp->TypeGet();
13221                     const bool      useExactSize =
13222                         varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK);
13223                     const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp);
13224
13225                     // Make sure we do not enregister this lclVar.
13226                     lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
13227
13228                     // If the size of the load is greater than the size of the lclVar, we cannot fold this access into
13229                     // a lclFld: the access represented by an lclFld node must begin at or after the start of the
13230                     // lclVar and must not extend beyond the end of the lclVar.
13231                     if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize))
13232                     {
13233                         // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
13234                         // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
13235                         // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
13236                         //
13237                         if (temp->OperGet() == GT_LCL_FLD)
13238                         {
13239                             temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
13240                             temp->AsLclFld()->gtFieldSeq =
13241                                 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13242                         }
13243                         else
13244                         {
13245                             temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
13246                             temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
13247                             if (fieldSeq != nullptr)
13248                             { // If it does represent a field, note that.
13249                                 temp->AsLclFld()->gtFieldSeq = fieldSeq;
13250                             }
13251                         }
13252                         temp->gtType      = tree->gtType;
13253                         foldAndReturnTemp = true;
13254                     }
13255                 }
13256
13257                 if (foldAndReturnTemp)
13258                 {
13259                     assert(temp != nullptr);
13260                     assert(temp->TypeGet() == typ);
13261                     assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR));
13262
13263                     // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for
13264                     // 'temp' because a GT_ADDR always marks it for its operand.
13265                     temp->gtFlags &= ~GTF_DONT_CSE;
13266                     temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
13267
13268                     if (op1->OperGet() == GT_ADD)
13269                     {
13270                         DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
13271                         DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
13272                     }
13273                     DEBUG_DESTROY_NODE(op1);  // GT_ADD or GT_ADDR
13274                     DEBUG_DESTROY_NODE(tree); // GT_IND
13275
13276                     // If the result of the fold is a local var, we may need to perform further adjustments e.g. for
13277                     // normalization.
13278                     if (temp->OperIs(GT_LCL_VAR))
13279                     {
13280 #ifdef DEBUG
13281                         // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear
13282                         // and the node in question must have this bit set (as it has already been morphed).
13283                         temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
13284 #endif // DEBUG
13285                         const bool forceRemorph = true;
13286                         temp                    = fgMorphLocalVar(temp, forceRemorph);
13287 #ifdef DEBUG
13288                         // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the
13289                         // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function
13290                         // returns.
13291                         temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13292 #endif // DEBUG
13293                     }
13294
13295                     return temp;
13296                 }
13297
13298                 // Only do this optimization when we are in the global optimizer. Doing this after value numbering
13299                 // could result in an invalid value number for the newly generated GT_IND node.
13300                 if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
13301                 {
13302                     // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
13303                     // TBD: this transformation is currently necessary for correctness -- it might
13304                     // be good to analyze the failures that result if we don't do this, and fix them
13305                     // in other ways.  Ideally, this should be optional.
13306                     GenTreePtr commaNode = op1;
13307                     unsigned   treeFlags = tree->gtFlags;
13308                     commaNode->gtType    = typ;
13309                     commaNode->gtFlags   = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13310                                                                            // dangerous, clear the GTF_REVERSE_OPS at
13311                                                                            // least.
13312 #ifdef DEBUG
13313                     commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13314 #endif
13315                     while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13316                     {
13317                         commaNode          = commaNode->gtOp.gtOp2;
13318                         commaNode->gtType  = typ;
13319                         commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13320                                                                              // dangerous, clear the GTF_REVERSE_OPS at
13321                                                                              // least.
13322 #ifdef DEBUG
13323                         commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13324 #endif
13325                     }
13326                     bool      wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
13327                     ArrayInfo arrInfo;
13328                     if (wasArrIndex)
13329                     {
13330                         bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
13331                         assert(b);
13332                         GetArrayInfoMap()->Remove(tree);
13333                     }
13334                     tree         = op1;
13335                     op1          = gtNewOperNode(GT_IND, typ, commaNode->gtOp.gtOp2);
13336                     op1->gtFlags = treeFlags;
13337                     if (wasArrIndex)
13338                     {
13339                         GetArrayInfoMap()->Set(op1, arrInfo);
13340                     }
13341 #ifdef DEBUG
13342                     op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13343 #endif
13344                     commaNode->gtOp.gtOp2 = op1;
13345                     return tree;
13346                 }
13347
13348                 break;
13349
13350             case GT_ADDR:
13351
13352                 // Can not remove op1 if it is currently a CSE candidate.
13353                 if (gtIsActiveCSE_Candidate(op1))
13354                 {
13355                     break;
13356                 }
13357
13358                 if (op1->OperGet() == GT_IND)
13359                 {
13360                     if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
13361                     {
13362                         // Can not remove a GT_ADDR if it is currently a CSE candidate.
13363                         if (gtIsActiveCSE_Candidate(tree))
13364                         {
13365                             break;
13366                         }
13367
13368                         // Perform the transform ADDR(IND(...)) == (...).
13369                         GenTreePtr addr = op1->gtOp.gtOp1;
13370
13371                         noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13372
13373                         DEBUG_DESTROY_NODE(op1);
13374                         DEBUG_DESTROY_NODE(tree);
13375
13376                         return addr;
13377                     }
13378                 }
13379                 else if (op1->OperGet() == GT_OBJ)
13380                 {
13381                     // Can not remove a GT_ADDR if it is currently a CSE candidate.
13382                     if (gtIsActiveCSE_Candidate(tree))
13383                     {
13384                         break;
13385                     }
13386
13387                     // Perform the transform ADDR(OBJ(...)) == (...).
13388                     GenTreePtr addr = op1->AsObj()->Addr();
13389
13390                     noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13391
13392                     DEBUG_DESTROY_NODE(op1);
13393                     DEBUG_DESTROY_NODE(tree);
13394
13395                     return addr;
13396                 }
13397                 else if (op1->gtOper == GT_CAST)
13398                 {
13399                     GenTreePtr casting = op1->gtCast.CastOp();
13400                     if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
13401                     {
13402                         DEBUG_DESTROY_NODE(op1);
13403                         tree->gtOp.gtOp1 = op1 = casting;
13404                     }
13405                 }
13406                 else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
13407                 {
13408                     // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
13409                     // (Be sure to mark "z" as an l-value...)
13410                     GenTreePtr commaNode = op1;
13411                     while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13412                     {
13413                         commaNode = commaNode->gtOp.gtOp2;
13414                     }
13415                     // The top-level addr might be annotated with a zeroOffset field.
13416                     FieldSeqNode* zeroFieldSeq = nullptr;
13417                     bool          isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
13418                     tree                       = op1;
13419                     commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
13420
13421                     // If the node we're about to put under a GT_ADDR is an indirection, it
13422                     // doesn't need to be materialized, since we only want the addressing mode. Because
13423                     // of this, this GT_IND is not a faulting indirection and we don't have to extract it
13424                     // as a side effect.
13425                     GenTree* commaOp2 = commaNode->gtOp.gtOp2;
13426                     if (commaOp2->OperIsBlk())
13427                     {
13428                         commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
13429                     }
13430                     if (commaOp2->gtOper == GT_IND)
13431                     {
13432                         commaOp2->gtFlags |= GTF_IND_NONFAULTING;
13433                     }
13434
13435                     op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
13436
13437                     if (isZeroOffset)
13438                     {
13439                         // Transfer the annotation to the new GT_ADDR node.
13440                         GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
13441                     }
13442                     commaNode->gtOp.gtOp2 = op1;
13443                     // Originally, I gave all the comma nodes type "byref".  But the ADDR(IND(x)) == x transform
13444                     // might give op1 a type different from byref (like, say, native int).  So now go back and give
13445                     // all the comma nodes the type of op1.
13446                     // TODO: the comma flag update below is conservative and can be improved.
13447                     // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
13448                     // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
13449                     commaNode = tree;
13450                     while (commaNode->gtOper == GT_COMMA)
13451                     {
13452                         commaNode->gtType = op1->gtType;
13453                         commaNode->gtFlags |= op1->gtFlags;
13454 #ifdef DEBUG
13455                         commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13456 #endif
13457                         commaNode = commaNode->gtOp.gtOp2;
13458                     }
13459
13460                     return tree;
13461                 }
13462
13463                 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
13464                 op1->gtFlags |= GTF_DONT_CSE;
13465                 break;
13466
13467             case GT_COLON:
13468                 if (fgGlobalMorph)
13469                 {
13470                     /* Mark the nodes that are conditionally executed */
13471                     fgWalkTreePre(&tree, gtMarkColonCond);
13472                 }
13473                 /* Since we're doing this postorder we clear this if it got set by a child */
13474                 fgRemoveRestOfBlock = false;
13475                 break;
13476
13477             case GT_COMMA:
13478
13479                 /* Special case: trees that don't produce a value */
13480                 if ((op2->OperKind() & GTK_ASGOP) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) ||
13481                     fgIsThrow(op2))
13482                 {
13483                     typ = tree->gtType = TYP_VOID;
13484                 }
13485
13486                 // If we are in the Valuenum CSE phase then don't morph away anything as these
13487                 // nodes may have CSE defs/uses in them.
13488                 //
13489                 if (!optValnumCSE_phase)
13490                 {
13491                     // Extract the side effects from the left side of the comma.  Since they don't "go" anywhere, this
13492                     // is all we need.
13493
13494                     GenTreePtr op1SideEffects = nullptr;
13495                     // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
13496                     // hoisted expressions in loops.
13497                     gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
13498                     if (op1SideEffects)
13499                     {
13500                         // Replace the left hand side with the side effect list.
13501                         tree->gtOp.gtOp1 = op1SideEffects;
13502                         tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT);
13503                     }
13504                     else
13505                     {
13506                         /* The left operand is worthless, throw it away */
13507                         if (lvaLocalVarRefCounted)
13508                         {
13509                             lvaRecursiveDecRefCounts(op1);
13510                         }
13511                         op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13512                         DEBUG_DESTROY_NODE(tree);
13513                         DEBUG_DESTROY_NODE(op1);
13514                         return op2;
13515                     }
13516
13517                     /* If the right operand is just a void nop node, throw it away */
13518                     if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
13519                     {
13520                         op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13521                         DEBUG_DESTROY_NODE(tree);
13522                         DEBUG_DESTROY_NODE(op2);
13523                         return op1;
13524                     }
13525                 }
13526
13527                 break;
13528
13529             case GT_JTRUE:
13530
13531                 /* Special case if fgRemoveRestOfBlock is set to true */
13532                 if (fgRemoveRestOfBlock)
13533                 {
13534                     if (fgIsCommaThrow(op1, true))
13535                     {
13536                         GenTreePtr throwNode = op1->gtOp.gtOp1;
13537                         noway_assert(throwNode->gtType == TYP_VOID);
13538
13539                         return throwNode;
13540                     }
13541
13542                     noway_assert(op1->OperKind() & GTK_RELOP);
13543                     noway_assert(op1->gtFlags & GTF_EXCEPT);
13544
13545                     // We need to keep op1 for the side-effects. Hang it off
13546                     // a GT_COMMA node
13547
13548                     tree->ChangeOper(GT_COMMA);
13549                     tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
13550
13551                     // Additionally since we're eliminating the JTRUE
13552                     // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
13553                     // So we change it into a GT_COMMA as well.
13554                     op1->ChangeOper(GT_COMMA);
13555                     op1->gtType = op1->gtOp.gtOp1->gtType;
13556
13557                     return tree;
13558                 }
13559
13560             default:
13561                 break;
13562         }
13563
13564         noway_assert(oper == tree->gtOper);
13565
13566         // If we are in the Valuenum CSE phase then don't morph away anything as these
13567         // nodes may have CSE defs/uses in them.
13568         //
13569         if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList())
13570         {
13571             /* Check for op1 as a GT_COMMA with a unconditional throw node */
13572             if (op1 && fgIsCommaThrow(op1, true))
13573             {
13574                 if ((op1->gtFlags & GTF_COLON_COND) == 0)
13575                 {
13576                     /* We can safely throw out the rest of the statements */
13577                     fgRemoveRestOfBlock = true;
13578                 }
13579
13580                 GenTreePtr throwNode = op1->gtOp.gtOp1;
13581                 noway_assert(throwNode->gtType == TYP_VOID);
13582
13583                 if (oper == GT_COMMA)
13584                 {
13585                     /* Both tree and op1 are GT_COMMA nodes */
13586                     /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
13587                     tree->gtOp.gtOp1 = throwNode;
13588                     return tree;
13589                 }
13590                 else if (oper != GT_NOP)
13591                 {
13592                     if (genActualType(typ) == genActualType(op1->gtType))
13593                     {
13594                         /* The types match so, return the comma throw node as the new tree */
13595                         return op1;
13596                     }
13597                     else
13598                     {
13599                         if (typ == TYP_VOID)
13600                         {
13601                             // Return the throw node
13602                             return throwNode;
13603                         }
13604                         else
13605                         {
13606                             GenTreePtr commaOp2 = op1->gtOp.gtOp2;
13607
13608                             // need type of oper to be same as tree
13609                             if (typ == TYP_LONG)
13610                             {
13611                                 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13612                                 commaOp2->gtIntConCommon.SetLngValue(0);
13613                                 /* Change the types of oper and commaOp2 to TYP_LONG */
13614                                 op1->gtType = commaOp2->gtType = TYP_LONG;
13615                             }
13616                             else if (varTypeIsFloating(typ))
13617                             {
13618                                 commaOp2->ChangeOperConst(GT_CNS_DBL);
13619                                 commaOp2->gtDblCon.gtDconVal = 0.0;
13620                                 /* Change the types of oper and commaOp2 to TYP_DOUBLE */
13621                                 op1->gtType = commaOp2->gtType = TYP_DOUBLE;
13622                             }
13623                             else
13624                             {
13625                                 commaOp2->ChangeOperConst(GT_CNS_INT);
13626                                 commaOp2->gtIntConCommon.SetIconValue(0);
13627                                 /* Change the types of oper and commaOp2 to TYP_INT */
13628                                 op1->gtType = commaOp2->gtType = TYP_INT;
13629                             }
13630
13631                             /* Return the GT_COMMA node as the new tree */
13632                             return op1;
13633                         }
13634                     }
13635                 }
13636             }
13637
13638             /* Check for op2 as a GT_COMMA with a unconditional throw */
13639
13640             if (op2 && fgIsCommaThrow(op2, true))
13641             {
13642                 if ((op2->gtFlags & GTF_COLON_COND) == 0)
13643                 {
13644                     /* We can safely throw out the rest of the statements */
13645                     fgRemoveRestOfBlock = true;
13646                 }
13647
13648                 // If op1 has no side-effects
13649                 if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
13650                 {
13651                     // If tree is an asg node
13652                     if (tree->OperIsAssignment())
13653                     {
13654                         /* Return the throw node as the new tree */
13655                         return op2->gtOp.gtOp1;
13656                     }
13657
13658                     if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
13659                     {
13660                         /* Return the throw node as the new tree */
13661                         return op2->gtOp.gtOp1;
13662                     }
13663
13664                     // If tree is a comma node
13665                     if (tree->OperGet() == GT_COMMA)
13666                     {
13667                         /* Return the throw node as the new tree */
13668                         return op2->gtOp.gtOp1;
13669                     }
13670
13671                     /* for the shift nodes the type of op2 can differ from the tree type */
13672                     if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
13673                     {
13674                         noway_assert(GenTree::OperIsShiftOrRotate(oper));
13675
13676                         GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13677
13678                         commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13679                         commaOp2->gtIntConCommon.SetLngValue(0);
13680
13681                         /* Change the types of oper and commaOp2 to TYP_LONG */
13682                         op2->gtType = commaOp2->gtType = TYP_LONG;
13683                     }
13684
13685                     if ((genActualType(typ) == TYP_INT) &&
13686                         (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
13687                     {
13688                         // An example case is comparison (say GT_GT) of two longs or floating point values.
13689
13690                         GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13691
13692                         commaOp2->ChangeOperConst(GT_CNS_INT);
13693                         commaOp2->gtIntCon.gtIconVal = 0;
13694                         /* Change the types of oper and commaOp2 to TYP_INT */
13695                         op2->gtType = commaOp2->gtType = TYP_INT;
13696                     }
13697
13698                     if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
13699                     {
13700                         noway_assert(tree->OperGet() == GT_ADD);
13701
13702                         GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13703
13704                         commaOp2->ChangeOperConst(GT_CNS_INT);
13705                         commaOp2->gtIntCon.gtIconVal = 0;
13706                         /* Change the types of oper and commaOp2 to TYP_BYREF */
13707                         op2->gtType = commaOp2->gtType = TYP_BYREF;
13708                     }
13709
13710                     /* types should now match */
13711                     noway_assert((genActualType(typ) == genActualType(op2->gtType)));
13712
13713                     /* Return the GT_COMMA node as the new tree */
13714                     return op2;
13715                 }
13716             }
13717         }
13718
13719         /*-------------------------------------------------------------------------
13720          * Optional morphing is done if tree transformations is permitted
13721          */
13722
13723         if ((opts.compFlags & CLFLG_TREETRANS) == 0)
13724         {
13725             return tree;
13726         }
13727
13728         tree = fgMorphSmpOpOptional(tree->AsOp());
13729
13730     } // extra scope for gcc workaround
13731     return tree;
13732 }
13733 #ifdef _PREFAST_
13734 #pragma warning(pop)
13735 #endif
13736
13737 GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
13738 {
13739     genTreeOps oper = tree->gtOper;
13740     GenTree*   op1  = tree->gtOp1;
13741     GenTree*   op2  = tree->gtOp2;
13742     var_types  typ  = tree->TypeGet();
13743
13744     if (fgGlobalMorph && GenTree::OperIsCommutative(oper))
13745     {
13746         /* Swap the operands so that the more expensive one is 'op1' */
13747
13748         if (tree->gtFlags & GTF_REVERSE_OPS)
13749         {
13750             tree->gtOp1 = op2;
13751             tree->gtOp2 = op1;
13752
13753             op2 = op1;
13754             op1 = tree->gtOp1;
13755
13756             tree->gtFlags &= ~GTF_REVERSE_OPS;
13757         }
13758
13759         if (oper == op2->gtOper)
13760         {
13761             /*  Reorder nested operators at the same precedence level to be
13762                 left-recursive. For example, change "(a+(b+c))" to the
13763                 equivalent expression "((a+b)+c)".
13764              */
13765
13766             /* Things are handled differently for floating-point operators */
13767
13768             if (!varTypeIsFloating(tree->TypeGet()))
13769             {
13770                 fgMoveOpsLeft(tree);
13771                 op1 = tree->gtOp1;
13772                 op2 = tree->gtOp2;
13773             }
13774         }
13775     }
13776
13777 #if REARRANGE_ADDS
13778
13779     /* Change "((x+icon)+y)" to "((x+y)+icon)"
13780        Don't reorder floating-point operations */
13781
13782     if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
13783         varTypeIsIntegralOrI(typ))
13784     {
13785         GenTreePtr ad2 = op1->gtOp.gtOp2;
13786
13787         if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0)
13788         {
13789             // This takes
13790             //       + (tree)
13791             //      / \
13792             //     /   \
13793             //    /     \
13794             //   + (op1) op2
13795             //  / \
13796             //     \
13797             //     ad2
13798             //
13799             // And it swaps ad2 and op2.  If (op2) is varTypeIsGC, then this implies that (tree) is
13800             // varTypeIsGC.  If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
13801             // (op1) with a child that is varTypeIsGC.  If we encounter that situation, make (op1) the same
13802             // type as (tree).
13803             //
13804             // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
13805             // necessary
13806
13807             if (varTypeIsGC(op2->TypeGet()))
13808             {
13809                 noway_assert(varTypeIsGC(typ));
13810                 op1->gtType = typ;
13811             }
13812             tree->gtOp2 = ad2;
13813
13814             op1->gtOp.gtOp2 = op2;
13815             op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
13816
13817             op2 = tree->gtOp2;
13818         }
13819     }
13820
13821 #endif
13822
13823     /*-------------------------------------------------------------------------
13824      * Perform optional oper-specific postorder morphing
13825      */
13826
13827     switch (oper)
13828     {
13829         genTreeOps cmop;
13830         bool       dstIsSafeLclVar;
13831
13832         case GT_ASG:
13833             /* We'll convert "a = a <op> x" into "a <op>= x"                     */
13834             /*     and also  "a = x <op> a" into "a <op>= x" for communative ops */
13835             CLANG_FORMAT_COMMENT_ANCHOR;
13836
13837             if (typ == TYP_LONG)
13838             {
13839                 break;
13840             }
13841
13842             if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
13843             {
13844                 if (tree->OperIsCopyBlkOp())
13845                 {
13846                     return fgMorphCopyBlock(tree);
13847                 }
13848                 else
13849                 {
13850                     return fgMorphInitBlock(tree);
13851                 }
13852             }
13853
13854             /* Make sure we're allowed to do this */
13855
13856             if (optValnumCSE_phase)
13857             {
13858                 // It is not safe to reorder/delete CSE's
13859                 break;
13860             }
13861
13862             /* Are we assigning to a GT_LCL_VAR ? */
13863
13864             dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR);
13865
13866             /* If we have a GT_LCL_VAR, then is the address taken? */
13867             if (dstIsSafeLclVar)
13868             {
13869                 unsigned   lclNum = op1->gtLclVarCommon.gtLclNum;
13870                 LclVarDsc* varDsc = lvaTable + lclNum;
13871
13872                 noway_assert(lclNum < lvaCount);
13873
13874                 /* Is the address taken? */
13875                 if (varDsc->lvAddrExposed)
13876                 {
13877                     dstIsSafeLclVar = false;
13878                 }
13879                 else if (op2->gtFlags & GTF_ASG)
13880                 {
13881                     break;
13882                 }
13883             }
13884
13885             if (!dstIsSafeLclVar)
13886             {
13887                 if (op2->gtFlags & GTF_ASG)
13888                 {
13889                     break;
13890                 }
13891
13892                 if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
13893                 {
13894                     break;
13895                 }
13896             }
13897
13898             /* Special case: a cast that can be thrown away */
13899
13900             if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
13901             {
13902                 var_types srct;
13903                 var_types cast;
13904                 var_types dstt;
13905
13906                 srct = op2->gtCast.CastOp()->TypeGet();
13907                 cast = (var_types)op2->CastToType();
13908                 dstt = op1->TypeGet();
13909
13910                 /* Make sure these are all ints and precision is not lost */
13911
13912                 if (cast >= dstt && dstt <= TYP_INT && srct <= TYP_INT)
13913                 {
13914                     op2 = tree->gtOp2 = op2->gtCast.CastOp();
13915                 }
13916             }
13917
13918             /* Make sure we have the operator range right */
13919
13920             static_assert(GT_SUB == GT_ADD + 1, "bad oper value");
13921             static_assert(GT_MUL == GT_ADD + 2, "bad oper value");
13922             static_assert(GT_DIV == GT_ADD + 3, "bad oper value");
13923             static_assert(GT_MOD == GT_ADD + 4, "bad oper value");
13924             static_assert(GT_UDIV == GT_ADD + 5, "bad oper value");
13925             static_assert(GT_UMOD == GT_ADD + 6, "bad oper value");
13926
13927             static_assert(GT_OR == GT_ADD + 7, "bad oper value");
13928             static_assert(GT_XOR == GT_ADD + 8, "bad oper value");
13929             static_assert(GT_AND == GT_ADD + 9, "bad oper value");
13930
13931             static_assert(GT_LSH == GT_ADD + 10, "bad oper value");
13932             static_assert(GT_RSH == GT_ADD + 11, "bad oper value");
13933             static_assert(GT_RSZ == GT_ADD + 12, "bad oper value");
13934
13935             /* Check for a suitable operator on the RHS */
13936
13937             cmop = op2->OperGet();
13938
13939             switch (cmop)
13940             {
13941                 case GT_NEG:
13942                     // GT_CHS only supported for integer types
13943                     if (varTypeIsFloating(tree->TypeGet()))
13944                     {
13945                         break;
13946                     }
13947
13948                     goto ASG_OP;
13949
13950                 case GT_MUL:
13951                     // GT_ASG_MUL only supported for floating point types
13952                     if (!varTypeIsFloating(tree->TypeGet()))
13953                     {
13954                         break;
13955                     }
13956
13957                     __fallthrough;
13958
13959                 case GT_ADD:
13960                 case GT_SUB:
13961                     if (op2->gtOverflow())
13962                     {
13963                         /* Disable folding into "<op>=" if the result can be
13964                            visible to anyone as <op> may throw an exception and
13965                            the assignment should not proceed
13966                            We are safe with an assignment to a local variables
13967                          */
13968                         if (ehBlockHasExnFlowDsc(compCurBB))
13969                         {
13970                             break;
13971                         }
13972                         if (!dstIsSafeLclVar)
13973                         {
13974                             break;
13975                         }
13976                     }
13977 #ifndef _TARGET_AMD64_
13978                     // This is hard for byte-operations as we need to make
13979                     // sure both operands are in RBM_BYTE_REGS.
13980                     if (varTypeIsByte(op2->TypeGet()))
13981                         break;
13982 #endif // _TARGET_AMD64_
13983                     goto ASG_OP;
13984
13985                 case GT_DIV:
13986                 case GT_UDIV:
13987                     // GT_ASG_DIV only supported for floating point types
13988                     if (!varTypeIsFloating(tree->TypeGet()))
13989                     {
13990                         break;
13991                     }
13992
13993                 case GT_LSH:
13994                 case GT_RSH:
13995                 case GT_RSZ:
13996                 case GT_OR:
13997                 case GT_XOR:
13998                 case GT_AND:
13999                 ASG_OP:
14000                 {
14001                     bool bReverse       = false;
14002                     bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse);
14003                     if (bAsgOpFoldable)
14004                     {
14005                         if (bReverse)
14006                         {
14007                             // We will transform this from "a = x <op> a" to "a <op>= x"
14008                             // so we can now destroy the duplicate "a"
14009                             DEBUG_DESTROY_NODE(op2->gtOp.gtOp2);
14010                             op2->gtOp.gtOp2 = op2->gtOp.gtOp1;
14011                         }
14012
14013                         /* Special case: "x |= -1" and "x &= 0" */
14014                         if (((cmop == GT_AND) && op2->gtOp.gtOp2->IsIntegralConst(0)) ||
14015                             ((cmop == GT_OR) && op2->gtOp.gtOp2->IsIntegralConst(-1)))
14016                         {
14017                             /* Simply change to an assignment */
14018                             tree->gtOp2 = op2->gtOp.gtOp2;
14019                             break;
14020                         }
14021
14022                         if (cmop == GT_NEG)
14023                         {
14024                             /* This is "x = -x;", use the flipsign operator */
14025
14026                             tree->ChangeOper(GT_CHS);
14027
14028                             if (op1->gtOper == GT_LCL_VAR)
14029                             {
14030                                 op1->gtFlags |= GTF_VAR_USEASG;
14031                             }
14032
14033                             tree->gtOp2 = gtNewIconNode(0, op1->TypeGet());
14034
14035                             break;
14036                         }
14037
14038                         if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
14039                         {
14040                             // Changing from x = x op y to x op= y when x is a small integer type
14041                             // makes the op size smaller (originally the op size was 32 bits, after
14042                             // sign or zero extension of x, and there is an implicit truncation in the
14043                             // assignment).
14044                             // This is ok in most cases because the upper bits were
14045                             // lost when assigning the op result to a small type var,
14046                             // but it may not be ok for the right shift operation where the higher bits
14047                             // could be shifted into the lower bits and preserved.
14048                             // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) ==
14049                             // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) ==
14050                             // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the
14051                             // wrong
14052                             // result:
14053                             // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
14054                             // but  (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
14055                             // The result becomes correct if we use >>unsigned instead of >>signed.
14056                             noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet());
14057                             cmop = GT_RSZ;
14058                         }
14059
14060                         /* Replace with an assignment operator */
14061                         noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD);
14062                         noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD);
14063                         noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD);
14064                         noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD);
14065                         noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD);
14066                         noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD);
14067                         noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD);
14068                         noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD);
14069
14070                         tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD));
14071                         tree->gtOp2 = op2->gtOp.gtOp2;
14072
14073                         /* Propagate GTF_OVERFLOW */
14074
14075                         if (op2->gtOverflowEx())
14076                         {
14077                             tree->gtType = op2->gtType;
14078                             tree->gtFlags |= (op2->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
14079                         }
14080
14081 #if FEATURE_SET_FLAGS
14082
14083                         /* Propagate GTF_SET_FLAGS */
14084                         if (op2->gtSetFlags())
14085                         {
14086                             tree->gtRequestSetFlags();
14087                         }
14088
14089 #endif // FEATURE_SET_FLAGS
14090
14091                         DEBUG_DESTROY_NODE(op2);
14092                         op2 = tree->gtOp2;
14093
14094                         /* The target is used as well as being defined */
14095                         if (op1->OperIsLocal())
14096                         {
14097                             op1->gtFlags &= ~GTF_VAR_USEDEF;
14098                             op1->gtFlags |= GTF_VAR_USEASG;
14099                         }
14100
14101 #if CPU_HAS_FP_SUPPORT
14102                         /* Check for the special case "x += y * x;" */
14103
14104                         // GT_ASG_MUL only supported for floating point types
14105                         if (cmop != GT_ADD && cmop != GT_SUB)
14106                         {
14107                             break;
14108                         }
14109
14110                         if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet()))
14111                         {
14112                             if (GenTree::Compare(op1, op2->gtOp.gtOp1))
14113                             {
14114                                 /* Change "x += x * y" into "x *= (y + 1)" */
14115
14116                                 op2 = op2->gtOp.gtOp2;
14117                             }
14118                             else if (GenTree::Compare(op1, op2->gtOp.gtOp2))
14119                             {
14120                                 /* Change "x += y * x" into "x *= (y + 1)" */
14121
14122                                 op2 = op2->gtOp.gtOp1;
14123                             }
14124                             else
14125                             {
14126                                 break;
14127                             }
14128
14129                             op1 = gtNewDconNode(1.0);
14130
14131                             /* Now make the "*=" node */
14132
14133                             if (cmop == GT_ADD)
14134                             {
14135                                 /* Change "x += x * y" into "x *= (y + 1)" */
14136
14137                                 tree->gtOp2 = op2 = gtNewOperNode(GT_ADD, tree->TypeGet(), op2, op1);
14138                             }
14139                             else
14140                             {
14141                                 /* Change "x -= x * y" into "x *= (1 - y)" */
14142
14143                                 noway_assert(cmop == GT_SUB);
14144                                 tree->gtOp2 = op2 = gtNewOperNode(GT_SUB, tree->TypeGet(), op1, op2);
14145                             }
14146                             tree->ChangeOper(GT_ASG_MUL);
14147                         }
14148 #endif // CPU_HAS_FP_SUPPORT
14149                     }
14150                 }
14151
14152                 break;
14153
14154                 case GT_NOT:
14155
14156                     /* Is the destination identical to the first RHS sub-operand? */
14157
14158                     if (GenTree::Compare(op1, op2->gtOp.gtOp1))
14159                     {
14160                         /* This is "x = ~x" which is the same as "x ^= -1"
14161                          * Transform the node into a GT_ASG_XOR */
14162
14163                         noway_assert(genActualType(typ) == TYP_INT || genActualType(typ) == TYP_LONG);
14164
14165                         op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT) ? gtNewIconNode(-1) : gtNewLconNode(-1);
14166
14167                         cmop = GT_XOR;
14168                         goto ASG_OP;
14169                     }
14170
14171                     break;
14172                 default:
14173                     break;
14174             }
14175
14176             break;
14177
14178         case GT_MUL:
14179
14180             /* Check for the case "(val + icon) * icon" */
14181
14182             if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
14183             {
14184                 GenTreePtr add = op1->gtOp.gtOp2;
14185
14186                 if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
14187                 {
14188                     if (tree->gtOverflow() || op1->gtOverflow())
14189                     {
14190                         break;
14191                     }
14192
14193                     ssize_t imul = op2->gtIntCon.gtIconVal;
14194                     ssize_t iadd = add->gtIntCon.gtIconVal;
14195
14196                     /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
14197
14198                     oper = GT_ADD;
14199                     tree->ChangeOper(oper);
14200
14201                     op2->gtIntCon.gtIconVal = iadd * imul;
14202
14203                     op1->ChangeOper(GT_MUL);
14204
14205                     add->gtIntCon.gtIconVal = imul;
14206 #ifdef _TARGET_64BIT_
14207                     if (add->gtType == TYP_INT)
14208                     {
14209                         // we need to properly re-sign-extend or truncate after multiplying two int constants above
14210                         add->AsIntCon()->TruncateOrSignExtend32();
14211                     }
14212 #endif //_TARGET_64BIT_
14213                 }
14214             }
14215
14216             break;
14217
14218         case GT_DIV:
14219
14220             /* For "val / 1", just return "val" */
14221
14222             if (op2->IsIntegralConst(1))
14223             {
14224                 DEBUG_DESTROY_NODE(tree);
14225                 return op1;
14226             }
14227
14228             break;
14229
14230         case GT_LSH:
14231
14232             /* Check for the case "(val + icon) << icon" */
14233
14234             if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
14235             {
14236                 GenTreePtr cns = op1->gtOp.gtOp2;
14237
14238                 if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
14239                 {
14240                     ssize_t ishf = op2->gtIntConCommon.IconValue();
14241                     ssize_t iadd = cns->gtIntConCommon.IconValue();
14242
14243                     // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
14244
14245                     /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
14246
14247                     tree->ChangeOper(GT_ADD);
14248                     ssize_t result = iadd << ishf;
14249                     op2->gtIntConCommon.SetIconValue(result);
14250 #ifdef _TARGET_64BIT_
14251                     if (op1->gtType == TYP_INT)
14252                     {
14253                         op2->AsIntCon()->TruncateOrSignExtend32();
14254                     }
14255 #endif // _TARGET_64BIT_
14256
14257                     // we are reusing the shift amount node here, but the type we want is that of the shift result
14258                     op2->gtType = op1->gtType;
14259
14260                     if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
14261                         cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
14262                     {
14263                         assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
14264                         op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
14265                     }
14266
14267                     op1->ChangeOper(GT_LSH);
14268
14269                     cns->gtIntConCommon.SetIconValue(ishf);
14270                 }
14271             }
14272
14273             break;
14274
14275         case GT_XOR:
14276
14277             if (!optValnumCSE_phase)
14278             {
14279                 /* "x ^ -1" is "~x" */
14280
14281                 if (op2->IsIntegralConst(-1))
14282                 {
14283                     tree->ChangeOper(GT_NOT);
14284                     tree->gtOp2 = nullptr;
14285                     DEBUG_DESTROY_NODE(op2);
14286                 }
14287                 else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
14288                 {
14289                     /* "binaryVal ^ 1" is "!binaryVal" */
14290                     gtReverseCond(op1);
14291                     DEBUG_DESTROY_NODE(op2);
14292                     DEBUG_DESTROY_NODE(tree);
14293                     return op1;
14294                 }
14295             }
14296
14297             break;
14298
14299         case GT_INIT_VAL:
14300             // Initialization values for initBlk have special semantics - their lower
14301             // byte is used to fill the struct. However, we allow 0 as a "bare" value,
14302             // which enables them to get a VNForZero, and be propagated.
14303             if (op1->IsIntegralConst(0))
14304             {
14305                 return op1;
14306             }
14307             break;
14308
14309         default:
14310             break;
14311     }
14312     return tree;
14313 }
14314
14315 //------------------------------------------------------------------------
14316 // fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b
14317 // (see ECMA III 3.55 and III.3.56).
14318 //
14319 // Arguments:
14320 //    tree - The GT_MOD/GT_UMOD tree to morph
14321 //
14322 // Returns:
14323 //    The morphed tree
14324 //
14325 // Notes:
14326 //    For ARM64 we don't have a remainder instruction so this transform is
14327 //    always done. For XARCH this transform is done if we know that magic
14328 //    division will be used, in that case this transform allows CSE to
14329 //    eliminate the redundant div from code like "x = a / 3; y = a % 3;".
14330 //
14331 //    This method will produce the above expression in 'a' and 'b' are
14332 //    leaf nodes, otherwise, if any of them is not a leaf it will spill
14333 //    its value into a temporary variable, an example:
14334 //    (x * 2 - 1) % (y + 1) ->  t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
14335 //
14336 GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
14337 {
14338     if (tree->OperGet() == GT_MOD)
14339     {
14340         tree->SetOper(GT_DIV);
14341     }
14342     else if (tree->OperGet() == GT_UMOD)
14343     {
14344         tree->SetOper(GT_UDIV);
14345     }
14346     else
14347     {
14348         noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
14349     }
14350
14351     var_types type        = tree->gtType;
14352     GenTree*  denominator = tree->gtOp2;
14353     GenTree*  numerator   = tree->gtOp1;
14354
14355     if (!numerator->OperIsLeaf())
14356     {
14357         numerator = fgMakeMultiUse(&tree->gtOp1);
14358     }
14359     else if (lvaLocalVarRefCounted && numerator->OperIsLocal())
14360     {
14361         // Morphing introduces new lclVar references. Increase ref counts
14362         lvaIncRefCnts(numerator);
14363     }
14364
14365     if (!denominator->OperIsLeaf())
14366     {
14367         denominator = fgMakeMultiUse(&tree->gtOp2);
14368     }
14369     else if (lvaLocalVarRefCounted && denominator->OperIsLocal())
14370     {
14371         // Morphing introduces new lclVar references. Increase ref counts
14372         lvaIncRefCnts(denominator);
14373     }
14374
14375     // The numerator and denominator may have been assigned to temps, in which case
14376     // their defining assignments are in the current tree. Therefore, we need to
14377     // set the execuction order accordingly on the nodes we create.
14378     // That is, the "mul" will be evaluated in "normal" order, and the "sub" must
14379     // be set to be evaluated in reverse order.
14380     //
14381     GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
14382     assert(!mul->IsReverseOp());
14383     GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
14384     sub->gtFlags |= GTF_REVERSE_OPS;
14385
14386 #ifdef DEBUG
14387     sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14388 #endif
14389
14390     return sub;
14391 }
14392
14393 //------------------------------------------------------------------------------
14394 // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
14395 //
14396 //
14397 // Arguments:
14398 //    oper  - Operation to check
14399 //
14400 // Return Value:
14401 //    True if the operation can be a root of a bitwise rotation tree; false otherwise.
14402
14403 bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
14404 {
14405     return (oper == GT_OR) || (oper == GT_XOR);
14406 }
14407
14408 //------------------------------------------------------------------------------
14409 // fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
14410 //                                      an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
14411 //
14412 // Arguments:
14413 //    tree  - tree to check for a rotation pattern
14414 //
14415 // Return Value:
14416 //    An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
14417 //
14418 // Assumption:
14419 //    The input is a GT_OR or a GT_XOR tree.
14420
14421 GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
14422 {
14423 #ifndef LEGACY_BACKEND
14424     //
14425     // Check for a rotation pattern, e.g.,
14426     //
14427     //                         OR                      ROL
14428     //                      /      \                   / \
14429     //                    LSH      RSZ      ->        x   y
14430     //                    / \      / \
14431     //                   x  AND   x  AND
14432     //                      / \      / \
14433     //                     y  31   ADD  31
14434     //                             / \
14435     //                            NEG 32
14436     //                             |
14437     //                             y
14438     // The patterns recognized:
14439     // (x << (y & M)) op (x >>> ((-y + N) & M))
14440     // (x >>> ((-y + N) & M)) op (x << (y & M))
14441     //
14442     // (x << y) op (x >>> (-y + N))
14443     // (x >> > (-y + N)) op (x << y)
14444     //
14445     // (x >>> (y & M)) op (x << ((-y + N) & M))
14446     // (x << ((-y + N) & M)) op (x >>> (y & M))
14447     //
14448     // (x >>> y) op (x << (-y + N))
14449     // (x << (-y + N)) op (x >>> y)
14450     //
14451     // (x << c1) op (x >>> c2)
14452     // (x >>> c1) op (x << c2)
14453     //
14454     // where
14455     // c1 and c2 are const
14456     // c1 + c2 == bitsize(x)
14457     // N == bitsize(x)
14458     // M is const
14459     // M & (N - 1) == N - 1
14460     // op is either | or ^
14461
14462     if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
14463     {
14464         // We can't do anything if the tree has assignments, calls, or volatile
14465         // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
14466         // thrown by the original tree will be thrown by the transformed tree as well.
14467         return tree;
14468     }
14469
14470     genTreeOps oper = tree->OperGet();
14471     assert(fgOperIsBitwiseRotationRoot(oper));
14472
14473     // Check if we have an LSH on one side of the OR and an RSZ on the other side.
14474     GenTreePtr op1            = tree->gtGetOp1();
14475     GenTreePtr op2            = tree->gtGetOp2();
14476     GenTreePtr leftShiftTree  = nullptr;
14477     GenTreePtr rightShiftTree = nullptr;
14478     if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
14479     {
14480         leftShiftTree  = op1;
14481         rightShiftTree = op2;
14482     }
14483     else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
14484     {
14485         leftShiftTree  = op2;
14486         rightShiftTree = op1;
14487     }
14488     else
14489     {
14490         return tree;
14491     }
14492
14493     // Check if the trees representing the value to shift are identical.
14494     // We already checked that there are no side effects above.
14495     if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
14496     {
14497         GenTreePtr rotatedValue           = leftShiftTree->gtGetOp1();
14498         var_types  rotatedValueActualType = genActualType(rotatedValue->gtType);
14499         ssize_t    rotatedValueBitSize    = genTypeSize(rotatedValueActualType) * 8;
14500         noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
14501         GenTreePtr leftShiftIndex  = leftShiftTree->gtGetOp2();
14502         GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();
14503
14504         // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
14505         // shouldn't be masked for the transformation to be valid. If additional
14506         // higher bits are not masked, the transformation is still valid since the result
14507         // of MSIL shift instructions is unspecified if the shift amount is greater or equal
14508         // than the width of the value being shifted.
14509         ssize_t minimalMask    = rotatedValueBitSize - 1;
14510         ssize_t leftShiftMask  = -1;
14511         ssize_t rightShiftMask = -1;
14512
14513         if ((leftShiftIndex->OperGet() == GT_AND))
14514         {
14515             if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
14516             {
14517                 leftShiftMask  = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14518                 leftShiftIndex = leftShiftIndex->gtGetOp1();
14519             }
14520             else
14521             {
14522                 return tree;
14523             }
14524         }
14525
14526         if ((rightShiftIndex->OperGet() == GT_AND))
14527         {
14528             if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
14529             {
14530                 rightShiftMask  = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14531                 rightShiftIndex = rightShiftIndex->gtGetOp1();
14532             }
14533             else
14534             {
14535                 return tree;
14536             }
14537         }
14538
14539         if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
14540         {
14541             // The shift index is overmasked, e.g., we have
14542             // something like (x << y & 15) or
14543             // (x >> (32 - y) & 15 with 32 bit x.
14544             // The transformation is not valid.
14545             return tree;
14546         }
14547
14548         GenTreePtr shiftIndexWithAdd    = nullptr;
14549         GenTreePtr shiftIndexWithoutAdd = nullptr;
14550         genTreeOps rotateOp             = GT_NONE;
14551         GenTreePtr rotateIndex          = nullptr;
14552
14553         if (leftShiftIndex->OperGet() == GT_ADD)
14554         {
14555             shiftIndexWithAdd    = leftShiftIndex;
14556             shiftIndexWithoutAdd = rightShiftIndex;
14557             rotateOp             = GT_ROR;
14558         }
14559         else if (rightShiftIndex->OperGet() == GT_ADD)
14560         {
14561             shiftIndexWithAdd    = rightShiftIndex;
14562             shiftIndexWithoutAdd = leftShiftIndex;
14563             rotateOp             = GT_ROL;
14564         }
14565
14566         if (shiftIndexWithAdd != nullptr)
14567         {
14568             if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
14569             {
14570                 if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
14571                 {
14572                     if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
14573                     {
14574                         if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
14575                         {
14576                             // We found one of these patterns:
14577                             // (x << (y & M)) | (x >>> ((-y + N) & M))
14578                             // (x << y) | (x >>> (-y + N))
14579                             // (x >>> (y & M)) | (x << ((-y + N) & M))
14580                             // (x >>> y) | (x << (-y + N))
14581                             // where N == bitsize(x), M is const, and
14582                             // M & (N - 1) == N - 1
14583                             CLANG_FORMAT_COMMENT_ANCHOR;
14584
14585 #ifndef _TARGET_64BIT_
14586                             if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
14587                             {
14588                                 // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86.
14589                                 // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
14590                                 // to add helpers for GT_ROL and GT_ROR.
14591                                 return tree;
14592                             }
14593 #endif
14594
14595                             rotateIndex = shiftIndexWithoutAdd;
14596                         }
14597                     }
14598                 }
14599             }
14600         }
14601         else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
14602         {
14603             if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
14604             {
14605                 // We found this pattern:
14606                 // (x << c1) | (x >>> c2)
14607                 // where c1 and c2 are const and c1 + c2 == bitsize(x)
14608                 rotateOp    = GT_ROL;
14609                 rotateIndex = leftShiftIndex;
14610             }
14611         }
14612
14613         if (rotateIndex != nullptr)
14614         {
14615             noway_assert(GenTree::OperIsRotate(rotateOp));
14616
14617             unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
14618
14619             // We can use the same tree only during global morph; reusing the tree in a later morph
14620             // may invalidate value numbers.
14621             if (fgGlobalMorph)
14622             {
14623                 tree->gtOp.gtOp1 = rotatedValue;
14624                 tree->gtOp.gtOp2 = rotateIndex;
14625                 tree->ChangeOper(rotateOp);
14626
14627                 unsigned childFlags = 0;
14628                 for (GenTree* op : tree->Operands())
14629                 {
14630                     childFlags |= (op->gtFlags & GTF_ALL_EFFECT);
14631                 }
14632
14633                 // The parent's flags should be a superset of its operands' flags
14634                 noway_assert((inputTreeEffects & childFlags) == childFlags);
14635             }
14636             else
14637             {
14638                 tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
14639                 noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
14640             }
14641
14642             return tree;
14643         }
14644     }
14645 #endif // LEGACY_BACKEND
14646     return tree;
14647 }
14648
14649 #if !CPU_HAS_FP_SUPPORT
14650 GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
14651 {
14652
14653     genTreeOps oper = tree->OperGet();
14654     var_types  typ  = tree->TypeGet();
14655     GenTreePtr op1  = tree->gtOp.gtOp1;
14656     GenTreePtr op2  = tree->gtGetOp2IfPresent();
14657
14658     /*
14659         We have to use helper calls for all FP operations:
14660
14661             FP operators that operate on FP values
14662             casts to and from FP
14663             comparisons of FP values
14664      */
14665
14666     if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
14667     {
14668         int        helper;
14669         GenTreePtr args;
14670         size_t     argc = genTypeStSz(typ);
14671
14672         /* Not all FP operations need helper calls */
14673
14674         switch (oper)
14675         {
14676             case GT_ASG:
14677             case GT_IND:
14678             case GT_LIST:
14679             case GT_ADDR:
14680             case GT_COMMA:
14681                 return tree;
14682         }
14683
14684 #ifdef DEBUG
14685
14686         /* If the result isn't FP, it better be a compare or cast */
14687
14688         if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
14689             gtDispTree(tree);
14690
14691         noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
14692 #endif
14693
14694         /* Keep track of how many arguments we're passing */
14695
14696         fgPtrArgCntCur += argc;
14697
14698         /* Is this a binary operator? */
14699
14700         if (op2)
14701         {
14702             /* Add the second operand to the argument count */
14703
14704             fgPtrArgCntCur += argc;
14705             argc *= 2;
14706
14707             /* What kind of an operator do we have? */
14708
14709             switch (oper)
14710             {
14711                 case GT_ADD:
14712                     helper = CPX_R4_ADD;
14713                     break;
14714                 case GT_SUB:
14715                     helper = CPX_R4_SUB;
14716                     break;
14717                 case GT_MUL:
14718                     helper = CPX_R4_MUL;
14719                     break;
14720                 case GT_DIV:
14721                     helper = CPX_R4_DIV;
14722                     break;
14723                 // case GT_MOD: helper = CPX_R4_REM; break;
14724
14725                 case GT_EQ:
14726                     helper = CPX_R4_EQ;
14727                     break;
14728                 case GT_NE:
14729                     helper = CPX_R4_NE;
14730                     break;
14731                 case GT_LT:
14732                     helper = CPX_R4_LT;
14733                     break;
14734                 case GT_LE:
14735                     helper = CPX_R4_LE;
14736                     break;
14737                 case GT_GE:
14738                     helper = CPX_R4_GE;
14739                     break;
14740                 case GT_GT:
14741                     helper = CPX_R4_GT;
14742                     break;
14743
14744                 default:
14745 #ifdef DEBUG
14746                     gtDispTree(tree);
14747 #endif
14748                     noway_assert(!"unexpected FP binary op");
14749                     break;
14750             }
14751
14752             args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
14753         }
14754         else
14755         {
14756             switch (oper)
14757             {
14758                 case GT_RETURN:
14759                     return tree;
14760
14761                 case GT_CAST:
14762                     noway_assert(!"FP cast");
14763
14764                 case GT_NEG:
14765                     helper = CPX_R4_NEG;
14766                     break;
14767
14768                 default:
14769 #ifdef DEBUG
14770                     gtDispTree(tree);
14771 #endif
14772                     noway_assert(!"unexpected FP unary op");
14773                     break;
14774             }
14775
14776             args = gtNewArgList(tree->gtOp.gtOp1);
14777         }
14778
14779         /* If we have double result/operands, modify the helper */
14780
14781         if (typ == TYP_DOUBLE)
14782         {
14783             noway_assert(CPX_R4_NEG + 1 == CPX_R8_NEG);
14784             noway_assert(CPX_R4_ADD + 1 == CPX_R8_ADD);
14785             noway_assert(CPX_R4_SUB + 1 == CPX_R8_SUB);
14786             noway_assert(CPX_R4_MUL + 1 == CPX_R8_MUL);
14787             noway_assert(CPX_R4_DIV + 1 == CPX_R8_DIV);
14788
14789             helper++;
14790         }
14791         else
14792         {
14793             noway_assert(tree->OperIsCompare());
14794
14795             noway_assert(CPX_R4_EQ + 1 == CPX_R8_EQ);
14796             noway_assert(CPX_R4_NE + 1 == CPX_R8_NE);
14797             noway_assert(CPX_R4_LT + 1 == CPX_R8_LT);
14798             noway_assert(CPX_R4_LE + 1 == CPX_R8_LE);
14799             noway_assert(CPX_R4_GE + 1 == CPX_R8_GE);
14800             noway_assert(CPX_R4_GT + 1 == CPX_R8_GT);
14801         }
14802
14803         tree = fgMorphIntoHelperCall(tree, helper, args);
14804
14805         if (fgPtrArgCntMax < fgPtrArgCntCur)
14806         {
14807             JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
14808             fgPtrArgCntMax = fgPtrArgCntCur;
14809         }
14810
14811         fgPtrArgCntCur -= argc;
14812         return tree;
14813
14814         case GT_RETURN:
14815
14816             if (op1)
14817             {
14818
14819                 if (compCurBB == genReturnBB)
14820                 {
14821                     /* This is the 'exitCrit' call at the exit label */
14822
14823                     noway_assert(op1->gtType == TYP_VOID);
14824                     noway_assert(op2 == 0);
14825
14826                     tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
14827
14828                     return tree;
14829                 }
14830
14831                 /* This is a (real) return value -- check its type */
14832                 CLANG_FORMAT_COMMENT_ANCHOR;
14833
14834 #ifdef DEBUG
14835                 if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
14836                 {
14837                     bool allowMismatch = false;
14838
14839                     // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
14840                     if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
14841                         (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
14842                         allowMismatch = true;
14843
14844                     if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
14845                         allowMismatch = true;
14846
14847                     if (!allowMismatch)
14848                         NO_WAY("Return type mismatch");
14849                 }
14850 #endif
14851             }
14852             break;
14853     }
14854     return tree;
14855 }
14856 #endif
14857
14858 /*****************************************************************************
14859  *
14860  *  Transform the given tree for code generation and return an equivalent tree.
14861  */
14862
14863 GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
14864 {
14865     noway_assert(tree);
14866     noway_assert(tree->gtOper != GT_STMT);
14867
14868 #ifdef DEBUG
14869     if (verbose)
14870     {
14871         if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
14872         {
14873             noway_assert(!"JitBreakMorphTree hit");
14874         }
14875     }
14876 #endif
14877
14878 #ifdef DEBUG
14879     int thisMorphNum = 0;
14880     if (verbose && treesBeforeAfterMorph)
14881     {
14882         thisMorphNum = morphNum++;
14883         printf("\nfgMorphTree (before %d):\n", thisMorphNum);
14884         gtDispTree(tree);
14885     }
14886 #endif
14887
14888     if (fgGlobalMorph)
14889     {
14890         // Apply any rewrites for implicit byref arguments before morphing the
14891         // tree.
14892
14893         if (fgMorphImplicitByRefArgs(tree))
14894         {
14895 #ifdef DEBUG
14896             if (verbose && treesBeforeAfterMorph)
14897             {
14898                 printf("\nfgMorphTree (%d), after implicit-byref rewrite:\n", thisMorphNum);
14899                 gtDispTree(tree);
14900             }
14901 #endif
14902         }
14903     }
14904
14905 /*-------------------------------------------------------------------------
14906  * fgMorphTree() can potentially replace a tree with another, and the
14907  * caller has to store the return value correctly.
14908  * Turn this on to always make copy of "tree" here to shake out
14909  * hidden/unupdated references.
14910  */
14911
14912 #ifdef DEBUG
14913
14914     if (compStressCompile(STRESS_GENERIC_CHECK, 0))
14915     {
14916         GenTreePtr copy;
14917
14918 #ifdef SMALL_TREE_NODES
14919         if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
14920         {
14921             copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
14922         }
14923         else
14924 #endif
14925         {
14926             copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
14927         }
14928
14929         copy->CopyFrom(tree, this);
14930
14931 #if defined(LATE_DISASM)
14932         // GT_CNS_INT is considered small, so CopyFrom() won't copy all fields
14933         if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
14934         {
14935             copy->gtIntCon.gtIconHdl.gtIconHdl1 = tree->gtIntCon.gtIconHdl.gtIconHdl1;
14936             copy->gtIntCon.gtIconHdl.gtIconHdl2 = tree->gtIntCon.gtIconHdl.gtIconHdl2;
14937         }
14938 #endif
14939
14940         DEBUG_DESTROY_NODE(tree);
14941         tree = copy;
14942     }
14943 #endif // DEBUG
14944
14945     if (fgGlobalMorph)
14946     {
14947         /* Ensure that we haven't morphed this node already */
14948         assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
14949
14950 #if LOCAL_ASSERTION_PROP
14951         /* Before morphing the tree, we try to propagate any active assertions */
14952         if (optLocalAssertionProp)
14953         {
14954             /* Do we have any active assertions? */
14955
14956             if (optAssertionCount > 0)
14957             {
14958                 GenTreePtr newTree = tree;
14959                 while (newTree != nullptr)
14960                 {
14961                     tree = newTree;
14962                     /* newTree is non-Null if we propagated an assertion */
14963                     newTree = optAssertionProp(apFull, tree, nullptr);
14964                 }
14965                 noway_assert(tree != nullptr);
14966             }
14967         }
14968         PREFAST_ASSUME(tree != nullptr);
14969 #endif
14970     }
14971
14972     /* Save the original un-morphed tree for fgMorphTreeDone */
14973
14974     GenTreePtr oldTree = tree;
14975
14976     /* Figure out what kind of a node we have */
14977
14978     unsigned kind = tree->OperKind();
14979
14980     /* Is this a constant node? */
14981
14982     if (kind & GTK_CONST)
14983     {
14984         tree = fgMorphConst(tree);
14985         goto DONE;
14986     }
14987
14988     /* Is this a leaf node? */
14989
14990     if (kind & GTK_LEAF)
14991     {
14992         tree = fgMorphLeaf(tree);
14993         goto DONE;
14994     }
14995
14996     /* Is it a 'simple' unary/binary operator? */
14997
14998     if (kind & GTK_SMPOP)
14999     {
15000         tree = fgMorphSmpOp(tree, mac);
15001         goto DONE;
15002     }
15003
15004     /* See what kind of a special operator we have here */
15005
15006     switch (tree->OperGet())
15007     {
15008         case GT_FIELD:
15009             tree = fgMorphField(tree, mac);
15010             break;
15011
15012         case GT_CALL:
15013             tree = fgMorphCall(tree->AsCall());
15014             break;
15015
15016         case GT_ARR_BOUNDS_CHECK:
15017 #ifdef FEATURE_SIMD
15018         case GT_SIMD_CHK:
15019 #endif // FEATURE_SIMD
15020         {
15021             fgSetRngChkTarget(tree);
15022
15023             GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
15024             bndsChk->gtIndex          = fgMorphTree(bndsChk->gtIndex);
15025             bndsChk->gtArrLen         = fgMorphTree(bndsChk->gtArrLen);
15026             // If the index is a comma(throw, x), just return that.
15027             if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
15028             {
15029                 tree = bndsChk->gtIndex;
15030             }
15031
15032             // Propagate effects flags upwards
15033             bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
15034             bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
15035
15036             // Otherwise, we don't change the tree.
15037         }
15038         break;
15039
15040         case GT_ARR_ELEM:
15041             tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
15042             tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15043
15044             unsigned dim;
15045             for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
15046             {
15047                 tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
15048                 tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
15049             }
15050             if (fgGlobalMorph)
15051             {
15052                 fgSetRngChkTarget(tree, false);
15053             }
15054             break;
15055
15056         case GT_ARR_OFFSET:
15057             tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
15058             tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
15059             tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
15060             tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
15061             tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
15062             tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15063             if (fgGlobalMorph)
15064             {
15065                 fgSetRngChkTarget(tree, false);
15066             }
15067             break;
15068
15069         case GT_CMPXCHG:
15070             tree->gtCmpXchg.gtOpLocation  = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
15071             tree->gtCmpXchg.gtOpValue     = fgMorphTree(tree->gtCmpXchg.gtOpValue);
15072             tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
15073             break;
15074
15075         case GT_STORE_DYN_BLK:
15076             tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
15077             __fallthrough;
15078         case GT_DYN_BLK:
15079             tree->gtDynBlk.Addr()        = fgMorphTree(tree->gtDynBlk.Addr());
15080             tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
15081             break;
15082
15083         default:
15084 #ifdef DEBUG
15085             gtDispTree(tree);
15086 #endif
15087             noway_assert(!"unexpected operator");
15088     }
15089 DONE:
15090
15091     fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
15092
15093     return tree;
15094 }
15095
15096 #if LOCAL_ASSERTION_PROP
15097 //------------------------------------------------------------------------
15098 // fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum
15099 //
15100 // Arguments:
15101 //    lclNum - The varNum of the lclVar for which we're killing assertions.
15102 //    tree   - (DEBUG only) the tree responsible for killing its assertions.
15103 //
15104 void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTreePtr tree))
15105 {
15106     /* All dependent assertions are killed here */
15107
15108     ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
15109
15110     if (killed)
15111     {
15112         AssertionIndex index = optAssertionCount;
15113         while (killed && (index > 0))
15114         {
15115             if (BitVecOps::IsMember(apTraits, killed, index - 1))
15116             {
15117 #ifdef DEBUG
15118                 AssertionDsc* curAssertion = optGetAssertion(index);
15119                 noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
15120                              ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
15121                 if (verbose)
15122                 {
15123                     printf("\nThe assignment ");
15124                     printTreeID(tree);
15125                     printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
15126                     optPrintAssertion(curAssertion);
15127                 }
15128 #endif
15129                 // Remove this bit from the killed mask
15130                 BitVecOps::RemoveElemD(apTraits, killed, index - 1);
15131
15132                 optAssertionRemove(index);
15133             }
15134
15135             index--;
15136         }
15137
15138         // killed mask should now be zero
15139         noway_assert(BitVecOps::IsEmpty(apTraits, killed));
15140     }
15141 }
15142 //------------------------------------------------------------------------
15143 // fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum.
15144 //
15145 // Arguments:
15146 //    lclNum - The varNum of the lclVar for which we're killing assertions.
15147 //    tree   - (DEBUG only) the tree responsible for killing its assertions.
15148 //
15149 // Notes:
15150 //    For structs and struct fields, it will invalidate the children and parent
15151 //    respectively.
15152 //    Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar.
15153 //
15154 void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree))
15155 {
15156     LclVarDsc* varDsc = &lvaTable[lclNum];
15157
15158     if (varDsc->lvPromoted)
15159     {
15160         noway_assert(varTypeIsStruct(varDsc));
15161
15162         // Kill the field locals.
15163         for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
15164         {
15165             fgKillDependentAssertionsSingle(i DEBUGARG(tree));
15166         }
15167
15168         // Kill the struct local itself.
15169         fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15170     }
15171     else if (varDsc->lvIsStructField)
15172     {
15173         // Kill the field local.
15174         fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15175
15176         // Kill the parent struct.
15177         fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree));
15178     }
15179     else
15180     {
15181         fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15182     }
15183 }
15184 #endif // LOCAL_ASSERTION_PROP
15185
15186 /*****************************************************************************
15187  *
15188  *  This function is called to complete the morphing of a tree node
15189  *  It should only be called once for each node.
15190  *  If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
15191  *  to enforce the invariant that each node is only morphed once.
15192  *  If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
15193  *  by an equivalent tree.
15194  *
15195  */
15196
15197 void Compiler::fgMorphTreeDone(GenTreePtr tree,
15198                                GenTreePtr oldTree /* == NULL */
15199                                DEBUGARG(int morphNum))
15200 {
15201 #ifdef DEBUG
15202     if (verbose && treesBeforeAfterMorph)
15203     {
15204         printf("\nfgMorphTree (after %d):\n", morphNum);
15205         gtDispTree(tree);
15206         printf(""); // in our logic this causes a flush
15207     }
15208 #endif
15209
15210     if (!fgGlobalMorph)
15211     {
15212         return;
15213     }
15214
15215     if ((oldTree != nullptr) && (oldTree != tree))
15216     {
15217         /* Ensure that we have morphed this node */
15218         assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
15219
15220 #ifdef DEBUG
15221         TransferTestDataToNode(oldTree, tree);
15222 #endif
15223     }
15224     else
15225     {
15226         // Ensure that we haven't morphed this node already
15227         assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
15228     }
15229
15230     if (tree->OperKind() & GTK_CONST)
15231     {
15232         goto DONE;
15233     }
15234
15235 #if LOCAL_ASSERTION_PROP
15236
15237     if (!optLocalAssertionProp)
15238     {
15239         goto DONE;
15240     }
15241
15242     /* Do we have any active assertions? */
15243
15244     if (optAssertionCount > 0)
15245     {
15246         /* Is this an assignment to a local variable */
15247         GenTreeLclVarCommon* lclVarTree = nullptr;
15248         if (tree->DefinesLocal(this, &lclVarTree))
15249         {
15250             unsigned lclNum = lclVarTree->gtLclNum;
15251             noway_assert(lclNum < lvaCount);
15252             fgKillDependentAssertions(lclNum DEBUGARG(tree));
15253         }
15254     }
15255
15256     /* If this tree makes a new assertion - make it available */
15257     optAssertionGen(tree);
15258
15259 #endif // LOCAL_ASSERTION_PROP
15260
15261 DONE:;
15262
15263 #ifdef DEBUG
15264     /* Mark this node as being morphed */
15265     tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
15266 #endif
15267 }
15268
15269 /*****************************************************************************
15270  *
15271  *  Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
15272  *  Returns true if we modified the flow graph
15273  */
15274
15275 bool Compiler::fgFoldConditional(BasicBlock* block)
15276 {
15277     bool result = false;
15278
15279     // We don't want to make any code unreachable
15280     if (opts.compDbgCode || opts.MinOpts())
15281     {
15282         return false;
15283     }
15284
15285     if (block->bbJumpKind == BBJ_COND)
15286     {
15287         noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15288
15289         GenTreePtr stmt = block->bbTreeList->gtPrev;
15290
15291         noway_assert(stmt->gtNext == nullptr);
15292
15293         if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15294         {
15295             noway_assert(fgRemoveRestOfBlock);
15296
15297             /* Unconditional throw - transform the basic block into a BBJ_THROW */
15298             fgConvertBBToThrowBB(block);
15299
15300             /* Remove 'block' from the predecessor list of 'block->bbNext' */
15301             fgRemoveRefPred(block->bbNext, block);
15302
15303             /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
15304             fgRemoveRefPred(block->bbJumpDest, block);
15305
15306 #ifdef DEBUG
15307             if (verbose)
15308             {
15309                 printf("\nConditional folded at BB%02u\n", block->bbNum);
15310                 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
15311             }
15312 #endif
15313             goto DONE_COND;
15314         }
15315
15316         noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
15317
15318         /* Did we fold the conditional */
15319
15320         noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15321         GenTreePtr cond;
15322         cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15323
15324         if (cond->OperKind() & GTK_CONST)
15325         {
15326             /* Yupee - we folded the conditional!
15327              * Remove the conditional statement */
15328
15329             noway_assert(cond->gtOper == GT_CNS_INT);
15330             noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));
15331
15332             /* remove the statement from bbTreelist - No need to update
15333              * the reference counts since there are no lcl vars */
15334             fgRemoveStmt(block, stmt);
15335
15336             // block is a BBJ_COND that we are folding the conditional for
15337             // bTaken is the path that will always be taken from block
15338             // bNotTaken is the path that will never be taken from block
15339             //
15340             BasicBlock* bTaken;
15341             BasicBlock* bNotTaken;
15342
15343             if (cond->gtIntCon.gtIconVal != 0)
15344             {
15345                 /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
15346                 block->bbJumpKind = BBJ_ALWAYS;
15347                 bTaken            = block->bbJumpDest;
15348                 bNotTaken         = block->bbNext;
15349             }
15350             else
15351             {
15352                 /* Unmark the loop if we are removing a backwards branch */
15353                 /* dest block must also be marked as a loop head and     */
15354                 /* We must be able to reach the backedge block           */
15355                 if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
15356                     fgReachable(block->bbJumpDest, block))
15357                 {
15358                     optUnmarkLoopBlocks(block->bbJumpDest, block);
15359                 }
15360
15361                 /* JTRUE 0 - transform the basic block into a BBJ_NONE   */
15362                 block->bbJumpKind = BBJ_NONE;
15363                 noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
15364                 bTaken    = block->bbNext;
15365                 bNotTaken = block->bbJumpDest;
15366             }
15367
15368             if (fgHaveValidEdgeWeights)
15369             {
15370                 // We are removing an edge from block to bNotTaken
15371                 // and we have already computed the edge weights, so
15372                 // we will try to adjust some of the weights
15373                 //
15374                 flowList*   edgeTaken = fgGetPredForBlock(bTaken, block);
15375                 BasicBlock* bUpdated  = nullptr; // non-NULL if we updated the weight of an internal block
15376
15377                 // We examine the taken edge (block -> bTaken)
15378                 // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
15379                 // else if bTaken has valid profile weight and block does not we try to adjust block's weight
15380                 // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
15381                 //
15382                 if (block->hasProfileWeight())
15383                 {
15384                     // The edge weights for (block -> bTaken) are 100% of block's weight
15385                     edgeTaken->flEdgeWeightMin = block->bbWeight;
15386                     edgeTaken->flEdgeWeightMax = block->bbWeight;
15387
15388                     if (!bTaken->hasProfileWeight())
15389                     {
15390                         if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
15391                         {
15392                             // Update the weight of bTaken
15393                             bTaken->inheritWeight(block);
15394                             bUpdated = bTaken;
15395                         }
15396                     }
15397                 }
15398                 else if (bTaken->hasProfileWeight())
15399                 {
15400                     if (bTaken->countOfInEdges() == 1)
15401                     {
15402                         // There is only one in edge to bTaken
15403                         edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
15404                         edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
15405
15406                         // Update the weight of block
15407                         block->inheritWeight(bTaken);
15408                         bUpdated = block;
15409                     }
15410                 }
15411
15412                 if (bUpdated != nullptr)
15413                 {
15414                     flowList* edge;
15415                     // Now fix the weights of the edges out of 'bUpdated'
15416                     switch (bUpdated->bbJumpKind)
15417                     {
15418                         case BBJ_NONE:
15419                             edge                  = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15420                             edge->flEdgeWeightMax = bUpdated->bbWeight;
15421                             break;
15422                         case BBJ_COND:
15423                             edge                  = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15424                             edge->flEdgeWeightMax = bUpdated->bbWeight;
15425                             __fallthrough;
15426                         case BBJ_ALWAYS:
15427                             edge                  = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
15428                             edge->flEdgeWeightMax = bUpdated->bbWeight;
15429                             break;
15430                         default:
15431                             // We don't handle BBJ_SWITCH
15432                             break;
15433                     }
15434                 }
15435             }
15436
15437             /* modify the flow graph */
15438
15439             /* Remove 'block' from the predecessor list of 'bNotTaken' */
15440             fgRemoveRefPred(bNotTaken, block);
15441
15442 #ifdef DEBUG
15443             if (verbose)
15444             {
15445                 printf("\nConditional folded at BB%02u\n", block->bbNum);
15446                 printf("BB%02u becomes a %s", block->bbNum,
15447                        block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15448                 if (block->bbJumpKind == BBJ_ALWAYS)
15449                 {
15450                     printf(" to BB%02u", block->bbJumpDest->bbNum);
15451                 }
15452                 printf("\n");
15453             }
15454 #endif
15455
15456             /* if the block was a loop condition we may have to modify
15457              * the loop table */
15458
15459             for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
15460             {
15461                 /* Some loops may have been already removed by
15462                  * loop unrolling or conditional folding */
15463
15464                 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
15465                 {
15466                     continue;
15467                 }
15468
15469                 /* We are only interested in the loop bottom */
15470
15471                 if (optLoopTable[loopNum].lpBottom == block)
15472                 {
15473                     if (cond->gtIntCon.gtIconVal == 0)
15474                     {
15475                         /* This was a bogus loop (condition always false)
15476                          * Remove the loop from the table */
15477
15478                         optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
15479 #ifdef DEBUG
15480                         if (verbose)
15481                         {
15482                             printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n", loopNum,
15483                                    optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
15484                         }
15485 #endif
15486                     }
15487                 }
15488             }
15489         DONE_COND:
15490             result = true;
15491         }
15492     }
15493     else if (block->bbJumpKind == BBJ_SWITCH)
15494     {
15495         noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15496
15497         GenTreePtr stmt = block->bbTreeList->gtPrev;
15498
15499         noway_assert(stmt->gtNext == nullptr);
15500
15501         if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15502         {
15503             noway_assert(fgRemoveRestOfBlock);
15504
15505             /* Unconditional throw - transform the basic block into a BBJ_THROW */
15506             fgConvertBBToThrowBB(block);
15507
15508             /* update the flow graph */
15509
15510             unsigned     jumpCnt = block->bbJumpSwt->bbsCount;
15511             BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
15512
15513             for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15514             {
15515                 BasicBlock* curJump = *jumpTab;
15516
15517                 /* Remove 'block' from the predecessor list of 'curJump' */
15518                 fgRemoveRefPred(curJump, block);
15519             }
15520
15521 #ifdef DEBUG
15522             if (verbose)
15523             {
15524                 printf("\nConditional folded at BB%02u\n", block->bbNum);
15525                 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
15526             }
15527 #endif
15528             goto DONE_SWITCH;
15529         }
15530
15531         noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
15532
15533         /* Did we fold the conditional */
15534
15535         noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15536         GenTreePtr cond;
15537         cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15538
15539         if (cond->OperKind() & GTK_CONST)
15540         {
15541             /* Yupee - we folded the conditional!
15542              * Remove the conditional statement */
15543
15544             noway_assert(cond->gtOper == GT_CNS_INT);
15545
15546             /* remove the statement from bbTreelist - No need to update
15547              * the reference counts since there are no lcl vars */
15548             fgRemoveStmt(block, stmt);
15549
15550             /* modify the flow graph */
15551
15552             /* Find the actual jump target */
15553             unsigned switchVal;
15554             switchVal = (unsigned)cond->gtIntCon.gtIconVal;
15555             unsigned jumpCnt;
15556             jumpCnt = block->bbJumpSwt->bbsCount;
15557             BasicBlock** jumpTab;
15558             jumpTab = block->bbJumpSwt->bbsDstTab;
15559             bool foundVal;
15560             foundVal = false;
15561
15562             for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15563             {
15564                 BasicBlock* curJump = *jumpTab;
15565
15566                 assert(curJump->countOfInEdges() > 0);
15567
15568                 // If val matches switchVal or we are at the last entry and
15569                 // we never found the switch value then set the new jump dest
15570
15571                 if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
15572                 {
15573                     if (curJump != block->bbNext)
15574                     {
15575                         /* transform the basic block into a BBJ_ALWAYS */
15576                         block->bbJumpKind = BBJ_ALWAYS;
15577                         block->bbJumpDest = curJump;
15578
15579                         // if we are jumping backwards, make sure we have a GC Poll.
15580                         if (curJump->bbNum > block->bbNum)
15581                         {
15582                             block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15583                         }
15584                     }
15585                     else
15586                     {
15587                         /* transform the basic block into a BBJ_NONE */
15588                         block->bbJumpKind = BBJ_NONE;
15589                         block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15590                     }
15591                     foundVal = true;
15592                 }
15593                 else
15594                 {
15595                     /* Remove 'block' from the predecessor list of 'curJump' */
15596                     fgRemoveRefPred(curJump, block);
15597                 }
15598             }
15599 #ifdef DEBUG
15600             if (verbose)
15601             {
15602                 printf("\nConditional folded at BB%02u\n", block->bbNum);
15603                 printf("BB%02u becomes a %s", block->bbNum,
15604                        block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15605                 if (block->bbJumpKind == BBJ_ALWAYS)
15606                 {
15607                     printf(" to BB%02u", block->bbJumpDest->bbNum);
15608                 }
15609                 printf("\n");
15610             }
15611 #endif
15612         DONE_SWITCH:
15613             result = true;
15614         }
15615     }
15616     return result;
15617 }
15618
15619 //*****************************************************************************
15620 //
15621 // Morphs a single statement in a block.
15622 // Can be called anytime, unlike fgMorphStmts() which should only be called once.
15623 //
15624 // Returns true  if 'stmt' was removed from the block.
15625 // Returns false if 'stmt' is still in the block (even if other statements were removed).
15626 //
15627
15628 bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg))
15629 {
15630     assert(block != nullptr);
15631     assert(stmt != nullptr);
15632
15633     compCurBB   = block;
15634     compCurStmt = stmt;
15635
15636     GenTree* morph = fgMorphTree(stmt->gtStmtExpr);
15637
15638     // Bug 1106830 - During the CSE phase we can't just remove
15639     // morph->gtOp.gtOp2 as it could contain CSE expressions.
15640     // This leads to a noway_assert in OptCSE.cpp when
15641     // searching for the removed CSE ref. (using gtFindLink)
15642     //
15643     if (!optValnumCSE_phase)
15644     {
15645         // Check for morph as a GT_COMMA with an unconditional throw
15646         if (fgIsCommaThrow(morph, true))
15647         {
15648 #ifdef DEBUG
15649             if (verbose)
15650             {
15651                 printf("Folding a top-level fgIsCommaThrow stmt\n");
15652                 printf("Removing op2 as unreachable:\n");
15653                 gtDispTree(morph->gtOp.gtOp2);
15654                 printf("\n");
15655             }
15656 #endif
15657             // Use the call as the new stmt
15658             morph = morph->gtOp.gtOp1;
15659             noway_assert(morph->gtOper == GT_CALL);
15660         }
15661
15662         // we can get a throw as a statement root
15663         if (fgIsThrow(morph))
15664         {
15665 #ifdef DEBUG
15666             if (verbose)
15667             {
15668                 printf("We have a top-level fgIsThrow stmt\n");
15669                 printf("Removing the rest of block as unreachable:\n");
15670             }
15671 #endif
15672             noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15673             fgRemoveRestOfBlock = true;
15674         }
15675     }
15676
15677     stmt->gtStmtExpr = morph;
15678
15679     if (lvaLocalVarRefCounted)
15680     {
15681         // fgMorphTree may have introduced new lclVar references. Bump the ref counts if requested.
15682         lvaRecursiveIncRefCounts(stmt->gtStmtExpr);
15683     }
15684
15685     // Can the entire tree be removed?
15686     bool removedStmt = fgCheckRemoveStmt(block, stmt);
15687
15688     // Or this is the last statement of a conditional branch that was just folded?
15689     if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock)
15690     {
15691         if (fgFoldConditional(block))
15692         {
15693             if (block->bbJumpKind != BBJ_THROW)
15694             {
15695                 removedStmt = true;
15696             }
15697         }
15698     }
15699
15700     if (!removedStmt)
15701     {
15702         // Have to re-do the evaluation order since for example some later code does not expect constants as op1
15703         gtSetStmtInfo(stmt);
15704
15705         // Have to re-link the nodes for this statement
15706         fgSetStmtSeq(stmt);
15707     }
15708
15709 #ifdef DEBUG
15710     if (verbose)
15711     {
15712         printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
15713         gtDispTree(morph);
15714         printf("\n");
15715     }
15716 #endif
15717
15718     if (fgRemoveRestOfBlock)
15719     {
15720         // Remove the rest of the stmts in the block
15721         for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
15722         {
15723             fgRemoveStmt(block, stmt);
15724         }
15725
15726         // The rest of block has been removed and we will always throw an exception.
15727
15728         // Update succesors of block
15729         fgRemoveBlockAsPred(block);
15730
15731         // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
15732         // We should not convert it to a ThrowBB.
15733         if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
15734         {
15735             // Convert block to a throw bb
15736             fgConvertBBToThrowBB(block);
15737         }
15738
15739 #ifdef DEBUG
15740         if (verbose)
15741         {
15742             printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum);
15743         }
15744 #endif
15745         fgRemoveRestOfBlock = false;
15746     }
15747
15748     return removedStmt;
15749 }
15750
15751 /*****************************************************************************
15752  *
15753  *  Morph the statements of the given block.
15754  *  This function should be called just once for a block. Use fgMorphBlockStmt()
15755  *  for reentrant calls.
15756  */
15757
15758 void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw)
15759 {
15760     fgRemoveRestOfBlock = false;
15761
15762     noway_assert(fgExpandInline == false);
15763
15764     /* Make the current basic block address available globally */
15765
15766     compCurBB = block;
15767
15768     *mult = *lnot = *loadw = false;
15769
15770     fgCurrentlyInUseArgTemps = hashBv::Create(this);
15771
15772     GenTreeStmt* stmt = block->firstStmt();
15773     GenTreePtr   prev = nullptr;
15774     for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt)
15775     {
15776         noway_assert(stmt->gtOper == GT_STMT);
15777
15778         if (fgRemoveRestOfBlock)
15779         {
15780             fgRemoveStmt(block, stmt);
15781             continue;
15782         }
15783 #ifdef FEATURE_SIMD
15784         if (!opts.MinOpts() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT && stmt->gtStmtExpr->OperGet() == GT_ASG)
15785         {
15786             fgMorphCombineSIMDFieldAssignments(block, stmt);
15787         }
15788 #endif
15789
15790         fgMorphStmt     = stmt;
15791         compCurStmt     = stmt;
15792         GenTreePtr tree = stmt->gtStmtExpr;
15793
15794 #ifdef DEBUG
15795         compCurStmtNum++;
15796         if (stmt == block->bbTreeList)
15797         {
15798             block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
15799         }
15800
15801         unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
15802
15803         if (verbose)
15804         {
15805             printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum);
15806             gtDispTree(tree);
15807         }
15808 #endif
15809
15810         /* Morph this statement tree */
15811
15812         GenTreePtr morph = fgMorphTree(tree);
15813
15814         // mark any outgoing arg temps as free so we can reuse them in the next statement.
15815
15816         fgCurrentlyInUseArgTemps->ZeroAll();
15817
15818         // Has fgMorphStmt been sneakily changed ?
15819
15820         if (stmt->gtStmtExpr != tree)
15821         {
15822             /* This must be tailcall. Ignore 'morph' and carry on with
15823                the tail-call node */
15824
15825             morph = stmt->gtStmtExpr;
15826             noway_assert(compTailCallUsed);
15827             noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
15828             noway_assert(stmt->gtNextStmt == nullptr);
15829
15830             GenTreeCall* call = morph->AsCall();
15831             // Could either be
15832             //   - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15833             //   - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15834             //     a jmp.
15835             noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15836                          (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15837                           (compCurBB->bbFlags & BBF_HAS_JMP)));
15838         }
15839         else if (block != compCurBB)
15840         {
15841             /* This must be a tail call that caused a GCPoll to get
15842                injected.  We haven't actually morphed the call yet
15843                but the flag still got set, clear it here...  */
15844             CLANG_FORMAT_COMMENT_ANCHOR;
15845
15846 #ifdef DEBUG
15847             tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
15848 #endif
15849
15850             noway_assert(compTailCallUsed);
15851             noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
15852             noway_assert(stmt->gtNextStmt == nullptr);
15853
15854             GenTreeCall* call = morph->AsCall();
15855
15856             // Could either be
15857             //   - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15858             //   - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15859             //     a jmp.
15860             noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15861                          (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15862                           (compCurBB->bbFlags & BBF_HAS_JMP)));
15863         }
15864
15865 #ifdef DEBUG
15866         if (compStressCompile(STRESS_CLONE_EXPR, 30))
15867         {
15868             // Clone all the trees to stress gtCloneExpr()
15869
15870             if (verbose)
15871             {
15872                 printf("\nfgMorphTree (stressClone from):\n");
15873                 gtDispTree(morph);
15874             }
15875
15876             morph = gtCloneExpr(morph);
15877             noway_assert(morph);
15878
15879             if (verbose)
15880             {
15881                 printf("\nfgMorphTree (stressClone to):\n");
15882                 gtDispTree(morph);
15883             }
15884         }
15885
15886         /* If the hash value changes. we modified the tree during morphing */
15887         if (verbose)
15888         {
15889             unsigned newHash = gtHashValue(morph);
15890             if (newHash != oldHash)
15891             {
15892                 printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum);
15893                 gtDispTree(morph);
15894             }
15895         }
15896 #endif
15897
15898         /* Check for morph as a GT_COMMA with an unconditional throw */
15899         if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
15900         {
15901             /* Use the call as the new stmt */
15902             morph = morph->gtOp.gtOp1;
15903             noway_assert(morph->gtOper == GT_CALL);
15904             noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15905
15906             fgRemoveRestOfBlock = true;
15907         }
15908
15909         stmt->gtStmtExpr = tree = morph;
15910
15911         noway_assert(fgPtrArgCntCur == 0);
15912
15913         if (fgRemoveRestOfBlock)
15914         {
15915             continue;
15916         }
15917
15918         /* Has the statement been optimized away */
15919
15920         if (fgCheckRemoveStmt(block, stmt))
15921         {
15922             continue;
15923         }
15924
15925         /* Check if this block ends with a conditional branch that can be folded */
15926
15927         if (fgFoldConditional(block))
15928         {
15929             continue;
15930         }
15931
15932         if (ehBlockHasExnFlowDsc(block))
15933         {
15934             continue;
15935         }
15936
15937 #if OPT_MULT_ADDSUB
15938
15939         /* Note whether we have two or more +=/-= operators in a row */
15940
15941         if (tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB)
15942         {
15943             if (prev && prev->gtOper == tree->gtOper)
15944             {
15945                 *mult = true;
15946             }
15947         }
15948
15949 #endif
15950
15951         /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */
15952
15953         if (tree->gtOper == GT_ASG_OR && prev && prev->gtOper == GT_ASG)
15954         {
15955             *loadw = true;
15956         }
15957     }
15958
15959     if (fgRemoveRestOfBlock)
15960     {
15961         if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
15962         {
15963             GenTreePtr first = block->bbTreeList;
15964             noway_assert(first);
15965             GenTreePtr last = first->gtPrev;
15966             noway_assert(last && last->gtNext == nullptr);
15967             GenTreePtr lastStmt = last->gtStmt.gtStmtExpr;
15968
15969             if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) ||
15970                 ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)))
15971             {
15972                 GenTreePtr op1 = lastStmt->gtOp.gtOp1;
15973
15974                 if (op1->OperKind() & GTK_RELOP)
15975                 {
15976                     /* Unmark the comparison node with GTF_RELOP_JMP_USED */
15977                     op1->gtFlags &= ~GTF_RELOP_JMP_USED;
15978                 }
15979
15980                 last->gtStmt.gtStmtExpr = fgMorphTree(op1);
15981             }
15982         }
15983
15984         /* Mark block as a BBJ_THROW block */
15985         fgConvertBBToThrowBB(block);
15986     }
15987
15988     noway_assert(fgExpandInline == false);
15989
15990 #if FEATURE_FASTTAILCALL
15991     GenTreePtr recursiveTailCall = nullptr;
15992     if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
15993     {
15994         fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
15995     }
15996 #endif
15997
15998 #ifdef DEBUG
15999     compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
16000 #endif
16001
16002     // Reset this back so that it doesn't leak out impacting other blocks
16003     fgRemoveRestOfBlock = false;
16004 }
16005
16006 /*****************************************************************************
16007  *
16008  *  Morph the blocks of the method.
16009  *  Returns true if the basic block list is modified.
16010  *  This function should be called just once.
16011  */
16012
16013 void Compiler::fgMorphBlocks()
16014 {
16015 #ifdef DEBUG
16016     if (verbose)
16017     {
16018         printf("\n*************** In fgMorphBlocks()\n");
16019     }
16020 #endif
16021
16022     /* Since fgMorphTree can be called after various optimizations to re-arrange
16023      * the nodes we need a global flag to signal if we are during the one-pass
16024      * global morphing */
16025
16026     fgGlobalMorph = true;
16027
16028 #if LOCAL_ASSERTION_PROP
16029     //
16030     // Local assertion prop is enabled if we are optimized
16031     //
16032     optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts());
16033
16034     if (optLocalAssertionProp)
16035     {
16036         //
16037         // Initialize for local assertion prop
16038         //
16039         optAssertionInit(true);
16040     }
16041 #elif ASSERTION_PROP
16042     //
16043     // If LOCAL_ASSERTION_PROP is not set
16044     // and we have global assertion prop
16045     // then local assertion prop is always off
16046     //
16047     optLocalAssertionProp = false;
16048
16049 #endif
16050
16051     /*-------------------------------------------------------------------------
16052      * Process all basic blocks in the function
16053      */
16054
16055     BasicBlock* block = fgFirstBB;
16056     noway_assert(block);
16057
16058 #ifdef DEBUG
16059     compCurStmtNum = 0;
16060 #endif
16061
16062     do
16063     {
16064 #if OPT_MULT_ADDSUB
16065         bool mult = false;
16066 #endif
16067
16068 #if OPT_BOOL_OPS
16069         bool lnot = false;
16070 #endif
16071
16072         bool loadw = false;
16073
16074 #ifdef DEBUG
16075         if (verbose)
16076         {
16077             printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName);
16078         }
16079 #endif
16080
16081 #if LOCAL_ASSERTION_PROP
16082         if (optLocalAssertionProp)
16083         {
16084             //
16085             // Clear out any currently recorded assertion candidates
16086             // before processing each basic block,
16087             // also we must  handle QMARK-COLON specially
16088             //
16089             optAssertionReset(0);
16090         }
16091 #endif
16092
16093         /* Process all statement trees in the basic block */
16094
16095         GenTreePtr tree;
16096
16097         fgMorphStmts(block, &mult, &lnot, &loadw);
16098
16099 #if OPT_MULT_ADDSUB
16100
16101         if (mult && (opts.compFlags & CLFLG_TREETRANS) && !opts.compDbgCode && !opts.MinOpts())
16102         {
16103             for (tree = block->bbTreeList; tree; tree = tree->gtNext)
16104             {
16105                 noway_assert(tree->gtOper == GT_STMT);
16106                 GenTreePtr last = tree->gtStmt.gtStmtExpr;
16107
16108                 if (last->gtOper == GT_ASG_ADD || last->gtOper == GT_ASG_SUB)
16109                 {
16110                     GenTreePtr temp;
16111                     GenTreePtr next;
16112
16113                     GenTreePtr dst1 = last->gtOp.gtOp1;
16114                     GenTreePtr src1 = last->gtOp.gtOp2;
16115
16116                     if (!last->IsCnsIntOrI())
16117                     {
16118                         goto NOT_CAFFE;
16119                     }
16120
16121                     if (dst1->gtOper != GT_LCL_VAR)
16122                     {
16123                         goto NOT_CAFFE;
16124                     }
16125                     if (!src1->IsCnsIntOrI())
16126                     {
16127                         goto NOT_CAFFE;
16128                     }
16129
16130                     for (;;)
16131                     {
16132                         GenTreePtr dst2;
16133                         GenTreePtr src2;
16134
16135                         /* Look at the next statement */
16136
16137                         temp = tree->gtNext;
16138                         if (!temp)
16139                         {
16140                             goto NOT_CAFFE;
16141                         }
16142
16143                         noway_assert(temp->gtOper == GT_STMT);
16144                         next = temp->gtStmt.gtStmtExpr;
16145
16146                         if (next->gtOper != last->gtOper)
16147                         {
16148                             goto NOT_CAFFE;
16149                         }
16150                         if (next->gtType != last->gtType)
16151                         {
16152                             goto NOT_CAFFE;
16153                         }
16154
16155                         dst2 = next->gtOp.gtOp1;
16156                         src2 = next->gtOp.gtOp2;
16157
16158                         if (dst2->gtOper != GT_LCL_VAR)
16159                         {
16160                             goto NOT_CAFFE;
16161                         }
16162                         if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum)
16163                         {
16164                             goto NOT_CAFFE;
16165                         }
16166
16167                         if (!src2->IsCnsIntOrI())
16168                         {
16169                             goto NOT_CAFFE;
16170                         }
16171
16172                         if (last->gtOverflow() != next->gtOverflow())
16173                         {
16174                             goto NOT_CAFFE;
16175                         }
16176
16177                         const ssize_t i1    = src1->gtIntCon.gtIconVal;
16178                         const ssize_t i2    = src2->gtIntCon.gtIconVal;
16179                         const ssize_t itemp = i1 + i2;
16180
16181                         /* if the operators are checking for overflow, check for overflow of the operands */
16182
16183                         if (next->gtOverflow())
16184                         {
16185                             if (next->TypeGet() == TYP_LONG)
16186                             {
16187                                 if (next->gtFlags & GTF_UNSIGNED)
16188                                 {
16189                                     ClrSafeInt<UINT64> si1(i1);
16190                                     if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow())
16191                                     {
16192                                         goto NOT_CAFFE;
16193                                     }
16194                                 }
16195                                 else
16196                                 {
16197                                     ClrSafeInt<INT64> si1(i1);
16198                                     if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow())
16199                                     {
16200                                         goto NOT_CAFFE;
16201                                     }
16202                                 }
16203                             }
16204                             else if (next->gtFlags & GTF_UNSIGNED)
16205                             {
16206                                 ClrSafeInt<UINT32> si1(i1);
16207                                 if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow())
16208                                 {
16209                                     goto NOT_CAFFE;
16210                                 }
16211                             }
16212                             else
16213                             {
16214                                 ClrSafeInt<INT32> si1(i1);
16215                                 if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow())
16216                                 {
16217                                     goto NOT_CAFFE;
16218                                 }
16219                             }
16220                         }
16221
16222                         /* Fold the two increments/decrements into one */
16223
16224                         src1->gtIntCon.gtIconVal = itemp;
16225 #ifdef _TARGET_64BIT_
16226                         if (src1->gtType == TYP_INT)
16227                         {
16228                             src1->AsIntCon()->TruncateOrSignExtend32();
16229                         }
16230 #endif //_TARGET_64BIT_
16231
16232                         /* Remove the second statement completely */
16233
16234                         noway_assert(tree->gtNext == temp);
16235                         noway_assert(temp->gtPrev == tree);
16236
16237                         if (temp->gtNext)
16238                         {
16239                             noway_assert(temp->gtNext->gtPrev == temp);
16240
16241                             temp->gtNext->gtPrev = tree;
16242                             tree->gtNext         = temp->gtNext;
16243                         }
16244                         else
16245                         {
16246                             tree->gtNext = nullptr;
16247
16248                             noway_assert(block->bbTreeList->gtPrev == temp);
16249
16250                             block->bbTreeList->gtPrev = tree;
16251                         }
16252                     }
16253                 }
16254
16255             NOT_CAFFE:;
16256             }
16257         }
16258
16259 #endif
16260
16261         /* Are we using a single return block? */
16262
16263         if (block->bbJumpKind == BBJ_RETURN)
16264         {
16265             if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
16266             {
16267                 /* We'll jump to the genReturnBB */
16268                 CLANG_FORMAT_COMMENT_ANCHOR;
16269
16270 #if !defined(_TARGET_X86_)
16271                 if (info.compFlags & CORINFO_FLG_SYNCH)
16272                 {
16273                     fgConvertSyncReturnToLeave(block);
16274                 }
16275                 else
16276 #endif // !_TARGET_X86_
16277                 {
16278                     block->bbJumpKind = BBJ_ALWAYS;
16279                     block->bbJumpDest = genReturnBB;
16280                     fgReturnCount--;
16281                 }
16282
16283                 // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
16284                 // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
16285                 // Such blocks do materialize as part of in-lining.
16286                 //
16287                 // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
16288                 // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
16289                 // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
16290                 // is BAD_VAR_NUM.
16291                 //
16292                 // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
16293
16294                 GenTreePtr last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
16295                 GenTreePtr ret  = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;
16296
16297                 // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
16298                 if (genReturnLocal != BAD_VAR_NUM)
16299                 {
16300                     // Method must be returning a value other than TYP_VOID.
16301                     noway_assert(compMethodHasRetVal());
16302
16303                     // This block must be ending with a GT_RETURN
16304                     noway_assert(last != nullptr);
16305                     noway_assert(last->gtOper == GT_STMT);
16306                     noway_assert(last->gtNext == nullptr);
16307                     noway_assert(ret != nullptr);
16308
16309                     // GT_RETURN must have non-null operand as the method is returning the value assigned to
16310                     // genReturnLocal
16311                     noway_assert(ret->OperGet() == GT_RETURN);
16312                     noway_assert(ret->gtGetOp1() != nullptr);
16313
16314                     GenTreePtr tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
16315
16316                     last->gtStmt.gtStmtExpr = (tree->OperIsCopyBlkOp()) ? fgMorphCopyBlock(tree) : tree;
16317
16318                     // make sure that copy-prop ignores this assignment.
16319                     last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
16320                 }
16321                 else if (ret != nullptr && ret->OperGet() == GT_RETURN)
16322                 {
16323                     // This block ends with a GT_RETURN
16324                     noway_assert(last != nullptr);
16325                     noway_assert(last->gtOper == GT_STMT);
16326                     noway_assert(last->gtNext == nullptr);
16327
16328                     // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block
16329                     noway_assert(ret->TypeGet() == TYP_VOID);
16330                     noway_assert(ret->gtGetOp1() == nullptr);
16331
16332                     fgRemoveStmt(block, last);
16333                 }
16334
16335 #ifdef DEBUG
16336                 if (verbose)
16337                 {
16338                     printf("morph BB%02u to point at onereturn.  New block is\n", block->bbNum);
16339                     fgTableDispBasicBlock(block);
16340                 }
16341 #endif
16342             }
16343         }
16344
16345         block = block->bbNext;
16346     } while (block);
16347
16348     /* We are done with the global morphing phase */
16349
16350     fgGlobalMorph = false;
16351
16352 #ifdef DEBUG
16353     if (verboseTrees)
16354     {
16355         fgDispBasicBlocks(true);
16356     }
16357 #endif
16358 }
16359
16360 //------------------------------------------------------------------------
16361 // fgCheckArgCnt: Check whether the maximum arg size will change codegen requirements
16362 //
16363 // Notes:
16364 //    fpPtrArgCntMax records the maximum number of pushed arguments.
16365 //    Depending upon this value of the maximum number of pushed arguments
16366 //    we may need to use an EBP frame or be partially interuptible.
16367 //    This functionality has been factored out of fgSetOptions() because
16368 //    the Rationalizer can create new calls.
16369 //
16370 // Assumptions:
16371 //    This must be called before isFramePointerRequired() is called, because it is a
16372 //    phased variable (can only be written before it has been read).
16373 //
16374 void Compiler::fgCheckArgCnt()
16375 {
16376     if (!compCanEncodePtrArgCntMax())
16377     {
16378 #ifdef DEBUG
16379         if (verbose)
16380         {
16381             printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
16382                    "interruptible\n");
16383         }
16384 #endif
16385         genInterruptible = false;
16386     }
16387     if (fgPtrArgCntMax >= sizeof(unsigned))
16388     {
16389 #ifdef DEBUG
16390         if (verbose)
16391         {
16392             printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
16393         }
16394 #endif
16395         codeGen->setFramePointerRequired(true);
16396     }
16397 }
16398
16399 /*****************************************************************************
16400  *
16401  *  Make some decisions about the kind of code to generate.
16402  */
16403
16404 void Compiler::fgSetOptions()
16405 {
16406 #ifdef DEBUG
16407     /* Should we force fully interruptible code ? */
16408     if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
16409     {
16410         noway_assert(!codeGen->isGCTypeFixed());
16411         genInterruptible = true;
16412     }
16413 #endif
16414
16415     if (opts.compDbgCode)
16416     {
16417         assert(!codeGen->isGCTypeFixed());
16418         genInterruptible = true; // debugging is easier this way ...
16419     }
16420
16421     /* Assume we won't need an explicit stack frame if this is allowed */
16422
16423     // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
16424     // the callee-saved registers.
16425     noway_assert(!compTailCallUsed || !compLocallocUsed);
16426
16427     if (compLocallocUsed)
16428     {
16429         codeGen->setFramePointerRequired(true);
16430     }
16431
16432 #ifdef _TARGET_X86_
16433
16434     if (compTailCallUsed)
16435         codeGen->setFramePointerRequired(true);
16436
16437 #endif // _TARGET_X86_
16438
16439     if (!opts.genFPopt)
16440     {
16441         codeGen->setFramePointerRequired(true);
16442     }
16443
16444     // Assert that the EH table has been initialized by now. Note that
16445     // compHndBBtabAllocCount never decreases; it is a high-water mark
16446     // of table allocation. In contrast, compHndBBtabCount does shrink
16447     // if we delete a dead EH region, and if it shrinks to zero, the
16448     // table pointer compHndBBtab is unreliable.
16449     assert(compHndBBtabAllocCount >= info.compXcptnsCount);
16450
16451 #ifdef _TARGET_X86_
16452
16453     // Note: this case, and the !X86 case below, should both use the
16454     // !X86 path. This would require a few more changes for X86 to use
16455     // compHndBBtabCount (the current number of EH clauses) instead of
16456     // info.compXcptnsCount (the number of EH clauses in IL), such as
16457     // in ehNeedsShadowSPslots(). This is because sometimes the IL has
16458     // an EH clause that we delete as statically dead code before we
16459     // get here, leaving no EH clauses left, and thus no requirement
16460     // to use a frame pointer because of EH. But until all the code uses
16461     // the same test, leave info.compXcptnsCount here.
16462     if (info.compXcptnsCount > 0)
16463     {
16464         codeGen->setFramePointerRequiredEH(true);
16465     }
16466
16467 #else // !_TARGET_X86_
16468
16469     if (compHndBBtabCount > 0)
16470     {
16471         codeGen->setFramePointerRequiredEH(true);
16472     }
16473
16474 #endif // _TARGET_X86_
16475
16476 #ifdef UNIX_X86_ABI
16477     if (info.compXcptnsCount > 0)
16478     {
16479         assert(!codeGen->isGCTypeFixed());
16480         // Enforce fully interruptible codegen for funclet unwinding
16481         genInterruptible = true;
16482     }
16483 #endif // UNIX_X86_ABI
16484
16485     fgCheckArgCnt();
16486
16487     if (info.compCallUnmanaged)
16488     {
16489         codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
16490     }
16491
16492     if (info.compPublishStubParam)
16493     {
16494         codeGen->setFramePointerRequiredGCInfo(true);
16495     }
16496
16497     if (opts.compNeedSecurityCheck)
16498     {
16499         codeGen->setFramePointerRequiredGCInfo(true);
16500
16501 #ifndef JIT32_GCENCODER
16502
16503         // The decoder only reports objects in frames with exceptions if the frame
16504         // is fully interruptible.
16505         // Even if there is no catch or other way to resume execution in this frame
16506         // the VM requires the security object to remain alive until later, so
16507         // Frames with security objects must be fully interruptible.
16508         genInterruptible = true;
16509
16510 #endif // JIT32_GCENCODER
16511     }
16512
16513     if (compIsProfilerHookNeeded())
16514     {
16515         codeGen->setFramePointerRequired(true);
16516     }
16517
16518     if (info.compIsVarArgs)
16519     {
16520         // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
16521         codeGen->setFramePointerRequiredGCInfo(true);
16522     }
16523
16524     if (lvaReportParamTypeArg())
16525     {
16526         codeGen->setFramePointerRequiredGCInfo(true);
16527     }
16528
16529     // printf("method will %s be fully interruptible\n", genInterruptible ? "   " : "not");
16530 }
16531
16532 /*****************************************************************************/
16533
16534 GenTreePtr Compiler::fgInitThisClass()
16535 {
16536     noway_assert(!compIsForInlining());
16537
16538     CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
16539
16540     if (!kind.needsRuntimeLookup)
16541     {
16542         return fgGetSharedCCtor(info.compClassHnd);
16543     }
16544     else
16545     {
16546 #ifdef FEATURE_READYTORUN_COMPILER
16547         // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR.
16548         if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI))
16549         {
16550             CORINFO_RESOLVED_TOKEN resolvedToken;
16551             memset(&resolvedToken, 0, sizeof(resolvedToken));
16552
16553             // We are in a shared method body, but maybe we don't need a runtime lookup after all.
16554             // This covers the case of a generic method on a non-generic type.
16555             if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
16556             {
16557                 resolvedToken.hClass = info.compClassHnd;
16558                 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
16559             }
16560
16561             // We need a runtime lookup.
16562             GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
16563
16564             // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
16565             // base of the class that owns the method being compiled". If we're in this method, it means we're not
16566             // inlining and there's no ambiguity.
16567             return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF,
16568                                              gtNewArgList(ctxTree), &kind);
16569         }
16570 #endif
16571
16572         // Collectible types requires that for shared generic code, if we use the generic context paramter
16573         // that we report it. (This is a conservative approach, we could detect some cases particularly when the
16574         // context parameter is this that we don't need the eager reporting logic.)
16575         lvaGenericsContextUseCount++;
16576
16577         switch (kind.runtimeLookupKind)
16578         {
16579             case CORINFO_LOOKUP_THISOBJ:
16580                 // This code takes a this pointer; but we need to pass the static method desc to get the right point in
16581                 // the hierarchy
16582                 {
16583                     GenTreePtr vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
16584                     // Vtable pointer of this object
16585                     vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
16586                     vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
16587                     GenTreePtr methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
16588
16589                     return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
16590                                                gtNewArgList(vtTree, methodHnd));
16591                 }
16592
16593             case CORINFO_LOOKUP_CLASSPARAM:
16594             {
16595                 GenTreePtr vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16596                 return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, 0, gtNewArgList(vtTree));
16597             }
16598
16599             case CORINFO_LOOKUP_METHODPARAM:
16600             {
16601                 GenTreePtr methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16602                 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
16603                                            gtNewArgList(gtNewIconNode(0), methHndTree));
16604             }
16605         }
16606     }
16607
16608     noway_assert(!"Unknown LOOKUP_KIND");
16609     UNREACHABLE();
16610 }
16611
16612 #ifdef DEBUG
16613 /*****************************************************************************
16614  *
16615  *  Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
16616  *  except for the allowed ? 1 : 0; pattern.
16617  */
16618 Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTreePtr* tree, fgWalkData* data)
16619 {
16620     if ((*tree)->OperGet() == GT_QMARK)
16621     {
16622         fgCheckQmarkAllowedForm(*tree);
16623     }
16624     return WALK_CONTINUE;
16625 }
16626
16627 void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
16628 {
16629     assert(tree->OperGet() == GT_QMARK);
16630 #ifndef LEGACY_BACKEND
16631     assert(!"Qmarks beyond morph disallowed.");
16632 #else  // LEGACY_BACKEND
16633     GenTreePtr colon = tree->gtOp.gtOp2;
16634
16635     assert(colon->gtOp.gtOp1->IsIntegralConst(0));
16636     assert(colon->gtOp.gtOp2->IsIntegralConst(1));
16637 #endif // LEGACY_BACKEND
16638 }
16639
16640 /*****************************************************************************
16641  *
16642  *  Verify that the importer has created GT_QMARK nodes in a way we can
16643  *  process them. The following is allowed:
16644  *
16645  *  1. A top level qmark. Top level qmark is of the form:
16646  *      a) (bool) ? (void) : (void) OR
16647  *      b) V0N = (bool) ? (type) : (type)
16648  *
16649  *  2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
16650  *     of either op1 of colon or op2 of colon but not a child of any other
16651  *     operator.
16652  */
16653 void Compiler::fgPreExpandQmarkChecks(GenTreePtr expr)
16654 {
16655     GenTreePtr topQmark = fgGetTopLevelQmark(expr);
16656
16657     // If the top level Qmark is null, then scan the tree to make sure
16658     // there are no qmarks within it.
16659     if (topQmark == nullptr)
16660     {
16661         fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
16662     }
16663     else
16664     {
16665         // We could probably expand the cond node also, but don't think the extra effort is necessary,
16666         // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
16667         fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);
16668
16669         fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
16670         fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
16671     }
16672 }
16673 #endif // DEBUG
16674
16675 /*****************************************************************************
16676  *
16677  *  Get the top level GT_QMARK node in a given "expr", return NULL if such a
16678  *  node is not present. If the top level GT_QMARK node is assigned to a
16679  *  GT_LCL_VAR, then return the lcl node in ppDst.
16680  *
16681  */
16682 GenTreePtr Compiler::fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst /* = NULL */)
16683 {
16684     if (ppDst != nullptr)
16685     {
16686         *ppDst = nullptr;
16687     }
16688
16689     GenTreePtr topQmark = nullptr;
16690     if (expr->gtOper == GT_QMARK)
16691     {
16692         topQmark = expr;
16693     }
16694     else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16695     {
16696         topQmark = expr->gtOp.gtOp2;
16697         if (ppDst != nullptr)
16698         {
16699             *ppDst = expr->gtOp.gtOp1;
16700         }
16701     }
16702     return topQmark;
16703 }
16704
16705 /*********************************************************************************
16706  *
16707  *  For a castclass helper call,
16708  *  Importer creates the following tree:
16709  *      tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
16710  *
16711  *  This method splits the qmark expression created by the importer into the
16712  *  following blocks: (block, asg, cond1, cond2, helper, remainder)
16713  *  Notice that op1 is the result for both the conditions. So we coalesce these
16714  *  assignments into a single block instead of two blocks resulting a nested diamond.
16715  *
16716  *                       +---------->-----------+
16717  *                       |          |           |
16718  *                       ^          ^           v
16719  *                       |          |           |
16720  *  block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
16721  *
16722  *  We expect to achieve the following codegen:
16723  *     mov      rsi, rdx                           tmp = op1                  // asgBlock
16724  *     test     rsi, rsi                           goto skip if tmp == null ? // cond1Block
16725  *     je       SKIP
16726  *     mov      rcx, 0x76543210                    cns = op2                  // cond2Block
16727  *     cmp      qword ptr [rsi], rcx               goto skip if *tmp == op2
16728  *     je       SKIP
16729  *     call     CORINFO_HELP_CHKCASTCLASS_SPECIAL  tmp = helper(cns, tmp)     // helperBlock
16730  *     mov      rsi, rax
16731  *  SKIP:                                                                     // remainderBlock
16732  *     tmp has the result.
16733  *
16734  */
16735 void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt)
16736 {
16737 #ifdef DEBUG
16738     if (verbose)
16739     {
16740         printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum);
16741         fgDispBasicBlocks(block, block, true);
16742     }
16743 #endif // DEBUG
16744
16745     GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16746
16747     GenTreePtr dst   = nullptr;
16748     GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
16749     noway_assert(dst != nullptr);
16750
16751     assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
16752
16753     // Get cond, true, false exprs for the qmark.
16754     GenTreePtr condExpr  = qmark->gtGetOp1();
16755     GenTreePtr trueExpr  = qmark->gtGetOp2()->AsColon()->ThenNode();
16756     GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16757
16758     // Get cond, true, false exprs for the nested qmark.
16759     GenTreePtr nestedQmark = falseExpr;
16760     GenTreePtr cond2Expr;
16761     GenTreePtr true2Expr;
16762     GenTreePtr false2Expr;
16763
16764     if (nestedQmark->gtOper == GT_QMARK)
16765     {
16766         cond2Expr  = nestedQmark->gtGetOp1();
16767         true2Expr  = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
16768         false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
16769
16770         assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
16771         cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
16772     }
16773     else
16774     {
16775         // This is a rare case that arises when we are doing minopts and encounter isinst of null
16776         // gtFoldExpr was still is able to optimize away part of the tree (but not all).
16777         // That means it does not match our pattern.
16778
16779         // Rather than write code to handle this case, just fake up some nodes to make it match the common
16780         // case.  Synthesize a comparison that is always true, and for the result-on-true, use the
16781         // entire subtree we expected to be the nested question op.
16782
16783         cond2Expr  = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
16784         true2Expr  = nestedQmark;
16785         false2Expr = gtNewIconNode(0, TYP_I_IMPL);
16786     }
16787     assert(false2Expr->OperGet() == trueExpr->OperGet());
16788
16789     // Clear flags as they are now going to be part of JTRUE.
16790     assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16791     condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16792
16793     // Create the chain of blocks. See method header comment.
16794     // The order of blocks after this is the following:
16795     //     block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
16796     //
16797     // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16798     // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16799     // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16800     // remainderBlock will still be GC safe.
16801     unsigned    propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16802     BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16803     fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16804
16805     BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
16806     BasicBlock* cond2Block  = fgNewBBafter(BBJ_COND, block, true);
16807     BasicBlock* cond1Block  = fgNewBBafter(BBJ_COND, block, true);
16808     BasicBlock* asgBlock    = fgNewBBafter(BBJ_NONE, block, true);
16809
16810     remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16811
16812     // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16813     // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16814     if ((block->bbFlags & BBF_INTERNAL) == 0)
16815     {
16816         helperBlock->bbFlags &= ~BBF_INTERNAL;
16817         cond2Block->bbFlags &= ~BBF_INTERNAL;
16818         cond1Block->bbFlags &= ~BBF_INTERNAL;
16819         asgBlock->bbFlags &= ~BBF_INTERNAL;
16820         helperBlock->bbFlags |= BBF_IMPORTED;
16821         cond2Block->bbFlags |= BBF_IMPORTED;
16822         cond1Block->bbFlags |= BBF_IMPORTED;
16823         asgBlock->bbFlags |= BBF_IMPORTED;
16824     }
16825
16826     // Chain the flow correctly.
16827     fgAddRefPred(asgBlock, block);
16828     fgAddRefPred(cond1Block, asgBlock);
16829     fgAddRefPred(cond2Block, cond1Block);
16830     fgAddRefPred(helperBlock, cond2Block);
16831     fgAddRefPred(remainderBlock, helperBlock);
16832     fgAddRefPred(remainderBlock, cond1Block);
16833     fgAddRefPred(remainderBlock, cond2Block);
16834
16835     cond1Block->bbJumpDest = remainderBlock;
16836     cond2Block->bbJumpDest = remainderBlock;
16837
16838     // Set the weights; some are guesses.
16839     asgBlock->inheritWeight(block);
16840     cond1Block->inheritWeight(block);
16841     cond2Block->inheritWeightPercentage(cond1Block, 50);
16842     helperBlock->inheritWeightPercentage(cond2Block, 50);
16843
16844     // Append cond1 as JTRUE to cond1Block
16845     GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
16846     GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16847     fgInsertStmtAtEnd(cond1Block, jmpStmt);
16848
16849     // Append cond2 as JTRUE to cond2Block
16850     jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
16851     jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16852     fgInsertStmtAtEnd(cond2Block, jmpStmt);
16853
16854     // AsgBlock should get tmp = op1 assignment.
16855     trueExpr            = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
16856     GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
16857     fgInsertStmtAtEnd(asgBlock, trueStmt);
16858
16859     // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
16860     gtReverseCond(cond2Expr);
16861     GenTreePtr helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
16862     GenTreePtr helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
16863     fgInsertStmtAtEnd(helperBlock, helperStmt);
16864
16865     // Finally remove the nested qmark stmt.
16866     fgRemoveStmt(block, stmt);
16867
16868 #ifdef DEBUG
16869     if (verbose)
16870     {
16871         printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum);
16872         fgDispBasicBlocks(block, remainderBlock, true);
16873     }
16874 #endif // DEBUG
16875 }
16876
16877 /*****************************************************************************
16878  *
16879  *  Expand a statement with a top level qmark node. There are three cases, based
16880  *  on whether the qmark has both "true" and "false" arms, or just one of them.
16881  *
16882  *     S0;
16883  *     C ? T : F;
16884  *     S1;
16885  *
16886  *     Generates ===>
16887  *
16888  *                       bbj_always
16889  *                       +---->------+
16890  *                 false |           |
16891  *     S0 -->-- ~C -->-- T   F -->-- S1
16892  *              |            |
16893  *              +--->--------+
16894  *              bbj_cond(true)
16895  *
16896  *     -----------------------------------------
16897  *
16898  *     S0;
16899  *     C ? T : NOP;
16900  *     S1;
16901  *
16902  *     Generates ===>
16903  *
16904  *                 false
16905  *     S0 -->-- ~C -->-- T -->-- S1
16906  *              |                |
16907  *              +-->-------------+
16908  *              bbj_cond(true)
16909  *
16910  *     -----------------------------------------
16911  *
16912  *     S0;
16913  *     C ? NOP : F;
16914  *     S1;
16915  *
16916  *     Generates ===>
16917  *
16918  *                false
16919  *     S0 -->-- C -->-- F -->-- S1
16920  *              |               |
16921  *              +-->------------+
16922  *              bbj_cond(true)
16923  *
16924  *  If the qmark assigns to a variable, then create tmps for "then"
16925  *  and "else" results and assign the temp to the variable as a writeback step.
16926  */
16927 void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTreePtr stmt)
16928 {
16929     GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16930
16931     // Retrieve the Qmark node to be expanded.
16932     GenTreePtr dst   = nullptr;
16933     GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
16934     if (qmark == nullptr)
16935     {
16936         return;
16937     }
16938
16939     if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
16940     {
16941         fgExpandQmarkForCastInstOf(block, stmt);
16942         return;
16943     }
16944
16945 #ifdef DEBUG
16946     if (verbose)
16947     {
16948         printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum);
16949         fgDispBasicBlocks(block, block, true);
16950     }
16951 #endif // DEBUG
16952
16953     // Retrieve the operands.
16954     GenTreePtr condExpr  = qmark->gtGetOp1();
16955     GenTreePtr trueExpr  = qmark->gtGetOp2()->AsColon()->ThenNode();
16956     GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16957
16958     assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16959     condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16960
16961     assert(!varTypeIsFloating(condExpr->TypeGet()));
16962
16963     bool hasTrueExpr  = (trueExpr->OperGet() != GT_NOP);
16964     bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
16965     assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
16966
16967     // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
16968     //     block ... condBlock ... elseBlock ... remainderBlock
16969     //
16970     // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16971     // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16972     // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16973     // remainderBlock will still be GC safe.
16974     unsigned    propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16975     BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16976     fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16977
16978     BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
16979     BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
16980
16981     // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16982     // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16983     if ((block->bbFlags & BBF_INTERNAL) == 0)
16984     {
16985         condBlock->bbFlags &= ~BBF_INTERNAL;
16986         elseBlock->bbFlags &= ~BBF_INTERNAL;
16987         condBlock->bbFlags |= BBF_IMPORTED;
16988         elseBlock->bbFlags |= BBF_IMPORTED;
16989     }
16990
16991     remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16992
16993     condBlock->inheritWeight(block);
16994
16995     fgAddRefPred(condBlock, block);
16996     fgAddRefPred(elseBlock, condBlock);
16997     fgAddRefPred(remainderBlock, elseBlock);
16998
16999     BasicBlock* thenBlock = nullptr;
17000     if (hasTrueExpr && hasFalseExpr)
17001     {
17002         //                       bbj_always
17003         //                       +---->------+
17004         //                 false |           |
17005         //     S0 -->-- ~C -->-- T   F -->-- S1
17006         //              |            |
17007         //              +--->--------+
17008         //              bbj_cond(true)
17009         //
17010         gtReverseCond(condExpr);
17011         condBlock->bbJumpDest = elseBlock;
17012
17013         thenBlock             = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
17014         thenBlock->bbJumpDest = remainderBlock;
17015         if ((block->bbFlags & BBF_INTERNAL) == 0)
17016         {
17017             thenBlock->bbFlags &= ~BBF_INTERNAL;
17018             thenBlock->bbFlags |= BBF_IMPORTED;
17019         }
17020
17021         elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
17022
17023         fgAddRefPred(thenBlock, condBlock);
17024         fgAddRefPred(remainderBlock, thenBlock);
17025
17026         thenBlock->inheritWeightPercentage(condBlock, 50);
17027         elseBlock->inheritWeightPercentage(condBlock, 50);
17028     }
17029     else if (hasTrueExpr)
17030     {
17031         //                 false
17032         //     S0 -->-- ~C -->-- T -->-- S1
17033         //              |                |
17034         //              +-->-------------+
17035         //              bbj_cond(true)
17036         //
17037         gtReverseCond(condExpr);
17038         condBlock->bbJumpDest = remainderBlock;
17039         fgAddRefPred(remainderBlock, condBlock);
17040         // Since we have no false expr, use the one we'd already created.
17041         thenBlock = elseBlock;
17042         elseBlock = nullptr;
17043
17044         thenBlock->inheritWeightPercentage(condBlock, 50);
17045     }
17046     else if (hasFalseExpr)
17047     {
17048         //                false
17049         //     S0 -->-- C -->-- F -->-- S1
17050         //              |               |
17051         //              +-->------------+
17052         //              bbj_cond(true)
17053         //
17054         condBlock->bbJumpDest = remainderBlock;
17055         fgAddRefPred(remainderBlock, condBlock);
17056
17057         elseBlock->inheritWeightPercentage(condBlock, 50);
17058     }
17059
17060     GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
17061     GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
17062     fgInsertStmtAtEnd(condBlock, jmpStmt);
17063
17064     // Remove the original qmark statement.
17065     fgRemoveStmt(block, stmt);
17066
17067     // Since we have top level qmarks, we either have a dst for it in which case
17068     // we need to create tmps for true and falseExprs, else just don't bother
17069     // assigning.
17070     unsigned lclNum = BAD_VAR_NUM;
17071     if (dst != nullptr)
17072     {
17073         assert(dst->gtOper == GT_LCL_VAR);
17074         lclNum = dst->gtLclVar.gtLclNum;
17075     }
17076     else
17077     {
17078         assert(qmark->TypeGet() == TYP_VOID);
17079     }
17080
17081     if (hasTrueExpr)
17082     {
17083         if (dst != nullptr)
17084         {
17085             trueExpr = gtNewTempAssign(lclNum, trueExpr);
17086         }
17087         GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
17088         fgInsertStmtAtEnd(thenBlock, trueStmt);
17089     }
17090
17091     // Assign the falseExpr into the dst or tmp, insert in elseBlock
17092     if (hasFalseExpr)
17093     {
17094         if (dst != nullptr)
17095         {
17096             falseExpr = gtNewTempAssign(lclNum, falseExpr);
17097         }
17098         GenTreePtr falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
17099         fgInsertStmtAtEnd(elseBlock, falseStmt);
17100     }
17101
17102 #ifdef DEBUG
17103     if (verbose)
17104     {
17105         printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum);
17106         fgDispBasicBlocks(block, remainderBlock, true);
17107     }
17108 #endif // DEBUG
17109 }
17110
17111 /*****************************************************************************
17112  *
17113  *  Expand GT_QMARK nodes from the flow graph into basic blocks.
17114  *
17115  */
17116
17117 void Compiler::fgExpandQmarkNodes()
17118 {
17119     if (compQmarkUsed)
17120     {
17121         for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
17122         {
17123             for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
17124             {
17125                 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
17126 #ifdef DEBUG
17127                 fgPreExpandQmarkChecks(expr);
17128 #endif
17129                 fgExpandQmarkStmt(block, stmt);
17130             }
17131         }
17132 #ifdef DEBUG
17133         fgPostExpandQmarkChecks();
17134 #endif
17135     }
17136     compQmarkRationalized = true;
17137 }
17138
17139 #ifdef DEBUG
17140 /*****************************************************************************
17141  *
17142  *  Make sure we don't have any more GT_QMARK nodes.
17143  *
17144  */
17145 void Compiler::fgPostExpandQmarkChecks()
17146 {
17147     for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
17148     {
17149         for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
17150         {
17151             GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
17152             fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
17153         }
17154     }
17155 }
17156 #endif
17157
17158 /*****************************************************************************
17159  *
17160  *  Transform all basic blocks for codegen.
17161  */
17162
17163 void Compiler::fgMorph()
17164 {
17165     noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
17166
17167     fgOutgoingArgTemps = nullptr;
17168
17169 #ifdef DEBUG
17170     if (verbose)
17171     {
17172         printf("*************** In fgMorph()\n");
17173     }
17174     if (verboseTrees)
17175     {
17176         fgDispBasicBlocks(true);
17177     }
17178 #endif // DEBUG
17179
17180     // Insert call to class constructor as the first basic block if
17181     // we were asked to do so.
17182     if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
17183                                     impTokenLookupContextHandle /* context */) &
17184         CORINFO_INITCLASS_USE_HELPER)
17185     {
17186         fgEnsureFirstBBisScratch();
17187         fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
17188     }
17189
17190 #ifdef DEBUG
17191     if (opts.compGcChecks)
17192     {
17193         for (unsigned i = 0; i < info.compArgsCount; i++)
17194         {
17195             if (lvaTable[i].TypeGet() == TYP_REF)
17196             {
17197                 // confirm that the argument is a GC pointer (for debugging (GC stress))
17198                 GenTreePtr      op   = gtNewLclvNode(i, TYP_REF);
17199                 GenTreeArgList* args = gtNewArgList(op);
17200                 op                   = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, 0, args);
17201
17202                 fgEnsureFirstBBisScratch();
17203                 fgInsertStmtAtEnd(fgFirstBB, op);
17204             }
17205         }
17206     }
17207
17208     if (opts.compStackCheckOnRet)
17209     {
17210         lvaReturnEspCheck                  = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck"));
17211         lvaTable[lvaReturnEspCheck].lvType = TYP_INT;
17212     }
17213
17214     if (opts.compStackCheckOnCall)
17215     {
17216         lvaCallEspCheck                  = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck"));
17217         lvaTable[lvaCallEspCheck].lvType = TYP_INT;
17218     }
17219 #endif // DEBUG
17220
17221     /* Filter out unimported BBs */
17222
17223     fgRemoveEmptyBlocks();
17224
17225 #ifdef DEBUG
17226     /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
17227     fgDebugCheckBBlist(false, false);
17228 #endif // DEBUG
17229
17230     EndPhase(PHASE_MORPH_INIT);
17231
17232     /* Inline */
17233     fgInline();
17234 #if 0
17235     JITDUMP("trees after inlining\n");
17236     DBEXEC(VERBOSE, fgDispBasicBlocks(true));
17237 #endif
17238
17239     RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
17240
17241     EndPhase(PHASE_MORPH_INLINE);
17242
17243     /* Add any internal blocks/trees we may need */
17244
17245     fgAddInternal();
17246
17247 #if OPT_BOOL_OPS
17248     fgMultipleNots = false;
17249 #endif
17250
17251 #ifdef DEBUG
17252     /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
17253     fgDebugCheckBBlist(false, false);
17254 #endif // DEBUG
17255
17256     fgRemoveEmptyTry();
17257
17258     EndPhase(PHASE_EMPTY_TRY);
17259
17260     fgRemoveEmptyFinally();
17261
17262     EndPhase(PHASE_EMPTY_FINALLY);
17263
17264     fgMergeFinallyChains();
17265
17266     EndPhase(PHASE_MERGE_FINALLY_CHAINS);
17267
17268     fgCloneFinally();
17269
17270     EndPhase(PHASE_CLONE_FINALLY);
17271
17272     fgUpdateFinallyTargetFlags();
17273
17274     /* For x64 and ARM64 we need to mark irregular parameters */
17275     fgMarkImplicitByRefArgs();
17276
17277     /* Promote struct locals if necessary */
17278     fgPromoteStructs();
17279
17280     /* Now it is the time to figure out what locals have address-taken. */
17281     fgMarkAddressExposedLocals();
17282
17283     EndPhase(PHASE_STR_ADRLCL);
17284
17285     /* Apply the type update to implicit byref parameters; also choose (based on address-exposed
17286        analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. */
17287     fgRetypeImplicitByRefArgs();
17288
17289 #ifdef DEBUG
17290     /* Now that locals have address-taken and implicit byref marked, we can safely apply stress. */
17291     lvaStressLclFld();
17292     fgStress64RsltMul();
17293 #endif // DEBUG
17294
17295     EndPhase(PHASE_MORPH_IMPBYREF);
17296
17297     /* Morph the trees in all the blocks of the method */
17298
17299     fgMorphBlocks();
17300
17301     /* Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args */
17302     fgMarkDemotedImplicitByRefArgs();
17303
17304     EndPhase(PHASE_MORPH_GLOBAL);
17305
17306 #if 0
17307     JITDUMP("trees after fgMorphBlocks\n");
17308     DBEXEC(VERBOSE, fgDispBasicBlocks(true));
17309 #endif
17310
17311     /* Decide the kind of code we want to generate */
17312
17313     fgSetOptions();
17314
17315     fgExpandQmarkNodes();
17316
17317 #ifdef DEBUG
17318     compCurBB = nullptr;
17319 #endif // DEBUG
17320 }
17321
17322 /*****************************************************************************
17323  *
17324  *  Promoting struct locals
17325  */
17326 void Compiler::fgPromoteStructs()
17327 {
17328 #ifdef DEBUG
17329     if (verbose)
17330     {
17331         printf("*************** In fgPromoteStructs()\n");
17332     }
17333 #endif // DEBUG
17334
17335     if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
17336     {
17337         return;
17338     }
17339
17340     if (fgNoStructPromotion)
17341     {
17342         return;
17343     }
17344
17345 #if 0
17346     // The code in this #if has been useful in debugging struct promotion issues, by
17347     // enabling selective enablement of the struct promotion optimization according to
17348     // method hash.
17349 #ifdef DEBUG
17350     unsigned methHash = info.compMethodHash();
17351     char* lostr = getenv("structpromohashlo");
17352     unsigned methHashLo = 0;
17353     if (lostr != NULL)
17354     {
17355         sscanf_s(lostr, "%x", &methHashLo);
17356     }
17357     char* histr = getenv("structpromohashhi");
17358     unsigned methHashHi = UINT32_MAX;
17359     if (histr != NULL)
17360     {
17361         sscanf_s(histr, "%x", &methHashHi);
17362     }
17363     if (methHash < methHashLo || methHash > methHashHi)
17364     {
17365         return;
17366     }
17367     else
17368     {
17369         printf("Promoting structs for method %s, hash = 0x%x.\n",
17370                info.compFullName, info.compMethodHash());
17371         printf("");         // in our logic this causes a flush
17372     }
17373 #endif // DEBUG
17374 #endif // 0
17375
17376     if (info.compIsVarArgs)
17377     {
17378         return;
17379     }
17380
17381     if (getNeedsGSSecurityCookie())
17382     {
17383         return;
17384     }
17385
17386 #ifdef DEBUG
17387     if (verbose)
17388     {
17389         printf("\nlvaTable before fgPromoteStructs\n");
17390         lvaTableDump();
17391     }
17392 #endif // DEBUG
17393
17394     // The lvaTable might grow as we grab temps. Make a local copy here.
17395     unsigned startLvaCount = lvaCount;
17396
17397     //
17398     // Loop through the original lvaTable. Looking for struct locals to be promoted.
17399     //
17400     lvaStructPromotionInfo structPromotionInfo;
17401     bool                   tooManyLocals = false;
17402
17403     for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
17404     {
17405         // Whether this var got promoted
17406         bool       promotedVar = false;
17407         LclVarDsc* varDsc      = &lvaTable[lclNum];
17408
17409         // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
17410         // its fields.  Instead, we will attempt to enregister the entire struct.
17411         if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic())
17412         {
17413             varDsc->lvRegStruct = true;
17414         }
17415         // Don't promote if we have reached the tracking limit.
17416         else if (lvaHaveManyLocals())
17417         {
17418             // Print the message first time when we detected this condition
17419             if (!tooManyLocals)
17420             {
17421                 JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
17422             }
17423             tooManyLocals = true;
17424         }
17425         else if (varTypeIsStruct(varDsc))
17426         {
17427             bool shouldPromote;
17428
17429             lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
17430             if (structPromotionInfo.canPromote)
17431             {
17432                 shouldPromote = lvaShouldPromoteStructVar(lclNum, &structPromotionInfo);
17433             }
17434             else
17435             {
17436                 shouldPromote = false;
17437             }
17438
17439 #if 0
17440             // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
17441             // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
17442             static int structPromoVarNum = 0;
17443             structPromoVarNum++;
17444             if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
17445 #endif // 0
17446
17447             if (shouldPromote)
17448             {
17449                 // Promote the this struct local var.
17450                 lvaPromoteStructVar(lclNum, &structPromotionInfo);
17451                 promotedVar = true;
17452
17453 #ifdef _TARGET_ARM_
17454                 if (structPromotionInfo.requiresScratchVar)
17455                 {
17456                     // Ensure that the scratch variable is allocated, in case we
17457                     // pass a promoted struct as an argument.
17458                     if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
17459                     {
17460                         lvaPromotedStructAssemblyScratchVar =
17461                             lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
17462                         lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
17463                     }
17464                 }
17465 #endif // _TARGET_ARM_
17466             }
17467         }
17468
17469         if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed)
17470         {
17471             // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
17472             // we will treat it as a reg struct.
17473             varDsc->lvRegStruct = true;
17474         }
17475     }
17476
17477 #ifdef DEBUG
17478     if (verbose)
17479     {
17480         printf("\nlvaTable after fgPromoteStructs\n");
17481         lvaTableDump();
17482     }
17483 #endif // DEBUG
17484 }
17485
17486 Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
17487 {
17488     noway_assert(tree->OperGet() == GT_FIELD);
17489
17490     GenTreePtr objRef = tree->gtField.gtFldObj;
17491     GenTreePtr obj    = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
17492     noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));
17493
17494     /* Is this an instance data member? */
17495
17496     if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
17497     {
17498         unsigned   lclNum = obj->gtLclVarCommon.gtLclNum;
17499         LclVarDsc* varDsc = &lvaTable[lclNum];
17500
17501         if (varTypeIsStruct(obj))
17502         {
17503             if (varDsc->lvPromoted)
17504             {
17505                 // Promoted struct
17506                 unsigned fldOffset     = tree->gtField.gtFldOffset;
17507                 unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17508                 noway_assert(fieldLclIndex != BAD_VAR_NUM);
17509
17510                 if (lvaIsImplicitByRefLocal(lclNum))
17511                 {
17512                     // Keep track of the number of appearances of each promoted implicit
17513                     // byref (here during struct promotion, which happens during address-exposed
17514                     // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
17515                     // byref params when deciding if it's legal to elide certain copies of them.
17516                     // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
17517                     // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
17518                     // chance, so have to check now.
17519                     JITDUMP(
17520                         "Incrementing ref count from %d to %d for V%02d in fgMorphStructField for promoted struct\n",
17521                         varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
17522                     varDsc->lvRefCnt++;
17523                 }
17524
17525                 tree->SetOper(GT_LCL_VAR);
17526                 tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
17527                 tree->gtType = lvaTable[fieldLclIndex].TypeGet();
17528                 tree->gtFlags &= GTF_NODE_MASK;
17529                 tree->gtFlags &= ~GTF_GLOB_REF;
17530
17531                 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17532                 if (parent->gtOper == GT_ASG)
17533                 {
17534                     if (parent->gtOp.gtOp1 == tree)
17535                     {
17536                         tree->gtFlags |= GTF_VAR_DEF;
17537                         tree->gtFlags |= GTF_DONT_CSE;
17538                     }
17539
17540                     // Promotion of struct containing struct fields where the field
17541                     // is a struct with a single pointer sized scalar type field: in
17542                     // this case struct promotion uses the type  of the underlying
17543                     // scalar field as the type of struct field instead of recursively
17544                     // promoting. This can lead to a case where we have a block-asgn
17545                     // with its RHS replaced with a scalar type.  Mark RHS value as
17546                     // DONT_CSE so that assertion prop will not do const propagation.
17547                     // The reason this is required is that if RHS of a block-asg is a
17548                     // constant, then it is interpreted as init-block incorrectly.
17549                     //
17550                     // TODO - This can also be avoided if we implement recursive struct
17551                     // promotion.
17552                     if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree))
17553                     {
17554                         tree->gtFlags |= GTF_DONT_CSE;
17555                     }
17556                 }
17557 #ifdef DEBUG
17558                 if (verbose)
17559                 {
17560                     printf("Replacing the field in promoted struct with a local var:\n");
17561                     fgWalkPre->printModified = true;
17562                 }
17563 #endif // DEBUG
17564                 return WALK_SKIP_SUBTREES;
17565             }
17566         }
17567         else
17568         {
17569             // Normed struct
17570             // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
17571             // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
17572             // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
17573             // there is one extremely rare case where that won't be true. An enum type is a special value type
17574             // that contains exactly one element of a primitive integer type (that, for CLS programs is named
17575             // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
17576             // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
17577             // ldfld. For example:
17578             //
17579             //  .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
17580             //  {
17581             //    .field public specialname rtspecialname int16 value__
17582             //    .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
17583             //  }
17584             //  .method public hidebysig static void  Main() cil managed
17585             //  {
17586             //     .locals init (valuetype mynamespace.e_t V_0)
17587             //     ...
17588             //     ldloca.s   V_0
17589             //     ldflda     int16 mynamespace.e_t::value__
17590             //     ...
17591             //  }
17592             //
17593             // Normally, compilers will not generate the ldflda, since it is superfluous.
17594             //
17595             // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
17596             // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
17597             // mismatch like this, don't do this morphing. The local var may end up getting marked as
17598             // address taken, and the appropriate SHORT load will be done from memory in that case.
17599
17600             if (tree->TypeGet() == obj->TypeGet())
17601             {
17602                 if (lvaIsImplicitByRefLocal(lclNum))
17603                 {
17604                     // Keep track of the number of appearances of each promoted implicit
17605                     // byref (here during struct promotion, which happens during address-exposed
17606                     // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
17607                     // byref params when deciding if it's legal to elide certain copies of them.
17608                     // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
17609                     // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
17610                     // chance, so have to check now.
17611                     JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField for normed struct\n",
17612                             varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
17613                     varDsc->lvRefCnt++;
17614                 }
17615
17616                 tree->ChangeOper(GT_LCL_VAR);
17617                 tree->gtLclVarCommon.SetLclNum(lclNum);
17618                 tree->gtFlags &= GTF_NODE_MASK;
17619
17620                 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17621                 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17622                 {
17623                     tree->gtFlags |= GTF_VAR_DEF;
17624                     tree->gtFlags |= GTF_DONT_CSE;
17625                 }
17626 #ifdef DEBUG
17627                 if (verbose)
17628                 {
17629                     printf("Replacing the field in normed struct with the local var:\n");
17630                     fgWalkPre->printModified = true;
17631                 }
17632 #endif // DEBUG
17633                 return WALK_SKIP_SUBTREES;
17634             }
17635         }
17636     }
17637
17638     return WALK_CONTINUE;
17639 }
17640
17641 Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre)
17642 {
17643     noway_assert(tree->OperGet() == GT_LCL_FLD);
17644
17645     unsigned   lclNum = tree->gtLclFld.gtLclNum;
17646     LclVarDsc* varDsc = &lvaTable[lclNum];
17647
17648     if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
17649     {
17650         // Promoted struct
17651         unsigned   fldOffset     = tree->gtLclFld.gtLclOffs;
17652         unsigned   fieldLclIndex = 0;
17653         LclVarDsc* fldVarDsc     = nullptr;
17654
17655         if (fldOffset != BAD_VAR_NUM)
17656         {
17657             fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17658             noway_assert(fieldLclIndex != BAD_VAR_NUM);
17659             fldVarDsc = &lvaTable[fieldLclIndex];
17660         }
17661
17662         if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
17663 #ifdef _TARGET_X86_
17664             && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
17665 #endif
17666                 )
17667         {
17668             // There is an existing sub-field we can use
17669             tree->gtLclFld.SetLclNum(fieldLclIndex);
17670
17671             // We need to keep the types 'compatible'.  If we can switch back to a GT_LCL_VAR
17672             CLANG_FORMAT_COMMENT_ANCHOR;
17673
17674 #ifdef _TARGET_ARM_
17675             assert(varTypeIsIntegralOrI(tree->TypeGet()) || varTypeIsFloating(tree->TypeGet()));
17676 #else
17677             assert(varTypeIsIntegralOrI(tree->TypeGet()));
17678 #endif
17679             if (varTypeCanReg(fldVarDsc->TypeGet()))
17680             {
17681                 // If the type is integer-ish, then we can use it as-is
17682                 tree->ChangeOper(GT_LCL_VAR);
17683                 assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
17684                 tree->gtType = fldVarDsc->TypeGet();
17685 #ifdef DEBUG
17686                 if (verbose)
17687                 {
17688                     printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n");
17689                     fgWalkPre->printModified = true;
17690                 }
17691 #endif // DEBUG
17692             }
17693
17694             GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17695             if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17696             {
17697                 tree->gtFlags |= GTF_VAR_DEF;
17698                 tree->gtFlags |= GTF_DONT_CSE;
17699             }
17700         }
17701         else
17702         {
17703             // There is no existing field that has all the parts that we need
17704             // So we must ensure that the struct lives in memory.
17705             lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
17706
17707 #ifdef DEBUG
17708             // We can't convert this guy to a float because he really does have his
17709             // address taken..
17710             varDsc->lvKeepType = 1;
17711 #endif // DEBUG
17712         }
17713
17714         return WALK_SKIP_SUBTREES;
17715     }
17716
17717     return WALK_CONTINUE;
17718 }
17719
17720 //------------------------------------------------------------------------
17721 // fgMarkImplicitByRefArgs: Identify any by-value struct parameters which are "implicit by-reference";
17722 //                          i.e. which the ABI requires to be passed by making a copy in the caller and
17723 //                          passing its address to the callee.  Mark their `LclVarDsc`s such that
17724 //                          `lvaIsImplicitByRefLocal` will return true for them.
17725
17726 void Compiler::fgMarkImplicitByRefArgs()
17727 {
17728 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
17729 #ifdef DEBUG
17730     if (verbose)
17731     {
17732         printf("\n*************** In fgMarkImplicitByRefs()\n");
17733     }
17734 #endif // DEBUG
17735
17736     for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17737     {
17738         LclVarDsc* varDsc = &lvaTable[lclNum];
17739
17740         if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
17741         {
17742             size_t size;
17743
17744             if (varDsc->lvSize() > REGSIZE_BYTES)
17745             {
17746                 size = varDsc->lvSize();
17747             }
17748             else
17749             {
17750                 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
17751                 size                         = info.compCompHnd->getClassSize(typeHnd);
17752             }
17753
17754 #if defined(_TARGET_AMD64_)
17755             if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
17756 #elif defined(_TARGET_ARM64_)
17757             if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc))
17758 #endif
17759             {
17760                 // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
17761                 // So I am now using it to indicate that this is one of the weird implicit
17762                 // by ref locals.
17763                 // The address taken cleanup will look for references to locals marked like
17764                 // this, and transform them appropriately.
17765                 varDsc->lvIsTemp = 1;
17766
17767                 // Clear the ref count field; fgMarkAddressTakenLocals will increment it per
17768                 // appearance of implicit-by-ref param so that call arg morphing can do an
17769                 // optimization for single-use implicit-by-ref params whose single use is as
17770                 // an outgoing call argument.
17771                 varDsc->lvRefCnt = 0;
17772             }
17773         }
17774     }
17775
17776 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
17777 }
17778
17779 //------------------------------------------------------------------------
17780 // fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from
17781 //                            struct to pointer).  Also choose (based on address-exposed analysis)
17782 //                            which struct promotions of implicit byrefs to keep or discard.
17783 //                            For those which are kept, insert the appropriate initialization code.
17784 //                            For those which are to be discarded, annotate the promoted field locals
17785 //                            so that fgMorphImplicitByRefArgs will know to rewrite their appearances
17786 //                            using indirections off the pointer parameters.
17787
17788 void Compiler::fgRetypeImplicitByRefArgs()
17789 {
17790 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
17791 #ifdef DEBUG
17792     if (verbose)
17793     {
17794         printf("\n*************** In fgRetypeImplicitByRefArgs()\n");
17795     }
17796 #endif // DEBUG
17797
17798     for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17799     {
17800         LclVarDsc* varDsc = &lvaTable[lclNum];
17801
17802         if (lvaIsImplicitByRefLocal(lclNum))
17803         {
17804             size_t size;
17805
17806             if (varDsc->lvSize() > REGSIZE_BYTES)
17807             {
17808                 size = varDsc->lvSize();
17809             }
17810             else
17811             {
17812                 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
17813                 size                         = info.compCompHnd->getClassSize(typeHnd);
17814             }
17815
17816             if (varDsc->lvPromoted)
17817             {
17818                 // This implicit-by-ref was promoted; create a new temp to represent the
17819                 // promoted struct before rewriting this parameter as a pointer.
17820                 unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref"));
17821                 lvaSetStruct(newLclNum, lvaGetStruct(lclNum), true);
17822                 // Update varDsc since lvaGrabTemp might have re-allocated the var dsc array.
17823                 varDsc = &lvaTable[lclNum];
17824
17825                 // Copy the struct promotion annotations to the new temp.
17826                 LclVarDsc* newVarDsc       = &lvaTable[newLclNum];
17827                 newVarDsc->lvPromoted      = true;
17828                 newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart;
17829                 newVarDsc->lvFieldCnt      = varDsc->lvFieldCnt;
17830                 newVarDsc->lvContainsHoles = varDsc->lvContainsHoles;
17831                 newVarDsc->lvCustomLayout  = varDsc->lvCustomLayout;
17832 #ifdef DEBUG
17833                 newVarDsc->lvKeepType = true;
17834 #endif // DEBUG
17835
17836                 // Propagate address-taken-ness and do-not-enregister-ness.
17837                 newVarDsc->lvAddrExposed     = varDsc->lvAddrExposed;
17838                 newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister;
17839 #ifdef DEBUG
17840                 newVarDsc->lvLclBlockOpAddr   = varDsc->lvLclBlockOpAddr;
17841                 newVarDsc->lvLclFieldExpr     = varDsc->lvLclFieldExpr;
17842                 newVarDsc->lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr;
17843                 newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr;
17844                 newVarDsc->lvLiveAcrossUCall  = varDsc->lvLiveAcrossUCall;
17845 #endif // DEBUG
17846
17847                 // If the promotion is dependent, the promoted temp would just be committed
17848                 // to memory anyway, so we'll rewrite its appearances to be indirections
17849                 // through the pointer parameter, the same as we'd do for this
17850                 // parameter if it weren't promoted at all (otherwise the initialization
17851                 // of the new temp would just be a needless memcpy at method entry).
17852                 bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ||
17853                                      (varDsc->lvRefCnt <= varDsc->lvFieldCnt);
17854
17855                 if (!undoPromotion)
17856                 {
17857                     // Insert IR that initializes the temp from the parameter.
17858                     // LHS is a simple reference to the temp.
17859                     fgEnsureFirstBBisScratch();
17860                     GenTreePtr lhs = gtNewLclvNode(newLclNum, varDsc->lvType);
17861                     // RHS is an indirection (using GT_OBJ) off the parameter.
17862                     GenTreePtr addr   = gtNewLclvNode(lclNum, TYP_BYREF);
17863                     GenTreePtr rhs    = gtNewBlockVal(addr, (unsigned)size);
17864                     GenTreePtr assign = gtNewAssignNode(lhs, rhs);
17865                     fgInsertStmtAtBeg(fgFirstBB, assign);
17866                 }
17867
17868                 // Update the locals corresponding to the promoted fields.
17869                 unsigned fieldLclStart = varDsc->lvFieldLclStart;
17870                 unsigned fieldCount    = varDsc->lvFieldCnt;
17871                 unsigned fieldLclStop  = fieldLclStart + fieldCount;
17872
17873                 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
17874                 {
17875                     LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
17876
17877                     if (undoPromotion)
17878                     {
17879                         // Leave lvParentLcl pointing to the parameter so that fgMorphImplicitByRefArgs
17880                         // will know to rewrite appearances of this local.
17881                         assert(fieldVarDsc->lvParentLcl == lclNum);
17882                     }
17883                     else
17884                     {
17885                         // Set the new parent.
17886                         fieldVarDsc->lvParentLcl = newLclNum;
17887                         // Clear the ref count field; it is used to communicate the nubmer of references
17888                         // to the implicit byref parameter when morphing calls that pass the implicit byref
17889                         // out as an outgoing argument value, but that doesn't pertain to this field local
17890                         // which is now a field of a non-arg local.
17891                         fieldVarDsc->lvRefCnt = 0;
17892                     }
17893
17894                     fieldVarDsc->lvIsParam = false;
17895                     // The fields shouldn't inherit any register preferences from
17896                     // the parameter which is really a pointer to the struct.
17897                     fieldVarDsc->lvIsRegArg      = false;
17898                     fieldVarDsc->lvIsMultiRegArg = false;
17899                     fieldVarDsc->lvSetIsHfaRegArg(false);
17900                     fieldVarDsc->lvArgReg = REG_NA;
17901 #if FEATURE_MULTIREG_ARGS
17902                     fieldVarDsc->lvOtherArgReg = REG_NA;
17903 #endif
17904                     fieldVarDsc->lvPrefReg = 0;
17905                 }
17906
17907                 // Hijack lvFieldLclStart to record the new temp number.
17908                 // It will get fixed up in fgMarkDemotedImplicitByRefArgs.
17909                 varDsc->lvFieldLclStart = newLclNum;
17910                 // Go ahead and clear lvFieldCnt -- either we're promoting
17911                 // a replacement temp or we're not promoting this arg, and
17912                 // in either case the parameter is now a pointer that doesn't
17913                 // have these fields.
17914                 varDsc->lvFieldCnt = 0;
17915
17916                 // Hijack lvPromoted to communicate to fgMorphImplicitByRefArgs
17917                 // whether references to the struct should be rewritten as
17918                 // indirections off the pointer (not promoted) or references
17919                 // to the new struct local (promoted).
17920                 varDsc->lvPromoted = !undoPromotion;
17921             }
17922             else
17923             {
17924                 // The "undo promotion" path above clears lvPromoted for args that struct
17925                 // promotion wanted to promote but that aren't considered profitable to
17926                 // rewrite.  It hijacks lvFieldLclStart to communicate to
17927                 // fgMarkDemotedImplicitByRefArgs that it needs to clean up annotations left
17928                 // on such args for fgMorphImplicitByRefArgs to consult in the interim.
17929                 // Here we have an arg that was simply never promoted, so make sure it doesn't
17930                 // have nonzero lvFieldLclStart, since that would confuse fgMorphImplicitByRefArgs
17931                 // and fgMarkDemotedImplicitByRefArgs.
17932                 assert(varDsc->lvFieldLclStart == 0);
17933             }
17934
17935             // Since the parameter in this position is really a pointer, its type is TYP_BYREF.
17936             varDsc->lvType = TYP_BYREF;
17937
17938             // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
17939             // make sure that the following flag is not set as these will force SSA to
17940             // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa)
17941             //
17942             varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
17943
17944 #ifdef DEBUG
17945             // This should not be converted to a double in stress mode,
17946             // because it is really a pointer
17947             varDsc->lvKeepType = 1;
17948
17949             // The struct parameter may have had its address taken, but the pointer parameter
17950             // cannot -- any uses of the struct parameter's address are uses of the pointer
17951             // parameter's value, and there's no way for the MSIL to reference the pointer
17952             // parameter's address.  So clear the address-taken bit for the parameter.
17953             varDsc->lvAddrExposed     = 0;
17954             varDsc->lvDoNotEnregister = 0;
17955
17956             if (verbose)
17957             {
17958                 printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
17959             }
17960 #endif // DEBUG
17961         }
17962     }
17963
17964 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
17965 }
17966
17967 //------------------------------------------------------------------------
17968 // fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion
17969 //                                 asked to promote.  Appearances of these have now been rewritten
17970 //                                 (by fgMorphImplicitByRefArgs) using indirections from the pointer
17971 //                                 parameter or references to the promotion temp, as appropriate.
17972
17973 void Compiler::fgMarkDemotedImplicitByRefArgs()
17974 {
17975 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
17976
17977     for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17978     {
17979         LclVarDsc* varDsc = &lvaTable[lclNum];
17980
17981         if (lvaIsImplicitByRefLocal(lclNum))
17982         {
17983             if (varDsc->lvPromoted)
17984             {
17985                 // The parameter is simply a pointer now, so clear lvPromoted.  It was left set
17986                 // by fgRetypeImplicitByRefArgs to communicate to fgMorphImplicitByRefArgs that
17987                 // appearances of this arg needed to be rewritten to a new promoted struct local.
17988                 varDsc->lvPromoted = false;
17989
17990                 // Clear the lvFieldLclStart value that was set by fgRetypeImplicitByRefArgs
17991                 // to tell fgMorphImplicitByRefArgs which local is the new promoted struct one.
17992                 varDsc->lvFieldLclStart = 0;
17993             }
17994             else if (varDsc->lvFieldLclStart != 0)
17995             {
17996                 // We created new temps to represent a promoted struct corresponding to this
17997                 // parameter, but decided not to go through with the promotion and have
17998                 // rewritten all uses as indirections off the pointer parameter.
17999                 // We stashed the pointer to the new struct temp in lvFieldLclStart; make
18000                 // note of that and clear the annotation.
18001                 unsigned structLclNum   = varDsc->lvFieldLclStart;
18002                 varDsc->lvFieldLclStart = 0;
18003
18004                 // Clear the arg's ref count; this was set during address-taken analysis so that
18005                 // call morphing could identify single-use implicit byrefs; we're done with
18006                 // that, and want it to be in its default state of zero when we go to set
18007                 // real ref counts for all variables.
18008                 varDsc->lvRefCnt = 0;
18009
18010                 // The temp struct is now unused; set flags appropriately so that we
18011                 // won't allocate space for it on the stack.
18012                 LclVarDsc* structVarDsc     = &lvaTable[structLclNum];
18013                 structVarDsc->lvRefCnt      = 0;
18014                 structVarDsc->lvAddrExposed = false;
18015 #ifdef DEBUG
18016                 structVarDsc->lvUnusedStruct = true;
18017 #endif // DEBUG
18018
18019                 unsigned fieldLclStart = structVarDsc->lvFieldLclStart;
18020                 unsigned fieldCount    = structVarDsc->lvFieldCnt;
18021                 unsigned fieldLclStop  = fieldLclStart + fieldCount;
18022
18023                 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
18024                 {
18025                     // Fix the pointer to the parent local.
18026                     LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
18027                     assert(fieldVarDsc->lvParentLcl == lclNum);
18028                     fieldVarDsc->lvParentLcl = structLclNum;
18029
18030                     // The field local is now unused; set flags appropriately so that
18031                     // we won't allocate stack space for it.
18032                     fieldVarDsc->lvRefCnt      = 0;
18033                     fieldVarDsc->lvAddrExposed = false;
18034                 }
18035             }
18036         }
18037     }
18038
18039 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18040 }
18041
18042 /*****************************************************************************
18043  *
18044  *  Morph irregular parameters
18045  *    for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
18046  */
18047 bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree)
18048 {
18049 #if (!defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) && !defined(_TARGET_ARM64_)
18050
18051     return false;
18052
18053 #else  // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18054
18055     bool changed = false;
18056
18057     // Implicit byref morphing needs to know if the reference to the parameter is a
18058     // child of GT_ADDR or not, so this method looks one level down and does the
18059     // rewrite whenever a child is a reference to an implicit byref parameter.
18060     if (tree->gtOper == GT_ADDR)
18061     {
18062         if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
18063         {
18064             GenTreePtr morphedTree = fgMorphImplicitByRefArgs(tree, true);
18065             changed                = (morphedTree != nullptr);
18066             assert(!changed || (morphedTree == tree));
18067         }
18068     }
18069     else
18070     {
18071         for (GenTreePtr* pTree : tree->UseEdges())
18072         {
18073             GenTreePtr childTree = *pTree;
18074             if (childTree->gtOper == GT_LCL_VAR)
18075             {
18076                 GenTreePtr newChildTree = fgMorphImplicitByRefArgs(childTree, false);
18077                 if (newChildTree != nullptr)
18078                 {
18079                     changed = true;
18080                     *pTree  = newChildTree;
18081                 }
18082             }
18083         }
18084     }
18085
18086     return changed;
18087 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18088 }
18089
18090 GenTreePtr Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree, bool isAddr)
18091 {
18092     assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
18093     assert(isAddr == (tree->gtOper == GT_ADDR));
18094
18095     GenTreePtr lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
18096     unsigned   lclNum     = lclVarTree->gtLclVarCommon.gtLclNum;
18097     LclVarDsc* lclVarDsc  = &lvaTable[lclNum];
18098
18099     CORINFO_FIELD_HANDLE fieldHnd;
18100     unsigned             fieldOffset  = 0;
18101     var_types            fieldRefType = TYP_UNKNOWN;
18102
18103     if (lvaIsImplicitByRefLocal(lclNum))
18104     {
18105         // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
18106         // re-invoke the traversal to mark address-taken locals.
18107         // So, we may encounter a tree that has already been transformed to TYP_BYREF.
18108         // If we do, leave it as-is.
18109         if (!varTypeIsStruct(lclVarTree))
18110         {
18111             assert(lclVarTree->TypeGet() == TYP_BYREF);
18112
18113             return nullptr;
18114         }
18115         else if (lclVarDsc->lvPromoted)
18116         {
18117             // fgRetypeImplicitByRefArgs created a new promoted struct local to represent this
18118             // arg.  Rewrite this to refer to the new local.
18119             assert(lclVarDsc->lvFieldLclStart != 0);
18120             lclVarTree->AsLclVarCommon()->SetLclNum(lclVarDsc->lvFieldLclStart);
18121             return tree;
18122         }
18123
18124         fieldHnd = nullptr;
18125     }
18126     else if (lclVarDsc->lvIsStructField && lvaIsImplicitByRefLocal(lclVarDsc->lvParentLcl))
18127     {
18128         // This was a field reference to an implicit-by-reference struct parameter that was
18129         // dependently promoted; update it to a field reference off the pointer.
18130         // Grab the field handle from the struct field lclVar.
18131         fieldHnd    = lclVarDsc->lvFieldHnd;
18132         fieldOffset = lclVarDsc->lvFldOffset;
18133         assert(fieldHnd != nullptr);
18134         // Update lclNum/lclVarDsc to refer to the parameter
18135         lclNum       = lclVarDsc->lvParentLcl;
18136         lclVarDsc    = &lvaTable[lclNum];
18137         fieldRefType = lclVarTree->TypeGet();
18138     }
18139     else
18140     {
18141         // We only need to tranform the 'marked' implicit by ref parameters
18142         return nullptr;
18143     }
18144
18145     // This is no longer a def of the lclVar, even if it WAS a def of the struct.
18146     lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);
18147
18148     if (isAddr)
18149     {
18150         if (fieldHnd == nullptr)
18151         {
18152             // change &X into just plain X
18153             tree->CopyFrom(lclVarTree, this);
18154             tree->gtType = TYP_BYREF;
18155         }
18156         else
18157         {
18158             // change &(X.f) [i.e. GT_ADDR of local for promoted arg field]
18159             // into &(X, f) [i.e. GT_ADDR of GT_FIELD off ptr param]
18160             lclVarTree->gtLclVarCommon.SetLclNum(lclNum);
18161             lclVarTree->gtType = TYP_BYREF;
18162             tree->gtOp.gtOp1   = gtNewFieldRef(fieldRefType, fieldHnd, lclVarTree, fieldOffset);
18163         }
18164
18165 #ifdef DEBUG
18166         if (verbose)
18167         {
18168             printf("Replacing address of implicit by ref struct parameter with byref:\n");
18169         }
18170 #endif // DEBUG
18171     }
18172     else
18173     {
18174         // Change X into OBJ(X) or FIELD(X, f)
18175         var_types structType = tree->gtType;
18176         tree->gtType         = TYP_BYREF;
18177
18178         if (fieldHnd)
18179         {
18180             tree->gtLclVarCommon.SetLclNum(lclNum);
18181             tree = gtNewFieldRef(fieldRefType, fieldHnd, tree, fieldOffset);
18182         }
18183         else
18184         {
18185             tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
18186         }
18187
18188         if (structType == TYP_STRUCT)
18189         {
18190             gtSetObjGcInfo(tree->AsObj());
18191         }
18192
18193         // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
18194         // we could remove TGTANYWHERE
18195         tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
18196
18197 #ifdef DEBUG
18198         if (verbose)
18199         {
18200             printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
18201         }
18202 #endif // DEBUG
18203     }
18204
18205 #ifdef DEBUG
18206     if (verbose)
18207     {
18208         gtDispTree(tree);
18209     }
18210 #endif // DEBUG
18211
18212     return tree;
18213 }
18214
18215 // An "AddrExposedContext" expresses the calling context in which an address expression occurs.
18216 enum AddrExposedContext
18217 {
18218     AXC_None,     // None of the below seen yet.
18219     AXC_Ind,      // The address being computed is to be dereferenced.
18220     AXC_Addr,     // We're computing a raw address (not dereferenced, at least not immediately).
18221     AXC_IndWide,  // A block operation dereferenced an address referencing more bytes than the address
18222                   // addresses -- if the address addresses a field of a struct local, we need to consider
18223                   // the entire local address taken (not just the field).
18224     AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
18225                   // on more bytes than the width of the storage location addressed.  If this is a
18226                   // field of a promoted struct local, declare the entire struct local address-taken.
18227     AXC_IndAdd,   // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
18228                   // If one arg is a constant int, evaluate the other in an IND context.  Otherwise, none.
18229 };
18230
18231 typedef ArrayStack<AddrExposedContext> AXCStack;
18232
18233 // We use pre-post to simulate passing an argument in a recursion, via a stack.
18234 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
18235 {
18236     AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
18237     (void)axcStack->Pop();
18238     return WALK_CONTINUE;
18239 }
18240
18241 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
18242 {
18243     GenTreePtr         tree     = *pTree;
18244     Compiler*          comp     = fgWalkPre->compiler;
18245     AXCStack*          axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
18246     AddrExposedContext axc      = axcStack->Top();
18247
18248     // In some situations, we have to figure out what the effective context is in which to
18249     // evaluate the current tree, depending on which argument position it is in its parent.
18250
18251     switch (axc)
18252     {
18253
18254         case AXC_IndAdd:
18255         {
18256             GenTreePtr parent = fgWalkPre->parentStack->Index(1);
18257             assert(parent->OperGet() == GT_ADD);
18258             // Is one of the args a constant representing a field offset,
18259             // and is this the other?  If so, Ind context.
18260             if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree)
18261             {
18262                 axc = AXC_Ind;
18263             }
18264             else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree)
18265             {
18266                 axc = AXC_Ind;
18267             }
18268             else
18269             {
18270                 axc = AXC_None;
18271             }
18272         }
18273         break;
18274
18275         default:
18276             break;
18277     }
18278
18279     // Now recurse properly for the tree.
18280     switch (tree->gtOper)
18281     {
18282         case GT_IND:
18283             if (axc != AXC_Addr)
18284             {
18285                 axcStack->Push(AXC_Ind);
18286             }
18287             else
18288             {
18289                 axcStack->Push(AXC_None);
18290             }
18291             return WALK_CONTINUE;
18292
18293         case GT_BLK:
18294         case GT_OBJ:
18295             if (axc == AXC_Addr)
18296             {
18297                 axcStack->Push(AXC_None);
18298             }
18299             else if (tree->TypeGet() == TYP_STRUCT)
18300             {
18301                 // The block operation will derefence its argument(s) -- usually.  If the size of the initblk
18302                 // or copyblk exceeds the size of a storage location whose address is used as one of the
18303                 // arguments, then we have to consider that storage location (indeed, it's underlying containing
18304                 // location) to be address taken.  So get the width of the initblk or copyblk.
18305
18306                 GenTreePtr  parent = fgWalkPre->parentStack->Index(1);
18307                 GenTreeBlk* blk    = tree->AsBlk();
18308                 unsigned    width  = blk->gtBlkSize;
18309                 noway_assert(width != 0);
18310                 axc           = AXC_Ind;
18311                 GenTree* addr = blk->Addr();
18312                 if (addr->OperGet() == GT_ADDR)
18313                 {
18314                     if (parent->gtOper == GT_ASG)
18315                     {
18316                         if ((tree == parent->gtOp.gtOp1) &&
18317                             ((width == 0) || !comp->fgFitsInOrNotLoc(addr->gtGetOp1(), width)))
18318                         {
18319                             axc = AXC_IndWide;
18320                         }
18321                     }
18322                     else
18323                     {
18324                         assert(parent->gtOper == GT_CALL);
18325                     }
18326                 }
18327                 axcStack->Push(axc);
18328             }
18329             else
18330             {
18331                 // This is like a regular GT_IND.
18332                 axcStack->Push(AXC_Ind);
18333             }
18334             return WALK_CONTINUE;
18335
18336         case GT_DYN_BLK:
18337             // Assume maximal width.
18338             axcStack->Push(AXC_IndWide);
18339             return WALK_CONTINUE;
18340
18341         case GT_LIST:
18342         case GT_FIELD_LIST:
18343             axcStack->Push(AXC_None);
18344             return WALK_CONTINUE;
18345
18346         case GT_INDEX:
18347             // Taking the address of an array element never takes the address of a local.
18348             axcStack->Push(AXC_None);
18349             return WALK_CONTINUE;
18350
18351         case GT_ADDR:
18352 #ifdef FEATURE_SIMD
18353             if (tree->gtOp.gtOp1->OperGet() == GT_SIMD)
18354             {
18355                 axcStack->Push(AXC_None);
18356             }
18357             else
18358 #endif // FEATURE_SIMD
18359                 if (axc == AXC_Ind)
18360             {
18361                 axcStack->Push(AXC_None);
18362             }
18363             else if (axc == AXC_IndWide)
18364             {
18365                 axcStack->Push(AXC_AddrWide);
18366             }
18367             else
18368             {
18369                 assert(axc == AXC_None);
18370                 axcStack->Push(AXC_Addr);
18371             }
18372             return WALK_CONTINUE;
18373
18374         case GT_FIELD:
18375             // First, handle a couple of special cases: field of promoted struct local, field
18376             // of "normed" struct.
18377             if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES)
18378             {
18379                 // It (may have) replaced the field with a local var or local field.  If we're in an addr context,
18380                 // label it addr-taken.
18381                 if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide))
18382                 {
18383                     unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18384                     comp->lvaSetVarAddrExposed(lclNum);
18385                     if (axc == AXC_AddrWide)
18386                     {
18387                         LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18388                         if (varDsc->lvIsStructField)
18389                         {
18390                             comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
18391                         }
18392                     }
18393                 }
18394                 // Push something to keep the PostCB, which will pop it, happy.
18395                 axcStack->Push(AXC_None);
18396                 return WALK_SKIP_SUBTREES;
18397             }
18398             else
18399             {
18400                 // GT_FIELD is an implicit deref.
18401                 if (axc == AXC_Addr)
18402                 {
18403                     axcStack->Push(AXC_None);
18404                 }
18405                 else if (axc == AXC_AddrWide)
18406                 {
18407                     axcStack->Push(AXC_IndWide);
18408                 }
18409                 else
18410                 {
18411                     axcStack->Push(AXC_Ind);
18412                 }
18413                 return WALK_CONTINUE;
18414             }
18415
18416         case GT_LCL_FLD:
18417         {
18418             assert(axc != AXC_Addr);
18419             unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18420             if (comp->lvaIsImplicitByRefLocal(lclNum))
18421             {
18422                 // Keep track of the number of appearances of each promoted implicit
18423                 // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
18424                 // checks the ref counts for implicit byref params when deciding if it's legal
18425                 // to elide certain copies of them.
18426                 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18427                 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
18428                         varDsc->lvRefCnt + 1, lclNum);
18429
18430                 varDsc->lvRefCnt++;
18431             }
18432             // This recognizes certain forms, and does all the work.  In that case, returns WALK_SKIP_SUBTREES,
18433             // else WALK_CONTINUE.  We do the same here.
18434             fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre);
18435             if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide))
18436             {
18437                 comp->lvaSetVarAddrExposed(lclNum);
18438                 if (axc == AXC_AddrWide)
18439                 {
18440                     LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18441                     if (varDsc->lvIsStructField)
18442                     {
18443                         comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
18444                     }
18445                 }
18446             }
18447             // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter
18448             // what, but something to be popped by the post callback.  If we're going
18449             // to analyze children, the LCL_FLD creates an Ind context, so use that.
18450             axcStack->Push(AXC_Ind);
18451             return res;
18452         }
18453
18454         case GT_LCL_VAR:
18455         {
18456             unsigned   lclNum = tree->gtLclVarCommon.gtLclNum;
18457             LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18458
18459             if (comp->lvaIsImplicitByRefLocal(lclNum))
18460             {
18461                 // Keep track of the number of appearances of each promoted implicit
18462                 // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
18463                 // checks the ref counts for implicit byref params when deciding if it's legal
18464                 // to elide certain copies of them.
18465                 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
18466                         varDsc->lvRefCnt + 1, lclNum);
18467
18468                 varDsc->lvRefCnt++;
18469             }
18470
18471             if (axc == AXC_Addr || axc == AXC_AddrWide)
18472             {
18473                 comp->lvaSetVarAddrExposed(lclNum);
18474                 if (axc == AXC_AddrWide)
18475                 {
18476                     if (varDsc->lvIsStructField)
18477                     {
18478                         comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
18479                     }
18480                 }
18481
18482                 // We may need to Quirk the storage size for this LCL_VAR
18483                 // some PInvoke signatures incorrectly specify a ByRef to an INT32
18484                 // when they actually write a SIZE_T or INT64
18485                 if (axc == AXC_Addr)
18486                 {
18487                     comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack);
18488                 }
18489             }
18490             // Push something to keep the PostCB, which will pop it, happy.
18491             axcStack->Push(AXC_None);
18492             // The tree is a leaf.
18493             return WALK_SKIP_SUBTREES;
18494         }
18495
18496         case GT_ADD:
18497             assert(axc != AXC_Addr);
18498             // See below about treating pointer operations as wider indirection.
18499             if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
18500             {
18501                 axcStack->Push(AXC_IndWide);
18502             }
18503             else if (axc == AXC_Ind)
18504             {
18505                 // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
18506                 // If it's an add of a constant and an address, and the constant represents a field,
18507                 // then we'll evaluate the address argument in an Ind context; otherwise, the None context.
18508                 axcStack->Push(AXC_IndAdd);
18509             }
18510             else
18511             {
18512                 axcStack->Push(axc);
18513             }
18514             return WALK_CONTINUE;
18515
18516         // !!! Treat Pointer Operations as Wider Indirection
18517         //
18518         // If we are performing pointer operations, make sure we treat that as equivalent to a wider
18519         // indirection. This is because the pointers could be pointing to the address of struct fields
18520         // and could be used to perform operations on the whole struct or passed to another method.
18521         //
18522         // When visiting a node in this pre-order walk, we do not know if we would in the future
18523         // encounter a GT_ADDR of a GT_FIELD below.
18524         //
18525         // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
18526         // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
18527         // wider indirection context down the expr tree.
18528         //
18529         // Example, in unsafe code,
18530         //
18531         //   IL_000e  12 00             ldloca.s     0x0
18532         //   IL_0010  7c 02 00 00 04    ldflda       0x4000002
18533         //   IL_0015  12 00             ldloca.s     0x0
18534         //   IL_0017  7c 01 00 00 04    ldflda       0x4000001
18535         //   IL_001c  59                sub
18536         //
18537         // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
18538         // consider GT_SUB to be equivalent of an AXC_IndWide.
18539         //
18540         // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
18541         // them as AXC_IndWide.
18542         //
18543
18544         // BINOP
18545         case GT_SUB:
18546         case GT_MUL:
18547         case GT_DIV:
18548         case GT_UDIV:
18549         case GT_OR:
18550         case GT_XOR:
18551         case GT_AND:
18552         case GT_LSH:
18553         case GT_RSH:
18554         case GT_RSZ:
18555         case GT_ROL:
18556         case GT_ROR:
18557         case GT_EQ:
18558         case GT_NE:
18559         case GT_LT:
18560         case GT_LE:
18561         case GT_GT:
18562         case GT_GE:
18563         // UNOP
18564         case GT_CAST:
18565             if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) ||
18566                 (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
18567             {
18568                 axcStack->Push(AXC_IndWide);
18569                 return WALK_CONTINUE;
18570             }
18571             __fallthrough;
18572
18573         default:
18574             // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None".  We must
18575             // handle the "Ind" propogation explicitly above.
18576             if (axc == AXC_Addr || axc == AXC_AddrWide)
18577             {
18578                 axcStack->Push(axc);
18579             }
18580             else
18581             {
18582                 axcStack->Push(AXC_None);
18583             }
18584             return WALK_CONTINUE;
18585     }
18586 }
18587
18588 bool Compiler::fgFitsInOrNotLoc(GenTreePtr tree, unsigned width)
18589 {
18590     if (tree->TypeGet() != TYP_STRUCT)
18591     {
18592         return width <= genTypeSize(tree->TypeGet());
18593     }
18594     else if (tree->OperGet() == GT_LCL_VAR)
18595     {
18596         assert(tree->TypeGet() == TYP_STRUCT);
18597         unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18598         return width <= lvaTable[lclNum].lvExactSize;
18599     }
18600     else if (tree->OperGet() == GT_FIELD)
18601     {
18602         CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass(tree->gtField.gtFldHnd);
18603         return width <= info.compCompHnd->getClassSize(fldClass);
18604     }
18605     else if (tree->OperGet() == GT_INDEX)
18606     {
18607         return width <= tree->gtIndex.gtIndElemSize;
18608     }
18609     else
18610     {
18611         return false;
18612     }
18613 }
18614
18615 void Compiler::fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq)
18616 {
18617     assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);
18618
18619     switch (op1->OperGet())
18620     {
18621         case GT_ADDR:
18622             if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
18623             {
18624                 GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
18625                 lclFld->gtFieldSeq    = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
18626             }
18627             break;
18628
18629         case GT_ADD:
18630             if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
18631             {
18632                 FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
18633                 if (op1Fs != nullptr)
18634                 {
18635                     op1Fs                                = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18636                     op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
18637                 }
18638             }
18639             else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
18640             {
18641                 FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
18642                 if (op2Fs != nullptr)
18643                 {
18644                     op2Fs                                = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
18645                     op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
18646                 }
18647             }
18648             break;
18649
18650         case GT_CNS_INT:
18651         {
18652             FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
18653             if (op1Fs != nullptr)
18654             {
18655                 op1Fs                    = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18656                 op1->gtIntCon.gtFieldSeq = op1Fs;
18657             }
18658         }
18659         break;
18660
18661         default:
18662             // Record in the general zero-offset map.
18663             GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
18664             break;
18665     }
18666 }
18667
18668 /*****************************************************************************
18669  *
18670  *  Mark address-taken locals.
18671  */
18672
18673 void Compiler::fgMarkAddressExposedLocals()
18674 {
18675 #ifdef DEBUG
18676     if (verbose)
18677     {
18678         printf("\n*************** In fgMarkAddressExposedLocals()\n");
18679     }
18680 #endif // DEBUG
18681
18682     BasicBlock* block = fgFirstBB;
18683     noway_assert(block);
18684
18685     do
18686     {
18687         /* Make the current basic block address available globally */
18688
18689         compCurBB = block;
18690
18691         GenTreePtr stmt;
18692
18693         for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
18694         {
18695             // Call Compiler::fgMarkAddrTakenLocalsCB on each node
18696             AXCStack stk(this);
18697             stk.Push(AXC_None); // We start in neither an addr or ind context.
18698             fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
18699         }
18700
18701         block = block->bbNext;
18702
18703     } while (block);
18704 }
18705
18706 // fgNodesMayInterfere:
18707 //   return true if moving nodes relative to each other can change the result of a computation
18708 //
18709 // args:
18710 //   read: a node which reads
18711 //
18712
18713 bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read)
18714 {
18715     LclVarDsc* srcVar = nullptr;
18716
18717     bool readIsIndir  = read->OperIsIndir() || read->OperIsImplicitIndir();
18718     bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir();
18719
18720     if (read->OperIsLocal())
18721     {
18722         srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum];
18723     }
18724
18725     if (writeIsIndir)
18726     {
18727         if (srcVar && srcVar->lvAddrExposed)
18728         {
18729             return true;
18730         }
18731         else if (readIsIndir)
18732         {
18733             return true;
18734         }
18735         return false;
18736     }
18737     else if (write->OperIsLocal())
18738     {
18739         LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum];
18740         if (readIsIndir)
18741         {
18742             return dstVar->lvAddrExposed;
18743         }
18744         else if (read->OperIsLocal())
18745         {
18746             if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum)
18747             {
18748                 return true;
18749             }
18750             return false;
18751         }
18752         else
18753         {
18754             return false;
18755         }
18756     }
18757     else
18758     {
18759         return false;
18760     }
18761 }
18762
18763 /** This predicate decides whether we will fold a tree with the structure:
18764  *  x = x <op> y where x could be any arbitrary expression into
18765  *  x <op>= y.
18766  *
18767  *  This modification is only performed when the target architecture supports
18768  *  complex addressing modes.  In the case of ARM for example, this transformation
18769  *  yields no benefit.
18770  *
18771  *  In case this functions decides we can proceed to fold into an assignment operator
18772  *  we need to inspect whether the operator is commutative to tell fgMorph whether we need to
18773  *  reverse the tree due to the fact we saw x = y <op> x and we want to fold that into
18774  *  x <op>= y because the operator property.
18775  */
18776 bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse)
18777 {
18778 #if CPU_LOAD_STORE_ARCH
18779     /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */
18780     return false;
18781 #elif !defined(LEGACY_BACKEND)
18782     return false;
18783 #else  // defined(LEGACY_BACKEND)
18784
18785     GenTreePtr op1  = tree->gtOp.gtOp1;
18786     GenTreePtr op2  = tree->gtGetOp2();
18787     genTreeOps cmop = op2->OperGet();
18788
18789     /* Is the destination identical to the first RHS sub-operand? */
18790     if (GenTree::Compare(op1, op2->gtOp.gtOp1))
18791     {
18792         /*
18793         Do not transform the following tree
18794
18795         [0024CFA4] -----------               const     int    1
18796         [0024CFDC] ----G------               |         int
18797         [0024CF5C] -----------               lclVar    ubyte  V01 tmp0
18798         [0024D05C] -A--G------               =         ubyte
18799         [0024D014] D------N---               lclVar    ubyte  V01 tmp0
18800
18801         to
18802
18803         [0024CFA4] -----------               const     int    1
18804         [0024D05C] -A--G------               |=        ubyte
18805         [0024D014] U------N---               lclVar    ubyte  V01 tmp0
18806
18807         , when V01 is a struct field local.
18808         */
18809
18810         if (op1->gtOper == GT_LCL_VAR && varTypeIsSmall(op1->TypeGet()) && op1->TypeGet() != op2->gtOp.gtOp2->TypeGet())
18811         {
18812             unsigned   lclNum = op1->gtLclVarCommon.gtLclNum;
18813             LclVarDsc* varDsc = lvaTable + lclNum;
18814
18815             if (varDsc->lvIsStructField)
18816             {
18817                 return false;
18818             }
18819         }
18820
18821         *bReverse = false;
18822         return true;
18823     }
18824     else if (GenTree::OperIsCommutative(cmop))
18825     {
18826         /* For commutative ops only, check for "a = x <op> a" */
18827
18828         /* Should we be doing this at all? */
18829         if ((opts.compFlags & CLFLG_TREETRANS) == 0)
18830         {
18831             return false;
18832         }
18833
18834         /* Can we swap the operands to cmop ... */
18835         if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) && (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT))
18836         {
18837             // Both sides must have side effects to prevent swap */
18838             return false;
18839         }
18840
18841         /* Is the destination identical to the second RHS sub-operand? */
18842         if (GenTree::Compare(op1, op2->gtOp.gtOp2))
18843         {
18844             *bReverse = true;
18845             return true;
18846         }
18847     }
18848     return false;
18849 #endif // defined(LEGACY_BACKEND)
18850 }
18851
18852 #ifdef FEATURE_SIMD
18853
18854 //-----------------------------------------------------------------------------------
18855 // fgMorphCombineSIMDFieldAssignments:
18856 //  If the RHS of the input stmt is a read for simd vector X Field, then this function
18857 //  will keep reading next few stmts based on the vector size(2, 3, 4).
18858 //  If the next stmts LHS are located contiguous and RHS are also located
18859 //  contiguous, then we replace those statements with a copyblk.
18860 //
18861 // Argument:
18862 //  block - BasicBlock*. block which stmt belongs to
18863 //  stmt  - GenTreeStmt*. the stmt node we want to check
18864 //
18865 // return value:
18866 //  if this funciton successfully optimized the stmts, then return true. Otherwise
18867 //  return false;
18868
18869 bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt)
18870 {
18871
18872     noway_assert(stmt->gtOper == GT_STMT);
18873     GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
18874     assert(tree->OperGet() == GT_ASG);
18875
18876     GenTreePtr originalLHS    = tree->gtOp.gtOp1;
18877     GenTreePtr prevLHS        = tree->gtOp.gtOp1;
18878     GenTreePtr prevRHS        = tree->gtOp.gtOp2;
18879     unsigned   index          = 0;
18880     var_types  baseType       = TYP_UNKNOWN;
18881     unsigned   simdSize       = 0;
18882     GenTreePtr simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
18883
18884     if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
18885     {
18886         // if the RHS is not from a SIMD vector field X, then there is no need to check further.
18887         return false;
18888     }
18889
18890     var_types  simdType             = getSIMDTypeForSize(simdSize);
18891     int        assignmentsCount     = simdSize / genTypeSize(baseType) - 1;
18892     int        remainingAssignments = assignmentsCount;
18893     GenTreePtr curStmt              = stmt->gtNext;
18894     GenTreePtr lastStmt             = stmt;
18895
18896     while (curStmt != nullptr && remainingAssignments > 0)
18897     {
18898         GenTreePtr exp = curStmt->gtStmt.gtStmtExpr;
18899         if (exp->OperGet() != GT_ASG)
18900         {
18901             break;
18902         }
18903         GenTreePtr curLHS = exp->gtGetOp1();
18904         GenTreePtr curRHS = exp->gtGetOp2();
18905
18906         if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
18907         {
18908             break;
18909         }
18910
18911         remainingAssignments--;
18912         prevLHS = curLHS;
18913         prevRHS = curRHS;
18914
18915         lastStmt = curStmt;
18916         curStmt  = curStmt->gtNext;
18917     }
18918
18919     if (remainingAssignments > 0)
18920     {
18921         // if the left assignments number is bigger than zero, then this means
18922         // that the assignments are not assgining to the contiguously memory
18923         // locations from same vector.
18924         return false;
18925     }
18926 #ifdef DEBUG
18927     if (verbose)
18928     {
18929         printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
18930         printf("From BB%02u, stmt", block->bbNum);
18931         printTreeID(stmt);
18932         printf(" to stmt");
18933         printTreeID(lastStmt);
18934         printf("\n");
18935     }
18936 #endif
18937
18938     for (int i = 0; i < assignmentsCount; i++)
18939     {
18940         fgRemoveStmt(block, stmt->gtNext);
18941     }
18942
18943     GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
18944     if (simdStructNode->OperIsLocal())
18945     {
18946         setLclRelatedToSIMDIntrinsic(simdStructNode);
18947     }
18948     GenTree* copyBlkAddr = copyBlkDst;
18949     if (copyBlkAddr->gtOper == GT_LEA)
18950     {
18951         copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
18952     }
18953     GenTreeLclVarCommon* localDst = nullptr;
18954     if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
18955     {
18956         setLclRelatedToSIMDIntrinsic(localDst);
18957     }
18958
18959     GenTree* simdStructAddr;
18960     if (simdStructNode->TypeGet() == TYP_BYREF)
18961     {
18962         assert(simdStructNode->OperIsLocal());
18963         assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
18964         simdStructNode = gtNewOperNode(GT_IND, simdType, simdStructNode);
18965     }
18966     else
18967     {
18968         assert(varTypeIsSIMD(simdStructNode));
18969     }
18970
18971 #ifdef DEBUG
18972     if (verbose)
18973     {
18974         printf("\nBB%02u stmt", block->bbNum);
18975         printTreeID(stmt);
18976         printf("(before)\n");
18977         gtDispTree(stmt);
18978     }
18979 #endif
18980
18981     // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
18982     GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
18983     blkNode->gtType  = simdType;
18984     tree             = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
18985                           false, // not volatile
18986                           true); // copyBlock
18987
18988     stmt->gtStmt.gtStmtExpr = tree;
18989
18990     // Since we generated a new address node which didn't exist before,
18991     // we should expose this address manually here.
18992     AXCStack stk(this);
18993     stk.Push(AXC_None);
18994     fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
18995
18996 #ifdef DEBUG
18997     if (verbose)
18998     {
18999         printf("\nReplaced BB%02u stmt", block->bbNum);
19000         printTreeID(stmt);
19001         printf("(after)\n");
19002         gtDispTree(stmt);
19003     }
19004 #endif
19005     return true;
19006 }
19007
19008 #endif // FEATURE_SIMD