Remove relocations for MethodTable's vtable-1st-level-indirection
[platform/upstream/dotnet/runtime.git] / src / coreclr / src / jit / morph.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX                                                                           XX
8 XX                          Morph                                            XX
9 XX                                                                           XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 */
13
14 #include "jitpch.h"
15 #ifdef _MSC_VER
16 #pragma hdrstop
17 #endif
18
19 #include "allocacheck.h" // for alloca
20
21 // Convert the given node into a call to the specified helper passing
22 // the given argument list.
23 //
24 // Tries to fold constants and also adds an edge for overflow exception
25 // returns the morphed tree
26 GenTreePtr Compiler::fgMorphCastIntoHelper(GenTreePtr tree, int helper, GenTreePtr oper)
27 {
28     GenTree* result;
29
30     /* If the operand is a constant, we'll try to fold it */
31     if (oper->OperIsConst())
32     {
33         GenTreePtr oldTree = tree;
34
35         tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
36
37         if (tree != oldTree)
38         {
39             return fgMorphTree(tree);
40         }
41         else if (tree->OperKind() & GTK_CONST)
42         {
43             return fgMorphConst(tree);
44         }
45
46         // assert that oper is unchanged and that it is still a GT_CAST node
47         noway_assert(tree->gtCast.CastOp() == oper);
48         noway_assert(tree->gtOper == GT_CAST);
49     }
50     result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
51     assert(result == tree);
52     return result;
53 }
54
55 /*****************************************************************************
56  *
57  *  Convert the given node into a call to the specified helper passing
58  *  the given argument list.
59  */
60
61 GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeArgList* args)
62 {
63     // The helper call ought to be semantically equivalent to the original node, so preserve its VN.
64     tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN);
65
66     tree->gtFlags |= GTF_CALL;
67     if (args)
68     {
69         tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
70     }
71     tree->gtCall.gtCallType            = CT_HELPER;
72     tree->gtCall.gtCallMethHnd         = eeFindHelper(helper);
73     tree->gtCall.gtCallArgs            = args;
74     tree->gtCall.gtCallObjp            = nullptr;
75     tree->gtCall.gtCallLateArgs        = nullptr;
76     tree->gtCall.fgArgInfo             = nullptr;
77     tree->gtCall.gtRetClsHnd           = nullptr;
78     tree->gtCall.gtCallMoreFlags       = 0;
79     tree->gtCall.gtInlineCandidateInfo = nullptr;
80     tree->gtCall.gtControlExpr         = nullptr;
81
82 #ifdef LEGACY_BACKEND
83     tree->gtCall.gtCallRegUsedMask = RBM_NONE;
84 #endif // LEGACY_BACKEND
85
86 #if DEBUG
87     // Helper calls are never candidates.
88
89     tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
90 #endif // DEBUG
91
92 #ifdef FEATURE_READYTORUN_COMPILER
93     tree->gtCall.gtEntryPoint.addr = nullptr;
94 #endif
95
96 #if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND)
97     if (varTypeIsLong(tree))
98     {
99         GenTreeCall*    callNode    = tree->AsCall();
100         ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
101         retTypeDesc->Reset();
102         retTypeDesc->InitializeLongReturnType(this);
103         callNode->ClearOtherRegs();
104     }
105 #endif // _TARGET_XXX_
106
107     /* Perform the morphing */
108
109     tree = fgMorphArgs(tree->AsCall());
110
111     return tree;
112 }
113
114 /*****************************************************************************
115  *
116  *  Determine if a relop must be morphed to a qmark to manifest a boolean value.
117  *  This is done when code generation can't create straight-line code to do it.
118  */
119 bool Compiler::fgMorphRelopToQmark(GenTreePtr tree)
120 {
121 #ifndef LEGACY_BACKEND
122     return false;
123 #else  // LEGACY_BACKEND
124     return (genActualType(tree->TypeGet()) == TYP_LONG) || varTypeIsFloating(tree->TypeGet());
125 #endif // LEGACY_BACKEND
126 }
127
128 /*****************************************************************************
129  *
130  *  Morph a cast node (we perform some very simple transformations here).
131  */
132
133 #ifdef _PREFAST_
134 #pragma warning(push)
135 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
136 #endif
137 GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
138 {
139     noway_assert(tree->gtOper == GT_CAST);
140     noway_assert(genTypeSize(TYP_I_IMPL) == sizeof(void*));
141
142     /* The first sub-operand is the thing being cast */
143
144     GenTreePtr oper = tree->gtCast.CastOp();
145
146     if (fgGlobalMorph && (oper->gtOper == GT_ADDR))
147     {
148         // Make sure we've checked if 'oper' is an address of an implicit-byref parameter.
149         // If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast
150         // morphing code to see that type.
151         fgMorphImplicitByRefArgs(oper);
152     }
153
154     var_types srcType = genActualType(oper->TypeGet());
155     unsigned  srcSize;
156
157     var_types dstType = tree->CastToType();
158     unsigned  dstSize = genTypeSize(dstType);
159
160     // See if the cast has to be done in two steps.  R -> I
161     if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
162     {
163         // Only x86 must go through TYP_DOUBLE to get to all
164         // integral types everybody else can get straight there
165         // except for when using helpers
166         if (srcType == TYP_FLOAT
167 #if !FEATURE_STACK_FP_X87
168
169 #if defined(_TARGET_ARM64_)
170             // Amd64: src = float, dst is overflow conversion.
171             // This goes through helper and hence src needs to be converted to double.
172             && tree->gtOverflow()
173 #elif defined(_TARGET_AMD64_)
174             // Amd64: src = float, dst = uint64 or overflow conversion.
175             // This goes through helper and hence src needs to be converted to double.
176             && (tree->gtOverflow() || (dstType == TYP_ULONG))
177 #elif defined(_TARGET_ARM_)
178             // Arm: src = float, dst = int64/uint64 or overflow conversion.
179             && (tree->gtOverflow() || varTypeIsLong(dstType))
180 #endif
181
182 #endif // FEATURE_STACK_FP_X87
183                 )
184         {
185             oper = gtNewCastNode(TYP_DOUBLE, oper, TYP_DOUBLE);
186         }
187
188         // do we need to do it in two steps R -> I, '-> smallType
189         CLANG_FORMAT_COMMENT_ANCHOR;
190
191 #if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
192         if (dstSize < genTypeSize(TYP_INT))
193         {
194             oper = gtNewCastNodeL(TYP_INT, oper, TYP_INT);
195             oper->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
196             tree->gtFlags &= ~GTF_UNSIGNED;
197         }
198 #else
199         if (dstSize < sizeof(void*))
200         {
201             oper = gtNewCastNodeL(TYP_I_IMPL, oper, TYP_I_IMPL);
202             oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
203         }
204 #endif
205         else
206         {
207             /* Note that if we need to use a helper call then we can not morph oper */
208             if (!tree->gtOverflow())
209             {
210 #ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
211                 goto OPTIMIZECAST;
212 #else
213                 switch (dstType)
214                 {
215                     case TYP_INT:
216 #ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
217 #ifdef LEGACY_BACKEND
218                         // the RyuJIT backend does not use the x87 FPU and therefore
219                         // does not support folding the cast conv.i4(round.d(d))
220                         if ((oper->gtOper == GT_INTRINSIC) &&
221                             (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
222                         {
223                             /* optimization: conv.i4(round.d(d)) -> round.i(d) */
224                             oper->gtType = dstType;
225                             return fgMorphTree(oper);
226                         }
227                         // if SSE2 is not enabled, we need the helper
228                         else
229 #endif // LEGACY_BACKEND
230                             if (!opts.compCanUseSSE2)
231                         {
232                             return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
233                         }
234                         else
235 #endif // _TARGET_X86_
236                         {
237                             goto OPTIMIZECAST;
238                         }
239 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
240                     case TYP_UINT:
241                         goto OPTIMIZECAST;
242 #else  // _TARGET_ARM_
243                     case TYP_UINT:
244                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
245 #endif // _TARGET_ARM_
246
247 #ifdef _TARGET_AMD64_
248                     // SSE2 has instructions to convert a float/double directly to a long
249                     case TYP_LONG:
250                         goto OPTIMIZECAST;
251 #else
252                     case TYP_LONG:
253                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
254 #endif //_TARGET_AMD64_
255                     case TYP_ULONG:
256                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
257                     default:
258                         break;
259                 }
260 #endif // _TARGET_ARM64_
261             }
262             else
263             {
264                 switch (dstType)
265                 {
266                     case TYP_INT:
267                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
268                     case TYP_UINT:
269                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
270                     case TYP_LONG:
271                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
272                     case TYP_ULONG:
273                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
274                     default:
275                         break;
276                 }
277             }
278             noway_assert(!"Unexpected dstType");
279         }
280     }
281 #ifndef _TARGET_64BIT_
282     // The code generation phase (for x86 & ARM32) does not handle casts
283     // directly from [u]long to anything other than [u]int. Insert an
284     // intermediate cast to native int.
285     else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
286     {
287         oper = gtNewCastNode(TYP_I_IMPL, oper, TYP_I_IMPL);
288         oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
289         tree->gtFlags &= ~GTF_UNSIGNED;
290     }
291 #endif //!_TARGET_64BIT_
292
293 #ifdef _TARGET_ARM_
294     else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
295              !varTypeIsLong(oper->gtCast.CastOp()))
296     {
297         // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
298         // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
299         // This happens semi-frequently because there is no IL 'conv.r4.un'
300         oper->gtType       = TYP_FLOAT;
301         oper->CastToType() = TYP_FLOAT;
302         return fgMorphTree(oper);
303     }
304     // converts long/ulong --> float/double casts into helper calls.
305     else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
306     {
307         if (dstType == TYP_FLOAT)
308         {
309             // there is only a double helper, so we
310             // - change the dsttype to double
311             // - insert a cast from double to float
312             // - recurse into the resulting tree
313             tree->CastToType() = TYP_DOUBLE;
314             tree->gtType       = TYP_DOUBLE;
315
316             tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
317
318             return fgMorphTree(tree);
319         }
320         if (tree->gtFlags & GTF_UNSIGNED)
321             return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
322         return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
323     }
324 #endif //_TARGET_ARM_
325
326 #ifdef _TARGET_AMD64_
327     // Do we have to do two step U4/8 -> R4/8 ?
328     // Codegen supports the following conversion as one-step operation
329     // a) Long -> R4/R8
330     // b) U8 -> R8
331     //
332     // The following conversions are performed as two-step operations using above.
333     // U4 -> R4/8 = U4-> Long -> R4/8
334     // U8 -> R4   = U8 -> R8 -> R4
335     else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
336     {
337         srcType = genUnsignedType(srcType);
338
339         if (srcType == TYP_ULONG)
340         {
341             if (dstType == TYP_FLOAT)
342             {
343                 // Codegen can handle U8 -> R8 conversion.
344                 // U8 -> R4 =  U8 -> R8 -> R4
345                 // - change the dsttype to double
346                 // - insert a cast from double to float
347                 // - recurse into the resulting tree
348                 tree->CastToType() = TYP_DOUBLE;
349                 tree->gtType       = TYP_DOUBLE;
350                 tree               = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
351                 return fgMorphTree(tree);
352             }
353         }
354         else if (srcType == TYP_UINT)
355         {
356             oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
357             oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
358             tree->gtFlags &= ~GTF_UNSIGNED;
359         }
360     }
361 #endif // _TARGET_AMD64_
362
363 #ifdef _TARGET_X86_
364     // Do we have to do two step U4/8 -> R4/8 ?
365     else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
366     {
367         srcType = genUnsignedType(srcType);
368
369         if (srcType == TYP_ULONG)
370         {
371             return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
372         }
373         else if (srcType == TYP_UINT)
374         {
375             oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
376             oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
377             tree->gtFlags &= ~GTF_UNSIGNED;
378 #ifndef LEGACY_BACKEND
379             return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
380 #endif
381         }
382     }
383 #ifndef LEGACY_BACKEND
384     else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
385     {
386         return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
387     }
388 #endif
389 #endif //_TARGET_XARCH_
390     else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
391     {
392         // We are casting away GC information.  we would like to just
393         // change the type to int, however this gives the emitter fits because
394         // it believes the variable is a GC variable at the begining of the
395         // instruction group, but is not turned non-gc by the code generator
396         // we fix this by copying the GC pointer to a non-gc pointer temp.
397         noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
398
399         // We generate an assignment to an int and then do the cast from an int. With this we avoid
400         // the gc problem and we allow casts to bytes, longs,  etc...
401         unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
402         oper->gtType    = TYP_I_IMPL;
403         GenTreePtr asg  = gtNewTempAssign(lclNum, oper);
404         oper->gtType    = srcType;
405
406         // do the real cast
407         GenTreePtr cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), dstType);
408
409         // Generate the comma tree
410         oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
411
412         return fgMorphTree(oper);
413     }
414
415     // Look for narrowing casts ([u]long -> [u]int) and try to push them
416     // down into the operand before morphing it.
417     //
418     // It doesn't matter if this is cast is from ulong or long (i.e. if
419     // GTF_UNSIGNED is set) because the transformation is only applied to
420     // overflow-insensitive narrowing casts, which always silently truncate.
421     //
422     // Note that casts from [u]long to small integer types are handled above.
423     if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
424     {
425         // As a special case, look for overflow-sensitive casts of an AND
426         // expression, and see if the second operand is a small constant. Since
427         // the result of an AND is bound by its smaller operand, it may be
428         // possible to prove that the cast won't overflow, which will in turn
429         // allow the cast's operand to be transformed.
430         if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
431         {
432             GenTreePtr andOp2 = oper->gtOp.gtOp2;
433
434             // Special case to the special case: AND with a casted int.
435             if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
436             {
437                 // gtFoldExprConst will deal with whether the cast is signed or
438                 // unsigned, or overflow-sensitive.
439                 andOp2 = oper->gtOp.gtOp2 = gtFoldExprConst(andOp2);
440             }
441
442             // Look for a constant less than 2^{32} for a cast to uint, or less
443             // than 2^{31} for a cast to int.
444             int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
445
446             if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
447             {
448                 // This cast can't overflow.
449                 tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
450             }
451         }
452
453         // Only apply this transformation during global morph,
454         // when neither the cast node nor the oper node may throw an exception
455         // based on the upper 32 bits.
456         //
457         if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
458         {
459             // For these operations the lower 32 bits of the result only depends
460             // upon the lower 32 bits of the operands
461             //
462             if (oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG, GT_LSH))
463             {
464                 DEBUG_DESTROY_NODE(tree);
465
466                 // Insert narrowing casts for op1 and op2
467                 oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, dstType);
468                 if (oper->gtOp.gtOp2 != nullptr)
469                 {
470                     oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, dstType);
471                 }
472
473                 // Clear the GT_MUL_64RSLT if it is set
474                 if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
475                 {
476                     oper->gtFlags &= ~GTF_MUL_64RSLT;
477                 }
478
479                 // The operation now produces a 32-bit result.
480                 oper->gtType = TYP_INT;
481
482                 // Remorph the new tree as the casts that we added may be folded away.
483                 return fgMorphTree(oper);
484             }
485         }
486     }
487
488 OPTIMIZECAST:
489     noway_assert(tree->gtOper == GT_CAST);
490
491     /* Morph the operand */
492     tree->gtCast.CastOp() = oper = fgMorphTree(oper);
493
494     /* Reset the call flag */
495     tree->gtFlags &= ~GTF_CALL;
496
497     /* unless we have an overflow cast, reset the except flag */
498     if (!tree->gtOverflow())
499     {
500         tree->gtFlags &= ~GTF_EXCEPT;
501     }
502
503     /* Just in case new side effects were introduced */
504     tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
505
506     srcType = oper->TypeGet();
507
508     /* if GTF_UNSIGNED is set then force srcType to an unsigned type */
509     if (tree->gtFlags & GTF_UNSIGNED)
510     {
511         srcType = genUnsignedType(srcType);
512     }
513
514     srcSize = genTypeSize(srcType);
515
516     if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate
517     {
518         /* See if we can discard the cast */
519         if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
520         {
521             if (srcType == dstType)
522             { // Certainly if they are identical it is pointless
523                 goto REMOVE_CAST;
524             }
525
526             if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
527             {
528                 unsigned   varNum = oper->gtLclVarCommon.gtLclNum;
529                 LclVarDsc* varDsc = &lvaTable[varNum];
530                 if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
531                 {
532                     goto REMOVE_CAST;
533                 }
534             }
535
536             bool unsignedSrc = varTypeIsUnsigned(srcType);
537             bool unsignedDst = varTypeIsUnsigned(dstType);
538             bool signsDiffer = (unsignedSrc != unsignedDst);
539
540             // For same sized casts with
541             //    the same signs or non-overflow cast we discard them as well
542             if (srcSize == dstSize)
543             {
544                 /* This should have been handled above */
545                 noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
546
547                 if (!signsDiffer)
548                 {
549                     goto REMOVE_CAST;
550                 }
551
552                 if (!tree->gtOverflow())
553                 {
554                     /* For small type casts, when necessary we force
555                        the src operand to the dstType and allow the
556                        implied load from memory to perform the casting */
557                     if (varTypeIsSmall(srcType))
558                     {
559                         switch (oper->gtOper)
560                         {
561                             case GT_IND:
562                             case GT_CLS_VAR:
563                             case GT_LCL_FLD:
564                             case GT_ARR_ELEM:
565                                 oper->gtType = dstType;
566                                 goto REMOVE_CAST;
567                             default:
568                                 break;
569                         }
570                     }
571                     else
572                     {
573                         goto REMOVE_CAST;
574                     }
575                 }
576             }
577
578             if (srcSize < dstSize) // widening cast
579             {
580                 // Keep any long casts
581                 if (dstSize == sizeof(int))
582                 {
583                     // Only keep signed to unsigned widening cast with overflow check
584                     if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
585                     {
586                         goto REMOVE_CAST;
587                     }
588                 }
589
590                 // Casts from signed->unsigned can never overflow while widening
591
592                 if (unsignedSrc || !unsignedDst)
593                 {
594                     tree->gtFlags &= ~GTF_OVERFLOW;
595                 }
596             }
597             else
598             {
599                 // Try to narrow the operand of the cast and discard the cast
600                 // Note: Do not narrow a cast that is marked as a CSE
601                 // And do not narrow if the oper is marked as a CSE either
602                 //
603                 if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
604                     optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
605                 {
606                     optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
607
608                     /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
609                     if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
610                     {
611                         oper = oper->gtCast.CastOp();
612                     }
613                     goto REMOVE_CAST;
614                 }
615             }
616         }
617
618         switch (oper->gtOper)
619         {
620             /* If the operand is a constant, we'll fold it */
621             case GT_CNS_INT:
622             case GT_CNS_LNG:
623             case GT_CNS_DBL:
624             case GT_CNS_STR:
625             {
626                 GenTreePtr oldTree = tree;
627
628                 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
629
630                 // Did we get a comma throw as a result of gtFoldExprConst?
631                 if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
632                 {
633                     noway_assert(fgIsCommaThrow(tree));
634                     tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
635                     fgMorphTreeDone(tree);
636                     return tree;
637                 }
638                 else if (tree->gtOper != GT_CAST)
639                 {
640                     return tree;
641                 }
642
643                 noway_assert(tree->gtCast.CastOp() == oper); // unchanged
644             }
645             break;
646
647             case GT_CAST:
648                 /* Check for two consecutive casts into the same dstType */
649                 if (!tree->gtOverflow())
650                 {
651                     var_types dstType2 = oper->CastToType();
652                     if (dstType == dstType2)
653                     {
654                         goto REMOVE_CAST;
655                     }
656                 }
657                 break;
658
659 #ifdef LEGACY_BACKEND
660
661             /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag
662                so that the code generator will know not to convert the result
663                of the idiv to a regpair */
664             case GT_MOD:
665                 if (dstType == TYP_INT)
666                 {
667                     tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
668                 }
669
670                 break;
671             case GT_UMOD:
672                 if (dstType == TYP_UINT)
673                 {
674                     tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
675                 }
676                 break;
677
678 #endif // LEGACY_BACKEND
679
680             case GT_COMMA:
681                 // Check for cast of a GT_COMMA with a throw overflow
682                 // Bug 110829: Since this optimization will bash the types
683                 // neither oper or commaOp2 can be CSE candidates
684                 if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
685                 {
686                     GenTreePtr commaOp2 = oper->gtOp.gtOp2;
687
688                     if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
689                     {
690                         // need type of oper to be same as tree
691                         if (tree->gtType == TYP_LONG)
692                         {
693                             commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
694                             commaOp2->gtIntConCommon.SetLngValue(0);
695                             /* Change the types of oper and commaOp2 to TYP_LONG */
696                             oper->gtType = commaOp2->gtType = TYP_LONG;
697                         }
698                         else if (varTypeIsFloating(tree->gtType))
699                         {
700                             commaOp2->ChangeOperConst(GT_CNS_DBL);
701                             commaOp2->gtDblCon.gtDconVal = 0.0;
702                             // Change the types of oper and commaOp2
703                             // X87 promotes everything to TYP_DOUBLE
704                             // But other's are a little more precise
705                             const var_types newTyp
706 #if FEATURE_X87_DOUBLES
707                                 = TYP_DOUBLE;
708 #else  // FEATURE_X87_DOUBLES
709                                 = tree->gtType;
710 #endif // FEATURE_X87_DOUBLES
711                             oper->gtType = commaOp2->gtType = newTyp;
712                         }
713                         else
714                         {
715                             commaOp2->ChangeOperConst(GT_CNS_INT);
716                             commaOp2->gtIntCon.gtIconVal = 0;
717                             /* Change the types of oper and commaOp2 to TYP_INT */
718                             oper->gtType = commaOp2->gtType = TYP_INT;
719                         }
720                     }
721
722                     if (vnStore != nullptr)
723                     {
724                         fgValueNumberTreeConst(commaOp2);
725                     }
726
727                     /* Return the GT_COMMA node as the new tree */
728                     return oper;
729                 }
730                 break;
731
732             default:
733                 break;
734         } /* end switch (oper->gtOper) */
735     }
736
737     if (tree->gtOverflow())
738     {
739         fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
740     }
741
742     return tree;
743
744 REMOVE_CAST:
745
746     /* Here we've eliminated the cast, so just return it's operand */
747     assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
748
749     DEBUG_DESTROY_NODE(tree);
750     return oper;
751 }
752 #ifdef _PREFAST_
753 #pragma warning(pop)
754 #endif
755
756 /*****************************************************************************
757  *
758  *  Perform an unwrap operation on a Proxy object
759  */
760
761 GenTreePtr Compiler::fgUnwrapProxy(GenTreePtr objRef)
762 {
763     assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
764
765     CORINFO_EE_INFO* pInfo = eeGetEEInfo();
766     GenTreePtr       addTree;
767
768     // Perform the unwrap:
769     //
770     //   This requires two extra indirections.
771     //   We mark these indirections as 'invariant' and
772     //   the CSE logic will hoist them when appropriate.
773     //
774     //  Note that each dereference is a GC pointer
775
776     addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
777
778     objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
779     objRef->gtFlags |= GTF_IND_INVARIANT;
780
781     addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
782
783     objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
784     objRef->gtFlags |= GTF_IND_INVARIANT;
785
786     // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
787     return objRef;
788 }
789
790 /*****************************************************************************
791  *
792  *  Morph an argument list; compute the pointer argument count in the process.
793  *
794  *  NOTE: This function can be called from any place in the JIT to perform re-morphing
795  *  due to graph altering modifications such as copy / constant propagation
796  */
797
798 unsigned UpdateGT_LISTFlags(GenTreePtr tree)
799 {
800     assert(tree->gtOper == GT_LIST);
801
802     unsigned flags = 0;
803     if (tree->gtOp.gtOp2)
804     {
805         flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
806     }
807
808     flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
809
810     tree->gtFlags &= ~GTF_ALL_EFFECT;
811     tree->gtFlags |= flags;
812
813     return tree->gtFlags;
814 }
815
816 #ifdef DEBUG
817 void fgArgTabEntry::Dump()
818 {
819     printf("fgArgTabEntry[arg %u", argNum);
820     if (regNum != REG_STK)
821     {
822         printf(", %s, regs=%u", getRegName(regNum), numRegs);
823     }
824     if (numSlots > 0)
825     {
826         printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
827     }
828     printf(", align=%u", alignment);
829     if (lateArgInx != (unsigned)-1)
830     {
831         printf(", lateArgInx=%u", lateArgInx);
832     }
833     if (isSplit)
834     {
835         printf(", isSplit");
836     }
837     if (needTmp)
838     {
839         printf(", tmpNum=V%02u", tmpNum);
840     }
841     if (needPlace)
842     {
843         printf(", needPlace");
844     }
845     if (isTmp)
846     {
847         printf(", isTmp");
848     }
849     if (processed)
850     {
851         printf(", processed");
852     }
853     if (isHfaRegArg)
854     {
855         printf(", isHfa");
856     }
857     if (isBackFilled)
858     {
859         printf(", isBackFilled");
860     }
861     if (isNonStandard)
862     {
863         printf(", isNonStandard");
864     }
865     printf("]\n");
866 }
867 #endif
868
869 fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
870 {
871     compiler    = comp;
872     callTree    = call;
873     argCount    = 0; // filled in arg count, starts at zero
874     nextSlotNum = INIT_ARG_STACK_SLOT;
875     stkLevel    = 0;
876 #if defined(UNIX_X86_ABI)
877     alignmentDone = false;
878     stkSizeBytes  = 0;
879     padStkAlign   = 0;
880 #endif
881 #if FEATURE_FIXED_OUT_ARGS
882     outArgSize = 0;
883 #endif
884
885     argTableSize = numArgs; // the allocated table size
886
887     hasRegArgs   = false;
888     hasStackArgs = false;
889     argsComplete = false;
890     argsSorted   = false;
891
892     if (argTableSize == 0)
893     {
894         argTable = nullptr;
895     }
896     else
897     {
898         argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
899     }
900 }
901
902 /*****************************************************************************
903  *
904  *  fgArgInfo Copy Constructor
905  *
906  *  This method needs to act like a copy constructor for fgArgInfo.
907  *  The newCall needs to have its fgArgInfo initialized such that
908  *  we have newCall that is an exact copy of the oldCall.
909  *  We have to take care since the argument information
910  *  in the argTable contains pointers that must point to the
911  *  new arguments and not the old arguments.
912  */
913 fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
914 {
915     fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo;
916
917     compiler    = oldArgInfo->compiler;
918     callTree    = newCall;
919     argCount    = 0; // filled in arg count, starts at zero
920     nextSlotNum = INIT_ARG_STACK_SLOT;
921     stkLevel    = oldArgInfo->stkLevel;
922 #if defined(UNIX_X86_ABI)
923     alignmentDone = oldArgInfo->alignmentDone;
924     stkSizeBytes  = oldArgInfo->stkSizeBytes;
925     padStkAlign   = oldArgInfo->padStkAlign;
926 #endif
927 #if FEATURE_FIXED_OUT_ARGS
928     outArgSize = oldArgInfo->outArgSize;
929 #endif
930     argTableSize = oldArgInfo->argTableSize;
931     argsComplete = false;
932     argTable     = nullptr;
933     if (argTableSize > 0)
934     {
935         argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
936         for (unsigned inx = 0; inx < argTableSize; inx++)
937         {
938             argTable[inx] = nullptr;
939         }
940     }
941
942     assert(oldArgInfo->argsComplete);
943
944     // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
945     // so we can iterate over these argument lists more uniformly.
946     // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
947     GenTreeArgList* newArgs;
948     GenTreeArgList  newArgObjp(newCall, newCall->gtCallArgs);
949     GenTreeArgList* oldArgs;
950     GenTreeArgList  oldArgObjp(oldCall, oldCall->gtCallArgs);
951
952     if (newCall->gtCallObjp == nullptr)
953     {
954         assert(oldCall->gtCallObjp == nullptr);
955         newArgs = newCall->gtCallArgs;
956         oldArgs = oldCall->gtCallArgs;
957     }
958     else
959     {
960         assert(oldCall->gtCallObjp != nullptr);
961         newArgObjp.Current() = newCall->gtCallArgs;
962         newArgs              = &newArgObjp;
963         oldArgObjp.Current() = oldCall->gtCallObjp;
964         oldArgs              = &oldArgObjp;
965     }
966
967     GenTreePtr        newCurr;
968     GenTreePtr        oldCurr;
969     GenTreeArgList*   newParent   = nullptr;
970     GenTreeArgList*   oldParent   = nullptr;
971     fgArgTabEntryPtr* oldArgTable = oldArgInfo->argTable;
972     bool              scanRegArgs = false;
973
974     while (newArgs)
975     {
976         /* Get hold of the next argument values for the oldCall and newCall */
977
978         newCurr = newArgs->Current();
979         oldCurr = oldArgs->Current();
980         if (newArgs != &newArgObjp)
981         {
982             newParent = newArgs;
983             oldParent = oldArgs;
984         }
985         else
986         {
987             assert(newParent == nullptr && oldParent == nullptr);
988         }
989         newArgs = newArgs->Rest();
990         oldArgs = oldArgs->Rest();
991
992         fgArgTabEntryPtr oldArgTabEntry = nullptr;
993         fgArgTabEntryPtr newArgTabEntry = nullptr;
994
995         for (unsigned inx = 0; inx < argTableSize; inx++)
996         {
997             oldArgTabEntry = oldArgTable[inx];
998
999             if (oldArgTabEntry->parent == oldParent)
1000             {
1001                 assert((oldParent == nullptr) == (newParent == nullptr));
1002
1003                 // We have found the matching "parent" field in oldArgTabEntry
1004
1005                 newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1006
1007                 // First block copy all fields
1008                 //
1009                 *newArgTabEntry = *oldArgTabEntry;
1010
1011                 // Then update all GenTreePtr fields in the newArgTabEntry
1012                 //
1013                 newArgTabEntry->parent = newParent;
1014
1015                 // The node field is likely to have been updated
1016                 //  to point at a node in the gtCallLateArgs list
1017                 //
1018                 if (oldArgTabEntry->node == oldCurr)
1019                 {
1020                     // node is not pointing into the gtCallLateArgs list
1021                     newArgTabEntry->node = newCurr;
1022                 }
1023                 else
1024                 {
1025                     // node must be pointing into the gtCallLateArgs list
1026                     //
1027                     // We will fix this pointer up in the next loop
1028                     //
1029                     newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
1030
1031                     scanRegArgs = true;
1032                 }
1033
1034                 // Now initialize the proper element in the argTable array
1035                 //
1036                 argTable[inx] = newArgTabEntry;
1037                 break;
1038             }
1039         }
1040         // We should have found the matching oldArgTabEntry and created the newArgTabEntry
1041         //
1042         assert(newArgTabEntry != nullptr);
1043     }
1044
1045     if (scanRegArgs)
1046     {
1047         newArgs = newCall->gtCallLateArgs;
1048         oldArgs = oldCall->gtCallLateArgs;
1049
1050         while (newArgs)
1051         {
1052             /* Get hold of the next argument values for the oldCall and newCall */
1053
1054             assert(newArgs->OperIsList());
1055
1056             newCurr = newArgs->Current();
1057             newArgs = newArgs->Rest();
1058
1059             assert(oldArgs->OperIsList());
1060
1061             oldCurr = oldArgs->Current();
1062             oldArgs = oldArgs->Rest();
1063
1064             fgArgTabEntryPtr oldArgTabEntry = nullptr;
1065             fgArgTabEntryPtr newArgTabEntry = nullptr;
1066
1067             for (unsigned inx = 0; inx < argTableSize; inx++)
1068             {
1069                 oldArgTabEntry = oldArgTable[inx];
1070
1071                 if (oldArgTabEntry->node == oldCurr)
1072                 {
1073                     // We have found the matching "node" field in oldArgTabEntry
1074
1075                     newArgTabEntry = argTable[inx];
1076                     assert(newArgTabEntry != nullptr);
1077
1078                     // update the "node" GenTreePtr fields in the newArgTabEntry
1079                     //
1080                     assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
1081
1082                     newArgTabEntry->node = newCurr;
1083                     break;
1084                 }
1085             }
1086         }
1087     }
1088
1089     argCount     = oldArgInfo->argCount;
1090     nextSlotNum  = oldArgInfo->nextSlotNum;
1091     hasRegArgs   = oldArgInfo->hasRegArgs;
1092     hasStackArgs = oldArgInfo->hasStackArgs;
1093     argsComplete = true;
1094     argsSorted   = true;
1095 }
1096
1097 void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry)
1098 {
1099     assert(argCount < argTableSize);
1100     argTable[argCount] = curArgTabEntry;
1101     argCount++;
1102 }
1103
1104 fgArgTabEntryPtr fgArgInfo::AddRegArg(
1105     unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1106 {
1107     fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1108
1109     curArgTabEntry->argNum        = argNum;
1110     curArgTabEntry->node          = node;
1111     curArgTabEntry->parent        = parent;
1112     curArgTabEntry->regNum        = regNum;
1113     curArgTabEntry->slotNum       = 0;
1114     curArgTabEntry->numRegs       = numRegs;
1115     curArgTabEntry->numSlots      = 0;
1116     curArgTabEntry->alignment     = alignment;
1117     curArgTabEntry->lateArgInx    = (unsigned)-1;
1118     curArgTabEntry->tmpNum        = (unsigned)-1;
1119     curArgTabEntry->isSplit       = false;
1120     curArgTabEntry->isTmp         = false;
1121     curArgTabEntry->needTmp       = false;
1122     curArgTabEntry->needPlace     = false;
1123     curArgTabEntry->processed     = false;
1124     curArgTabEntry->isHfaRegArg   = false;
1125     curArgTabEntry->isBackFilled  = false;
1126     curArgTabEntry->isNonStandard = false;
1127
1128     hasRegArgs = true;
1129     AddArg(curArgTabEntry);
1130     return curArgTabEntry;
1131 }
1132
1133 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1134 fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned                                                         argNum,
1135                                       GenTreePtr                                                       node,
1136                                       GenTreePtr                                                       parent,
1137                                       regNumber                                                        regNum,
1138                                       unsigned                                                         numRegs,
1139                                       unsigned                                                         alignment,
1140                                       const bool                                                       isStruct,
1141                                       const regNumber                                                  otherRegNum,
1142                                       const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
1143 {
1144     fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
1145     assert(curArgTabEntry != nullptr);
1146
1147     // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1148     // PlaceHolder node (in case of needed late argument, for example.)
1149     // This requires using of an extra flag. At creation time the state is right, so
1150     // and this assert enforces that.
1151     assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1152     curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
1153     curArgTabEntry->isStruct    = isStruct;    // is this a struct arg
1154
1155     if (isStruct && structDescPtr != nullptr)
1156     {
1157         curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
1158     }
1159
1160     return curArgTabEntry;
1161 }
1162 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1163
1164 fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned   argNum,
1165                                       GenTreePtr node,
1166                                       GenTreePtr parent,
1167                                       unsigned   numSlots,
1168                                       unsigned   alignment
1169                                           FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
1170 {
1171     fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1172
1173     nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1174
1175 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1176     // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1177     // PlaceHolder node (in case of needed late argument, for example.)
1178     // This reqires using of an extra flag. At creation time the state is right, so
1179     // and this assert enforces that.
1180     assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1181     curArgTabEntry->isStruct = isStruct; // is this a struct arg
1182 #endif                                   // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1183
1184     curArgTabEntry->argNum        = argNum;
1185     curArgTabEntry->node          = node;
1186     curArgTabEntry->parent        = parent;
1187     curArgTabEntry->regNum        = REG_STK;
1188     curArgTabEntry->slotNum       = nextSlotNum;
1189     curArgTabEntry->numRegs       = 0;
1190     curArgTabEntry->numSlots      = numSlots;
1191     curArgTabEntry->alignment     = alignment;
1192     curArgTabEntry->lateArgInx    = (unsigned)-1;
1193     curArgTabEntry->tmpNum        = (unsigned)-1;
1194     curArgTabEntry->isSplit       = false;
1195     curArgTabEntry->isTmp         = false;
1196     curArgTabEntry->needTmp       = false;
1197     curArgTabEntry->needPlace     = false;
1198     curArgTabEntry->processed     = false;
1199     curArgTabEntry->isHfaRegArg   = false;
1200     curArgTabEntry->isBackFilled  = false;
1201     curArgTabEntry->isNonStandard = false;
1202
1203     hasStackArgs = true;
1204     AddArg(curArgTabEntry);
1205
1206     nextSlotNum += numSlots;
1207     return curArgTabEntry;
1208 }
1209
1210 void fgArgInfo::RemorphReset()
1211 {
1212     nextSlotNum = INIT_ARG_STACK_SLOT;
1213 }
1214
1215 fgArgTabEntry* fgArgInfo::RemorphRegArg(
1216     unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1217 {
1218     fgArgTabEntryPtr curArgTabEntry = nullptr;
1219     unsigned         regArgInx      = 0;
1220     unsigned         inx;
1221
1222     for (inx = 0; inx < argCount; inx++)
1223     {
1224         curArgTabEntry = argTable[inx];
1225         if (curArgTabEntry->argNum == argNum)
1226         {
1227             break;
1228         }
1229
1230         bool       isRegArg;
1231         GenTreePtr argx;
1232         if (curArgTabEntry->parent != nullptr)
1233         {
1234             assert(curArgTabEntry->parent->OperIsList());
1235             argx     = curArgTabEntry->parent->Current();
1236             isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1237         }
1238         else
1239         {
1240             argx     = curArgTabEntry->node;
1241             isRegArg = true;
1242         }
1243
1244         if (isRegArg)
1245         {
1246             regArgInx++;
1247         }
1248     }
1249     // if this was a nonstandard arg the table is definitive
1250     if (curArgTabEntry->isNonStandard)
1251     {
1252         regNum = curArgTabEntry->regNum;
1253     }
1254
1255     assert(curArgTabEntry->argNum == argNum);
1256     assert(curArgTabEntry->regNum == regNum);
1257     assert(curArgTabEntry->alignment == alignment);
1258     assert(curArgTabEntry->parent == parent);
1259
1260     if (curArgTabEntry->node != node)
1261     {
1262         GenTreePtr argx     = nullptr;
1263         unsigned   regIndex = 0;
1264
1265         /* process the register argument list */
1266         for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
1267         {
1268             argx = list->Current();
1269             assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1270             if (regIndex == regArgInx)
1271             {
1272                 break;
1273             }
1274         }
1275         assert(regIndex == regArgInx);
1276         assert(regArgInx == curArgTabEntry->lateArgInx);
1277
1278         if (curArgTabEntry->node != argx)
1279         {
1280             curArgTabEntry->node = argx;
1281         }
1282     }
1283     return curArgTabEntry;
1284 }
1285
1286 void fgArgInfo::RemorphStkArg(
1287     unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, unsigned alignment)
1288 {
1289     fgArgTabEntryPtr curArgTabEntry = nullptr;
1290     bool             isRegArg       = false;
1291     unsigned         regArgInx      = 0;
1292     GenTreePtr       argx;
1293     unsigned         inx;
1294
1295     for (inx = 0; inx < argCount; inx++)
1296     {
1297         curArgTabEntry = argTable[inx];
1298
1299         if (curArgTabEntry->parent != nullptr)
1300         {
1301             assert(curArgTabEntry->parent->OperIsList());
1302             argx     = curArgTabEntry->parent->Current();
1303             isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1304         }
1305         else
1306         {
1307             argx     = curArgTabEntry->node;
1308             isRegArg = true;
1309         }
1310
1311         if (curArgTabEntry->argNum == argNum)
1312         {
1313             break;
1314         }
1315
1316         if (isRegArg)
1317         {
1318             regArgInx++;
1319         }
1320     }
1321
1322     nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1323
1324     assert(curArgTabEntry->argNum == argNum);
1325     assert(curArgTabEntry->slotNum == nextSlotNum);
1326     assert(curArgTabEntry->numSlots == numSlots);
1327     assert(curArgTabEntry->alignment == alignment);
1328     assert(curArgTabEntry->parent == parent);
1329     assert(parent->OperIsList());
1330
1331 #if FEATURE_FIXED_OUT_ARGS
1332     if (curArgTabEntry->node != node)
1333     {
1334         if (isRegArg)
1335         {
1336             GenTreePtr argx     = nullptr;
1337             unsigned   regIndex = 0;
1338
1339             /* process the register argument list */
1340             for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
1341             {
1342                 argx = list->Current();
1343                 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1344                 if (regIndex == regArgInx)
1345                 {
1346                     break;
1347                 }
1348             }
1349             assert(regIndex == regArgInx);
1350             assert(regArgInx == curArgTabEntry->lateArgInx);
1351
1352             if (curArgTabEntry->node != argx)
1353             {
1354                 curArgTabEntry->node = argx;
1355             }
1356         }
1357         else
1358         {
1359             assert(parent->Current() == node);
1360             curArgTabEntry->node = node;
1361         }
1362     }
1363 #else
1364     curArgTabEntry->node = node;
1365 #endif
1366
1367     nextSlotNum += numSlots;
1368 }
1369
1370 void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
1371 {
1372     fgArgTabEntryPtr curArgTabEntry = nullptr;
1373     assert(argNum < argCount);
1374     for (unsigned inx = 0; inx < argCount; inx++)
1375     {
1376         curArgTabEntry = argTable[inx];
1377         if (curArgTabEntry->argNum == argNum)
1378         {
1379             break;
1380         }
1381     }
1382
1383     assert(numRegs > 0);
1384     assert(numSlots > 0);
1385
1386     if (argsComplete)
1387     {
1388         assert(curArgTabEntry->isSplit == true);
1389         assert(curArgTabEntry->numRegs == numRegs);
1390         assert(curArgTabEntry->numSlots == numSlots);
1391     }
1392     else
1393     {
1394         curArgTabEntry->isSplit  = true;
1395         curArgTabEntry->numRegs  = numRegs;
1396         curArgTabEntry->numSlots = numSlots;
1397     }
1398     nextSlotNum += numSlots;
1399 }
1400
1401 void fgArgInfo::EvalToTmp(unsigned argNum, unsigned tmpNum, GenTreePtr newNode)
1402 {
1403     fgArgTabEntryPtr curArgTabEntry = nullptr;
1404     assert(argNum < argCount);
1405     for (unsigned inx = 0; inx < argCount; inx++)
1406     {
1407         curArgTabEntry = argTable[inx];
1408         if (curArgTabEntry->argNum == argNum)
1409         {
1410             break;
1411         }
1412     }
1413     assert(curArgTabEntry->parent->Current() == newNode);
1414
1415     curArgTabEntry->node   = newNode;
1416     curArgTabEntry->tmpNum = tmpNum;
1417     curArgTabEntry->isTmp  = true;
1418 }
1419
1420 void fgArgInfo::ArgsComplete()
1421 {
1422     bool hasStackArgs    = false;
1423     bool hasStructRegArg = false;
1424
1425     for (unsigned curInx = 0; curInx < argCount; curInx++)
1426     {
1427         fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1428         assert(curArgTabEntry != nullptr);
1429         GenTreePtr argx = curArgTabEntry->node;
1430
1431         if (curArgTabEntry->regNum == REG_STK)
1432         {
1433             hasStackArgs = true;
1434 #if !FEATURE_FIXED_OUT_ARGS
1435             // On x86 we use push instructions to pass arguments:
1436             //   The non-register arguments are evaluated and pushed in order
1437             //   and they are never evaluated into temps
1438             //
1439             continue;
1440 #endif
1441         }
1442 #if defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND)
1443         else if (curArgTabEntry->isSplit)
1444         {
1445             hasStructRegArg = true;
1446             hasStackArgs    = true;
1447         }
1448 #endif
1449         else // we have a register argument, next we look for a struct type.
1450         {
1451             if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
1452             {
1453                 hasStructRegArg = true;
1454             }
1455         }
1456
1457         /* If the argument tree contains an assignment (GTF_ASG) then the argument and
1458            and every earlier argument (except constants) must be evaluated into temps
1459            since there may be other arguments that follow and they may use the value being assigned.
1460
1461            EXAMPLE: ArgTab is "a, a=5, a"
1462                     -> when we see the second arg "a=5"
1463                        we know the first two arguments "a, a=5" have to be evaluated into temps
1464
1465            For the case of an assignment, we only know that there exist some assignment someplace
1466            in the tree.  We don't know what is being assigned so we are very conservative here
1467            and assume that any local variable could have been assigned.
1468          */
1469
1470         if (argx->gtFlags & GTF_ASG)
1471         {
1472             // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
1473             // a tmp, then we need a temp in the late arg list.
1474             if ((argCount > 1) || argx->OperIsCopyBlkOp()
1475 #ifdef FEATURE_FIXED_OUT_ARGS
1476                 || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
1477                                          // that we only have late non-register args when that feature is on.
1478 #endif                                   // FEATURE_FIXED_OUT_ARGS
1479                 )
1480             {
1481                 curArgTabEntry->needTmp = true;
1482             }
1483
1484             // For all previous arguments, unless they are a simple constant
1485             //  we require that they be evaluated into temps
1486             for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1487             {
1488                 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1489                 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1490
1491                 assert(prevArgTabEntry->node);
1492                 if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
1493                 {
1494                     prevArgTabEntry->needTmp = true;
1495                 }
1496             }
1497         }
1498
1499 #if FEATURE_FIXED_OUT_ARGS
1500         // Like calls, if this argument has a tree that will do an inline throw,
1501         // a call to a jit helper, then we need to treat it like a call (but only
1502         // if there are/were any stack args).
1503         // This means unnesting, sorting, etc.  Technically this is overly
1504         // conservative, but I want to avoid as much special-case debug-only code
1505         // as possible, so leveraging the GTF_CALL flag is the easiest.
1506         if (!(argx->gtFlags & GTF_CALL) && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) &&
1507             compiler->opts.compDbgCode &&
1508             (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
1509         {
1510             for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
1511             {
1512                 if (otherInx == curInx)
1513                 {
1514                     continue;
1515                 }
1516
1517                 if (argTable[otherInx]->regNum == REG_STK)
1518                 {
1519                     argx->gtFlags |= GTF_CALL;
1520                     break;
1521                 }
1522             }
1523         }
1524 #endif // FEATURE_FIXED_OUT_ARGS
1525
1526         /* If it contains a call (GTF_CALL) then itself and everything before the call
1527            with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
1528            has to be kept in the right order since we will move the call to the first position)
1529
1530            For calls we don't have to be quite as conservative as we are with an assignment
1531            since the call won't be modifying any non-address taken LclVars.
1532          */
1533
1534         if (argx->gtFlags & GTF_CALL)
1535         {
1536             if (argCount > 1) // If this is not the only argument
1537             {
1538                 curArgTabEntry->needTmp = true;
1539             }
1540             else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
1541             {
1542                 // Spill all arguments that are floating point calls
1543                 curArgTabEntry->needTmp = true;
1544             }
1545
1546             // All previous arguments may need to be evaluated into temps
1547             for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1548             {
1549                 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1550                 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1551                 assert(prevArgTabEntry->node);
1552
1553                 // For all previous arguments, if they have any GTF_ALL_EFFECT
1554                 //  we require that they be evaluated into a temp
1555                 if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
1556                 {
1557                     prevArgTabEntry->needTmp = true;
1558                 }
1559 #if FEATURE_FIXED_OUT_ARGS
1560                 // Or, if they are stored into the FIXED_OUT_ARG area
1561                 // we require that they be moved to the gtCallLateArgs
1562                 // and replaced with a placeholder node
1563                 else if (prevArgTabEntry->regNum == REG_STK)
1564                 {
1565                     prevArgTabEntry->needPlace = true;
1566                 }
1567 #if defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND)
1568                 else if (prevArgTabEntry->isSplit)
1569                 {
1570                     prevArgTabEntry->needPlace = true;
1571                 }
1572 #endif
1573 #endif
1574             }
1575         }
1576
1577 #ifndef LEGACY_BACKEND
1578 #if FEATURE_MULTIREG_ARGS
1579         // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
1580         // with multiple indirections, so here we consider spilling it into a tmp LclVar.
1581         //
1582
1583         bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
1584
1585         if ((argx->TypeGet() == TYP_STRUCT) && (curArgTabEntry->needTmp == false))
1586         {
1587             if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
1588             {
1589                 // Spill multireg struct arguments that have Assignments or Calls embedded in them
1590                 curArgTabEntry->needTmp = true;
1591             }
1592 #ifndef _TARGET_ARM_
1593             // TODO-Arm: This optimization is not implemented for ARM32
1594             // so we skip this for ARM32 until it is ported to use RyuJIT backend
1595             //
1596             else
1597             {
1598                 // We call gtPrepareCost to measure the cost of evaluating this tree
1599                 compiler->gtPrepareCost(argx);
1600
1601                 if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
1602                 {
1603                     // Spill multireg struct arguments that are expensive to evaluate twice
1604                     curArgTabEntry->needTmp = true;
1605                 }
1606                 else if (argx->OperGet() == GT_OBJ)
1607                 {
1608                     GenTreeObj*          argObj     = argx->AsObj();
1609                     CORINFO_CLASS_HANDLE objClass   = argObj->gtClass;
1610                     unsigned             structSize = compiler->info.compCompHnd->getClassSize(objClass);
1611                     switch (structSize)
1612                     {
1613                         case 3:
1614                         case 5:
1615                         case 6:
1616                         case 7:
1617                             // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
1618                             //
1619                             if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
1620                             {
1621                                 // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
1622                                 // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
1623                                 //
1624                                 curArgTabEntry->needTmp = true;
1625                             }
1626                             break;
1627                         case 11:
1628                         case 13:
1629                         case 14:
1630                         case 15:
1631                             // Spill any GT_OBJ multireg structs that are difficult to extract
1632                             //
1633                             // When we have a GT_OBJ of a struct with the above sizes we would need
1634                             // to use 3 or 4 load instructions to load the exact size of this struct.
1635                             // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
1636                             // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
1637                             // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
1638                             // the argument.
1639                             //
1640                             curArgTabEntry->needTmp = true;
1641                             break;
1642
1643                         default:
1644                             break;
1645                     }
1646                 }
1647             }
1648 #endif // !_TARGET_ARM_
1649         }
1650 #endif // FEATURE_MULTIREG_ARGS
1651 #endif // LEGACY_BACKEND
1652     }
1653
1654     // We only care because we can't spill structs and qmarks involve a lot of spilling, but
1655     // if we don't have qmarks, then it doesn't matter.
1656     // So check for Qmark's globally once here, instead of inside the loop.
1657     //
1658     const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
1659
1660 #if FEATURE_FIXED_OUT_ARGS
1661
1662     // For Arm/x64 we only care because we can't reorder a register
1663     // argument that uses GT_LCLHEAP.  This is an optimization to
1664     // save a check inside the below loop.
1665     //
1666     const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
1667
1668 #else
1669
1670     const bool hasStackArgsWeCareAbout = hasStackArgs;
1671
1672 #endif // FEATURE_FIXED_OUT_ARGS
1673
1674     // If we have any stack args we have to force the evaluation
1675     // of any arguments passed in registers that might throw an exception
1676     //
1677     // Technically we only a required to handle the following two cases:
1678     //     a GT_IND with GTF_IND_RNGCHK (only on x86) or
1679     //     a GT_LCLHEAP node that allocates stuff on the stack
1680     //
1681     if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
1682     {
1683         for (unsigned curInx = 0; curInx < argCount; curInx++)
1684         {
1685             fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1686             assert(curArgTabEntry != nullptr);
1687             GenTreePtr argx = curArgTabEntry->node;
1688
1689             // Examine the register args that are currently not marked needTmp
1690             //
1691             if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
1692             {
1693                 if (hasStackArgsWeCareAbout)
1694                 {
1695 #if !FEATURE_FIXED_OUT_ARGS
1696                     // On x86 we previously recorded a stack depth of zero when
1697                     // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
1698                     // Thus we can not reorder the argument after any stack based argument
1699                     // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
1700                     // check for it explicitly
1701                     //
1702                     if (argx->gtFlags & GTF_EXCEPT)
1703                     {
1704                         curArgTabEntry->needTmp = true;
1705                         continue;
1706                     }
1707 #else
1708                     // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
1709                     //
1710                     if (argx->gtFlags & GTF_EXCEPT)
1711                     {
1712                         assert(compiler->compLocallocUsed);
1713
1714                         // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
1715                         //
1716                         if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
1717                         {
1718                             curArgTabEntry->needTmp = true;
1719                             continue;
1720                         }
1721                     }
1722 #endif
1723                 }
1724                 if (hasStructRegArgWeCareAbout)
1725                 {
1726                     // Returns true if a GT_QMARK node is encountered in the argx tree
1727                     //
1728                     if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
1729                     {
1730                         curArgTabEntry->needTmp = true;
1731                         continue;
1732                     }
1733                 }
1734             }
1735         }
1736     }
1737
1738     argsComplete = true;
1739 }
1740
1741 void fgArgInfo::SortArgs()
1742 {
1743     assert(argsComplete == true);
1744
1745 #ifdef DEBUG
1746     if (compiler->verbose)
1747     {
1748         printf("\nSorting the arguments:\n");
1749     }
1750 #endif
1751
1752     /* Shuffle the arguments around before we build the gtCallLateArgs list.
1753        The idea is to move all "simple" arguments like constants and local vars
1754        to the end of the table, and move the complex arguments towards the beginning
1755        of the table. This will help prevent registers from being spilled by
1756        allowing us to evaluate the more complex arguments before the simpler arguments.
1757        The argTable ends up looking like:
1758            +------------------------------------+  <--- argTable[argCount - 1]
1759            |          constants                 |
1760            +------------------------------------+
1761            |    local var / local field         |
1762            +------------------------------------+
1763            | remaining arguments sorted by cost |
1764            +------------------------------------+
1765            | temps (argTable[].needTmp = true)  |
1766            +------------------------------------+
1767            |  args with calls (GTF_CALL)        |
1768            +------------------------------------+  <--- argTable[0]
1769      */
1770
1771     /* Set the beginning and end for the new argument table */
1772     unsigned curInx;
1773     int      regCount      = 0;
1774     unsigned begTab        = 0;
1775     unsigned endTab        = argCount - 1;
1776     unsigned argsRemaining = argCount;
1777
1778     // First take care of arguments that are constants.
1779     // [We use a backward iterator pattern]
1780     //
1781     curInx = argCount;
1782     do
1783     {
1784         curInx--;
1785
1786         fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1787
1788         if (curArgTabEntry->regNum != REG_STK)
1789         {
1790             regCount++;
1791         }
1792
1793         // Skip any already processed args
1794         //
1795         if (!curArgTabEntry->processed)
1796         {
1797             GenTreePtr argx = curArgTabEntry->node;
1798
1799             // put constants at the end of the table
1800             //
1801             if (argx->gtOper == GT_CNS_INT)
1802             {
1803                 noway_assert(curInx <= endTab);
1804
1805                 curArgTabEntry->processed = true;
1806
1807                 // place curArgTabEntry at the endTab position by performing a swap
1808                 //
1809                 if (curInx != endTab)
1810                 {
1811                     argTable[curInx] = argTable[endTab];
1812                     argTable[endTab] = curArgTabEntry;
1813                 }
1814
1815                 endTab--;
1816                 argsRemaining--;
1817             }
1818         }
1819     } while (curInx > 0);
1820
1821     if (argsRemaining > 0)
1822     {
1823         // Next take care of arguments that are calls.
1824         // [We use a forward iterator pattern]
1825         //
1826         for (curInx = begTab; curInx <= endTab; curInx++)
1827         {
1828             fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1829
1830             // Skip any already processed args
1831             //
1832             if (!curArgTabEntry->processed)
1833             {
1834                 GenTreePtr argx = curArgTabEntry->node;
1835
1836                 // put calls at the beginning of the table
1837                 //
1838                 if (argx->gtFlags & GTF_CALL)
1839                 {
1840                     curArgTabEntry->processed = true;
1841
1842                     // place curArgTabEntry at the begTab position by performing a swap
1843                     //
1844                     if (curInx != begTab)
1845                     {
1846                         argTable[curInx] = argTable[begTab];
1847                         argTable[begTab] = curArgTabEntry;
1848                     }
1849
1850                     begTab++;
1851                     argsRemaining--;
1852                 }
1853             }
1854         }
1855     }
1856
1857     if (argsRemaining > 0)
1858     {
1859         // Next take care arguments that are temps.
1860         // These temps come before the arguments that are
1861         // ordinary local vars or local fields
1862         // since this will give them a better chance to become
1863         // enregistered into their actual argument register.
1864         // [We use a forward iterator pattern]
1865         //
1866         for (curInx = begTab; curInx <= endTab; curInx++)
1867         {
1868             fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1869
1870             // Skip any already processed args
1871             //
1872             if (!curArgTabEntry->processed)
1873             {
1874                 if (curArgTabEntry->needTmp)
1875                 {
1876                     curArgTabEntry->processed = true;
1877
1878                     // place curArgTabEntry at the begTab position by performing a swap
1879                     //
1880                     if (curInx != begTab)
1881                     {
1882                         argTable[curInx] = argTable[begTab];
1883                         argTable[begTab] = curArgTabEntry;
1884                     }
1885
1886                     begTab++;
1887                     argsRemaining--;
1888                 }
1889             }
1890         }
1891     }
1892
1893     if (argsRemaining > 0)
1894     {
1895         // Next take care of local var and local field arguments.
1896         // These are moved towards the end of the argument evaluation.
1897         // [We use a backward iterator pattern]
1898         //
1899         curInx = endTab + 1;
1900         do
1901         {
1902             curInx--;
1903
1904             fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1905
1906             // Skip any already processed args
1907             //
1908             if (!curArgTabEntry->processed)
1909             {
1910                 GenTreePtr argx = curArgTabEntry->node;
1911
1912                 if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
1913                 {
1914                     noway_assert(curInx <= endTab);
1915
1916                     curArgTabEntry->processed = true;
1917
1918                     // place curArgTabEntry at the endTab position by performing a swap
1919                     //
1920                     if (curInx != endTab)
1921                     {
1922                         argTable[curInx] = argTable[endTab];
1923                         argTable[endTab] = curArgTabEntry;
1924                     }
1925
1926                     endTab--;
1927                     argsRemaining--;
1928                 }
1929             }
1930         } while (curInx > begTab);
1931     }
1932
1933     // Finally, take care of all the remaining arguments.
1934     // Note that we fill in one arg at a time using a while loop.
1935     bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
1936     while (argsRemaining > 0)
1937     {
1938         /* Find the most expensive arg remaining and evaluate it next */
1939
1940         fgArgTabEntryPtr expensiveArgTabEntry = nullptr;
1941         unsigned         expensiveArg         = UINT_MAX;
1942         unsigned         expensiveArgCost     = 0;
1943
1944         // [We use a forward iterator pattern]
1945         //
1946         for (curInx = begTab; curInx <= endTab; curInx++)
1947         {
1948             fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1949
1950             // Skip any already processed args
1951             //
1952             if (!curArgTabEntry->processed)
1953             {
1954                 GenTreePtr argx = curArgTabEntry->node;
1955
1956                 // We should have already handled these kinds of args
1957                 assert(argx->gtOper != GT_LCL_VAR);
1958                 assert(argx->gtOper != GT_LCL_FLD);
1959                 assert(argx->gtOper != GT_CNS_INT);
1960
1961                 // This arg should either have no persistent side effects or be the last one in our table
1962                 // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
1963
1964                 if (argsRemaining == 1)
1965                 {
1966                     // This is the last arg to place
1967                     expensiveArg         = curInx;
1968                     expensiveArgTabEntry = curArgTabEntry;
1969                     assert(begTab == endTab);
1970                     break;
1971                 }
1972                 else
1973                 {
1974                     if (!costsPrepared)
1975                     {
1976                         /* We call gtPrepareCost to measure the cost of evaluating this tree */
1977                         compiler->gtPrepareCost(argx);
1978                     }
1979
1980                     if (argx->gtCostEx > expensiveArgCost)
1981                     {
1982                         // Remember this arg as the most expensive one that we have yet seen
1983                         expensiveArgCost     = argx->gtCostEx;
1984                         expensiveArg         = curInx;
1985                         expensiveArgTabEntry = curArgTabEntry;
1986                     }
1987                 }
1988             }
1989         }
1990
1991         noway_assert(expensiveArg != UINT_MAX);
1992
1993         // put the most expensive arg towards the beginning of the table
1994
1995         expensiveArgTabEntry->processed = true;
1996
1997         // place expensiveArgTabEntry at the begTab position by performing a swap
1998         //
1999         if (expensiveArg != begTab)
2000         {
2001             argTable[expensiveArg] = argTable[begTab];
2002             argTable[begTab]       = expensiveArgTabEntry;
2003         }
2004
2005         begTab++;
2006         argsRemaining--;
2007
2008         costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
2009     }
2010
2011     // The table should now be completely filled and thus begTab should now be adjacent to endTab
2012     // and regArgsRemaining should be zero
2013     assert(begTab == (endTab + 1));
2014     assert(argsRemaining == 0);
2015
2016 #if !FEATURE_FIXED_OUT_ARGS
2017     // Finally build the regArgList
2018     //
2019     callTree->gtCall.regArgList      = NULL;
2020     callTree->gtCall.regArgListCount = regCount;
2021
2022     unsigned regInx = 0;
2023     for (curInx = 0; curInx < argCount; curInx++)
2024     {
2025         fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2026
2027         if (curArgTabEntry->regNum != REG_STK)
2028         {
2029             // Encode the argument register in the register mask
2030             //
2031             callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
2032             regInx++;
2033         }
2034     }
2035 #endif // !FEATURE_FIXED_OUT_ARGS
2036
2037     argsSorted = true;
2038 }
2039
2040 //------------------------------------------------------------------------------
2041 // fgMakeTmpArgNode : This function creates a tmp var only if needed.
2042 //                    We need this to be done in order to enforce ordering
2043 //                    of the evaluation of arguments.
2044 //
2045 // Arguments:
2046 //    tmpVarNum  - the var num which we clone into the newly created temp var.
2047 //
2048 // Return Value:
2049 //    the newly created temp var tree.
2050
2051 GenTreePtr Compiler::fgMakeTmpArgNode(
2052     unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
2053 {
2054     LclVarDsc* varDsc = &lvaTable[tmpVarNum];
2055     assert(varDsc->lvIsTemp);
2056     var_types type = varDsc->TypeGet();
2057
2058     // Create a copy of the temp to go into the late argument list
2059     GenTreePtr arg      = gtNewLclvNode(tmpVarNum, type);
2060     GenTreePtr addrNode = nullptr;
2061
2062     if (varTypeIsStruct(type))
2063     {
2064
2065 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || (!defined(LEGACY_BACKEND) && defined(_TARGET_ARM_))
2066
2067 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2068
2069         arg->gtFlags |= GTF_DONT_CSE;
2070
2071 #else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2072         // Can this type be passed in a single register?
2073         // If so, the following call will return the corresponding primitive type.
2074         // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.
2075
2076         bool                 passedInRegisters = false;
2077         structPassingKind    kind;
2078         CORINFO_CLASS_HANDLE clsHnd         = varDsc->lvVerTypeInfo.GetClassHandle();
2079         var_types            structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
2080
2081         if (structBaseType != TYP_UNKNOWN)
2082         {
2083             passedInRegisters = true;
2084             type              = structBaseType;
2085         }
2086 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2087
2088         // If it is passed in registers, don't get the address of the var. Make it a
2089         // field instead. It will be loaded in registers with putarg_reg tree in lower.
2090         if (passedInRegisters)
2091         {
2092             arg->ChangeOper(GT_LCL_FLD);
2093             arg->gtType = type;
2094         }
2095         else
2096         {
2097 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2098             // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
2099             var_types addrType = type;
2100 #else
2101             var_types addrType = TYP_BYREF;
2102 #endif
2103             arg      = gtNewOperNode(GT_ADDR, addrType, arg);
2104             addrNode = arg;
2105
2106 #if FEATURE_MULTIREG_ARGS
2107 #ifdef _TARGET_ARM64_
2108             assert(varTypeIsStruct(type));
2109             if (lvaIsMultiregStruct(varDsc))
2110             {
2111                 // ToDo-ARM64: Consider using:  arg->ChangeOper(GT_LCL_FLD);
2112                 // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
2113                 // We will create a GT_OBJ for the argument below.
2114                 // This will be passed by value in two registers.
2115                 assert(addrNode != nullptr);
2116
2117                 // Create an Obj of the temp to use it as a call argument.
2118                 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2119
2120                 // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
2121                 // this is only to preserve former behavior (though some CSE'ing of struct
2122                 // values can be pessimizing, so enabling this may require some additional tuning).
2123                 arg->gtFlags |= GTF_DONT_CSE;
2124             }
2125 #endif // _TARGET_ARM64_
2126 #endif // FEATURE_MULTIREG_ARGS
2127         }
2128
2129 #else // not (_TARGET_AMD64_ or _TARGET_ARM64_ or (!LEGACY_BACKEND and _TARGET_ARM_))
2130
2131         // other targets, we pass the struct by value
2132         assert(varTypeIsStruct(type));
2133
2134         addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
2135
2136         // Get a new Obj node temp to use it as a call argument.
2137         // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
2138         arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
2139
2140 #endif // not (_TARGET_AMD64_ or _TARGET_ARM64_ or (!LEGACY_BACKEND and _TARGET_ARM_))
2141
2142     } // (varTypeIsStruct(type))
2143
2144     if (addrNode != nullptr)
2145     {
2146         assert(addrNode->gtOper == GT_ADDR);
2147
2148         // This will prevent this LclVar from being optimized away
2149         lvaSetVarAddrExposed(tmpVarNum);
2150
2151         // the child of a GT_ADDR is required to have this flag set
2152         addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
2153     }
2154
2155     return arg;
2156 }
2157
2158 void fgArgInfo::EvalArgsToTemps()
2159 {
2160     assert(argsSorted == true);
2161
2162     unsigned regArgInx = 0;
2163     // Now go through the argument table and perform the necessary evaluation into temps
2164     GenTreeArgList* tmpRegArgNext = nullptr;
2165     for (unsigned curInx = 0; curInx < argCount; curInx++)
2166     {
2167         fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2168
2169         GenTreePtr argx     = curArgTabEntry->node;
2170         GenTreePtr setupArg = nullptr;
2171         GenTreePtr defArg;
2172
2173 #if !FEATURE_FIXED_OUT_ARGS
2174         // Only ever set for FEATURE_FIXED_OUT_ARGS
2175         assert(curArgTabEntry->needPlace == false);
2176
2177         // On x86 and other archs that use push instructions to pass arguments:
2178         //   Only the register arguments need to be replaced with placeholder nodes.
2179         //   Stacked arguments are evaluated and pushed (or stored into the stack) in order.
2180         //
2181         if (curArgTabEntry->regNum == REG_STK)
2182             continue;
2183 #endif
2184
2185         if (curArgTabEntry->needTmp)
2186         {
2187             unsigned tmpVarNum;
2188
2189             if (curArgTabEntry->isTmp == true)
2190             {
2191                 // Create a copy of the temp to go into the late argument list
2192                 tmpVarNum = curArgTabEntry->tmpNum;
2193                 defArg    = compiler->fgMakeTmpArgNode(tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
2194                     argTable[curInx]->structDesc.passedInRegisters));
2195
2196                 // mark the original node as a late argument
2197                 argx->gtFlags |= GTF_LATE_ARG;
2198             }
2199             else
2200             {
2201                 // Create a temp assignment for the argument
2202                 // Put the temp in the gtCallLateArgs list
2203                 CLANG_FORMAT_COMMENT_ANCHOR;
2204
2205 #ifdef DEBUG
2206                 if (compiler->verbose)
2207                 {
2208                     printf("Argument with 'side effect'...\n");
2209                     compiler->gtDispTree(argx);
2210                 }
2211 #endif
2212
2213 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2214                 noway_assert(argx->gtType != TYP_STRUCT);
2215 #endif
2216
2217                 tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
2218                 if (argx->gtOper == GT_MKREFANY)
2219                 {
2220                     // For GT_MKREFANY, typically the actual struct copying does
2221                     // not have any side-effects and can be delayed. So instead
2222                     // of using a temp for the whole struct, we can just use a temp
2223                     // for operand that that has a side-effect
2224                     GenTreePtr operand;
2225                     if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
2226                     {
2227                         operand = argx->gtOp.gtOp1;
2228
2229                         // In the early argument evaluation, place an assignment to the temp
2230                         // from the source operand of the mkrefany
2231                         setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2232
2233                         // Replace the operand for the mkrefany with the new temp.
2234                         argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2235                     }
2236                     else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
2237                     {
2238                         operand = argx->gtOp.gtOp2;
2239
2240                         // In the early argument evaluation, place an assignment to the temp
2241                         // from the source operand of the mkrefany
2242                         setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2243
2244                         // Replace the operand for the mkrefany with the new temp.
2245                         argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2246                     }
2247                 }
2248
2249                 if (setupArg != nullptr)
2250                 {
2251                     // Now keep the mkrefany for the late argument list
2252                     defArg = argx;
2253
2254                     // Clear the side-effect flags because now both op1 and op2 have no side-effects
2255                     defArg->gtFlags &= ~GTF_ALL_EFFECT;
2256                 }
2257                 else
2258                 {
2259                     setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
2260
2261                     LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
2262
2263 #ifndef LEGACY_BACKEND
2264                     if (compiler->fgOrder == Compiler::FGOrderLinear)
2265                     {
2266                         // We'll reference this temporary variable just once
2267                         // when we perform the function call after
2268                         // setting up this argument.
2269                         varDsc->lvRefCnt = 1;
2270                     }
2271 #endif // !LEGACY_BACKEND
2272
2273                     var_types lclVarType = genActualType(argx->gtType);
2274                     var_types scalarType = TYP_UNKNOWN;
2275
2276                     if (setupArg->OperIsCopyBlkOp())
2277                     {
2278                         setupArg = compiler->fgMorphCopyBlock(setupArg);
2279 #if defined(_TARGET_ARM64_) || (!defined(LEGACY_BACKEND) && defined(_TARGET_ARM_))
2280                         // This scalar LclVar widening step is only performed for ARM architectures.
2281                         //
2282                         CORINFO_CLASS_HANDLE clsHnd     = compiler->lvaGetStruct(tmpVarNum);
2283                         unsigned             structSize = varDsc->lvExactSize;
2284
2285                         scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd);
2286 #endif // _TARGET_ARM*_
2287                     }
2288
2289                     // scalarType can be set to a wider type for ARM architectures: (3 => 4)  or (5,6,7 => 8)
2290                     if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
2291                     {
2292                         // Create a GT_LCL_FLD using the wider type to go to the late argument list
2293                         defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
2294                     }
2295                     else
2296                     {
2297                         // Create a copy of the temp to go to the late argument list
2298                         defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
2299                     }
2300
2301                     curArgTabEntry->isTmp  = true;
2302                     curArgTabEntry->tmpNum = tmpVarNum;
2303
2304 #ifdef _TARGET_ARM_
2305                     // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
2306                     // might have left holes in the used registers (see
2307                     // fgAddSkippedRegsInPromotedStructArg).
2308                     // Too bad we're not that smart for these intermediate temps...
2309                     if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
2310                     {
2311                         regNumber argReg      = curArgTabEntry->regNum;
2312                         regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
2313                         for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
2314                         {
2315                             argReg = genRegArgNext(argReg);
2316                             allUsedRegs |= genRegMask(argReg);
2317                         }
2318 #ifdef LEGACY_BACKEND
2319                         callTree->gtCall.gtCallRegUsedMask |= allUsedRegs;
2320 #endif // LEGACY_BACKEND
2321                     }
2322 #endif // _TARGET_ARM_
2323                 }
2324
2325                 /* mark the assignment as a late argument */
2326                 setupArg->gtFlags |= GTF_LATE_ARG;
2327
2328 #ifdef DEBUG
2329                 if (compiler->verbose)
2330                 {
2331                     printf("\n  Evaluate to a temp:\n");
2332                     compiler->gtDispTree(setupArg);
2333                 }
2334 #endif
2335             }
2336         }
2337         else // curArgTabEntry->needTmp == false
2338         {
2339             //   On x86 -
2340             //      Only register args are replaced with placeholder nodes
2341             //      and the stack based arguments are evaluated and pushed in order.
2342             //
2343             //   On Arm/x64 - When needTmp is false and needPlace is false,
2344             //      the non-register arguments are evaluated and stored in order.
2345             //      When needPlace is true we have a nested call that comes after
2346             //      this argument so we have to replace it in the gtCallArgs list
2347             //      (the initial argument evaluation list) with a placeholder.
2348             //
2349             if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
2350             {
2351                 continue;
2352             }
2353
2354             /* No temp needed - move the whole node to the gtCallLateArgs list */
2355
2356             /* The argument is deferred and put in the late argument list */
2357
2358             defArg = argx;
2359
2360             // Create a placeholder node to put in its place in gtCallLateArgs.
2361
2362             // For a struct type we also need to record the class handle of the arg.
2363             CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
2364
2365 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2366
2367             // All structs are either passed (and retyped) as integral types, OR they
2368             // are passed by reference.
2369             noway_assert(argx->gtType != TYP_STRUCT);
2370
2371 #else // !defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2372
2373             if (varTypeIsStruct(defArg))
2374             {
2375                 // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
2376                 GenTreePtr defArgTmp = defArg;
2377
2378                 // The GT_OBJ may be be a child of a GT_COMMA.
2379                 while (defArgTmp->gtOper == GT_COMMA)
2380                 {
2381                     defArgTmp = defArgTmp->gtOp.gtOp2;
2382                 }
2383                 assert(varTypeIsStruct(defArgTmp));
2384
2385                 // We handle two opcodes: GT_MKREFANY and GT_OBJ.
2386                 if (defArgTmp->gtOper == GT_MKREFANY)
2387                 {
2388                     clsHnd = compiler->impGetRefAnyClass();
2389                 }
2390                 else if (defArgTmp->gtOper == GT_OBJ)
2391                 {
2392                     clsHnd = defArgTmp->AsObj()->gtClass;
2393                 }
2394                 else
2395                 {
2396                     BADCODE("Unhandled struct argument tree in fgMorphArgs");
2397                 }
2398             }
2399
2400 #endif // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
2401
2402             setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
2403
2404             /* mark the placeholder node as a late argument */
2405             setupArg->gtFlags |= GTF_LATE_ARG;
2406
2407 #ifdef DEBUG
2408             if (compiler->verbose)
2409             {
2410                 if (curArgTabEntry->regNum == REG_STK)
2411                 {
2412                     printf("Deferred stack argument :\n");
2413                 }
2414                 else
2415                 {
2416                     printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
2417                 }
2418
2419                 compiler->gtDispTree(argx);
2420                 printf("Replaced with placeholder node:\n");
2421                 compiler->gtDispTree(setupArg);
2422             }
2423 #endif
2424         }
2425
2426         if (setupArg != nullptr)
2427         {
2428             if (curArgTabEntry->parent)
2429             {
2430                 GenTreePtr parent = curArgTabEntry->parent;
2431                 /* a normal argument from the list */
2432                 noway_assert(parent->OperIsList());
2433                 noway_assert(parent->gtOp.gtOp1 == argx);
2434
2435                 parent->gtOp.gtOp1 = setupArg;
2436             }
2437             else
2438             {
2439                 /* must be the gtCallObjp */
2440                 noway_assert(callTree->gtCall.gtCallObjp == argx);
2441
2442                 callTree->gtCall.gtCallObjp = setupArg;
2443             }
2444         }
2445
2446         /* deferred arg goes into the late argument list */
2447
2448         if (tmpRegArgNext == nullptr)
2449         {
2450             tmpRegArgNext                   = compiler->gtNewArgList(defArg);
2451             callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
2452         }
2453         else
2454         {
2455             noway_assert(tmpRegArgNext->OperIsList());
2456             noway_assert(tmpRegArgNext->Current());
2457             tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
2458             tmpRegArgNext             = tmpRegArgNext->Rest();
2459         }
2460
2461         curArgTabEntry->node       = defArg;
2462         curArgTabEntry->lateArgInx = regArgInx++;
2463     }
2464
2465 #ifdef DEBUG
2466     if (compiler->verbose)
2467     {
2468         printf("\nShuffled argument table:    ");
2469         for (unsigned curInx = 0; curInx < argCount; curInx++)
2470         {
2471             fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2472
2473             if (curArgTabEntry->regNum != REG_STK)
2474             {
2475                 printf("%s ", getRegName(curArgTabEntry->regNum));
2476             }
2477         }
2478         printf("\n");
2479     }
2480 #endif
2481 }
2482
2483 // Get the late arg for arg at position argIndex.
2484 // argIndex - 0-based position to get late arg for.
2485 //            Caller must ensure this position has a late arg.
2486 GenTreePtr fgArgInfo::GetLateArg(unsigned argIndex)
2487 {
2488     for (unsigned j = 0; j < this->ArgCount(); j++)
2489     {
2490         if (this->ArgTable()[j]->argNum == argIndex)
2491         {
2492             return this->ArgTable()[j]->node;
2493         }
2494     }
2495     // Caller must ensure late arg exists.
2496     unreached();
2497 }
2498
2499 void fgArgInfo::RecordStkLevel(unsigned stkLvl)
2500 {
2501     assert(!IsUninitialized(stkLvl));
2502     this->stkLevel = stkLvl;
2503 }
2504
2505 unsigned fgArgInfo::RetrieveStkLevel()
2506 {
2507     assert(!IsUninitialized(stkLevel));
2508     return stkLevel;
2509 }
2510
2511 // Return a conservative estimate of the stack size in bytes.
2512 // It will be used only on the intercepted-for-host code path to copy the arguments.
2513 int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
2514 {
2515
2516     int numArgs = 0;
2517     for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
2518     {
2519         numArgs++;
2520     }
2521
2522     int numStkArgs;
2523     if (numArgs > MAX_REG_ARG)
2524     {
2525         numStkArgs = numArgs - MAX_REG_ARG;
2526     }
2527     else
2528     {
2529         numStkArgs = 0;
2530     }
2531
2532     return numStkArgs * REGSIZE_BYTES;
2533 }
2534
2535 //------------------------------------------------------------------------------
2536 // fgMakeMultiUse : If the node is a local, clone it and increase the ref count
2537 //                  otherwise insert a comma form temp
2538 //
2539 // Arguments:
2540 //    ppTree  - a pointer to the child node we will be replacing with the comma expression that
2541 //              evaluates ppTree to a temp and returns the result
2542 //
2543 // Return Value:
2544 //    A fresh GT_LCL_VAR node referencing the temp which has not been used
2545 //
2546 // Assumption:
2547 //    The result tree MUST be added to the tree structure since the ref counts are
2548 //    already incremented.
2549
2550 GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
2551 {
2552     GenTree* tree = *pOp;
2553     if (tree->IsLocal())
2554     {
2555         auto result = gtClone(tree);
2556         if (lvaLocalVarRefCounted)
2557         {
2558             lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2559         }
2560         return result;
2561     }
2562     else
2563     {
2564         GenTree* result = fgInsertCommaFormTemp(pOp);
2565
2566         // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
2567         // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
2568         // be added by the caller.
2569         if (lvaLocalVarRefCounted)
2570         {
2571             lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2572             lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2573             lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2574         }
2575
2576         return result;
2577     }
2578 }
2579
2580 //------------------------------------------------------------------------------
2581 // fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
2582 //                        and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
2583 //
2584 // Arguments:
2585 //    ppTree     - a pointer to the child node we will be replacing with the comma expression that
2586 //                 evaluates ppTree to a temp and returns the result
2587 //
2588 //    structType - value type handle if the temp created is of TYP_STRUCT.
2589 //
2590 // Return Value:
2591 //    A fresh GT_LCL_VAR node referencing the temp which has not been used
2592 //
2593
2594 GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
2595 {
2596     GenTree* subTree = *ppTree;
2597
2598     unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
2599
2600     if (varTypeIsStruct(subTree))
2601     {
2602         assert(structType != nullptr);
2603         lvaSetStruct(lclNum, structType, false);
2604     }
2605
2606     // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
2607     // The type of GT_COPYBLK is TYP_VOID.  Therefore, we should use subTree->TypeGet() for
2608     // setting type of lcl vars created.
2609     GenTree* asg = gtNewTempAssign(lclNum, subTree);
2610
2611     GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2612
2613     GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
2614
2615     *ppTree = comma;
2616
2617     return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2618 }
2619
2620 //------------------------------------------------------------------------
2621 // fgMorphArgs: Walk and transform (morph) the arguments of a call
2622 //
2623 // Arguments:
2624 //    callNode - the call for which we are doing the argument morphing
2625 //
2626 // Return Value:
2627 //    Like most morph methods, this method returns the morphed node,
2628 //    though in this case there are currently no scenarios where the
2629 //    node itself is re-created.
2630 //
2631 // Notes:
2632 //    This method is even less idempotent than most morph methods.
2633 //    That is, it makes changes that should not be redone. It uses the existence
2634 //    of gtCallLateArgs (the late arguments list) to determine if it has
2635 //    already done that work.
2636 //
2637 //    The first time it is called (i.e. during global morphing), this method
2638 //    computes the "late arguments". This is when it determines which arguments
2639 //    need to be evaluated to temps prior to the main argument setup, and which
2640 //    can be directly evaluated into the argument location. It also creates a
2641 //    second argument list (gtCallLateArgs) that does the final placement of the
2642 //    arguments, e.g. into registers or onto the stack.
2643 //
2644 //    The "non-late arguments", aka the gtCallArgs, are doing the in-order
2645 //    evaluation of the arguments that might have side-effects, such as embedded
2646 //    assignments, calls or possible throws. In these cases, it and earlier
2647 //    arguments must be evaluated to temps.
2648 //
2649 //    On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
2650 //    if we have any nested calls, we need to defer the copying of the argument
2651 //    into the fixed argument area until after the call. If the argument did not
2652 //    otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
2653 //    replaced in the "early" arg list (gtCallArgs) with a placeholder node.
2654
2655 #ifdef _PREFAST_
2656 #pragma warning(push)
2657 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2658 #endif
2659 GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
2660 {
2661     GenTreePtr args;
2662     GenTreePtr argx;
2663
2664     unsigned flagsSummary    = 0;
2665     unsigned genPtrArgCntSav = fgPtrArgCntCur;
2666
2667     unsigned argIndex = 0;
2668
2669     unsigned intArgRegNum = 0;
2670     unsigned fltArgRegNum = 0;
2671
2672 #ifdef _TARGET_ARM_
2673     regMaskTP argSkippedRegMask    = RBM_NONE;
2674     regMaskTP fltArgSkippedRegMask = RBM_NONE;
2675 #endif //  _TARGET_ARM_
2676
2677 #if defined(_TARGET_X86_)
2678     unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
2679 #else
2680     const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
2681 #endif
2682
2683     unsigned argSlots                = 0;
2684     unsigned nonRegPassedStructSlots = 0;
2685     bool     reMorphing              = call->AreArgsComplete();
2686     bool     callHasRetBuffArg       = call->HasRetBufArg();
2687
2688 #ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
2689     bool callIsVararg = call->IsVarargs();
2690 #endif
2691
2692 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2693     // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
2694     // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
2695     // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
2696     bool hasStackArgCopy = false;
2697 #endif
2698
2699 #ifndef LEGACY_BACKEND
2700     // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
2701     // following the normal calling convention or in the normal argument registers. We either mark existing
2702     // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
2703     // non-standard arguments into the argument list, below.
2704     class NonStandardArgs
2705     {
2706         struct NonStandardArg
2707         {
2708             regNumber reg;  // The register to be assigned to this non-standard argument.
2709             GenTree*  node; // The tree node representing this non-standard argument.
2710                             //   Note that this must be updated if the tree node changes due to morphing!
2711         };
2712
2713         ArrayStack<NonStandardArg> args;
2714
2715     public:
2716         NonStandardArgs(Compiler* compiler) : args(compiler, 3) // We will have at most 3 non-standard arguments
2717         {
2718         }
2719
2720         //-----------------------------------------------------------------------------
2721         // Add: add a non-standard argument to the table of non-standard arguments
2722         //
2723         // Arguments:
2724         //    node - a GenTree node that has a non-standard argument.
2725         //    reg - the register to assign to this node.
2726         //
2727         // Return Value:
2728         //    None.
2729         //
2730         void Add(GenTree* node, regNumber reg)
2731         {
2732             NonStandardArg nsa = {reg, node};
2733             args.Push(nsa);
2734         }
2735
2736         //-----------------------------------------------------------------------------
2737         // Find: Look for a GenTree* in the set of non-standard args.
2738         //
2739         // Arguments:
2740         //    node - a GenTree node to look for
2741         //
2742         // Return Value:
2743         //    The index of the non-standard argument (a non-negative, unique, stable number).
2744         //    If the node is not a non-standard argument, return -1.
2745         //
2746         int Find(GenTree* node)
2747         {
2748             for (int i = 0; i < args.Height(); i++)
2749             {
2750                 if (node == args.Index(i).node)
2751                 {
2752                     return i;
2753                 }
2754             }
2755             return -1;
2756         }
2757
2758         //-----------------------------------------------------------------------------
2759         // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
2760         // set the register to use for the node.
2761         //
2762         // Arguments:
2763         //    node - a GenTree node to look for
2764         //    pReg - an OUT argument. *pReg is set to the non-standard register to use if
2765         //           'node' is found in the non-standard argument set.
2766         //
2767         // Return Value:
2768         //    'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
2769         //          register to use.
2770         //    'false' otherwise (in this case, *pReg is unmodified).
2771         //
2772         bool FindReg(GenTree* node, regNumber* pReg)
2773         {
2774             for (int i = 0; i < args.Height(); i++)
2775             {
2776                 NonStandardArg& nsa = args.IndexRef(i);
2777                 if (node == nsa.node)
2778                 {
2779                     *pReg = nsa.reg;
2780                     return true;
2781                 }
2782             }
2783             return false;
2784         }
2785
2786         //-----------------------------------------------------------------------------
2787         // Replace: Replace the non-standard argument node at a given index. This is done when
2788         // the original node was replaced via morphing, but we need to continue to assign a
2789         // particular non-standard arg to it.
2790         //
2791         // Arguments:
2792         //    index - the index of the non-standard arg. It must exist.
2793         //    node - the new GenTree node.
2794         //
2795         // Return Value:
2796         //    None.
2797         //
2798         void Replace(int index, GenTree* node)
2799         {
2800             args.IndexRef(index).node = node;
2801         }
2802
2803     } nonStandardArgs(this);
2804 #endif // !LEGACY_BACKEND
2805
2806     // Count of args. On first morph, this is counted before we've filled in the arg table.
2807     // On remorph, we grab it from the arg table.
2808     unsigned numArgs = 0;
2809
2810     // Process the late arguments (which were determined by a previous caller).
2811     // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
2812     // may need to refer to it.
2813     if (reMorphing)
2814     {
2815         // We need to reMorph the gtCallLateArgs early since that is what triggers
2816         // the expression folding and we need to have the final folded gtCallLateArgs
2817         // available when we call RemorphRegArg so that we correctly update the fgArgInfo
2818         // with the folded tree that represents the final optimized argument nodes.
2819         //
2820         // However if a range-check needs to be generated for any of these late
2821         // arguments we also need to "know" what the stack depth will be when we generate
2822         // code to branch to the throw range check failure block as that is part of the
2823         // GC information contract for that block.
2824         //
2825         // Since the late arguments are evaluated last we have pushed all of the
2826         // other arguments on the stack before we evaluate these late arguments,
2827         // so we record the stack depth on the first morph call when reMorphing
2828         // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
2829         //
2830         if (call->gtCallLateArgs != nullptr)
2831         {
2832             unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
2833             fgPtrArgCntCur += callStkLevel;
2834             call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
2835             flagsSummary |= call->gtCallLateArgs->gtFlags;
2836             fgPtrArgCntCur -= callStkLevel;
2837         }
2838         assert(call->fgArgInfo != nullptr);
2839         call->fgArgInfo->RemorphReset();
2840
2841         numArgs = call->fgArgInfo->ArgCount();
2842     }
2843     else
2844     {
2845         // First we need to count the args
2846         if (call->gtCallObjp)
2847         {
2848             numArgs++;
2849         }
2850         for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
2851         {
2852             numArgs++;
2853         }
2854
2855         // Insert or mark non-standard args. These are either outside the normal calling convention, or
2856         // arguments registers that don't follow the normal progression of argument registers in the calling
2857         // convention (such as for the ARM64 fixed return buffer argument x8).
2858         //
2859         // *********** NOTE *************
2860         // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
2861         // in the implementation of fast tail call.
2862         // *********** END NOTE *********
2863         CLANG_FORMAT_COMMENT_ANCHOR;
2864
2865 #if !defined(LEGACY_BACKEND)
2866 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2867         // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
2868         // Set the argument registers correctly here.
2869         if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
2870         {
2871             GenTreeArgList* args = call->gtCallArgs;
2872             GenTree*        arg1 = args->Current();
2873             assert(arg1 != nullptr);
2874             nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
2875         }
2876 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2877 #if defined(_TARGET_X86_)
2878         // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
2879         // hi part to be in EDX. This sets the argument registers up correctly.
2880         else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
2881                  call->IsHelperCall(this, CORINFO_HELP_LRSZ))
2882         {
2883             GenTreeArgList* args = call->gtCallArgs;
2884             GenTree*        arg1 = args->Current();
2885             assert(arg1 != nullptr);
2886             nonStandardArgs.Add(arg1, REG_LNGARG_LO);
2887
2888             args          = args->Rest();
2889             GenTree* arg2 = args->Current();
2890             assert(arg2 != nullptr);
2891             nonStandardArgs.Add(arg2, REG_LNGARG_HI);
2892         }
2893 #else  // !defined(_TARGET_X86_)
2894         // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
2895         // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
2896         // convention for x86/SSE.
2897
2898         // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
2899         //
2900         if (hasFixedRetBuffReg() && call->HasRetBufArg())
2901         {
2902             args = call->gtCallArgs;
2903             assert(args != nullptr);
2904             assert(args->OperIsList());
2905
2906             argx = call->gtCallArgs->Current();
2907
2908             // We don't increment numArgs here, since we already counted this argument above.
2909
2910             nonStandardArgs.Add(argx, theFixedRetBuffReg());
2911         }
2912
2913         // We are allowed to have a Fixed Return Buffer argument combined
2914         // with any of the remaining non-standard arguments
2915         //
2916         if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
2917         {
2918             assert(!call->gtCallCookie);
2919             // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
2920             // It will be used only on the intercepted-for-host code path to copy the arguments.
2921
2922             GenTree* cns     = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
2923             call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
2924             numArgs++;
2925
2926             nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
2927         }
2928         else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT) && !call->IsTailCallViaHelper())
2929         {
2930             // indirect VSD stubs need the base of the indirection cell to be
2931             // passed in addition.  At this point that is the value in gtCallAddr.
2932             // The actual call target will be derived from gtCallAddr in call
2933             // lowering.
2934
2935             // If it is a VSD call getting dispatched via tail call helper,
2936             // fgMorphTailCall() would materialize stub addr as an additional
2937             // parameter added to the original arg list and hence no need to
2938             // add as a non-standard arg.
2939
2940             GenTree* arg = call->gtCallAddr;
2941             if (arg->OperIsLocal())
2942             {
2943                 arg = gtClone(arg, true);
2944             }
2945             else
2946             {
2947                 call->gtCallAddr = fgInsertCommaFormTemp(&arg);
2948                 call->gtFlags |= GTF_ASG;
2949             }
2950             noway_assert(arg != nullptr);
2951
2952             // And push the stub address onto the list of arguments
2953             call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2954             numArgs++;
2955
2956             nonStandardArgs.Add(arg, virtualStubParamInfo->GetReg());
2957         }
2958         else
2959 #endif // defined(_TARGET_X86_)
2960         if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
2961         {
2962             assert(!call->IsUnmanaged());
2963
2964             GenTree* arg = call->gtCallCookie;
2965             noway_assert(arg != nullptr);
2966             call->gtCallCookie = nullptr;
2967
2968 #if defined(_TARGET_X86_)
2969             // x86 passes the cookie on the stack as the final argument to the call.
2970             GenTreeArgList** insertionPoint = &call->gtCallArgs;
2971             for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
2972             {
2973             }
2974             *insertionPoint = gtNewListNode(arg, nullptr);
2975 #else  // !defined(_TARGET_X86_)
2976             // All other architectures pass the cookie in a register.
2977             call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2978 #endif // defined(_TARGET_X86_)
2979
2980             nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
2981             numArgs++;
2982
2983             // put destination into R10/EAX
2984             arg              = gtClone(call->gtCallAddr, true);
2985             call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2986             numArgs++;
2987
2988             nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
2989
2990             // finally change this call to a helper call
2991             call->gtCallType    = CT_HELPER;
2992             call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
2993         }
2994 #endif // !defined(LEGACY_BACKEND)
2995
2996         // Allocate the fgArgInfo for the call node;
2997         //
2998         call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
2999     }
3000
3001     if (varTypeIsStruct(call))
3002     {
3003         fgFixupStructReturn(call);
3004     }
3005
3006     /* First we morph the argument subtrees ('this' pointer, arguments, etc.).
3007      * During the first call to fgMorphArgs we also record the
3008      * information about late arguments we have in 'fgArgInfo'.
3009      * This information is used later to contruct the gtCallLateArgs */
3010
3011     /* Process the 'this' argument value, if present */
3012
3013     argx = call->gtCallObjp;
3014
3015     if (argx)
3016     {
3017         argx             = fgMorphTree(argx);
3018         call->gtCallObjp = argx;
3019         flagsSummary |= argx->gtFlags;
3020
3021         assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
3022
3023         assert(argIndex == 0);
3024
3025         /* We must fill in or update the argInfo table */
3026
3027         if (reMorphing)
3028         {
3029             /* this is a register argument - possibly update it in the table */
3030             call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
3031         }
3032         else
3033         {
3034             assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
3035
3036             /* this is a register argument - put it in the table */
3037             call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
3038 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3039                                        ,
3040                                        false, REG_STK, nullptr
3041 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3042                                        );
3043         }
3044         // this can't be a struct.
3045         assert(argx->gtType != TYP_STRUCT);
3046
3047         /* Increment the argument register count and argument index */
3048         if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
3049         {
3050             intArgRegNum++;
3051 #ifdef WINDOWS_AMD64_ABI
3052             // Whenever we pass an integer register argument
3053             // we skip the corresponding floating point register argument
3054             fltArgRegNum++;
3055 #endif // WINDOWS_AMD64_ABI
3056         }
3057         else
3058         {
3059             noway_assert(!"the 'this' pointer can not be a floating point type");
3060         }
3061         argIndex++;
3062         argSlots++;
3063     }
3064
3065 #ifdef _TARGET_X86_
3066     // Compute the maximum number of arguments that can be passed in registers.
3067     // For X86 we handle the varargs and unmanaged calling conventions
3068
3069     if (call->gtFlags & GTF_CALL_POP_ARGS)
3070     {
3071         noway_assert(intArgRegNum < MAX_REG_ARG);
3072         // No more register arguments for varargs (CALL_POP_ARGS)
3073         maxRegArgs = intArgRegNum;
3074
3075         // Add in the ret buff arg
3076         if (callHasRetBuffArg)
3077             maxRegArgs++;
3078     }
3079
3080     if (call->IsUnmanaged())
3081     {
3082         noway_assert(intArgRegNum == 0);
3083
3084         if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
3085         {
3086             noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
3087                          call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
3088                          call->gtCallArgs->gtOp.gtOp1->gtOper ==
3089                              GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
3090             maxRegArgs = 1;
3091         }
3092         else
3093         {
3094             maxRegArgs = 0;
3095         }
3096
3097         // Add in the ret buff arg
3098         if (callHasRetBuffArg)
3099             maxRegArgs++;
3100     }
3101 #endif // _TARGET_X86_
3102
3103     /* Morph the user arguments */
3104     CLANG_FORMAT_COMMENT_ANCHOR;
3105
3106 #if defined(_TARGET_ARM_)
3107
3108     // The ARM ABI has a concept of back-filling of floating-point argument registers, according
3109     // to the "Procedure Call Standard for the ARM Architecture" document, especially
3110     // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
3111     // appear in a lower-numbered register than floating point argument N. That is, argument
3112     // register allocation is not strictly increasing. To support this, we need to keep track of unused
3113     // floating-point argument registers that we can back-fill. We only support 4-byte float and
3114     // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
3115     // only back-fill single registers, since there is no way with these types to create
3116     // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
3117     // available (with 16 FP argument registers). Consider this code:
3118     //
3119     // struct HFA { float x, y, z; }; // a three element HFA
3120     // void bar(float a1,   // passed in f0
3121     //          double a2,  // passed in f2/f3; skip f1 for alignment
3122     //          HFA a3,     // passed in f4/f5/f6
3123     //          double a4,  // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
3124     //          HFA a5,     // passed in f10/f11/f12
3125     //          double a6,  // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
3126     //                      // slots
3127     //          float a7,   // passed in f1 (back-filled)
3128     //          float a8,   // passed in f7 (back-filled)
3129     //          float a9,   // passed in f13 (back-filled)
3130     //          float a10)  // passed on the stack in [OutArg+0]
3131     //
3132     // Note that if we ever support FP types with larger alignment requirements, then there could
3133     // be more than single register back-fills.
3134     //
3135     // Once we assign a floating-pointer register to the stack, they all must be on the stack.
3136     // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
3137     // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
3138     // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
3139     // and prevent any additional floating-point arguments from going in registers.
3140
3141     bool anyFloatStackArgs = false;
3142
3143 #endif // _TARGET_ARM_
3144
3145 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3146     SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3147 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3148
3149     bool hasStructArgument     = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
3150     bool hasMultiregStructArgs = false;
3151     for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
3152     {
3153         GenTreePtr* parentArgx = &args->gtOp.gtOp1;
3154
3155 #if FEATURE_MULTIREG_ARGS
3156         if (!hasStructArgument)
3157         {
3158             hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
3159         }
3160 #endif // FEATURE_MULTIREG_ARGS
3161
3162 #ifndef LEGACY_BACKEND
3163         // Record the index of any nonStandard arg that we may be processing here, as we are
3164         // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
3165         GenTreePtr orig_argx         = *parentArgx;
3166         int        nonStandard_index = nonStandardArgs.Find(orig_argx);
3167 #endif // !LEGACY_BACKEND
3168
3169         argx        = fgMorphTree(*parentArgx);
3170         *parentArgx = argx;
3171         flagsSummary |= argx->gtFlags;
3172
3173         assert(args->OperIsList());
3174         assert(argx == args->Current());
3175
3176 #ifndef LEGACY_BACKEND
3177         if ((nonStandard_index != -1) && (argx != orig_argx))
3178         {
3179             // We need to update the node field for this nonStandard arg here
3180             // as it was changed by the call to fgMorphTree
3181             nonStandardArgs.Replace(nonStandard_index, argx);
3182         }
3183 #endif // !LEGACY_BACKEND
3184
3185         /* Change the node to TYP_I_IMPL so we don't report GC info
3186          * NOTE: We deferred this from the importer because of the inliner */
3187
3188         if (argx->IsVarAddr())
3189         {
3190             argx->gtType = TYP_I_IMPL;
3191         }
3192
3193         bool     passUsingFloatRegs;
3194         unsigned argAlign = 1;
3195         // Setup any HFA information about 'argx'
3196         var_types hfaType  = GetHfaType(argx);
3197         bool      isHfaArg = varTypeIsFloating(hfaType);
3198         unsigned  hfaSlots = 0;
3199
3200         if (isHfaArg)
3201         {
3202             hfaSlots = GetHfaCount(argx);
3203
3204             // If we have a HFA struct it's possible we transition from a method that originally
3205             // only had integer types to now start having FP types.  We have to communicate this
3206             // through this flag since LSRA later on will use this flag to determine whether
3207             // or not to track the FP register set.
3208             //
3209             compFloatingPointUsed = true;
3210         }
3211
3212         unsigned             size          = 0;
3213         CORINFO_CLASS_HANDLE copyBlkClass  = nullptr;
3214         bool                 isRegArg      = false;
3215         bool                 isNonStandard = false;
3216         regNumber            nonStdRegNum  = REG_NA;
3217
3218         fgArgTabEntryPtr argEntry = nullptr;
3219
3220         if (reMorphing)
3221         {
3222             argEntry = gtArgEntryByArgNum(call, argIndex);
3223         }
3224
3225 #ifdef _TARGET_ARM_
3226
3227         bool passUsingIntRegs;
3228         if (reMorphing)
3229         {
3230             passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3231             passUsingIntRegs   = isValidIntArgReg(argEntry->regNum);
3232         }
3233         else
3234         {
3235             passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
3236             passUsingIntRegs   = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
3237         }
3238
3239         GenTreePtr curArg = argx;
3240         // If late args have already been computed, use the node in the argument table.
3241         if (argEntry != NULL && argEntry->isTmp)
3242         {
3243             curArg = argEntry->node;
3244         }
3245
3246         if (reMorphing)
3247         {
3248             argAlign = argEntry->alignment;
3249         }
3250         else
3251         {
3252             // We don't use the "size" return value from InferOpSizeAlign().
3253             codeGen->InferOpSizeAlign(curArg, &argAlign);
3254
3255             argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
3256             argAlign /= TARGET_POINTER_SIZE;
3257         }
3258
3259         if (argAlign == 2)
3260         {
3261             if (passUsingFloatRegs)
3262             {
3263                 if (fltArgRegNum % 2 == 1)
3264                 {
3265                     fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3266                     fltArgRegNum++;
3267                 }
3268             }
3269             else if (passUsingIntRegs)
3270             {
3271                 if (intArgRegNum % 2 == 1)
3272                 {
3273                     argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3274                     intArgRegNum++;
3275                 }
3276             }
3277
3278             if (argSlots % 2 == 1)
3279             {
3280                 argSlots++;
3281             }
3282         }
3283
3284 #elif defined(_TARGET_ARM64_)
3285
3286         if (reMorphing)
3287         {
3288             passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3289         }
3290         else
3291         {
3292             passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
3293         }
3294
3295 #elif defined(_TARGET_AMD64_)
3296         if (reMorphing)
3297         {
3298             passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3299         }
3300         else
3301         {
3302             passUsingFloatRegs = varTypeIsFloating(argx);
3303         }
3304 #elif defined(_TARGET_X86_)
3305
3306         passUsingFloatRegs = false;
3307
3308 #else
3309 #error Unsupported or unset target architecture
3310 #endif // _TARGET_*
3311
3312         bool      isBackFilled     = false;
3313         unsigned  nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
3314         var_types structBaseType   = TYP_STRUCT;
3315         unsigned  structSize       = 0;
3316
3317         bool isStructArg = varTypeIsStruct(argx);
3318
3319         if (reMorphing)
3320         {
3321 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3322             // Get the struct description for the already completed struct argument.
3323             fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, argx);
3324             assert(fgEntryPtr != nullptr);
3325
3326             // As described in few other places, this can happen when the argx was morphed
3327             // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
3328             // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
3329             // was a struct and the struct classification.
3330             isStructArg = fgEntryPtr->isStruct;
3331
3332             if (isStructArg)
3333             {
3334                 structDesc.CopyFrom(fgEntryPtr->structDesc);
3335             }
3336 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3337
3338             assert(argEntry != nullptr);
3339             if (argEntry->IsBackFilled())
3340             {
3341                 isRegArg         = true;
3342                 size             = argEntry->numRegs;
3343                 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
3344                 assert(size == 1);
3345                 isBackFilled = true;
3346             }
3347             else if (argEntry->regNum == REG_STK)
3348             {
3349                 isRegArg = false;
3350                 assert(argEntry->numRegs == 0);
3351                 size = argEntry->numSlots;
3352             }
3353             else
3354             {
3355                 isRegArg = true;
3356                 assert(argEntry->numRegs > 0);
3357                 size = argEntry->numRegs + argEntry->numSlots;
3358             }
3359
3360             // This size has now been computed
3361             assert(size != 0);
3362         }
3363         else // !reMorphing
3364         {
3365             //
3366             // Figure out the size of the argument. This is either in number of registers, or number of
3367             // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
3368             // the stack.
3369             //
3370             if (argx->IsArgPlaceHolderNode() || (!isStructArg))
3371             {
3372 #if defined(_TARGET_AMD64_)
3373 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3374                 if (!isStructArg)
3375                 {
3376                     size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3377                 }
3378                 else
3379                 {
3380                     size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3381                                               TARGET_POINTER_SIZE)) /
3382                            TARGET_POINTER_SIZE;
3383                     eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
3384                     if (size > 1)
3385                     {
3386                         hasMultiregStructArgs = true;
3387                     }
3388                 }
3389 #else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3390                 size         = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3391 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3392 #elif defined(_TARGET_ARM64_)
3393                 if (isStructArg)
3394                 {
3395                     if (isHfaArg)
3396                     {
3397                         size = GetHfaCount(argx);
3398                         // HFA structs are passed by value in multiple registers
3399                         hasMultiregStructArgs = true;
3400                     }
3401                     else
3402                     {
3403                         // Structs are either passed in 1 or 2 (64-bit) slots
3404                         size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3405                                                   TARGET_POINTER_SIZE)) /
3406                                TARGET_POINTER_SIZE;
3407
3408                         if (size == 2)
3409                         {
3410                             // Structs that are the size of 2 pointers are passed by value in multiple registers
3411                             hasMultiregStructArgs = true;
3412                         }
3413                         else if (size > 2)
3414                         {
3415                             size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
3416                                       // reference (to a copy)
3417                         }
3418                     }
3419                     // Note that there are some additional rules for multireg structs.
3420                     // (i.e they cannot be split between registers and the stack)
3421                 }
3422                 else
3423                 {
3424                     size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
3425                 }
3426 #elif defined(_TARGET_ARM_)
3427                 if (isStructArg)
3428                 {
3429                     size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3430                                               TARGET_POINTER_SIZE)) /
3431                            TARGET_POINTER_SIZE;
3432                     if (isHfaArg || size > 1)
3433                     {
3434                         hasMultiregStructArgs = true;
3435                     }
3436                 }
3437                 else
3438                 {
3439                     // The typical case
3440                     // long/double type argument(s) will be changed to GT_FIELD_LIST in lower phase
3441                     size = genTypeStSz(argx->gtType);
3442                 }
3443 #elif defined(_TARGET_X86_)
3444                 size       = genTypeStSz(argx->gtType);
3445 #else
3446 #error Unsupported or unset target architecture
3447 #endif // _TARGET_XXX_
3448             }
3449 #ifdef _TARGET_ARM_
3450             else if (isHfaArg)
3451             {
3452                 size                  = GetHfaCount(argx);
3453                 hasMultiregStructArgs = true;
3454             }
3455 #endif           // _TARGET_ARM_
3456             else // struct type
3457             {
3458                 // We handle two opcodes: GT_MKREFANY and GT_OBJ
3459                 if (argx->gtOper == GT_MKREFANY)
3460                 {
3461                     if (varTypeIsStruct(argx))
3462                     {
3463                         isStructArg = true;
3464                     }
3465 #ifdef _TARGET_AMD64_
3466 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3467                     if (varTypeIsStruct(argx))
3468                     {
3469                         size                 = info.compCompHnd->getClassSize(impGetRefAnyClass());
3470                         unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
3471                         size                 = roundupSize / TARGET_POINTER_SIZE;
3472                         eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
3473                     }
3474                     else
3475 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3476                     {
3477                         size = 1;
3478                     }
3479 #else
3480                     size                 = 2;
3481 #endif
3482                 }
3483                 else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
3484                 {
3485                     GenTreePtr  argObj         = argx;
3486                     GenTreePtr* parentOfArgObj = parentArgx;
3487
3488                     assert(args->OperIsList());
3489                     assert(argx == args->Current());
3490
3491                     /* The GT_OBJ may be be a child of a GT_COMMA */
3492                     while (argObj->gtOper == GT_COMMA)
3493                     {
3494                         parentOfArgObj = &argObj->gtOp.gtOp2;
3495                         argObj         = argObj->gtOp.gtOp2;
3496                     }
3497
3498                     // TODO-1stClassStructs: An OBJ node should not be required for lclVars.
3499                     if (argObj->gtOper != GT_OBJ)
3500                     {
3501                         BADCODE("illegal argument tree in fgMorphArgs");
3502                     }
3503
3504                     CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
3505 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3506                     eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
3507 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3508
3509                     unsigned originalSize = info.compCompHnd->getClassSize(objClass);
3510                     originalSize          = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
3511                     unsigned roundupSize  = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
3512
3513                     structSize = originalSize;
3514
3515                     structPassingKind howToPassStruct;
3516                     structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);
3517
3518 #ifdef _TARGET_ARM64_
3519                     if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
3520                         !isPow2(originalSize))                    // size is 3,5,6 or 7 bytes
3521                     {
3522                         if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3523                         {
3524                             // For ARM64 we pass structs that are 3,5,6,7 bytes in size
3525                             // we can read 4 or 8 bytes from the LclVar to pass this arg
3526                             originalSize = genTypeSize(structBaseType);
3527                         }
3528                     }
3529 #endif //  _TARGET_ARM64_
3530
3531 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3532                     // On System V OS-es a struct is never passed by reference.
3533                     // It is either passed by value on the stack or in registers.
3534                     bool passStructInRegisters = false;
3535 #else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3536                     bool passStructByRef = false;
3537 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3538
3539                     // The following if-then-else needs to be carefully refactored.
3540                     // Basically the else portion wants to turn a struct load (a GT_OBJ)
3541                     // into a GT_IND of the appropriate size.
3542                     // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
3543                     // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined  (Why?)
3544                     // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
3545                     // It also can't do this if we have a HFA arg,
3546                     // unless we have a 1-elem HFA in which case we want to do the optimization.
3547                     CLANG_FORMAT_COMMENT_ANCHOR;
3548
3549 #ifndef _TARGET_X86_
3550 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3551                     // Check for struct argument with size 1, 2, 4 or 8 bytes
3552                     // As we can optimize these by turning them into a GT_IND of the correct type
3553                     //
3554                     // Check for cases that we cannot optimize:
3555                     //
3556                     if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
3557                         !isPow2(originalSize) ||                // it is not a power of two (1, 2, 4 or 8)
3558                         (isHfaArg && (hfaSlots != 1)))          // it is a one element HFA struct
3559 #endif                                                          // FEATURE_UNIX_AMD64_STRUCT_PASSING
3560                     {
3561                         // Normalize 'size' to the number of pointer sized items
3562                         // 'size' is the number of register slots that we will use to pass the argument
3563                         size = roundupSize / TARGET_POINTER_SIZE;
3564 #if defined(_TARGET_AMD64_)
3565 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3566                         size            = 1; // This must be copied to a temp and passed by address
3567                         passStructByRef = true;
3568                         copyBlkClass    = objClass;
3569 #else // FEATURE_UNIX_AMD64_STRUCT_PASSING
3570                         if (!structDesc.passedInRegisters)
3571                         {
3572                             GenTreePtr lclVar     = fgIsIndirOfAddrOfLocal(argObj);
3573                             bool       needCpyBlk = false;
3574                             if (lclVar != nullptr)
3575                             {
3576                                 // If the struct is promoted to registers, it has to be materialized
3577                                 // on stack. We may want to support promoted structures in
3578                                 // codegening pugarg_stk instead of creating a copy here.
3579                                 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3580                                 needCpyBlk        = varDsc->lvPromoted;
3581                             }
3582                             else
3583                             {
3584                                 // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
3585                                 // sets structDesc.passedInRegisters to be false.
3586                                 //
3587                                 // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
3588                                 // by rationalizer. For now we will let SIMD struct arg to be copied to
3589                                 // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
3590                                 //
3591                                 // +--*  obj       simd16
3592                                 // |  \--*  addr      byref
3593                                 // |     |  /--*  lclVar    simd16 V05 loc4
3594                                 // |     \--*  simd      simd16 int -
3595                                 // |        \--*  lclVar    simd16 V08 tmp1
3596                                 //
3597                                 // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
3598                                 // so that we don't need to generate a copy here.
3599                                 GenTree* addr = argObj->gtOp.gtOp1;
3600                                 if (addr->OperGet() == GT_ADDR)
3601                                 {
3602                                     GenTree* addrChild = addr->gtOp.gtOp1;
3603                                     if (addrChild->OperGet() == GT_SIMD)
3604                                     {
3605                                         needCpyBlk = true;
3606                                     }
3607                                 }
3608                             }
3609                             passStructInRegisters = false;
3610                             if (needCpyBlk)
3611                             {
3612                                 copyBlkClass = objClass;
3613                             }
3614                             else
3615                             {
3616                                 copyBlkClass = NO_CLASS_HANDLE;
3617                             }
3618                         }
3619                         else
3620                         {
3621                             // The objClass is used to materialize the struct on stack.
3622                             // For SystemV, the code below generates copies for struct arguments classified
3623                             // as register argument.
3624                             // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
3625                             // can be passed on registers or can be copied directly to outgoing area.
3626                             passStructInRegisters = true;
3627                             copyBlkClass          = objClass;
3628                         }
3629
3630 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3631 #elif defined(_TARGET_ARM64_)
3632                         if ((size > 2) && !isHfaArg)
3633                         {
3634                             size            = 1; // This must be copied to a temp and passed by address
3635                             passStructByRef = true;
3636                             copyBlkClass    = objClass;
3637                         }
3638 #endif
3639
3640 #ifdef _TARGET_ARM_
3641                         // If we're passing a promoted struct local var,
3642                         // we may need to skip some registers due to alignment; record those.
3643                         GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
3644                         if (lclVar != NULL)
3645                         {
3646                             LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3647                             if (varDsc->lvPromoted)
3648                             {
3649                                 assert(argObj->OperGet() == GT_OBJ);
3650                                 if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
3651                                 {
3652                                     fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3653                                 }
3654                             }
3655                         }
3656 #endif // _TARGET_ARM_
3657                     }
3658 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3659                     // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
3660                     // generated for struct 1, 2, 4, or 8.
3661                     else // We have a struct argument with size 1, 2, 4 or 8 bytes
3662                     {
3663                         // change our GT_OBJ into a GT_IND of the correct type.
3664                         // We've already ensured above that size is a power of 2, and less than or equal to pointer
3665                         // size.
3666
3667                         assert(howToPassStruct == SPK_PrimitiveType);
3668
3669                         // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
3670                         // primitives
3671                         if (isHfaArg)
3672                         {
3673                             // If we reach here with an HFA arg it has to be a one element HFA
3674                             assert(hfaSlots == 1);
3675                             structBaseType = hfaType; // change the indirection type to a floating point type
3676                         }
3677
3678                         noway_assert(structBaseType != TYP_UNKNOWN);
3679
3680                         argObj->ChangeOper(GT_IND);
3681
3682                         // Now see if we can fold *(&X) into X
3683                         if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
3684                         {
3685                             GenTreePtr temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
3686
3687                             // Keep the DONT_CSE flag in sync
3688                             // (as the addr always marks it for its op1)
3689                             temp->gtFlags &= ~GTF_DONT_CSE;
3690                             temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
3691                             DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
3692                             DEBUG_DESTROY_NODE(argObj);             // GT_IND
3693
3694                             argObj          = temp;
3695                             *parentOfArgObj = temp;
3696
3697                             // If the OBJ had been the top level node, we've now changed argx.
3698                             if (parentOfArgObj == parentArgx)
3699                             {
3700                                 argx = temp;
3701                             }
3702                         }
3703                         if (argObj->gtOper == GT_LCL_VAR)
3704                         {
3705                             unsigned   lclNum = argObj->gtLclVarCommon.gtLclNum;
3706                             LclVarDsc* varDsc = &lvaTable[lclNum];
3707
3708                             if (varDsc->lvPromoted)
3709                             {
3710                                 if (varDsc->lvFieldCnt == 1)
3711                                 {
3712                                     // get the first and only promoted field
3713                                     LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
3714                                     if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
3715                                     {
3716                                         // we will use the first and only promoted field
3717                                         argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
3718
3719                                         if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
3720                                             (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
3721                                         {
3722                                             // Just use the existing field's type
3723                                             argObj->gtType = fieldVarDsc->TypeGet();
3724                                         }
3725                                         else
3726                                         {
3727                                             // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
3728                                             // to a new type
3729                                             argObj->ChangeOper(GT_LCL_FLD);
3730                                             argObj->gtType = structBaseType;
3731                                         }
3732                                         assert(varTypeCanReg(argObj->TypeGet()));
3733                                         assert(copyBlkClass == NO_CLASS_HANDLE);
3734                                     }
3735                                     else
3736                                     {
3737                                         // use GT_LCL_FLD to swizzle the single field struct to a new type
3738                                         lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
3739                                         argObj->ChangeOper(GT_LCL_FLD);
3740                                         argObj->gtType = structBaseType;
3741                                     }
3742                                 }
3743                                 else
3744                                 {
3745                                     // The struct fits into a single register, but it has been promoted into its
3746                                     // constituent fields, and so we have to re-assemble it
3747                                     copyBlkClass = objClass;
3748 #ifdef _TARGET_ARM_
3749                                     // Alignment constraints may cause us not to use (to "skip") some argument
3750                                     // registers. Add those, if any, to the skipped (int) arg reg mask.
3751                                     fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3752 #endif // _TARGET_ARM_
3753                                 }
3754                             }
3755                             else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
3756                             {
3757                                 // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
3758                                 argObj->ChangeOper(GT_LCL_FLD);
3759                                 argObj->gtType = structBaseType;
3760                             }
3761                         }
3762                         else
3763                         {
3764                             // Not a GT_LCL_VAR, so we can just change the type on the node
3765                             argObj->gtType = structBaseType;
3766                         }
3767                         assert(varTypeCanReg(argObj->TypeGet()) ||
3768                                ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
3769
3770                         size = 1;
3771                     }
3772 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3773
3774 #endif // not _TARGET_X86_
3775                     // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
3776                     if ((structBaseType == TYP_STRUCT) &&
3777 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3778                         !passStructInRegisters
3779 #else  // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3780                         !passStructByRef
3781 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3782                         )
3783                     {
3784                         if (isHfaArg && passUsingFloatRegs)
3785                         {
3786                             size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA
3787                         }
3788                         else
3789                         {
3790                             // If the valuetype size is not a multiple of sizeof(void*),
3791                             // we must copyblk to a temp before doing the obj to avoid
3792                             // the obj reading memory past the end of the valuetype
3793                             CLANG_FORMAT_COMMENT_ANCHOR;
3794
3795                             if (roundupSize > originalSize)
3796                             {
3797                                 copyBlkClass = objClass;
3798
3799                                 // There are a few special cases where we can omit using a CopyBlk
3800                                 // where we normally would need to use one.
3801
3802                                 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3803                                 {
3804                                     copyBlkClass = NO_CLASS_HANDLE;
3805                                 }
3806                             }
3807
3808                             size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
3809                         }
3810                     }
3811                 }
3812
3813 #ifdef _TARGET_64BIT_
3814                 if (size > 1)
3815                 {
3816                     hasMultiregStructArgs = true;
3817                 }
3818 #elif defined(_TARGET_ARM_)
3819                 // Build the mkrefany as a GT_FIELD_LIST in this function
3820                 if (size > 1 && argx->gtOper != GT_MKREFANY)
3821                 {
3822                     hasMultiregStructArgs = true;
3823                 }
3824 #endif // _TARGET_ARM_
3825             }
3826
3827             // The 'size' value has now must have been set. (the original value of zero is an invalid value)
3828             assert(size != 0);
3829
3830             //
3831             // Figure out if the argument will be passed in a register.
3832             //
3833
3834             if (isRegParamType(genActualType(argx->TypeGet()))
3835 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3836                 && (!isStructArg || structDesc.passedInRegisters)
3837 #endif
3838                     )
3839             {
3840 #ifdef _TARGET_ARM_
3841                 if (passUsingFloatRegs)
3842                 {
3843                     // First, see if it can be back-filled
3844                     if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
3845                         (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
3846                         (size == 1))                          // The size to back-fill is one float register
3847                     {
3848                         // Back-fill the register.
3849                         isBackFilled              = true;
3850                         regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
3851                         fltArgSkippedRegMask &=
3852                             ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
3853                         nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
3854                         assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
3855                     }
3856
3857                     // Does the entire float, double, or HFA fit in the FP arg registers?
3858                     // Check if the last register needed is still in the argument register range.
3859                     isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
3860
3861                     if (!isRegArg)
3862                     {
3863                         anyFloatStackArgs = true;
3864                     }
3865                 }
3866                 else
3867                 {
3868                     isRegArg = intArgRegNum < MAX_REG_ARG;
3869                 }
3870 #elif defined(_TARGET_ARM64_)
3871                 if (passUsingFloatRegs)
3872                 {
3873                     // Check if the last register needed is still in the fp argument register range.
3874                     isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
3875
3876                     // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
3877                     if (isHfaArg && !isRegArg)
3878                     {
3879                         // recompute the 'size' so that it represent the number of stack slots rather than the number of
3880                         // registers
3881                         //
3882                         unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
3883                         size                 = roundupSize / TARGET_POINTER_SIZE;
3884
3885                         // We also must update fltArgRegNum so that we no longer try to
3886                         // allocate any new floating point registers for args
3887                         // This prevents us from backfilling a subsequent arg into d7
3888                         //
3889                         fltArgRegNum = MAX_FLOAT_REG_ARG;
3890                     }
3891                 }
3892                 else
3893                 {
3894                     // Check if the last register needed is still in the int argument register range.
3895                     isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3896
3897                     // Did we run out of registers when we had a 16-byte struct (size===2) ?
3898                     // (i.e we only have one register remaining but we needed two registers to pass this arg)
3899                     // This prevents us from backfilling a subsequent arg into x7
3900                     //
3901                     if (!isRegArg && (size > 1))
3902                     {
3903                         // We also must update intArgRegNum so that we no longer try to
3904                         // allocate any new general purpose registers for args
3905                         //
3906                         intArgRegNum = maxRegArgs;
3907                     }
3908                 }
3909 #else // not _TARGET_ARM_ or _TARGET_ARM64_
3910
3911 #if defined(UNIX_AMD64_ABI)
3912
3913 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3914                 // Here a struct can be passed in register following the classifications of its members and size.
3915                 // Now make sure there are actually enough registers to do so.
3916                 if (isStructArg)
3917                 {
3918                     unsigned int structFloatRegs = 0;
3919                     unsigned int structIntRegs   = 0;
3920                     for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3921                     {
3922                         if (structDesc.IsIntegralSlot(i))
3923                         {
3924                             structIntRegs++;
3925                         }
3926                         else if (structDesc.IsSseSlot(i))
3927                         {
3928                             structFloatRegs++;
3929                         }
3930                     }
3931
3932                     isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
3933                                ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
3934                 }
3935                 else
3936 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3937                 {
3938                     if (passUsingFloatRegs)
3939                     {
3940                         isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
3941                     }
3942                     else
3943                     {
3944                         isRegArg = intArgRegNum < MAX_REG_ARG;
3945                     }
3946                 }
3947 #else  // !defined(UNIX_AMD64_ABI)
3948                 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3949 #endif // !defined(UNIX_AMD64_ABI)
3950 #endif // _TARGET_ARM_
3951             }
3952             else
3953             {
3954                 isRegArg = false;
3955             }
3956
3957 #ifndef LEGACY_BACKEND
3958             // If there are nonstandard args (outside the calling convention) they were inserted above
3959             // and noted them in a table so we can recognize them here and build their argInfo.
3960             //
3961             // They should not affect the placement of any other args or stack space required.
3962             // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
3963             isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum);
3964             if (isNonStandard && (nonStdRegNum == REG_STK))
3965             {
3966                 isRegArg = false;
3967             }
3968 #if defined(_TARGET_X86_)
3969             else if (call->IsTailCallViaHelper())
3970             {
3971                 // We have already (before calling fgMorphArgs()) appended the 4 special args
3972                 // required by the x86 tailcall helper. These args are required to go on the
3973                 // stack. Force them to the stack here.
3974                 assert(numArgs >= 4);
3975                 if (argIndex >= numArgs - 4)
3976                 {
3977                     isRegArg = false;
3978                 }
3979             }
3980 #endif    // defined(_TARGET_X86_)
3981 #endif    // !LEGACY_BACKEND
3982         } // end !reMorphing
3983
3984         //
3985         // Now we know if the argument goes in registers or not and how big it is,
3986         // whether we had to just compute it or this is a re-morph call and we looked it up.
3987         //
3988         CLANG_FORMAT_COMMENT_ANCHOR;
3989
3990 #ifdef _TARGET_ARM_
3991         // If we ever allocate a floating point argument to the stack, then all
3992         // subsequent HFA/float/double arguments go on the stack.
3993         if (!isRegArg && passUsingFloatRegs)
3994         {
3995             for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
3996             {
3997                 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3998             }
3999         }
4000
4001         // If we think we're going to split a struct between integer registers and the stack, check to
4002         // see if we've already assigned a floating-point arg to the stack.
4003         if (isRegArg &&                            // We decided above to use a register for the argument
4004             !passUsingFloatRegs &&                 // We're using integer registers
4005             (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
4006             anyFloatStackArgs)                     // We've already used the stack for a floating-point argument
4007         {
4008             isRegArg = false; // Change our mind; don't pass this struct partially in registers
4009
4010             // Skip the rest of the integer argument registers
4011             for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
4012             {
4013                 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
4014             }
4015         }
4016
4017 #endif // _TARGET_ARM_
4018
4019         if (isRegArg)
4020         {
4021             regNumber nextRegNum = REG_STK;
4022 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4023             regNumber    nextOtherRegNum = REG_STK;
4024             unsigned int structFloatRegs = 0;
4025             unsigned int structIntRegs   = 0;
4026
4027             if (isStructArg && structDesc.passedInRegisters)
4028             {
4029                 // It is a struct passed in registers. Assign the next available register.
4030                 assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
4031                 regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
4032                 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
4033                 {
4034                     if (structDesc.IsIntegralSlot(i))
4035                     {
4036                         *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
4037                         structIntRegs++;
4038                     }
4039                     else if (structDesc.IsSseSlot(i))
4040                     {
4041                         *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
4042                         structFloatRegs++;
4043                     }
4044                 }
4045             }
4046             else
4047 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4048             {
4049                 // fill in or update the argInfo table
4050                 nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
4051                                                 : genMapIntRegArgNumToRegNum(intArgRegNum);
4052             }
4053
4054 #ifdef _TARGET_AMD64_
4055 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
4056             assert(size == 1);
4057 #endif
4058 #endif
4059
4060             fgArgTabEntryPtr newArgEntry;
4061             if (reMorphing)
4062             {
4063                 // This is a register argument - possibly update it in the table
4064                 newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
4065             }
4066             else
4067             {
4068                 if (isNonStandard)
4069                 {
4070                     nextRegNum = nonStdRegNum;
4071                 }
4072
4073                 // This is a register argument - put it in the table
4074                 newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
4075 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4076                                                          ,
4077                                                          isStructArg, nextOtherRegNum, &structDesc
4078 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4079                                                          );
4080
4081                 newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
4082                                             isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
4083                 newArgEntry->SetIsBackFilled(isBackFilled);
4084                 newArgEntry->isNonStandard = isNonStandard;
4085             }
4086
4087             if (newArgEntry->isNonStandard)
4088             {
4089                 continue;
4090             }
4091
4092             // Set up the next intArgRegNum and fltArgRegNum values.
4093             if (!isBackFilled)
4094             {
4095 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4096                 if (isStructArg)
4097                 {
4098                     intArgRegNum += structIntRegs;
4099                     fltArgRegNum += structFloatRegs;
4100                 }
4101                 else
4102 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4103                 {
4104                     if (passUsingFloatRegs)
4105                     {
4106                         fltArgRegNum += size;
4107
4108 #ifdef WINDOWS_AMD64_ABI
4109                         // Whenever we pass an integer register argument
4110                         // we skip the corresponding floating point register argument
4111                         intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
4112 #endif // WINDOWS_AMD64_ABI
4113 #ifdef _TARGET_ARM_
4114                         if (fltArgRegNum > MAX_FLOAT_REG_ARG)
4115                         {
4116                             // This indicates a partial enregistration of a struct type
4117                             assert(varTypeIsStruct(argx));
4118                             unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
4119                             assert((unsigned char)numRegsPartial == numRegsPartial);
4120                             call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4121                             fltArgRegNum = MAX_FLOAT_REG_ARG;
4122                         }
4123 #endif // _TARGET_ARM_
4124                     }
4125                     else
4126                     {
4127                         if (hasFixedRetBuffReg() && (nextRegNum == theFixedRetBuffReg()))
4128                         {
4129                             // we are setting up the fixed return buffer register argument
4130                             // so don't increment intArgRegNum
4131                             assert(size == 1);
4132                         }
4133                         else
4134                         {
4135                             // Increment intArgRegNum by 'size' registers
4136                             intArgRegNum += size;
4137                         }
4138
4139 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
4140                         fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
4141 #endif // _TARGET_AMD64_
4142 #ifdef _TARGET_ARM_
4143                         if (intArgRegNum > MAX_REG_ARG)
4144                         {
4145                             // This indicates a partial enregistration of a struct type
4146                             assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() ||
4147                                    (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
4148                             unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
4149                             assert((unsigned char)numRegsPartial == numRegsPartial);
4150                             call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4151                             intArgRegNum = MAX_REG_ARG;
4152                             fgPtrArgCntCur += size - numRegsPartial;
4153                         }
4154 #endif // _TARGET_ARM_
4155                     }
4156                 }
4157             }
4158         }
4159         else // We have an argument that is not passed in a register
4160         {
4161             fgPtrArgCntCur += size;
4162
4163             // If the register arguments have not been determined then we must fill in the argInfo
4164
4165             if (reMorphing)
4166             {
4167                 // This is a stack argument - possibly update it in the table
4168                 call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
4169             }
4170             else
4171             {
4172                 // This is a stack argument - put it in the table
4173                 call->fgArgInfo->AddStkArg(argIndex, argx, args, size,
4174                                            argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
4175             }
4176         }
4177
4178         if (copyBlkClass != NO_CLASS_HANDLE)
4179         {
4180             noway_assert(!reMorphing);
4181             fgMakeOutgoingStructArgCopy(call, args, argIndex,
4182                                         copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
4183
4184             // This can cause a GTF_EXCEPT flag to be set.
4185             // TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags.
4186             // This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and
4187             // there are no register arguments. Then reMorphing is never true, so we keep re-copying
4188             // any struct arguments.
4189             // i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0)
4190             flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT);
4191
4192 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4193             hasStackArgCopy = true;
4194 #endif
4195         }
4196
4197 #ifndef LEGACY_BACKEND
4198         if (argx->gtOper == GT_MKREFANY)
4199         {
4200             // 'Lower' the MKREFANY tree and insert it.
4201             noway_assert(!reMorphing);
4202
4203 #ifndef _TARGET_64BIT_
4204
4205             // Build the mkrefany as a GT_FIELD_LIST
4206             GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST)
4207                 GenTreeFieldList(argx->gtOp.gtOp1, offsetof(CORINFO_RefAny, dataPtr), TYP_BYREF, nullptr);
4208             (void)new (this, GT_FIELD_LIST)
4209                 GenTreeFieldList(argx->gtOp.gtOp2, offsetof(CORINFO_RefAny, type), TYP_I_IMPL, fieldList);
4210             fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
4211             fp->node            = fieldList;
4212             args->gtOp.gtOp1    = fieldList;
4213
4214 #else  // _TARGET_64BIT_
4215
4216             // Get a new temp
4217             // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
4218             unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
4219             lvaSetStruct(tmp, impGetRefAnyClass(), false);
4220
4221             // Build the mkrefany as a comma node:
4222             // (tmp.ptr=argx),(tmp.type=handle)
4223             GenTreeLclFld* destPtrSlot  = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr));
4224             GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type));
4225             destPtrSlot->gtFieldSeq     = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
4226             destPtrSlot->gtFlags |= GTF_VAR_DEF;
4227             destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
4228             destTypeSlot->gtFlags |= GTF_VAR_DEF;
4229
4230             GenTreePtr asgPtrSlot  = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
4231             GenTreePtr asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
4232             GenTreePtr asg         = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
4233
4234             // Change the expression to "(tmp=val)"
4235             args->gtOp.gtOp1 = asg;
4236
4237             // EvalArgsToTemps will cause tmp to actually get loaded as the argument
4238             call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
4239             lvaSetVarAddrExposed(tmp);
4240 #endif // _TARGET_64BIT_
4241         }
4242 #endif // !LEGACY_BACKEND
4243
4244 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
4245         if (isStructArg)
4246         {
4247             GenTree* lclNode = fgIsIndirOfAddrOfLocal(argx);
4248             if ((lclNode != nullptr) &&
4249                 (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT))
4250             {
4251                 // Make a GT_FIELD_LIST of the field lclVars.
4252                 GenTreeLclVarCommon* lcl       = lclNode->AsLclVarCommon();
4253                 LclVarDsc*           varDsc    = &(lvaTable[lcl->gtLclNum]);
4254                 GenTreeFieldList*    fieldList = nullptr;
4255                 for (unsigned fieldLclNum = varDsc->lvFieldLclStart;
4256                      fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum)
4257                 {
4258                     LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
4259                     if (fieldList == nullptr)
4260                     {
4261                         lcl->SetLclNum(fieldLclNum);
4262                         lcl->ChangeOper(GT_LCL_VAR);
4263                         lcl->gtType = fieldVarDsc->lvType;
4264                         fieldList   = new (this, GT_FIELD_LIST)
4265                             GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr);
4266                         fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
4267                         fp->node            = fieldList;
4268                         args->gtOp.gtOp1    = fieldList;
4269                     }
4270                     else
4271                     {
4272                         GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
4273                         fieldList         = new (this, GT_FIELD_LIST)
4274                             GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList);
4275                     }
4276                 }
4277             }
4278         }
4279 #endif // _TARGET_X86_ && !LEGACY_BACKEND
4280
4281 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4282         if (isStructArg && !isRegArg)
4283         {
4284             nonRegPassedStructSlots += size;
4285         }
4286         else
4287 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4288         {
4289             argSlots += size;
4290         }
4291     } // end foreach argument loop
4292
4293     if (!reMorphing)
4294     {
4295         call->fgArgInfo->ArgsComplete();
4296
4297 #ifdef LEGACY_BACKEND
4298         call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
4299 #if defined(_TARGET_ARM_)
4300         call->gtCallRegUsedMask &= ~argSkippedRegMask;
4301 #endif
4302         if (fltArgRegNum > 0)
4303         {
4304 #if defined(_TARGET_ARM_)
4305             call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask;
4306 #endif
4307         }
4308 #endif // LEGACY_BACKEND
4309     }
4310
4311     if (call->gtCallArgs)
4312     {
4313         UpdateGT_LISTFlags(call->gtCallArgs);
4314     }
4315
4316     /* Process the function address, if indirect call */
4317
4318     if (call->gtCallType == CT_INDIRECT)
4319     {
4320         call->gtCallAddr = fgMorphTree(call->gtCallAddr);
4321     }
4322
4323     call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur);
4324
4325     if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
4326     {
4327         fgPtrArgCntCur++;
4328     }
4329
4330     /* Remember the maximum value we ever see */
4331
4332     if (fgPtrArgCntMax < fgPtrArgCntCur)
4333     {
4334         JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
4335         fgPtrArgCntMax = fgPtrArgCntCur;
4336     }
4337
4338     assert(fgPtrArgCntCur >= genPtrArgCntSav);
4339     call->fgArgInfo->SetStkSizeBytes((fgPtrArgCntCur - genPtrArgCntSav) * TARGET_POINTER_SIZE);
4340
4341     /* The call will pop all the arguments we pushed */
4342
4343     fgPtrArgCntCur = genPtrArgCntSav;
4344
4345 #if FEATURE_FIXED_OUT_ARGS
4346
4347     // Record the outgoing argument size.  If the call is a fast tail
4348     // call, it will setup its arguments in incoming arg area instead
4349     // of the out-going arg area, so we don't need to track the
4350     // outgoing arg size.
4351     if (!call->IsFastTailCall())
4352     {
4353         unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
4354
4355 #if defined(UNIX_AMD64_ABI)
4356         opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly
4357
4358         // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above.
4359
4360         // First slots go in registers only, no stack needed.
4361         // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments,
4362         // and ignores floating point args (it is overly conservative in that case).
4363         preallocatedArgCount = nonRegPassedStructSlots;
4364         if (argSlots > MAX_REG_ARG)
4365         {
4366             preallocatedArgCount += argSlots - MAX_REG_ARG;
4367         }
4368 #endif // UNIX_AMD64_ABI
4369
4370         const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
4371         call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL));
4372
4373 #ifdef DEBUG
4374         if (verbose)
4375         {
4376             printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots,
4377                    preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize);
4378         }
4379 #endif
4380     }
4381 #endif // FEATURE_FIXED_OUT_ARGS
4382
4383     /* Update the 'side effect' flags value for the call */
4384
4385     call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4386
4387     // If the register arguments have already been determined
4388     // or we have no register arguments then we don't need to
4389     // call SortArgs() and EvalArgsToTemps()
4390     //
4391     // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
4392     // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
4393     // is added to make sure to call EvalArgsToTemp.
4394     if (!reMorphing && (call->fgArgInfo->HasRegArgs()
4395 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4396                         || hasStackArgCopy
4397 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4398                         ))
4399     {
4400         // This is the first time that we morph this call AND it has register arguments.
4401         // Follow into the code below and do the 'defer or eval to temp' analysis.
4402
4403         call->fgArgInfo->SortArgs();
4404
4405         call->fgArgInfo->EvalArgsToTemps();
4406
4407         // We may have updated the arguments
4408         if (call->gtCallArgs)
4409         {
4410             UpdateGT_LISTFlags(call->gtCallArgs);
4411         }
4412     }
4413
4414 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4415
4416     // Rewrite the struct args to be passed by value on stack or in registers.
4417     fgMorphSystemVStructArgs(call, hasStructArgument);
4418
4419 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4420
4421 #ifndef LEGACY_BACKEND
4422     // In the future we can migrate UNIX_AMD64 to use this
4423     // method instead of fgMorphSystemVStructArgs
4424
4425     // We only build GT_FIELD_LISTs for MultiReg structs for the RyuJIT backend
4426     if (hasMultiregStructArgs)
4427     {
4428         fgMorphMultiregStructArgs(call);
4429     }
4430 #endif // LEGACY_BACKEND
4431
4432 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4433
4434 #ifdef DEBUG
4435     if (verbose)
4436     {
4437         fgArgInfoPtr argInfo = call->fgArgInfo;
4438         for (unsigned curInx = 0; curInx < argInfo->ArgCount(); curInx++)
4439         {
4440             fgArgTabEntryPtr curArgEntry = argInfo->ArgTable()[curInx];
4441             curArgEntry->Dump();
4442         }
4443     }
4444 #endif
4445
4446     return call;
4447 }
4448 #ifdef _PREFAST_
4449 #pragma warning(pop)
4450 #endif
4451
4452 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4453 // fgMorphSystemVStructArgs:
4454 //   Rewrite the struct args to be passed by value on stack or in registers.
4455 //
4456 // args:
4457 //   call: The call whose arguments need to be morphed.
4458 //   hasStructArgument: Whether this call has struct arguments.
4459 //
4460 void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
4461 {
4462     unsigned   flagsSummary = 0;
4463     GenTreePtr args;
4464     GenTreePtr argx;
4465
4466     if (hasStructArgument)
4467     {
4468         fgArgInfoPtr allArgInfo = call->fgArgInfo;
4469
4470         for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4471         {
4472             // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4473             // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4474             // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4475             // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4476             // otherwise points to the list in the late args list.
4477             bool             isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4478             fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4479             assert(fgEntryPtr != nullptr);
4480             GenTreePtr argx     = fgEntryPtr->node;
4481             GenTreePtr lateList = nullptr;
4482             GenTreePtr lateNode = nullptr;
4483
4484             if (isLateArg)
4485             {
4486                 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4487                 {
4488                     assert(list->OperIsList());
4489
4490                     GenTreePtr argNode = list->Current();
4491                     if (argx == argNode)
4492                     {
4493                         lateList = list;
4494                         lateNode = argNode;
4495                         break;
4496                     }
4497                 }
4498                 assert(lateList != nullptr && lateNode != nullptr);
4499             }
4500             GenTreePtr arg            = argx;
4501             bool       argListCreated = false;
4502
4503             var_types type = arg->TypeGet();
4504
4505             if (varTypeIsStruct(type))
4506             {
4507                 var_types originalType = type;
4508                 // If we have already processed the arg...
4509                 if (arg->OperGet() == GT_FIELD_LIST && varTypeIsStruct(arg))
4510                 {
4511                     continue;
4512                 }
4513
4514                 // If already OBJ it is set properly already.
4515                 if (arg->OperGet() == GT_OBJ)
4516                 {
4517                     assert(!fgEntryPtr->structDesc.passedInRegisters);
4518                     continue;
4519                 }
4520
4521                 assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
4522                        (arg->OperGet() == GT_ADDR &&
4523                         (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
4524
4525                 GenTreeLclVarCommon* lclCommon =
4526                     arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
4527                 if (fgEntryPtr->structDesc.passedInRegisters)
4528                 {
4529                     if (fgEntryPtr->structDesc.eightByteCount == 1)
4530                     {
4531                         // Change the type and below the code will change the LclVar to a LCL_FLD
4532                         type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4533                                                                  fgEntryPtr->structDesc.eightByteSizes[0]);
4534                     }
4535                     else if (fgEntryPtr->structDesc.eightByteCount == 2)
4536                     {
4537                         // Create LCL_FLD for each eightbyte.
4538                         argListCreated = true;
4539
4540                         // First eightbyte.
4541                         arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
4542                         arg->gtType =
4543                             GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4544                                                               fgEntryPtr->structDesc.eightByteSizes[0]);
4545                         GenTreeFieldList* fieldList =
4546                             new (this, GT_FIELD_LIST) GenTreeFieldList(arg, 0, originalType, nullptr);
4547                         fieldList->gtType = originalType; // Preserve the type. It is a special case.
4548                         arg               = fieldList;
4549
4550                         // Second eightbyte.
4551                         GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
4552                             GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
4553                                                                                 .eightByteClassifications[1],
4554                                                                             fgEntryPtr->structDesc.eightByteSizes[1]),
4555                                           lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
4556
4557                         fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(newLclField, 0, originalType, fieldList);
4558                         fieldList->gtType       = originalType; // Preserve the type. It is a special case.
4559                         newLclField->gtFieldSeq = FieldSeqStore::NotAField();
4560                     }
4561                     else
4562                     {
4563                         assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
4564                                                                                        // for the CLR.
4565                     }
4566                 }
4567
4568                 // If we didn't change the type of the struct, it means
4569                 // its classification doesn't support to be passed directly through a
4570                 // register, so we need to pass a pointer to the destination where
4571                 // where we copied the struct to.
4572                 if (!argListCreated)
4573                 {
4574                     if (fgEntryPtr->structDesc.passedInRegisters)
4575                     {
4576                         arg->gtType = type;
4577                     }
4578                     else
4579                     {
4580                         // Make sure this is an addr node.
4581                         if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
4582                         {
4583                             arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
4584                         }
4585
4586                         assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
4587
4588                         // Create an Obj of the temp to use it as a call argument.
4589                         arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
4590                     }
4591                 }
4592             }
4593
4594             if (argx != arg)
4595             {
4596                 bool             isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4597                 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4598                 assert(fgEntryPtr != nullptr);
4599                 GenTreePtr argx     = fgEntryPtr->node;
4600                 GenTreePtr lateList = nullptr;
4601                 GenTreePtr lateNode = nullptr;
4602                 if (isLateArg)
4603                 {
4604                     for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4605                     {
4606                         assert(list->OperIsList());
4607
4608                         GenTreePtr argNode = list->Current();
4609                         if (argx == argNode)
4610                         {
4611                             lateList = list;
4612                             lateNode = argNode;
4613                             break;
4614                         }
4615                     }
4616                     assert(lateList != nullptr && lateNode != nullptr);
4617                 }
4618
4619                 fgEntryPtr->node = arg;
4620                 if (isLateArg)
4621                 {
4622                     lateList->gtOp.gtOp1 = arg;
4623                 }
4624                 else
4625                 {
4626                     args->gtOp.gtOp1 = arg;
4627                 }
4628             }
4629         }
4630     }
4631
4632     // Update the flags
4633     call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4634 }
4635 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4636
4637 //-----------------------------------------------------------------------------
4638 // fgMorphMultiregStructArgs:  Locate the TYP_STRUCT arguments and
4639 //                             call fgMorphMultiregStructArg on each of them.
4640 //
4641 // Arguments:
4642 //    call:    a GenTreeCall node that has one or more TYP_STRUCT arguments
4643 //
4644 // Notes:
4645 //    We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments.
4646 //    The call to fgMorphMultiregStructArg will mutate the argument into the GT_FIELD_LIST form
4647 //    which is only used for struct arguments.
4648 //    If this method fails to find any TYP_STRUCT arguments it will assert.
4649 //
4650 void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
4651 {
4652     GenTreePtr   args;
4653     GenTreePtr   argx;
4654     bool         foundStructArg = false;
4655     unsigned     initialFlags   = call->gtFlags;
4656     unsigned     flagsSummary   = 0;
4657     fgArgInfoPtr allArgInfo     = call->fgArgInfo;
4658
4659     // Currently ARM64/ARM is using this method to morph the MultiReg struct args
4660     //  in the future AMD64_UNIX will also use this method
4661     CLANG_FORMAT_COMMENT_ANCHOR;
4662
4663 #ifdef _TARGET_X86_
4664     assert(!"Logic error: no MultiregStructArgs for X86");
4665 #endif
4666 #ifdef _TARGET_AMD64_
4667 #if defined(UNIX_AMD64_ABI)
4668     NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
4669 #else  // WINDOWS_AMD64_ABI
4670     assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
4671 #endif // !UNIX_AMD64_ABI
4672 #endif
4673
4674     for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4675     {
4676         // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4677         // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4678         // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4679         // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4680         // otherwise points to the list in the late args list.
4681         bool             isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4682         fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4683         assert(fgEntryPtr != nullptr);
4684         GenTreePtr argx     = fgEntryPtr->node;
4685         GenTreePtr lateList = nullptr;
4686         GenTreePtr lateNode = nullptr;
4687
4688         if (isLateArg)
4689         {
4690             for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4691             {
4692                 assert(list->OperIsList());
4693
4694                 GenTreePtr argNode = list->Current();
4695                 if (argx == argNode)
4696                 {
4697                     lateList = list;
4698                     lateNode = argNode;
4699                     break;
4700                 }
4701             }
4702             assert(lateList != nullptr && lateNode != nullptr);
4703         }
4704
4705         GenTreePtr arg = argx;
4706
4707         if (arg->TypeGet() == TYP_STRUCT)
4708         {
4709             foundStructArg = true;
4710
4711             arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
4712
4713             // Did we replace 'argx' with a new tree?
4714             if (arg != argx)
4715             {
4716                 fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
4717
4718                 // link the new arg node into either the late arg list or the gtCallArgs list
4719                 if (isLateArg)
4720                 {
4721                     lateList->gtOp.gtOp1 = arg;
4722                 }
4723                 else
4724                 {
4725                     args->gtOp.gtOp1 = arg;
4726                 }
4727             }
4728         }
4729     }
4730
4731     // We should only call this method when we actually have one or more multireg struct args
4732     assert(foundStructArg);
4733
4734     // Update the flags
4735     call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4736 }
4737
4738 //-----------------------------------------------------------------------------
4739 // fgMorphMultiregStructArg:  Given a multireg TYP_STRUCT arg from a call argument list
4740 //   Morph the argument into a set of GT_FIELD_LIST nodes.
4741 //
4742 // Arguments:
4743 //     arg        - A GenTree node containing a TYP_STRUCT arg that
4744 //                  is to be passed in multiple registers
4745 //     fgEntryPtr - the fgArgTabEntry information for the current 'arg'
4746 //
4747 // Notes:
4748 //    arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable
4749 //    for passing in multiple registers.
4750 //    If arg is a LclVar we check if it is struct promoted and has the right number of fields
4751 //    and if they are at the appropriate offsets we will use the struct promted fields
4752 //    in the GT_FIELD_LIST nodes that we create.
4753 //    If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
4754 //    we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
4755 //    this also forces the struct to be stack allocated into the local frame.
4756 //    For the GT_OBJ case will clone the address expression and generate two (or more)
4757 //    indirections.
4758 //    Currently the implementation handles ARM64/ARM and will NYI for other architectures.
4759 //
4760 GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr)
4761 {
4762     assert(arg->TypeGet() == TYP_STRUCT);
4763
4764 #ifndef _TARGET_ARMARCH_
4765     NYI("fgMorphMultiregStructArg requires implementation for this target");
4766 #endif
4767
4768 #ifdef _TARGET_ARM_
4769     if (fgEntryPtr->isSplit)
4770     {
4771         if (fgEntryPtr->isHfaRegArg)
4772         {
4773             // We cannot handle HFA split struct morphed to GT_FIELD_LIST yet
4774             NYI_ARM("Struct split between float registers and stack");
4775         }
4776         else if (fgEntryPtr->numSlots + fgEntryPtr->numRegs > 4)
4777         {
4778             return arg;
4779         }
4780     }
4781     else if (!fgEntryPtr->isHfaRegArg && fgEntryPtr->numSlots > 4)
4782     {
4783         return arg;
4784     }
4785 #endif
4786
4787 #if FEATURE_MULTIREG_ARGS
4788     // Examine 'arg' and setup argValue objClass and structSize
4789     //
4790     CORINFO_CLASS_HANDLE objClass   = NO_CLASS_HANDLE;
4791     GenTreePtr           argValue   = arg; // normally argValue will be arg, but see right below
4792     unsigned             structSize = 0;
4793
4794     if (arg->OperGet() == GT_OBJ)
4795     {
4796         GenTreeObj* argObj = arg->AsObj();
4797         objClass           = argObj->gtClass;
4798         structSize         = info.compCompHnd->getClassSize(objClass);
4799
4800         // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR
4801         //
4802         if (argObj->gtOp1->OperGet() == GT_ADDR)
4803         {
4804             argValue = argObj->gtOp1->gtOp.gtOp1;
4805         }
4806     }
4807     else if (arg->OperGet() == GT_LCL_VAR)
4808     {
4809         GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
4810         unsigned             varNum  = varNode->gtLclNum;
4811         assert(varNum < lvaCount);
4812         LclVarDsc* varDsc = &lvaTable[varNum];
4813
4814         objClass   = lvaGetStruct(varNum);
4815         structSize = varDsc->lvExactSize;
4816     }
4817     noway_assert(objClass != nullptr);
4818
4819     var_types hfaType                 = TYP_UNDEF;
4820     var_types elemType                = TYP_UNDEF;
4821     unsigned  elemCount               = 0;
4822     unsigned  elemSize                = 0;
4823     var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
4824
4825     hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
4826     if (varTypeIsFloating(hfaType))
4827     {
4828         elemType  = hfaType;
4829         elemSize  = genTypeSize(elemType);
4830         elemCount = structSize / elemSize;
4831         assert(elemSize * elemCount == structSize);
4832         for (unsigned inx = 0; inx < elemCount; inx++)
4833         {
4834             type[inx] = elemType;
4835         }
4836     }
4837     else
4838     {
4839 #ifdef _TARGET_ARM64_
4840         assert(structSize <= 2 * TARGET_POINTER_SIZE);
4841 #elif defined(_TARGET_ARM_)
4842         assert(structSize <= 4 * TARGET_POINTER_SIZE);
4843 #endif
4844
4845 #ifdef _TARGET_ARM64_
4846         BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
4847         info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
4848         elemCount = 2;
4849         type[0]   = getJitGCType(gcPtrs[0]);
4850         type[1]   = getJitGCType(gcPtrs[1]);
4851 #elif defined(_TARGET_ARM_)
4852         BYTE gcPtrs[4] = {TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE};
4853         elemCount      = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
4854         info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
4855         for (unsigned inx = 0; inx < elemCount; inx++)
4856         {
4857             type[inx] = getJitGCType(gcPtrs[inx]);
4858         }
4859 #endif // _TARGET_ARM_
4860
4861         if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
4862         {
4863             elemSize = TARGET_POINTER_SIZE;
4864             // We can safely widen this to aligned bytes since we are loading from
4865             // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
4866             // lives in the stack frame or will be a promoted field.
4867             //
4868             structSize = elemCount * TARGET_POINTER_SIZE;
4869         }
4870         else // we must have a GT_OBJ
4871         {
4872             assert(argValue->OperGet() == GT_OBJ);
4873
4874             // We need to load the struct from an arbitrary address
4875             // and we can't read past the end of the structSize
4876             // We adjust the last load type here
4877             //
4878             unsigned remainingBytes = structSize % TARGET_POINTER_SIZE;
4879             unsigned lastElem       = elemCount - 1;
4880             if (remainingBytes != 0)
4881             {
4882                 switch (remainingBytes)
4883                 {
4884                     case 1:
4885                         type[lastElem] = TYP_BYTE;
4886                         break;
4887                     case 2:
4888                         type[lastElem] = TYP_SHORT;
4889                         break;
4890 #ifdef _TARGET_ARM64_
4891                     case 4:
4892                         type[lastElem] = TYP_INT;
4893                         break;
4894 #endif // _TARGET_ARM64_
4895                     default:
4896                         noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
4897                         break;
4898                 }
4899             }
4900         }
4901     }
4902     // We should still have a TYP_STRUCT
4903     assert(argValue->TypeGet() == TYP_STRUCT);
4904
4905     GenTreeFieldList* newArg = nullptr;
4906
4907     // Are we passing a struct LclVar?
4908     //
4909     if (argValue->OperGet() == GT_LCL_VAR)
4910     {
4911         GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4912         unsigned             varNum  = varNode->gtLclNum;
4913         assert(varNum < lvaCount);
4914         LclVarDsc* varDsc = &lvaTable[varNum];
4915
4916         // At this point any TYP_STRUCT LclVar must be an aligned struct
4917         // or an HFA struct, both which are passed by value.
4918         //
4919         assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
4920
4921         varDsc->lvIsMultiRegArg = true;
4922
4923 #ifdef DEBUG
4924         if (verbose)
4925         {
4926             JITDUMP("Multireg struct argument V%02u : ");
4927             fgEntryPtr->Dump();
4928         }
4929 #endif // DEBUG
4930
4931         // This local variable must match the layout of the 'objClass' type exactly
4932         if (varDsc->lvIsHfa())
4933         {
4934             // We have a HFA struct
4935             noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
4936             noway_assert(elemSize == genTypeSize(elemType));
4937             noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
4938             noway_assert(elemSize * elemCount == varDsc->lvExactSize);
4939
4940             for (unsigned inx = 0; (inx < elemCount); inx++)
4941             {
4942                 noway_assert(type[inx] == elemType);
4943             }
4944         }
4945         else
4946         {
4947 #ifdef _TARGET_ARM64_
4948             // We must have a 16-byte struct (non-HFA)
4949             noway_assert(elemCount == 2);
4950 #elif defined(_TARGET_ARM_)
4951             noway_assert(elemCount <= 4);
4952 #endif
4953
4954             for (unsigned inx = 0; inx < elemCount; inx++)
4955             {
4956                 CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
4957
4958                 // We setup the type[inx] value above using the GC info from 'objClass'
4959                 // This GT_LCL_VAR must have the same GC layout info
4960                 //
4961                 if (currentGcLayoutType != TYPE_GC_NONE)
4962                 {
4963                     noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
4964                 }
4965                 else
4966                 {
4967                     // We may have use a small type when we setup the type[inx] values above
4968                     // We can safely widen this to TYP_I_IMPL
4969                     type[inx] = TYP_I_IMPL;
4970                 }
4971             }
4972         }
4973
4974 #ifdef _TARGET_ARM64_
4975         // Is this LclVar a promoted struct with exactly 2 fields?
4976         // TODO-ARM64-CQ: Support struct promoted HFA types here
4977         if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
4978         {
4979             // See if we have two promoted fields that start at offset 0 and 8?
4980             unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
4981             unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
4982
4983             // Did we find the promoted fields at the necessary offsets?
4984             if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
4985             {
4986                 LclVarDsc* loVarDsc = &lvaTable[loVarNum];
4987                 LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
4988
4989                 var_types loType = loVarDsc->lvType;
4990                 var_types hiType = hiVarDsc->lvType;
4991
4992                 if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
4993                 {
4994                     // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
4995                     // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
4996                     //
4997                     JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
4998                             varNum);
4999                     //
5000                     // we call lvaSetVarDoNotEnregister and do the proper transformation below.
5001                     //
5002                 }
5003                 else
5004                 {
5005                     // We can use the struct promoted field as the two arguments
5006
5007                     GenTreePtr loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
5008                     GenTreePtr hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
5009
5010                     // Create a new tree for 'arg'
5011                     //    replace the existing LDOBJ(ADDR(LCLVAR))
5012                     //    with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr))
5013                     //
5014                     newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr);
5015                     (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg);
5016                 }
5017             }
5018         }
5019         else
5020         {
5021             //
5022             // We will create a list of GT_LCL_FLDs nodes to pass this struct
5023             //
5024             lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5025         }
5026 #elif defined(_TARGET_ARM_)
5027         // Is this LclVar a promoted struct with exactly same size?
5028         if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa())
5029         {
5030             // See if we have promoted fields?
5031             unsigned varNums[4];
5032             bool     hasBadVarNum = false;
5033             for (unsigned inx = 0; inx < elemCount; inx++)
5034             {
5035                 varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx);
5036                 if (varNums[inx] == BAD_VAR_NUM)
5037                 {
5038                     hasBadVarNum = true;
5039                     break;
5040                 }
5041             }
5042
5043             // Did we find the promoted fields at the necessary offsets?
5044             if (!hasBadVarNum)
5045             {
5046                 LclVarDsc* varDscs[4];
5047                 var_types  varType[4];
5048                 bool       varIsFloat = false;
5049
5050                 for (unsigned inx = 0; inx < elemCount; inx++)
5051                 {
5052                     varDscs[inx] = &lvaTable[varNums[inx]];
5053                     varType[inx] = varDscs[inx]->lvType;
5054                     if (varTypeIsFloating(varType[inx]))
5055                     {
5056                         // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the
5057                         // integer
5058                         // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
5059                         //
5060                         JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
5061                                 varNum);
5062                         //
5063                         // we call lvaSetVarDoNotEnregister and do the proper transformation below.
5064                         //
5065                         varIsFloat = true;
5066                         break;
5067                     }
5068                 }
5069
5070                 if (!varIsFloat)
5071                 {
5072                     unsigned          offset    = 0;
5073                     GenTreeFieldList* listEntry = nullptr;
5074                     // We can use the struct promoted field as arguments
5075                     for (unsigned inx = 0; inx < elemCount; inx++)
5076                     {
5077                         GenTreePtr lclVar = gtNewLclvNode(varNums[inx], varType[inx], varNums[inx]);
5078                         // Create a new tree for 'arg'
5079                         //    replace the existing LDOBJ(ADDR(LCLVAR))
5080                         listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(lclVar, offset, varType[inx], listEntry);
5081                         if (newArg == nullptr)
5082                         {
5083                             newArg = listEntry;
5084                         }
5085                         offset += TARGET_POINTER_SIZE;
5086                     }
5087                 }
5088             }
5089         }
5090         else
5091         {
5092             //
5093             // We will create a list of GT_LCL_FLDs nodes to pass this struct
5094             //
5095             lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5096         }
5097 #endif // _TARGET_ARM_
5098     }
5099
5100     // If we didn't set newarg to a new List Node tree
5101     //
5102     if (newArg == nullptr)
5103     {
5104         if (fgEntryPtr->regNum == REG_STK)
5105         {
5106             // We leave this stack passed argument alone
5107             return arg;
5108         }
5109
5110         // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
5111         // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
5112         //
5113         if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
5114         {
5115             GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
5116             unsigned             varNum  = varNode->gtLclNum;
5117             assert(varNum < lvaCount);
5118             LclVarDsc* varDsc = &lvaTable[varNum];
5119
5120             unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
5121             unsigned lastOffset = baseOffset + (elemCount * elemSize);
5122
5123             // The allocated size of our LocalVar must be at least as big as lastOffset
5124             assert(varDsc->lvSize() >= lastOffset);
5125
5126             if (varDsc->lvStructGcCount > 0)
5127             {
5128                 // alignment of the baseOffset is required
5129                 noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
5130                 noway_assert(elemSize == TARGET_POINTER_SIZE);
5131                 unsigned    baseIndex = baseOffset / TARGET_POINTER_SIZE;
5132                 const BYTE* gcPtrs    = varDsc->lvGcLayout; // Get the GC layout for the local variable
5133                 for (unsigned inx = 0; (inx < elemCount); inx++)
5134                 {
5135                     // The GC information must match what we setup using 'objClass'
5136                     noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
5137                 }
5138             }
5139             else //  this varDsc contains no GC pointers
5140             {
5141                 for (unsigned inx = 0; inx < elemCount; inx++)
5142                 {
5143                     // The GC information must match what we setup using 'objClass'
5144                     noway_assert(!varTypeIsGC(type[inx]));
5145                 }
5146             }
5147
5148             //
5149             // We create a list of GT_LCL_FLDs nodes to pass this struct
5150             //
5151             lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5152
5153             // Create a new tree for 'arg'
5154             //    replace the existing LDOBJ(ADDR(LCLVAR))
5155             //    with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
5156             //
5157             unsigned          offset    = baseOffset;
5158             GenTreeFieldList* listEntry = nullptr;
5159             for (unsigned inx = 0; inx < elemCount; inx++)
5160             {
5161                 elemSize              = genTypeSize(type[inx]);
5162                 GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
5163                 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry);
5164                 if (newArg == nullptr)
5165                 {
5166                     newArg = listEntry;
5167                 }
5168                 offset += elemSize;
5169             }
5170         }
5171         // Are we passing a GT_OBJ struct?
5172         //
5173         else if (argValue->OperGet() == GT_OBJ)
5174         {
5175             GenTreeObj* argObj   = argValue->AsObj();
5176             GenTreePtr  baseAddr = argObj->gtOp1;
5177             var_types   addrType = baseAddr->TypeGet();
5178
5179             // Create a new tree for 'arg'
5180             //    replace the existing LDOBJ(EXPR)
5181             //    with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...)
5182             //
5183
5184             unsigned          offset    = 0;
5185             GenTreeFieldList* listEntry = nullptr;
5186             for (unsigned inx = 0; inx < elemCount; inx++)
5187             {
5188                 elemSize           = genTypeSize(type[inx]);
5189                 GenTreePtr curAddr = baseAddr;
5190                 if (offset != 0)
5191                 {
5192                     GenTreePtr baseAddrDup = gtCloneExpr(baseAddr);
5193                     noway_assert(baseAddrDup != nullptr);
5194                     curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
5195                 }
5196                 else
5197                 {
5198                     curAddr = baseAddr;
5199                 }
5200                 GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr);
5201
5202                 // For safety all GT_IND should have at least GT_GLOB_REF set.
5203                 curItem->gtFlags |= GTF_GLOB_REF;
5204                 if (fgAddrCouldBeNull(curItem))
5205                 {
5206                     // This indirection can cause a GPF if the address could be null.
5207                     curItem->gtFlags |= GTF_EXCEPT;
5208                 }
5209
5210                 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry);
5211                 if (newArg == nullptr)
5212                 {
5213                     newArg = listEntry;
5214                 }
5215                 offset += elemSize;
5216             }
5217         }
5218     }
5219
5220 #ifdef DEBUG
5221     // If we reach here we should have set newArg to something
5222     if (newArg == nullptr)
5223     {
5224         gtDispTree(argValue);
5225         assert(!"Missing case in fgMorphMultiregStructArg");
5226     }
5227
5228     if (verbose)
5229     {
5230         printf("fgMorphMultiregStructArg created tree:\n");
5231         gtDispTree(newArg);
5232     }
5233 #endif
5234
5235     arg = newArg; // consider calling fgMorphTree(newArg);
5236
5237 #endif // FEATURE_MULTIREG_ARGS
5238
5239     return arg;
5240 }
5241
5242 // Make a copy of a struct variable if necessary, to pass to a callee.
5243 // returns: tree that computes address of the outgoing arg
5244 void Compiler::fgMakeOutgoingStructArgCopy(
5245     GenTreeCall*         call,
5246     GenTree*             args,
5247     unsigned             argIndex,
5248     CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
5249         const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
5250 {
5251     GenTree* argx = args->Current();
5252     noway_assert(argx->gtOper != GT_MKREFANY);
5253     // See if we need to insert a copy at all
5254     // Case 1: don't need a copy if it is the last use of a local.  We can't determine that all of the time
5255     // but if there is only one use and no loops, the use must be last.
5256     GenTreeLclVarCommon* lcl = nullptr;
5257     if (argx->OperIsLocal())
5258     {
5259         lcl = argx->AsLclVarCommon();
5260     }
5261     else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
5262     {
5263         lcl = argx->AsObj()->Addr()->AsLclVarCommon();
5264     }
5265     if (lcl != nullptr)
5266     {
5267         unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
5268         if (lvaIsImplicitByRefLocal(varNum))
5269         {
5270             LclVarDsc* varDsc = &lvaTable[varNum];
5271             // JIT_TailCall helper has an implicit assumption that all tail call arguments live
5272             // on the caller's frame. If an argument lives on the caller caller's frame, it may get
5273             // overwritten if that frame is reused for the tail call. Therefore, we should always copy
5274             // struct parameters if they are passed as arguments to a tail call.
5275             if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
5276             {
5277                 varDsc->lvRefCnt    = 0;
5278                 args->gtOp.gtOp1    = lcl;
5279                 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
5280                 fp->node            = lcl;
5281
5282                 JITDUMP("did not have to make outgoing copy for V%2d", varNum);
5283                 return;
5284             }
5285         }
5286     }
5287
5288     if (fgOutgoingArgTemps == nullptr)
5289     {
5290         fgOutgoingArgTemps = hashBv::Create(this);
5291     }
5292
5293     unsigned tmp   = 0;
5294     bool     found = false;
5295
5296     // Attempt to find a local we have already used for an outgoing struct and reuse it.
5297     // We do not reuse within a statement.
5298     if (!opts.MinOpts())
5299     {
5300         indexType lclNum;
5301         FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
5302         {
5303             LclVarDsc* varDsc = &lvaTable[lclNum];
5304             if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
5305                 !fgCurrentlyInUseArgTemps->testBit(lclNum))
5306             {
5307                 tmp   = (unsigned)lclNum;
5308                 found = true;
5309                 JITDUMP("reusing outgoing struct arg");
5310                 break;
5311             }
5312         }
5313         NEXT_HBV_BIT_SET;
5314     }
5315
5316     // Create the CopyBlk tree and insert it.
5317     if (!found)
5318     {
5319         // Get a new temp
5320         // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
5321         tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
5322         lvaSetStruct(tmp, copyBlkClass, false);
5323         fgOutgoingArgTemps->setBit(tmp);
5324     }
5325
5326     fgCurrentlyInUseArgTemps->setBit(tmp);
5327
5328     // TYP_SIMD structs should not be enregistered, since ABI requires it to be
5329     // allocated on stack and address of it needs to be passed.
5330     if (lclVarIsSIMDType(tmp))
5331     {
5332         lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
5333     }
5334
5335     // Create a reference to the temp
5336     GenTreePtr dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
5337     dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
5338
5339     // TODO-Cleanup: This probably shouldn't be done here because arg morphing is done prior
5340     // to ref counting of the lclVars.
5341     lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this);
5342
5343     GenTreePtr src;
5344     if (argx->gtOper == GT_OBJ)
5345     {
5346         argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
5347     }
5348     else
5349     {
5350         argx->gtFlags |= GTF_DONT_CSE;
5351     }
5352
5353     // Copy the valuetype to the temp
5354     unsigned   size    = info.compCompHnd->getClassSize(copyBlkClass);
5355     GenTreePtr copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
5356     copyBlk            = fgMorphCopyBlock(copyBlk);
5357
5358 #if FEATURE_FIXED_OUT_ARGS
5359
5360     // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
5361     // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
5362     GenTreePtr arg = copyBlk;
5363
5364 #else // FEATURE_FIXED_OUT_ARGS
5365
5366     // Structs are always on the stack, and thus never need temps
5367     // so we have to put the copy and temp all into one expression
5368     GenTreePtr arg = fgMakeTmpArgNode(tmp FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
5369
5370     // Change the expression to "(tmp=val),tmp"
5371     arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
5372
5373 #endif // FEATURE_FIXED_OUT_ARGS
5374
5375     args->gtOp.gtOp1 = arg;
5376     call->fgArgInfo->EvalToTmp(argIndex, tmp, arg);
5377
5378     return;
5379 }
5380
5381 #ifdef _TARGET_ARM_
5382 // See declaration for specification comment.
5383 void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
5384                                                    unsigned   firstArgRegNum,
5385                                                    regMaskTP* pArgSkippedRegMask)
5386 {
5387     assert(varDsc->lvPromoted);
5388     // There's no way to do these calculations without breaking abstraction and assuming that
5389     // integer register arguments are consecutive ints.  They are on ARM.
5390
5391     // To start, figure out what register contains the last byte of the first argument.
5392     LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
5393     unsigned   lastFldRegOfLastByte =
5394         (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5395     ;
5396
5397     // Now we're keeping track of the register that the last field ended in; see what registers
5398     // subsequent fields start in, and whether any are skipped.
5399     // (We assume here the invariant that the fields are sorted in offset order.)
5400     for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
5401     {
5402         unsigned   fldVarNum    = varDsc->lvFieldLclStart + fldVarOffset;
5403         LclVarDsc* fldVarDsc    = &lvaTable[fldVarNum];
5404         unsigned   fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
5405         assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
5406         // This loop should enumerate the offsets of any registers skipped.
5407         // Find what reg contains the last byte:
5408         // And start at the first register after that.  If that isn't the first reg of the current
5409         for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
5410              skippedRegOffsets++)
5411         {
5412             // If the register number would not be an arg reg, we're done.
5413             if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
5414                 return;
5415             *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
5416         }
5417         lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5418     }
5419 }
5420
5421 #endif // _TARGET_ARM_
5422
5423 //****************************************************************************
5424 //  fgFixupStructReturn:
5425 //    The companion to impFixupCallStructReturn.  Now that the importer is done
5426 //    change the gtType to the precomputed native return type
5427 //    requires that callNode currently has a struct type
5428 //
5429 void Compiler::fgFixupStructReturn(GenTreePtr callNode)
5430 {
5431     assert(varTypeIsStruct(callNode));
5432
5433     GenTreeCall* call              = callNode->AsCall();
5434     bool         callHasRetBuffArg = call->HasRetBufArg();
5435     bool         isHelperCall      = call->IsHelperCall();
5436
5437     // Decide on the proper return type for this call that currently returns a struct
5438     //
5439     CORINFO_CLASS_HANDLE        retClsHnd = call->gtRetClsHnd;
5440     Compiler::structPassingKind howToReturnStruct;
5441     var_types                   returnType;
5442
5443     // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
5444     // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
5445     //
5446     //    CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
5447     //    CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
5448     //    CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
5449     //
5450     if (isHelperCall)
5451     {
5452         assert(!callHasRetBuffArg);
5453         assert(retClsHnd == NO_CLASS_HANDLE);
5454
5455         // Now that we are past the importer, re-type this node
5456         howToReturnStruct = SPK_PrimitiveType;
5457         returnType        = (var_types)call->gtReturnType;
5458     }
5459     else
5460     {
5461         returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
5462     }
5463
5464     if (howToReturnStruct == SPK_ByReference)
5465     {
5466         assert(returnType == TYP_UNKNOWN);
5467         assert(callHasRetBuffArg);
5468     }
5469     else
5470     {
5471         assert(returnType != TYP_UNKNOWN);
5472
5473         if (returnType != TYP_STRUCT)
5474         {
5475             // Widen the primitive type if necessary
5476             returnType = genActualType(returnType);
5477         }
5478         call->gtType = returnType;
5479     }
5480
5481 #if FEATURE_MULTIREG_RET
5482     // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
5483     assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
5484 #else // !FEATURE_MULTIREG_RET
5485     // No more struct returns
5486     assert(call->TypeGet() != TYP_STRUCT);
5487 #endif
5488
5489 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5490     // If it was a struct return, it has been transformed into a call
5491     // with a return buffer (that returns TYP_VOID) or into a return
5492     // of a primitive/enregisterable type
5493     assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
5494 #endif
5495 }
5496
5497 /*****************************************************************************
5498  *
5499  *  A little helper used to rearrange nested commutative operations. The
5500  *  effect is that nested associative, commutative operations are transformed
5501  *  into a 'left-deep' tree, i.e. into something like this:
5502  *
5503  *      (((a op b) op c) op d) op...
5504  */
5505
5506 #if REARRANGE_ADDS
5507
5508 void Compiler::fgMoveOpsLeft(GenTreePtr tree)
5509 {
5510     GenTreePtr op1;
5511     GenTreePtr op2;
5512     genTreeOps oper;
5513
5514     do
5515     {
5516         op1  = tree->gtOp.gtOp1;
5517         op2  = tree->gtOp.gtOp2;
5518         oper = tree->OperGet();
5519
5520         noway_assert(GenTree::OperIsCommutative(oper));
5521         noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
5522         noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
5523         noway_assert(oper == op2->gtOper);
5524
5525         // Commutativity doesn't hold if overflow checks are needed
5526
5527         if (tree->gtOverflowEx() || op2->gtOverflowEx())
5528         {
5529             return;
5530         }
5531
5532         if (gtIsActiveCSE_Candidate(op2))
5533         {
5534             // If we have marked op2 as a CSE candidate,
5535             // we can't perform a commutative reordering
5536             // because any value numbers that we computed for op2
5537             // will be incorrect after performing a commutative reordering
5538             //
5539             return;
5540         }
5541
5542         if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
5543         {
5544             return;
5545         }
5546
5547         // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
5548         if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
5549         {
5550             return;
5551         }
5552
5553         if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
5554         {
5555             // We could deal with this, but we were always broken and just hit the assert
5556             // below regarding flags, which means it's not frequent, so will just bail out.
5557             // See #195514
5558             return;
5559         }
5560
5561         noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
5562
5563         GenTreePtr ad1 = op2->gtOp.gtOp1;
5564         GenTreePtr ad2 = op2->gtOp.gtOp2;
5565
5566         // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
5567         // We can not reorder such GT_OR trees
5568         //
5569         if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
5570         {
5571             break;
5572         }
5573
5574         /* Change "(x op (y op z))" to "(x op y) op z" */
5575         /* ie.    "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
5576
5577         GenTreePtr new_op1 = op2;
5578
5579         new_op1->gtOp.gtOp1 = op1;
5580         new_op1->gtOp.gtOp2 = ad1;
5581
5582         /* Change the flags. */
5583
5584         // Make sure we arent throwing away any flags
5585         noway_assert((new_op1->gtFlags &
5586                       ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
5587                         GTF_REVERSE_OPS |             // The reverse ops flag also can be set, it will be re-calculated
5588                         GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
5589
5590         new_op1->gtFlags =
5591             (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
5592             (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
5593
5594         /* Retype new_op1 if it has not/become a GC ptr. */
5595
5596         if (varTypeIsGC(op1->TypeGet()))
5597         {
5598             noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5599                           oper == GT_ADD) || // byref(ref + (int+int))
5600                          (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5601                           oper == GT_OR)); // int(gcref | int(gcref|intval))
5602
5603             new_op1->gtType = tree->gtType;
5604         }
5605         else if (varTypeIsGC(ad2->TypeGet()))
5606         {
5607             // Neither ad1 nor op1 are GC. So new_op1 isnt either
5608             noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
5609             new_op1->gtType = TYP_I_IMPL;
5610         }
5611
5612         // If new_op1 is a new expression. Assign it a new unique value number.
5613         // vnStore is null before the ValueNumber phase has run
5614         if (vnStore != nullptr)
5615         {
5616             // We can only keep the old value number on new_op1 if both op1 and ad2
5617             // have the same non-NoVN value numbers. Since op is commutative, comparing
5618             // only ad2 and op1 is enough.
5619             if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5620                 (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5621                 (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
5622             {
5623                 new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
5624             }
5625         }
5626
5627         tree->gtOp.gtOp1 = new_op1;
5628         tree->gtOp.gtOp2 = ad2;
5629
5630         /* If 'new_op1' is now the same nested op, process it recursively */
5631
5632         if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
5633         {
5634             fgMoveOpsLeft(new_op1);
5635         }
5636
5637         /* If   'ad2'   is now the same nested op, process it
5638          * Instead of recursion, we set up op1 and op2 for the next loop.
5639          */
5640
5641         op1 = new_op1;
5642         op2 = ad2;
5643     } while ((op2->gtOper == oper) && !op2->gtOverflowEx());
5644
5645     return;
5646 }
5647
5648 #endif
5649
5650 /*****************************************************************************/
5651
5652 void Compiler::fgSetRngChkTarget(GenTreePtr tree, bool delay)
5653 {
5654     GenTreeBoundsChk* bndsChk = nullptr;
5655     SpecialCodeKind   kind    = SCK_RNGCHK_FAIL;
5656
5657 #ifdef FEATURE_SIMD
5658     if ((tree->gtOper == GT_ARR_BOUNDS_CHECK) || (tree->gtOper == GT_SIMD_CHK))
5659 #else  // FEATURE_SIMD
5660     if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
5661 #endif // FEATURE_SIMD
5662     {
5663         bndsChk = tree->AsBoundsChk();
5664         kind    = tree->gtBoundsChk.gtThrowKind;
5665     }
5666     else
5667     {
5668         noway_assert((tree->gtOper == GT_ARR_ELEM) || (tree->gtOper == GT_ARR_INDEX));
5669     }
5670
5671 #ifdef _TARGET_X86_
5672     unsigned callStkDepth = fgPtrArgCntCur;
5673 #else
5674     // only x86 pushes args
5675     const unsigned callStkDepth = 0;
5676 #endif
5677
5678     if (opts.MinOpts())
5679     {
5680         delay = false;
5681
5682         // we need to initialize this field
5683         if (fgGlobalMorph && bndsChk != nullptr)
5684         {
5685             bndsChk->gtStkDepth = callStkDepth;
5686         }
5687     }
5688
5689     if (!opts.compDbgCode)
5690     {
5691         if (delay || compIsForInlining())
5692         {
5693             /*  We delay this until after loop-oriented range check
5694                 analysis. For now we merely store the current stack
5695                 level in the tree node.
5696              */
5697             if (bndsChk != nullptr)
5698             {
5699                 noway_assert(!bndsChk->gtIndRngFailBB || previousCompletedPhase >= PHASE_OPTIMIZE_LOOPS);
5700                 bndsChk->gtStkDepth = callStkDepth;
5701             }
5702         }
5703         else
5704         {
5705             /* Create/find the appropriate "range-fail" label */
5706
5707             // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
5708             noway_assert((bndsChk != nullptr) || fgGlobalMorph);
5709
5710             unsigned stkDepth = (bndsChk != nullptr) ? bndsChk->gtStkDepth : callStkDepth;
5711
5712             BasicBlock* rngErrBlk = fgRngChkTarget(compCurBB, stkDepth, kind);
5713
5714             /* Add the label to the indirection node */
5715
5716             if (bndsChk != nullptr)
5717             {
5718                 bndsChk->gtIndRngFailBB = gtNewCodeRef(rngErrBlk);
5719             }
5720         }
5721     }
5722 }
5723
5724 /*****************************************************************************
5725  *
5726  *  Expand a GT_INDEX node and fully morph the child operands
5727  *
5728  *  The orginal GT_INDEX node is bashed into the GT_IND node that accesses
5729  *  the array element.  We expand the GT_INDEX node into a larger tree that
5730  *  evaluates the array base and index.  The simplest expansion is a GT_COMMA
5731  *  with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
5732  *  For complex array or index expressions one or more GT_COMMA assignments
5733  *  are inserted so that we only evaluate the array or index expressions once.
5734  *
5735  *  The fully expanded tree is then morphed.  This causes gtFoldExpr to
5736  *  perform local constant prop and reorder the constants in the tree and
5737  *  fold them.
5738  *
5739  *  We then parse the resulting array element expression in order to locate
5740  *  and label the constants and variables that occur in the tree.
5741  */
5742
5743 const int MAX_ARR_COMPLEXITY   = 4;
5744 const int MAX_INDEX_COMPLEXITY = 4;
5745
5746 GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
5747 {
5748     noway_assert(tree->gtOper == GT_INDEX);
5749     GenTreeIndex* asIndex = tree->AsIndex();
5750
5751     var_types            elemTyp        = tree->TypeGet();
5752     unsigned             elemSize       = tree->gtIndex.gtIndElemSize;
5753     CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
5754
5755     noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
5756
5757 #ifdef FEATURE_SIMD
5758     if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= getSIMDVectorRegisterByteLength())
5759     {
5760         // If this is a SIMD type, this is the point at which we lose the type information,
5761         // so we need to set the correct type on the GT_IND.
5762         // (We don't care about the base type here, so we only check, but don't retain, the return value).
5763         unsigned simdElemSize = 0;
5764         if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
5765         {
5766             assert(simdElemSize == elemSize);
5767             elemTyp = getSIMDTypeForSize(elemSize);
5768             // This is the new type of the node.
5769             tree->gtType = elemTyp;
5770             // Now set elemStructType to null so that we don't confuse value numbering.
5771             elemStructType = nullptr;
5772         }
5773     }
5774 #endif // FEATURE_SIMD
5775
5776     GenTreePtr arrRef = asIndex->Arr();
5777     GenTreePtr index  = asIndex->Index();
5778
5779     // Set up the the array length's offset into lenOffs
5780     // And    the the first element's offset into elemOffs
5781     ssize_t lenOffs;
5782     ssize_t elemOffs;
5783     if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
5784     {
5785         lenOffs  = offsetof(CORINFO_String, stringLen);
5786         elemOffs = offsetof(CORINFO_String, chars);
5787         tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
5788     }
5789     else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
5790     {
5791         lenOffs  = offsetof(CORINFO_RefArray, length);
5792         elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
5793     }
5794     else // We have a standard array
5795     {
5796         lenOffs  = offsetof(CORINFO_Array, length);
5797         elemOffs = offsetof(CORINFO_Array, u1Elems);
5798     }
5799
5800     bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
5801     bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
5802
5803     GenTreePtr arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
5804     GenTreePtr indexDefn  = nullptr; // non-NULL if we need to allocate a temp for the index expression
5805     GenTreePtr bndsChk    = nullptr;
5806
5807     // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
5808     if (chkd)
5809     {
5810         GenTreePtr arrRef2 = nullptr; // The second copy will be used in array address expression
5811         GenTreePtr index2  = nullptr;
5812
5813         // If the arrRef expression involves an assignment, a call or reads from global memory,
5814         // then we *must* allocate a temporary in which to "localize" those values,
5815         // to ensure that the same values are used in the bounds check and the actual
5816         // dereference.
5817         // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
5818         // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true
5819         // complexity is not exposed. (Without that condition there are cases of local struct
5820         // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was
5821         // fixed, there were some regressions that were mostly ameliorated by adding this condition.)
5822         //
5823         if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
5824             gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
5825         {
5826             unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5827             arrRefDefn            = gtNewTempAssign(arrRefTmpNum, arrRef);
5828             arrRef                = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5829             arrRef2               = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5830         }
5831         else
5832         {
5833             arrRef2 = gtCloneExpr(arrRef);
5834             noway_assert(arrRef2 != nullptr);
5835         }
5836
5837         // If the index expression involves an assignment, a call or reads from global memory,
5838         // we *must* allocate a temporary in which to "localize" those values,
5839         // to ensure that the same values are used in the bounds check and the actual
5840         // dereference.
5841         // Also we allocate the temporary when the index is sufficiently complex/expensive.
5842         //
5843         if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
5844             (arrRef->OperGet() == GT_FIELD))
5845         {
5846             unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5847             indexDefn            = gtNewTempAssign(indexTmpNum, index);
5848             index                = gtNewLclvNode(indexTmpNum, index->TypeGet());
5849             index2               = gtNewLclvNode(indexTmpNum, index->TypeGet());
5850         }
5851         else
5852         {
5853             index2 = gtCloneExpr(index);
5854             noway_assert(index2 != nullptr);
5855         }
5856
5857         // Next introduce a GT_ARR_BOUNDS_CHECK node
5858         var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
5859
5860 #ifdef _TARGET_64BIT_
5861         // The CLI Spec allows an array to be indexed by either an int32 or a native int.  In the case
5862         // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
5863         // the comparison will have to be widen to 64 bits.
5864         if (index->TypeGet() == TYP_I_IMPL)
5865         {
5866             bndsChkType = TYP_I_IMPL;
5867         }
5868 #endif // _TARGET_64BIT_
5869
5870         GenTree* arrLen = new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrRef, (int)lenOffs);
5871
5872         if (bndsChkType != TYP_INT)
5873         {
5874             arrLen = gtNewCastNode(bndsChkType, arrLen, bndsChkType);
5875         }
5876
5877         GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
5878             GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);
5879
5880         bndsChk = arrBndsChk;
5881
5882         // Make sure to increment ref-counts if already ref-counted.
5883         if (lvaLocalVarRefCounted)
5884         {
5885             lvaRecursiveIncRefCounts(index);
5886             lvaRecursiveIncRefCounts(arrRef);
5887         }
5888
5889         // Now we'll switch to using the second copies for arrRef and index
5890         // to compute the address expression
5891
5892         arrRef = arrRef2;
5893         index  = index2;
5894     }
5895
5896     // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
5897
5898     GenTreePtr addr;
5899
5900 #ifdef _TARGET_64BIT_
5901     // Widen 'index' on 64-bit targets
5902     if (index->TypeGet() != TYP_I_IMPL)
5903     {
5904         if (index->OperGet() == GT_CNS_INT)
5905         {
5906             index->gtType = TYP_I_IMPL;
5907         }
5908         else
5909         {
5910             index = gtNewCastNode(TYP_I_IMPL, index, TYP_I_IMPL);
5911         }
5912     }
5913 #endif // _TARGET_64BIT_
5914
5915     /* Scale the index value if necessary */
5916     if (elemSize > 1)
5917     {
5918         GenTreePtr size = gtNewIconNode(elemSize, TYP_I_IMPL);
5919
5920         // Fix 392756 WP7 Crossgen
5921         //
5922         // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
5923         // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
5924         // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
5925         //
5926         size->gtFlags |= GTF_DONT_CSE;
5927
5928         /* Multiply by the array element size */
5929         addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
5930     }
5931     else
5932     {
5933         addr = index;
5934     }
5935
5936     /* Add the object ref to the element's offset */
5937
5938     addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
5939
5940     /* Add the first element's offset */
5941
5942     GenTreePtr cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
5943
5944     addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);
5945
5946 #if SMALL_TREE_NODES
5947     assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL);
5948 #endif
5949
5950     // Change the orginal GT_INDEX node into a GT_IND node
5951     tree->SetOper(GT_IND);
5952
5953     // If the index node is a floating-point type, notify the compiler
5954     // we'll potentially use floating point registers at the time of codegen.
5955     if (varTypeIsFloating(tree->gtType))
5956     {
5957         this->compFloatingPointUsed = true;
5958     }
5959
5960     // We've now consumed the GTF_INX_RNGCHK, and the node
5961     // is no longer a GT_INDEX node.
5962     tree->gtFlags &= ~GTF_INX_RNGCHK;
5963
5964     tree->gtOp.gtOp1 = addr;
5965
5966     // This is an array index expression.
5967     tree->gtFlags |= GTF_IND_ARR_INDEX;
5968
5969     /* An indirection will cause a GPF if the address is null */
5970     tree->gtFlags |= GTF_EXCEPT;
5971
5972     if (nCSE)
5973     {
5974         tree->gtFlags |= GTF_DONT_CSE;
5975     }
5976
5977     // Store information about it.
5978     GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));
5979
5980     // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
5981
5982     GenTreePtr indTree = tree;
5983
5984     // Did we create a bndsChk tree?
5985     if (bndsChk)
5986     {
5987         // Use a GT_COMMA node to prepend the array bound check
5988         //
5989         tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
5990
5991         /* Mark the indirection node as needing a range check */
5992         fgSetRngChkTarget(bndsChk);
5993     }
5994
5995     if (indexDefn != nullptr)
5996     {
5997         // Use a GT_COMMA node to prepend the index assignment
5998         //
5999         tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
6000     }
6001     if (arrRefDefn != nullptr)
6002     {
6003         // Use a GT_COMMA node to prepend the arRef assignment
6004         //
6005         tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
6006     }
6007
6008     // Currently we morph the tree to perform some folding operations prior
6009     // to attaching fieldSeq info and labeling constant array index contributions
6010     //
6011     fgMorphTree(tree);
6012
6013     // Ideally we just want to proceed to attaching fieldSeq info and labeling the
6014     // constant array index contributions, but the morphing operation may have changed
6015     // the 'tree' into something that now unconditionally throws an exception.
6016     //
6017     // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
6018     // or it could be left unchanged.  If it is unchanged then we should not return,
6019     // instead we should proceed to attaching fieldSeq info, etc...
6020     //
6021     GenTreePtr arrElem = tree->gtEffectiveVal();
6022
6023     if (fgIsCommaThrow(tree))
6024     {
6025         if ((arrElem != indTree) ||         // A new tree node may have been created
6026             (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
6027         {
6028             return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
6029         }
6030     }
6031
6032     assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
6033
6034     addr = arrElem->gtOp.gtOp1;
6035
6036     assert(addr->TypeGet() == TYP_BYREF);
6037
6038     GenTreePtr cnsOff = nullptr;
6039     if (addr->OperGet() == GT_ADD)
6040     {
6041         if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
6042         {
6043             cnsOff = addr->gtOp.gtOp2;
6044             addr   = addr->gtOp.gtOp1;
6045         }
6046
6047         while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB))
6048         {
6049             assert(addr->TypeGet() == TYP_BYREF);
6050             GenTreePtr index = addr->gtOp.gtOp2;
6051
6052             // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
6053             index->LabelIndex(this);
6054
6055             addr = addr->gtOp.gtOp1;
6056         }
6057         assert(addr->TypeGet() == TYP_REF);
6058     }
6059     else if (addr->OperGet() == GT_CNS_INT)
6060     {
6061         cnsOff = addr;
6062     }
6063
6064     FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
6065
6066     if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
6067     {
6068         // Assign it the [#FirstElem] field sequence
6069         //
6070         cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
6071     }
6072     else //  We have folded the first element's offset with the index expression
6073     {
6074         // Build the [#ConstantIndex, #FirstElem] field sequence
6075         //
6076         FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
6077         FieldSeqNode* fieldSeq          = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
6078
6079         if (cnsOff == nullptr) // It must have folded into a zero offset
6080         {
6081             // Record in the general zero-offset map.
6082             GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6083         }
6084         else
6085         {
6086             cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
6087         }
6088     }
6089
6090     return tree;
6091 }
6092
6093 #ifdef _TARGET_X86_
6094 /*****************************************************************************
6095  *
6096  *  Wrap fixed stack arguments for varargs functions to go through varargs
6097  *  cookie to access them, except for the cookie itself.
6098  *
6099  * Non-x86 platforms are allowed to access all arguments directly
6100  * so we don't need this code.
6101  *
6102  */
6103 GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
6104 {
6105     /* For the fixed stack arguments of a varargs function, we need to go
6106         through the varargs cookies to access them, except for the
6107         cookie itself */
6108
6109     LclVarDsc* varDsc = &lvaTable[lclNum];
6110
6111     if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
6112     {
6113         // Create a node representing the local pointing to the base of the args
6114         GenTreePtr ptrArg =
6115             gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
6116                           gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*) +
6117                                         lclOffs));
6118
6119         // Access the argument through the local
6120         GenTreePtr tree;
6121         if (varType == TYP_STRUCT)
6122         {
6123             tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize);
6124         }
6125         else
6126         {
6127             tree = gtNewOperNode(GT_IND, varType, ptrArg);
6128         }
6129         tree->gtFlags |= GTF_IND_TGTANYWHERE;
6130
6131         if (varDsc->lvAddrExposed)
6132         {
6133             tree->gtFlags |= GTF_GLOB_REF;
6134         }
6135
6136         return fgMorphTree(tree);
6137     }
6138
6139     return NULL;
6140 }
6141 #endif
6142
6143 /*****************************************************************************
6144  *
6145  *  Transform the given GT_LCL_VAR tree for code generation.
6146  */
6147
6148 GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree, bool forceRemorph)
6149 {
6150     assert(tree->gtOper == GT_LCL_VAR);
6151
6152     unsigned   lclNum  = tree->gtLclVarCommon.gtLclNum;
6153     var_types  varType = lvaGetRealType(lclNum);
6154     LclVarDsc* varDsc  = &lvaTable[lclNum];
6155
6156     if (varDsc->lvAddrExposed)
6157     {
6158         tree->gtFlags |= GTF_GLOB_REF;
6159     }
6160
6161 #ifdef _TARGET_X86_
6162     if (info.compIsVarArgs)
6163     {
6164         GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
6165         if (newTree != nullptr)
6166         {
6167             if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
6168             {
6169                 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
6170             }
6171             return newTree;
6172         }
6173     }
6174 #endif // _TARGET_X86_
6175
6176     /* If not during the global morphing phase bail */
6177
6178     if (!fgGlobalMorph && !forceRemorph)
6179     {
6180         return tree;
6181     }
6182
6183     bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
6184
6185     noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
6186
6187     if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
6188     {
6189 #if LOCAL_ASSERTION_PROP
6190         /* Assertion prop can tell us to omit adding a cast here */
6191         if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
6192         {
6193             return tree;
6194         }
6195 #endif
6196         /* Small-typed arguments and aliased locals are normalized on load.
6197            Other small-typed locals are normalized on store.
6198            Also, under the debugger as the debugger could write to the variable.
6199            If this is one of the former, insert a narrowing cast on the load.
6200                    ie. Convert: var-short --> cast-short(var-int) */
6201
6202         tree->gtType = TYP_INT;
6203         fgMorphTreeDone(tree);
6204         tree = gtNewCastNode(TYP_INT, tree, varType);
6205         fgMorphTreeDone(tree);
6206         return tree;
6207     }
6208
6209     return tree;
6210 }
6211
6212 /*****************************************************************************
6213   Grab a temp for big offset morphing.
6214   This method will grab a new temp if no temp of this "type" has been created.
6215   Or it will return the same cached one if it has been created.
6216 */
6217 unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
6218 {
6219     unsigned lclNum = fgBigOffsetMorphingTemps[type];
6220
6221     if (lclNum == BAD_VAR_NUM)
6222     {
6223         // We haven't created a temp for this kind of type. Create one now.
6224         lclNum                         = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
6225         fgBigOffsetMorphingTemps[type] = lclNum;
6226     }
6227     else
6228     {
6229         // We better get the right type.
6230         noway_assert(lvaTable[lclNum].TypeGet() == type);
6231     }
6232
6233     noway_assert(lclNum != BAD_VAR_NUM);
6234     return lclNum;
6235 }
6236
6237 /*****************************************************************************
6238  *
6239  *  Transform the given GT_FIELD tree for code generation.
6240  */
6241
6242 GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
6243 {
6244     assert(tree->gtOper == GT_FIELD);
6245
6246     CORINFO_FIELD_HANDLE symHnd          = tree->gtField.gtFldHnd;
6247     unsigned             fldOffset       = tree->gtField.gtFldOffset;
6248     GenTreePtr           objRef          = tree->gtField.gtFldObj;
6249     bool                 fieldMayOverlap = false;
6250     bool                 objIsLocal      = false;
6251
6252     if (fgGlobalMorph && (objRef != nullptr) && (objRef->gtOper == GT_ADDR))
6253     {
6254         // Make sure we've checked if 'objRef' is an address of an implicit-byref parameter.
6255         // If it is, fgMorphImplicitByRefArgs may change it do a different opcode, which the
6256         // simd field rewrites are sensitive to.
6257         fgMorphImplicitByRefArgs(objRef);
6258     }
6259
6260     noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
6261                  ((tree->gtFlags & GTF_GLOB_REF) != 0));
6262
6263     if (tree->gtField.gtFldMayOverlap)
6264     {
6265         fieldMayOverlap = true;
6266         // Reset the flag because we may reuse the node.
6267         tree->gtField.gtFldMayOverlap = false;
6268     }
6269
6270 #ifdef FEATURE_SIMD
6271     // if this field belongs to simd struct, translate it to simd instrinsic.
6272     if (mac == nullptr)
6273     {
6274         GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
6275         if (newTree != tree)
6276         {
6277             newTree = fgMorphSmpOp(newTree);
6278             return newTree;
6279         }
6280     }
6281     else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
6282     {
6283         GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr();
6284         if (lcl != nullptr)
6285         {
6286             lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField));
6287         }
6288     }
6289 #endif
6290
6291     /* Is this an instance data member? */
6292
6293     if (objRef)
6294     {
6295         GenTreePtr addr;
6296         objIsLocal = objRef->IsLocal();
6297
6298         if (tree->gtFlags & GTF_IND_TLS_REF)
6299         {
6300             NO_WAY("instance field can not be a TLS ref.");
6301         }
6302
6303         /* We'll create the expression "*(objRef + mem_offs)" */
6304
6305         noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);
6306
6307         // An optimization for Contextful classes:
6308         // we unwrap the proxy when we have a 'this reference'
6309         if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
6310         {
6311             objRef = fgUnwrapProxy(objRef);
6312         }
6313
6314         /*
6315             Now we have a tree like this:
6316
6317                                   +--------------------+
6318                                   |      GT_FIELD      |   tree
6319                                   +----------+---------+
6320                                              |
6321                               +--------------+-------------+
6322                               |   tree->gtField.gtFldObj   |
6323                               +--------------+-------------+
6324
6325
6326             We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6327
6328                                   +--------------------+
6329                                   |   GT_IND/GT_OBJ    |   tree
6330                                   +---------+----------+
6331                                             |
6332                                             |
6333                                   +---------+----------+
6334                                   |       GT_ADD       |   addr
6335                                   +---------+----------+
6336                                             |
6337                                           /   \
6338                                         /       \
6339                                       /           \
6340                          +-------------------+  +----------------------+
6341                          |       objRef      |  |     fldOffset        |
6342                          |                   |  | (when fldOffset !=0) |
6343                          +-------------------+  +----------------------+
6344
6345
6346             or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6347
6348
6349                                   +--------------------+
6350                                   |   GT_IND/GT_OBJ    |   tree
6351                                   +----------+---------+
6352                                              |
6353                                   +----------+---------+
6354                                   |       GT_COMMA     |  comma2
6355                                   +----------+---------+
6356                                              |
6357                                             / \
6358                                           /     \
6359                                         /         \
6360                                       /             \
6361                  +---------+----------+               +---------+----------+
6362            comma |      GT_COMMA      |               |  "+" (i.e. GT_ADD) |   addr
6363                  +---------+----------+               +---------+----------+
6364                            |                                     |
6365                          /   \                                  /  \
6366                        /       \                              /      \
6367                      /           \                          /          \
6368          +-----+-----+             +-----+-----+      +---------+   +-----------+
6369      asg |  GT_ASG   |         ind |   GT_IND  |      |  tmpLcl |   | fldOffset |
6370          +-----+-----+             +-----+-----+      +---------+   +-----------+
6371                |                         |
6372               / \                        |
6373             /     \                      |
6374           /         \                    |
6375    +-----+-----+   +-----+-----+   +-----------+
6376    |   tmpLcl  |   |   objRef  |   |   tmpLcl  |
6377    +-----------+   +-----------+   +-----------+
6378
6379
6380         */
6381
6382         var_types objRefType = objRef->TypeGet();
6383
6384         GenTreePtr comma = nullptr;
6385
6386         bool addedExplicitNullCheck = false;
6387
6388         // NULL mac means we encounter the GT_FIELD first.  This denotes a dereference of the field,
6389         // and thus is equivalent to a MACK_Ind with zero offset.
6390         MorphAddrContext defMAC(MACK_Ind);
6391         if (mac == nullptr)
6392         {
6393             mac = &defMAC;
6394         }
6395
6396         // This flag is set to enable the "conservative" style of explicit null-check insertion.
6397         // This means that we insert an explicit null check whenever we create byref by adding a
6398         // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
6399         // dereferenced).  The alternative is "aggressive", which would not insert such checks (for
6400         // small offsets); in this plan, we would transfer some null-checking responsibility to
6401         // callee's of methods taking byref parameters.  They would have to add explicit null checks
6402         // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
6403         // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
6404         // large).  To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
6405         // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
6406         // This is left here to point out how to implement it.
6407         CLANG_FORMAT_COMMENT_ANCHOR;
6408
6409 #define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
6410
6411         // If the objRef is a GT_ADDR node, it, itself, never requires null checking.  The expression
6412         // whose address is being taken is either a local or static variable, whose address is necessarily
6413         // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
6414         if (objRef->gtOper != GT_ADDR && ((mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind) &&
6415                                           (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)
6416 #if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
6417                                            || (mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0))
6418 #else
6419                                            || (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
6420                                                (mac->m_totalOffset + fldOffset > 0))
6421 #endif
6422                                                )))
6423         {
6424 #ifdef DEBUG
6425             if (verbose)
6426             {
6427                 printf("Before explicit null check morphing:\n");
6428                 gtDispTree(tree);
6429             }
6430 #endif
6431
6432             //
6433             // Create the "comma" subtree
6434             //
6435             GenTreePtr asg = nullptr;
6436             GenTreePtr nullchk;
6437
6438             unsigned lclNum;
6439
6440             if (objRef->gtOper != GT_LCL_VAR)
6441             {
6442                 lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
6443
6444                 // Create the "asg" node
6445                 asg = gtNewTempAssign(lclNum, objRef);
6446             }
6447             else
6448             {
6449                 lclNum = objRef->gtLclVarCommon.gtLclNum;
6450             }
6451
6452             // Create the "nullchk" node.
6453             // Make it TYP_BYTE so we only deference it for 1 byte.
6454             GenTreePtr lclVar = gtNewLclvNode(lclNum, objRefType);
6455             nullchk           = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);
6456
6457             nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
6458
6459             // An indirection will cause a GPF if the address is null.
6460             nullchk->gtFlags |= GTF_EXCEPT;
6461
6462             compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
6463             optMethodFlags |= OMF_HAS_NULLCHECK;
6464
6465             if (asg)
6466             {
6467                 // Create the "comma" node.
6468                 comma = gtNewOperNode(GT_COMMA,
6469                                       TYP_VOID, // We don't want to return anything from this "comma" node.
6470                                                 // Set the type to TYP_VOID, so we can select "cmp" instruction
6471                                                 // instead of "mov" instruction later on.
6472                                       asg, nullchk);
6473             }
6474             else
6475             {
6476                 comma = nullchk;
6477             }
6478
6479             addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
6480
6481             addedExplicitNullCheck = true;
6482         }
6483         else if (fldOffset == 0)
6484         {
6485             // Generate the "addr" node.
6486             addr = objRef;
6487             FieldSeqNode* fieldSeq =
6488                 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6489             GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6490         }
6491         else
6492         {
6493             addr = objRef;
6494         }
6495
6496 #ifdef FEATURE_READYTORUN_COMPILER
6497         if (tree->gtField.gtFieldLookup.addr != nullptr)
6498         {
6499             GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL);
6500
6501             if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
6502             {
6503                 baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset);
6504             }
6505
6506             addr =
6507                 gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, baseOffset);
6508         }
6509 #endif
6510         if (fldOffset != 0)
6511         {
6512             // Generate the "addr" node.
6513             /* Add the member offset to the object's address */
6514             FieldSeqNode* fieldSeq =
6515                 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6516             addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
6517                                  gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
6518         }
6519
6520         // Now let's set the "tree" as a GT_IND tree.
6521
6522         tree->SetOper(GT_IND);
6523         tree->gtOp.gtOp1 = addr;
6524
6525         if (fgAddrCouldBeNull(addr))
6526         {
6527             // This indirection can cause a GPF if the address could be null.
6528             tree->gtFlags |= GTF_EXCEPT;
6529         }
6530
6531         if (addedExplicitNullCheck)
6532         {
6533             //
6534             // Create "comma2" node and link it to "tree".
6535             //
6536             GenTreePtr comma2;
6537             comma2 = gtNewOperNode(GT_COMMA,
6538                                    addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
6539                                    comma, addr);
6540             tree->gtOp.gtOp1 = comma2;
6541         }
6542
6543 #ifdef DEBUG
6544         if (verbose)
6545         {
6546             if (addedExplicitNullCheck)
6547             {
6548                 printf("After adding explicit null check:\n");
6549                 gtDispTree(tree);
6550             }
6551         }
6552 #endif
6553     }
6554     else /* This is a static data member */
6555     {
6556         if (tree->gtFlags & GTF_IND_TLS_REF)
6557         {
6558             // Thread Local Storage static field reference
6559             //
6560             // Field ref is a TLS 'Thread-Local-Storage' reference
6561             //
6562             // Build this tree:  IND(*) #
6563             //                    |
6564             //                   ADD(I_IMPL)
6565             //                   / \
6566             //                  /  CNS(fldOffset)
6567             //                 /
6568             //                /
6569             //               /
6570             //             IND(I_IMPL) == [Base of this DLL's TLS]
6571             //              |
6572             //             ADD(I_IMPL)
6573             //             / \
6574             //            /   CNS(IdValue*4) or MUL
6575             //           /                      / \
6576             //          IND(I_IMPL)            /  CNS(4)
6577             //           |                    /
6578             //          CNS(TLS_HDL,0x2C)    IND
6579             //                                |
6580             //                               CNS(pIdAddr)
6581             //
6582             // # Denotes the orginal node
6583             //
6584             void**   pIdAddr = nullptr;
6585             unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);
6586
6587             //
6588             // If we can we access the TLS DLL index ID value directly
6589             // then pIdAddr will be NULL and
6590             //      IdValue will be the actual TLS DLL index ID
6591             //
6592             GenTreePtr dllRef = nullptr;
6593             if (pIdAddr == nullptr)
6594             {
6595                 if (IdValue != 0)
6596                 {
6597                     dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
6598                 }
6599             }
6600             else
6601             {
6602                 dllRef = gtNewIconHandleNode((size_t)pIdAddr, GTF_ICON_STATIC_HDL);
6603                 dllRef = gtNewOperNode(GT_IND, TYP_I_IMPL, dllRef);
6604                 dllRef->gtFlags |= GTF_IND_INVARIANT;
6605
6606                 /* Multiply by 4 */
6607
6608                 dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
6609             }
6610
6611 #define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
6612
6613             // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
6614
6615             GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
6616
6617             // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6618             if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6619             {
6620                 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6621                 tlsRef->gtFlags |= GTF_ICON_INITCLASS;
6622             }
6623
6624             tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6625
6626             if (dllRef != nullptr)
6627             {
6628                 /* Add the dllRef */
6629                 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
6630             }
6631
6632             /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
6633             tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6634
6635             if (fldOffset != 0)
6636             {
6637                 FieldSeqNode* fieldSeq =
6638                     fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6639                 GenTreePtr fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
6640
6641                 /* Add the TLS static field offset to the address */
6642
6643                 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
6644             }
6645
6646             // Final indirect to get to actual value of TLS static field
6647
6648             tree->SetOper(GT_IND);
6649             tree->gtOp.gtOp1 = tlsRef;
6650
6651             noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
6652         }
6653         else
6654         {
6655             // Normal static field reference
6656
6657             //
6658             // If we can we access the static's address directly
6659             // then pFldAddr will be NULL and
6660             //      fldAddr will be the actual address of the static field
6661             //
6662             void** pFldAddr = nullptr;
6663             void*  fldAddr  = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);
6664
6665             if (pFldAddr == nullptr)
6666             {
6667 #ifdef _TARGET_64BIT_
6668                 if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
6669                 {
6670                     // The address is not directly addressible, so force it into a
6671                     // constant, so we handle it properly
6672
6673                     GenTreePtr addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
6674                     addr->gtType    = TYP_I_IMPL;
6675                     FieldSeqNode* fieldSeq =
6676                         fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6677                     addr->gtIntCon.gtFieldSeq = fieldSeq;
6678                     // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6679                     if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6680                     {
6681                         tree->gtFlags &= ~GTF_FLD_INITCLASS;
6682                         addr->gtFlags |= GTF_ICON_INITCLASS;
6683                     }
6684
6685                     tree->SetOper(GT_IND);
6686                     // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
6687                     // We must clear it when we transform the node.
6688                     // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
6689                     // that the logic above does its own checking to determine whether a nullcheck is needed.
6690                     tree->gtFlags &= ~GTF_IND_ARR_LEN;
6691                     tree->gtOp.gtOp1 = addr;
6692
6693                     return fgMorphSmpOp(tree);
6694                 }
6695                 else
6696 #endif // _TARGET_64BIT_
6697                 {
6698                     // Only volatile or classinit could be set, and they map over
6699                     noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0);
6700                     static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE);
6701                     static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS);
6702                     tree->SetOper(GT_CLS_VAR);
6703                     tree->gtClsVar.gtClsVarHnd = symHnd;
6704                     FieldSeqNode* fieldSeq =
6705                         fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6706                     tree->gtClsVar.gtFieldSeq = fieldSeq;
6707                 }
6708
6709                 return tree;
6710             }
6711             else
6712             {
6713                 GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
6714
6715                 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6716                 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6717                 {
6718                     tree->gtFlags &= ~GTF_FLD_INITCLASS;
6719                     addr->gtFlags |= GTF_ICON_INITCLASS;
6720                 }
6721
6722                 // There are two cases here, either the static is RVA based,
6723                 // in which case the type of the FIELD node is not a GC type
6724                 // and the handle to the RVA is a TYP_I_IMPL.  Or the FIELD node is
6725                 // a GC type and the handle to it is a TYP_BYREF in the GC heap
6726                 // because handles to statics now go into the large object heap
6727
6728                 var_types  handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
6729                 GenTreePtr op1       = gtNewOperNode(GT_IND, handleTyp, addr);
6730                 op1->gtFlags |= GTF_IND_INVARIANT;
6731
6732                 tree->SetOper(GT_IND);
6733                 tree->gtOp.gtOp1 = op1;
6734             }
6735         }
6736     }
6737     noway_assert(tree->gtOper == GT_IND);
6738     // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
6739     // We must clear it when we transform the node.
6740     // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
6741     // that the logic above does its own checking to determine whether a nullcheck is needed.
6742     tree->gtFlags &= ~GTF_IND_ARR_LEN;
6743
6744     GenTreePtr res = fgMorphSmpOp(tree);
6745
6746     // If we have a struct type, this node would previously have been under a GT_ADDR,
6747     // and therefore would have been marked GTF_DONT_CSE.
6748     // TODO-1stClassStructs: revisit this.
6749     if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
6750     {
6751         res->gtFlags |= GTF_DONT_CSE;
6752     }
6753
6754     if (fldOffset == 0 && res->OperGet() == GT_IND)
6755     {
6756         GenTreePtr addr = res->gtOp.gtOp1;
6757         // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
6758         FieldSeqNode* fieldSeq =
6759             fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6760         fgAddFieldSeqForZeroOffset(addr, fieldSeq);
6761     }
6762
6763     return res;
6764 }
6765
6766 //------------------------------------------------------------------------------
6767 // fgMorphCallInline: attempt to inline a call
6768 //
6769 // Arguments:
6770 //    call         - call expression to inline, inline candidate
6771 //    inlineResult - result tracking and reporting
6772 //
6773 // Notes:
6774 //    Attempts to inline the call.
6775 //
6776 //    If successful, callee's IR is inserted in place of the call, and
6777 //    is marked with an InlineContext.
6778 //
6779 //    If unsuccessful, the transformations done in anticpation of a
6780 //    possible inline are undone, and the candidate flag on the call
6781 //    is cleared.
6782
6783 void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
6784 {
6785     // The call must be a candiate for inlining.
6786     assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
6787
6788     // Attempt the inline
6789     fgMorphCallInlineHelper(call, inlineResult);
6790
6791     // We should have made up our minds one way or another....
6792     assert(inlineResult->IsDecided());
6793
6794     // If we failed to inline, we have a bit of work to do to cleanup
6795     if (inlineResult->IsFailure())
6796     {
6797
6798 #ifdef DEBUG
6799
6800         // Before we do any cleanup, create a failing InlineContext to
6801         // capture details of the inlining attempt.
6802         m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
6803
6804 #endif
6805
6806         // It was an inline candidate, but we haven't expanded it.
6807         if (call->gtCall.gtReturnType != TYP_VOID)
6808         {
6809             // Detach the GT_CALL tree from the original statement by
6810             // hanging a "nothing" node to it. Later the "nothing" node will be removed
6811             // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
6812
6813             noway_assert(fgMorphStmt->gtStmtExpr == call);
6814             fgMorphStmt->gtStmtExpr = gtNewNothingNode();
6815         }
6816
6817         // Clear the Inline Candidate flag so we can ensure later we tried
6818         // inlining all candidates.
6819         //
6820         call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
6821     }
6822 }
6823
6824 /*****************************************************************************
6825  *  Helper to attempt to inline a call
6826  *  Sets success/failure in inline result
6827  *  If success, modifies current method's IR with inlinee's IR
6828  *  If failed, undoes any speculative modifications to current method
6829  */
6830
6831 void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
6832 {
6833     // Don't expect any surprises here.
6834     assert(result->IsCandidate());
6835
6836     if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
6837     {
6838         // For now, attributing this to call site, though it's really
6839         // more of a budget issue (lvaCount currently includes all
6840         // caller and prospective callee locals). We still might be
6841         // able to inline other callees into this caller, or inline
6842         // this callee in other callers.
6843         result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
6844         return;
6845     }
6846
6847     if (call->IsVirtual())
6848     {
6849         result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
6850         return;
6851     }
6852
6853     // impMarkInlineCandidate() is expected not to mark tail prefixed calls
6854     // and recursive tail calls as inline candidates.
6855     noway_assert(!call->IsTailPrefixedCall());
6856     noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
6857
6858     /* If the caller's stack frame is marked, then we can't do any inlining. Period.
6859        Although we have checked this in impCanInline, it is possible that later IL instructions
6860        might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
6861     */
6862
6863     if (opts.compNeedSecurityCheck)
6864     {
6865         result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
6866         return;
6867     }
6868
6869     //
6870     // Calling inlinee's compiler to inline the method.
6871     //
6872
6873     unsigned startVars = lvaCount;
6874
6875 #ifdef DEBUG
6876     if (verbose)
6877     {
6878         printf("Expanding INLINE_CANDIDATE in statement ");
6879         printTreeID(fgMorphStmt);
6880         printf(" in BB%02u:\n", compCurBB->bbNum);
6881         gtDispTree(fgMorphStmt);
6882         if (call->IsImplicitTailCall())
6883         {
6884             printf("Note: candidate is implicit tail call\n");
6885         }
6886     }
6887 #endif
6888
6889     impInlineRoot()->m_inlineStrategy->NoteAttempt(result);
6890
6891     //
6892     // Invoke the compiler to inline the call.
6893     //
6894
6895     fgInvokeInlineeCompiler(call, result);
6896
6897     if (result->IsFailure())
6898     {
6899         // Undo some changes made in anticipation of inlining...
6900
6901         // Zero out the used locals
6902         memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
6903         for (unsigned i = startVars; i < lvaCount; i++)
6904         {
6905             new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
6906         }
6907
6908         lvaCount = startVars;
6909
6910 #ifdef DEBUG
6911         if (verbose)
6912         {
6913             // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
6914         }
6915 #endif
6916
6917         return;
6918     }
6919
6920 #ifdef DEBUG
6921     if (verbose)
6922     {
6923         // printf("After inlining lvaCount=%d.\n", lvaCount);
6924     }
6925 #endif
6926 }
6927
6928 /*****************************************************************************
6929  *
6930  * Performs checks to see if this tail call can be optimized as epilog+jmp.
6931  */
6932 bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
6933 {
6934 #if FEATURE_FASTTAILCALL
6935     // Reached here means that return types of caller and callee are tail call compatible.
6936     // In case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
6937     //
6938     // In an implicit tail call case callSig may not be available but it is guaranteed to be available
6939     // for explicit tail call cases.  The reason implicit tail case callSig may not be available is that
6940     // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
6941     // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
6942     // currently not copying/setting callSig.
6943     CLANG_FORMAT_COMMENT_ANCHOR;
6944
6945 #ifdef DEBUG
6946     if (callee->IsTailPrefixedCall())
6947     {
6948         assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
6949                                             (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
6950     }
6951 #endif
6952
6953     // Note on vararg methods:
6954     // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
6955     // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
6956     // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
6957     // out-going area required for callee is bounded by caller's fixed argument space.
6958     //
6959     // Note that callee being a vararg method is not a problem since we can account the params being passed.
6960
6961     // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie)
6962     unsigned nCallerArgs = info.compArgsCount;
6963
6964     // Count the callee args including implicit and hidden.
6965     // Note that GenericContext and VarargCookie are added by importer while
6966     // importing the call to gtCallArgs list along with explicit user args.
6967     unsigned nCalleeArgs = 0;
6968     if (callee->gtCallObjp) // thisPtr
6969     {
6970         nCalleeArgs++;
6971     }
6972
6973     if (callee->HasRetBufArg()) // RetBuf
6974     {
6975         nCalleeArgs++;
6976
6977         // If callee has RetBuf param, caller too must have it.
6978         // Otherwise go the slow route.
6979         if (info.compRetBuffArg == BAD_VAR_NUM)
6980         {
6981             return false;
6982         }
6983     }
6984
6985     // Count user args while tracking whether any of them is a multi-byte params
6986     // that cannot be passed in a register. Note that we don't need to count
6987     // non-standard and secret params passed in registers (e.g. R10, R11) since
6988     // these won't contribute to out-going arg size.
6989     bool hasMultiByteArgs = false;
6990     for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2)
6991     {
6992         nCalleeArgs++;
6993
6994         assert(args->OperIsList());
6995         GenTreePtr argx = args->gtOp.gtOp1;
6996
6997         if (varTypeIsStruct(argx))
6998         {
6999             // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
7000             while (argx->gtOper == GT_COMMA)
7001             {
7002                 argx = argx->gtOp.gtOp2;
7003             }
7004
7005             // Get the size of the struct and see if it is register passable.
7006             CORINFO_CLASS_HANDLE objClass = nullptr;
7007
7008             if (argx->OperGet() == GT_OBJ)
7009             {
7010                 objClass = argx->AsObj()->gtClass;
7011             }
7012             else if (argx->IsLocal())
7013             {
7014                 objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
7015             }
7016             if (objClass != nullptr)
7017             {
7018 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
7019
7020                 unsigned typeSize = 0;
7021                 hasMultiByteArgs  = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
7022
7023 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
7024                 // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers.
7025                 // Account for the second eightbyte in the nCalleeArgs.
7026                 // https://github.com/dotnet/coreclr/issues/2666
7027                 // TODO-CQ-Amd64-Unix/arm64:  Structs of size between 9 to 16 bytes are conservatively estimated
7028                 //                            as two args, since they need two registers whereas nCallerArgs is
7029                 //                            counting such an arg as one. This would mean we will not be optimizing
7030                 //                            certain calls though technically possible.
7031
7032                 if (typeSize > TARGET_POINTER_SIZE)
7033                 {
7034                     unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE);
7035                     nCalleeArgs += extraArgRegsToAdd;
7036                 }
7037 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
7038
7039 #else
7040                 assert(!"Target platform ABI rules regarding passing struct type args in registers");
7041                 unreached();
7042 #endif //_TARGET_AMD64_ || _TARGET_ARM64_
7043             }
7044             else
7045             {
7046                 hasMultiByteArgs = true;
7047             }
7048         }
7049     }
7050
7051     // Go the slow route, if it has multi-byte params
7052     if (hasMultiByteArgs)
7053     {
7054         return false;
7055     }
7056
7057     // If we reached here means that callee has only those argument types which can be passed in
7058     // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
7059     // If we are passing args on stack for callee and it has more args passed on stack than
7060     // caller, then fast tail call cannot be performed.
7061     //
7062     // Note that the GC'ness of on stack args need not match since the arg setup area is marked
7063     // as non-interruptible for fast tail calls.
7064     if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs))
7065     {
7066         return false;
7067     }
7068
7069     return true;
7070 #else
7071     return false;
7072 #endif
7073 }
7074
7075 /*****************************************************************************
7076  *
7077  *  Transform the given GT_CALL tree for tail call code generation.
7078  */
7079 void Compiler::fgMorphTailCall(GenTreeCall* call)
7080 {
7081     JITDUMP("fgMorphTailCall (before):\n");
7082     DISPTREE(call);
7083
7084 #if defined(_TARGET_ARM_)
7085     // For the helper-assisted tail calls, we need to push all the arguments
7086     // into a single list, and then add a few extra at the beginning
7087
7088     // Check for PInvoke call types that we don't handle in codegen yet.
7089     assert(!call->IsUnmanaged());
7090     assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));
7091
7092     // First move the this pointer (if any) onto the regular arg list
7093     GenTreePtr thisPtr = NULL;
7094     if (call->gtCallObjp)
7095     {
7096         GenTreePtr objp  = call->gtCallObjp;
7097         call->gtCallObjp = NULL;
7098
7099         if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
7100         {
7101             thisPtr      = gtClone(objp, true);
7102             var_types vt = objp->TypeGet();
7103             if (thisPtr == NULL)
7104             {
7105                 // Too complex, so use a temp
7106                 unsigned   lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7107                 GenTreePtr asg    = gtNewTempAssign(lclNum, objp);
7108                 if (!call->IsVirtualVtable())
7109                 {
7110                     // Add an indirection to get the nullcheck
7111                     GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7112                     GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7113                     asg            = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7114                 }
7115                 objp    = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7116                 thisPtr = gtNewLclvNode(lclNum, vt);
7117             }
7118             else if (!call->IsVirtualVtable())
7119             {
7120                 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7121                 objp           = gtNewOperNode(GT_COMMA, vt, ind, objp);
7122                 thisPtr        = gtClone(thisPtr, true);
7123             }
7124
7125             call->gtFlags &= ~GTF_CALL_NULLCHECK;
7126         }
7127
7128         call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
7129     }
7130
7131     // Add the extra VSD parameter if needed
7132     CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
7133     if (call->IsVirtualStub())
7134     {
7135         flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
7136
7137         GenTreePtr arg;
7138         if (call->gtCallType == CT_INDIRECT)
7139         {
7140             arg = gtClone(call->gtCallAddr, true);
7141             noway_assert(arg != NULL);
7142         }
7143         else
7144         {
7145             noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
7146             ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7147             arg          = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7148
7149             // Change the call type, so we can add the extra indirection here, rather than in codegen
7150             call->gtCallAddr         = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7151             call->gtStubCallStubAddr = NULL;
7152             call->gtCallType         = CT_INDIRECT;
7153         }
7154         // Add the extra indirection to generate the real target
7155         call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr);
7156         call->gtFlags |= GTF_EXCEPT;
7157
7158         // And push the stub address onto the list of arguments
7159         call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7160     }
7161     else if (call->IsVirtualVtable())
7162     {
7163         // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER
7164
7165         noway_assert(thisPtr != NULL);
7166
7167         GenTreePtr add  = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
7168         GenTreePtr vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7169         vtbl->gtFlags |= GTF_EXCEPT;
7170
7171         unsigned vtabOffsOfIndirection;
7172         unsigned vtabOffsAfterIndirection;
7173         unsigned isRelative;
7174         info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection,
7175                                                 &isRelative);
7176
7177         /* Get the appropriate vtable chunk */
7178
7179         if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
7180         {
7181             add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
7182
7183             GenTreePtr indOffTree;
7184
7185             if (isRelative)
7186             {
7187                 indOffTree = impCloneExpr(add, &add, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
7188                                           nullptr DEBUGARG("virtual table call"));
7189             }
7190
7191             vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7192
7193             if (isRelative)
7194             {
7195                 vtbl = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, indOffTree);
7196             }
7197         }
7198
7199         /* Now the appropriate vtable slot */
7200
7201         add  = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
7202         vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7203
7204         // Switch this to a plain indirect call
7205         call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
7206         assert(!call->IsVirtual());
7207         call->gtCallType = CT_INDIRECT;
7208
7209         call->gtCallAddr   = vtbl;
7210         call->gtCallCookie = NULL;
7211         call->gtFlags |= GTF_EXCEPT;
7212     }
7213
7214     // Now inject a placeholder for the real call target that codegen
7215     // will generate
7216     GenTreePtr arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL);
7217 #ifdef LEGACY_BACKEND
7218     codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR);
7219 #endif
7220     call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7221
7222     // Lastly inject the pointer for the copy routine
7223     noway_assert(call->callSig != NULL);
7224     void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7225     arg               = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7226     call->gtCallArgs  = gtNewListNode(arg, call->gtCallArgs);
7227
7228     // It is now a varargs tail call
7229     call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
7230     call->gtFlags &= ~GTF_CALL_POP_ARGS;
7231
7232 #elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
7233
7234     // x86 classic codegen doesn't require any morphing
7235
7236     // For the helper-assisted tail calls, we need to push all the arguments
7237     // into a single list, and then add a few extra at the beginning or end.
7238     //
7239     // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
7240     //
7241     //      JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
7242     //
7243     // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
7244     // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
7245     // for callTarget here which will be replaced later with callTarget in tail call lowering.
7246     //
7247     // For x86, the tailcall helper is defined as:
7248     //
7249     //      JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
7250     //      callTarget)
7251     //
7252     // Note that the special arguments are on the stack, whereas the function arguments follow
7253     // the normal convention: there might be register arguments in ECX and EDX. The stack will
7254     // look like (highest address at the top):
7255     //      first normal stack argument
7256     //      ...
7257     //      last normal stack argument
7258     //      numberOfOldStackArgs
7259     //      numberOfNewStackArgs
7260     //      flags
7261     //      callTarget
7262     //
7263     // Each special arg is 4 bytes.
7264     //
7265     // 'flags' is a bitmask where:
7266     //      1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
7267     //          callee-saved registers for tailcall functions. Note that the helper assumes
7268     //          that the callee-saved registers live immediately below EBP, and must have been
7269     //          pushed in this order: EDI, ESI, EBX.
7270     //      2 == call target is a virtual stub dispatch.
7271     //
7272     // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
7273     // on the custom calling convention.
7274
7275     // Check for PInvoke call types that we don't handle in codegen yet.
7276     assert(!call->IsUnmanaged());
7277     assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
7278
7279     // Don't support tail calling helper methods
7280     assert(call->gtCallType != CT_HELPER);
7281
7282     // We come this route only for tail prefixed calls that cannot be dispatched as
7283     // fast tail calls
7284     assert(!call->IsImplicitTailCall());
7285     assert(!fgCanFastTailCall(call));
7286
7287     // First move the 'this' pointer (if any) onto the regular arg list. We do this because
7288     // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
7289     // and thus shift where the 'this' pointer will be passed to a later argument slot. In
7290     // addition, for all platforms, we are going to change the call into a helper call. Our code
7291     // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
7292     // do this transformation, we must explicitly create a null 'this' pointer check, if required,
7293     // since special 'this' pointer handling will no longer kick in.
7294     //
7295     // Some call types, such as virtual vtable calls, require creating a call address expression
7296     // that involves the "this" pointer. Lowering will sometimes create an embedded statement
7297     // to create a temporary that is assigned to the "this" pointer expression, and then use
7298     // that temp to create the call address expression. This temp creation embedded statement
7299     // will occur immediately before the "this" pointer argument, and then will be used for both
7300     // the "this" pointer argument as well as the call address expression. In the normal ordering,
7301     // the embedded statement establishing the "this" pointer temp will execute before both uses
7302     // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
7303     // normal call argument list, and insert a placeholder which will hold the call address
7304     // expression. For non-x86, things are ok, because the order of execution of these is not
7305     // altered. However, for x86, the call address expression is inserted as the *last* argument
7306     // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
7307     // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
7308     // for those cases where call lowering creates an embedded form temp of "this", we will
7309     // create a temp here, early, that will later get morphed correctly.
7310
7311     if (call->gtCallObjp)
7312     {
7313         GenTreePtr thisPtr = nullptr;
7314         GenTreePtr objp    = call->gtCallObjp;
7315         call->gtCallObjp   = nullptr;
7316
7317 #ifdef _TARGET_X86_
7318         if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
7319         {
7320             // tmp = "this"
7321             unsigned   lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7322             GenTreePtr asg    = gtNewTempAssign(lclNum, objp);
7323
7324             // COMMA(tmp = "this", tmp)
7325             var_types  vt  = objp->TypeGet();
7326             GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7327             thisPtr        = gtNewOperNode(GT_COMMA, vt, asg, tmp);
7328
7329             objp = thisPtr;
7330         }
7331 #endif // _TARGET_X86_
7332
7333 #if defined(_TARGET_X86_)
7334         // When targeting x86, the runtime requires that we perforrm a null check on the `this` argument before tail
7335         // calling to a virtual dispatch stub. This requirement is a consequence of limitations in the runtime's
7336         // ability to map an AV to a NullReferenceException if the AV occurs in a dispatch stub.
7337         if (call->NeedsNullCheck() || call->IsVirtualStub())
7338 #else
7339         if (call->NeedsNullCheck())
7340 #endif // defined(_TARGET_X86_)
7341         {
7342             // clone "this" if "this" has no side effects.
7343             if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
7344             {
7345                 thisPtr = gtClone(objp, true);
7346             }
7347
7348             var_types vt = objp->TypeGet();
7349             if (thisPtr == nullptr)
7350             {
7351                 // create a temp if either "this" has side effects or "this" is too complex to clone.
7352
7353                 // tmp = "this"
7354                 unsigned   lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7355                 GenTreePtr asg    = gtNewTempAssign(lclNum, objp);
7356
7357                 // COMMA(tmp = "this", deref(tmp))
7358                 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7359                 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7360                 asg            = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7361
7362                 // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
7363                 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7364             }
7365             else
7366             {
7367                 // thisPtr = COMMA(deref("this"), "this")
7368                 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7369                 thisPtr        = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
7370             }
7371
7372             call->gtFlags &= ~GTF_CALL_NULLCHECK;
7373         }
7374         else
7375         {
7376             thisPtr = objp;
7377         }
7378
7379         // During rationalization tmp="this" and null check will
7380         // materialize as embedded stmts in right execution order.
7381         assert(thisPtr != nullptr);
7382         call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
7383     }
7384
7385 #if defined(_TARGET_AMD64_)
7386
7387     // Add the extra VSD parameter to arg list in case of VSD calls.
7388     // Tail call arg copying thunk will move this extra VSD parameter
7389     // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
7390     // in Stublinkerx86.cpp for more details.
7391     CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
7392     if (call->IsVirtualStub())
7393     {
7394         GenTreePtr stubAddrArg;
7395
7396         flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
7397
7398         if (call->gtCallType == CT_INDIRECT)
7399         {
7400             stubAddrArg = gtClone(call->gtCallAddr, true);
7401             noway_assert(stubAddrArg != nullptr);
7402         }
7403         else
7404         {
7405             noway_assert((call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0);
7406
7407             ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7408             stubAddrArg  = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7409         }
7410
7411         // Push the stub address onto the list of arguments
7412         call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
7413     }
7414
7415     // Now inject a placeholder for the real call target that Lower phase will generate.
7416     GenTreePtr arg   = gtNewIconNode(0, TYP_I_IMPL);
7417     call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7418
7419     // Inject the pointer for the copy routine to be used for struct copying
7420     noway_assert(call->callSig != nullptr);
7421     void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7422     arg               = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7423     call->gtCallArgs  = gtNewListNode(arg, call->gtCallArgs);
7424
7425 #else // !_TARGET_AMD64_
7426
7427     // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
7428     // append to the list.
7429     GenTreeArgList** ppArg = &call->gtCallArgs;
7430     for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
7431     {
7432         ppArg = (GenTreeArgList**)&args->gtOp2;
7433     }
7434     assert(ppArg != nullptr);
7435     assert(*ppArg == nullptr);
7436
7437     unsigned nOldStkArgsWords =
7438         (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
7439     GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
7440     *ppArg        = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
7441     ppArg         = (GenTreeArgList**)&((*ppArg)->gtOp2);
7442
7443     // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
7444     // The constant will be replaced.
7445     GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
7446     *ppArg        = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
7447     ppArg         = (GenTreeArgList**)&((*ppArg)->gtOp2);
7448
7449     // Inject a placeholder for the flags.
7450     // The constant will be replaced.
7451     GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
7452     *ppArg        = gtNewListNode(arg1, nullptr);
7453     ppArg         = (GenTreeArgList**)&((*ppArg)->gtOp2);
7454
7455     // Inject a placeholder for the real call target that the Lowering phase will generate.
7456     // The constant will be replaced.
7457     GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
7458     *ppArg        = gtNewListNode(arg0, nullptr);
7459
7460 #endif // !_TARGET_AMD64_
7461
7462     // It is now a varargs tail call dispatched via helper.
7463     call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
7464     call->gtFlags &= ~GTF_CALL_POP_ARGS;
7465
7466 #endif // _TARGET_*
7467
7468     JITDUMP("fgMorphTailCall (after):\n");
7469     DISPTREE(call);
7470 }
7471
7472 //------------------------------------------------------------------------------
7473 // fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
7474 //
7475 //
7476 // Arguments:
7477 //    block  - basic block ending with a recursive fast tail call
7478 //    recursiveTailCall - recursive tail call to transform
7479 //
7480 // Notes:
7481 //    The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
7482
7483 void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
7484 {
7485     assert(recursiveTailCall->IsTailCallConvertibleToLoop());
7486     GenTreePtr last = block->lastStmt();
7487     assert(recursiveTailCall == last->gtStmt.gtStmtExpr);
7488
7489     // Transform recursive tail call into a loop.
7490
7491     GenTreePtr earlyArgInsertionPoint = last;
7492     IL_OFFSETX callILOffset           = last->gtStmt.gtStmtILoffsx;
7493
7494     // Hoist arg setup statement for the 'this' argument.
7495     GenTreePtr thisArg = recursiveTailCall->gtCallObjp;
7496     if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
7497     {
7498         GenTreePtr thisArgStmt = gtNewStmt(thisArg, callILOffset);
7499         fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
7500     }
7501
7502     // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
7503     // then the temps need to be assigned to the method parameters. This is done so that the caller
7504     // parameters are not re-assigned before call arguments depending on them  are evaluated.
7505     // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
7506     // where the next temp or parameter assignment should be inserted.
7507
7508     // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
7509     // while the second call argument (const 1) doesn't.
7510     // Basic block before tail recursion elimination:
7511     //  ***** BB04, stmt 1 (top level)
7512     //  [000037] ------------             *  stmtExpr  void  (top level) (IL 0x00A...0x013)
7513     //  [000033] --C - G------ - \--*  call      void   RecursiveMethod
7514     //  [000030] ------------ | / --*  const     int - 1
7515     //  [000031] ------------arg0 in rcx + --*  +int
7516     //  [000029] ------------ | \--*  lclVar    int    V00 arg1
7517     //  [000032] ------------arg1 in rdx    \--*  const     int    1
7518     //
7519     //
7520     //  Basic block after tail recursion elimination :
7521     //  ***** BB04, stmt 1 (top level)
7522     //  [000051] ------------             *  stmtExpr  void  (top level) (IL 0x00A... ? ? ? )
7523     //  [000030] ------------ | / --*  const     int - 1
7524     //  [000031] ------------ | / --*  +int
7525     //  [000029] ------------ | | \--*  lclVar    int    V00 arg1
7526     //  [000050] - A----------             \--* = int
7527     //  [000049] D------N----                \--*  lclVar    int    V02 tmp0
7528     //
7529     //  ***** BB04, stmt 2 (top level)
7530     //  [000055] ------------             *  stmtExpr  void  (top level) (IL 0x00A... ? ? ? )
7531     //  [000052] ------------ | / --*  lclVar    int    V02 tmp0
7532     //  [000054] - A----------             \--* = int
7533     //  [000053] D------N----                \--*  lclVar    int    V00 arg0
7534
7535     //  ***** BB04, stmt 3 (top level)
7536     //  [000058] ------------             *  stmtExpr  void  (top level) (IL 0x00A... ? ? ? )
7537     //  [000032] ------------ | / --*  const     int    1
7538     //  [000057] - A----------             \--* = int
7539     //  [000056] D------N----                \--*  lclVar    int    V01 arg1
7540
7541     GenTreePtr tmpAssignmentInsertionPoint   = last;
7542     GenTreePtr paramAssignmentInsertionPoint = last;
7543
7544     // Process early args. They may contain both setup statements for late args and actual args.
7545     // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
7546     // below has the correct second argument.
7547     int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
7548     for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
7549          (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
7550     {
7551         GenTreePtr earlyArg = earlyArgs->Current();
7552         if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
7553         {
7554             if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
7555             {
7556                 // This is a setup node so we need to hoist it.
7557                 GenTreePtr earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
7558                 fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
7559             }
7560             else
7561             {
7562                 // This is an actual argument that needs to be assigned to the corresponding caller parameter.
7563                 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
7564                 GenTreePtr       paramAssignStmt =
7565                     fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
7566                                                           tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7567                 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7568                 {
7569                     // All temp assignments will happen before the first param assignment.
7570                     tmpAssignmentInsertionPoint = paramAssignStmt;
7571                 }
7572             }
7573         }
7574     }
7575
7576     // Process late args.
7577     int lateArgIndex = 0;
7578     for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
7579          (lateArgIndex++, lateArgs = lateArgs->Rest()))
7580     {
7581         // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
7582         GenTreePtr       lateArg        = lateArgs->Current();
7583         fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
7584         GenTreePtr       paramAssignStmt =
7585             fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
7586                                                   tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7587
7588         if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7589         {
7590             // All temp assignments will happen before the first param assignment.
7591             tmpAssignmentInsertionPoint = paramAssignStmt;
7592         }
7593     }
7594
7595     // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
7596     // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
7597     // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
7598     if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
7599     {
7600         var_types  thisType           = lvaTable[info.compThisArg].TypeGet();
7601         GenTreePtr arg0               = gtNewLclvNode(lvaArg0Var, thisType);
7602         GenTreePtr arg0Assignment     = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
7603         GenTreePtr arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
7604         fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
7605     }
7606
7607     // Remove the call
7608     fgRemoveStmt(block, last);
7609
7610     // Set the loop edge.
7611     block->bbJumpKind = BBJ_ALWAYS;
7612     block->bbJumpDest = fgFirstBBisScratch() ? fgFirstBB->bbNext : fgFirstBB;
7613     fgAddRefPred(block->bbJumpDest, block);
7614     block->bbFlags &= ~BBF_HAS_JMP;
7615 }
7616
7617 //------------------------------------------------------------------------------
7618 // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
7619 //
7620 //
7621 // Arguments:
7622 //    arg  -  argument to assign
7623 //    argTabEntry  -  argument table entry corresponding to arg
7624 //    block  --- basic block the call is in
7625 //    callILOffset  -  IL offset of the call
7626 //    tmpAssignmentInsertionPoint  -  tree before which temp assignment should be inserted (if necessary)
7627 //    paramAssignmentInsertionPoint  -  tree before which parameter assignment should be inserted
7628 //
7629 // Return Value:
7630 //    parameter assignment statement if one was inserted; nullptr otherwise.
7631
7632 GenTreePtr Compiler::fgAssignRecursiveCallArgToCallerParam(GenTreePtr       arg,
7633                                                            fgArgTabEntryPtr argTabEntry,
7634                                                            BasicBlock*      block,
7635                                                            IL_OFFSETX       callILOffset,
7636                                                            GenTreePtr       tmpAssignmentInsertionPoint,
7637                                                            GenTreePtr       paramAssignmentInsertionPoint)
7638 {
7639     // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
7640     // some argument trees may reference parameters directly.
7641
7642     GenTreePtr argInTemp             = nullptr;
7643     unsigned   originalArgNum        = argTabEntry->argNum;
7644     bool       needToAssignParameter = true;
7645
7646     // TODO-CQ: enable calls with struct arguments passed in registers.
7647     noway_assert(!varTypeIsStruct(arg->TypeGet()));
7648
7649     if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
7650     {
7651         // The argument is already assigned to a temp or is a const.
7652         argInTemp = arg;
7653     }
7654     else if (arg->OperGet() == GT_LCL_VAR)
7655     {
7656         unsigned   lclNum = arg->AsLclVar()->gtLclNum;
7657         LclVarDsc* varDsc = &lvaTable[lclNum];
7658         if (!varDsc->lvIsParam)
7659         {
7660             // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
7661             argInTemp = arg;
7662         }
7663         else if (lclNum == originalArgNum)
7664         {
7665             // The argument is the same parameter local that we were about to assign so
7666             // we can skip the assignment.
7667             needToAssignParameter = false;
7668         }
7669     }
7670
7671     // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
7672     // any caller parameters. Some common cases are handled above but we may be able to eliminate
7673     // more temp assignments.
7674
7675     GenTreePtr paramAssignStmt = nullptr;
7676     if (needToAssignParameter)
7677     {
7678         if (argInTemp == nullptr)
7679         {
7680             // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
7681             // TODO: we can avoid a temp assignment if we can prove that the argument tree
7682             // doesn't involve any caller parameters.
7683             unsigned   tmpNum        = lvaGrabTemp(true DEBUGARG("arg temp"));
7684             GenTreePtr tempSrc       = arg;
7685             GenTreePtr tempDest      = gtNewLclvNode(tmpNum, tempSrc->gtType);
7686             GenTreePtr tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
7687             GenTreePtr tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
7688             fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
7689             argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
7690         }
7691
7692         // Now assign the temp to the parameter.
7693         LclVarDsc* paramDsc = lvaTable + originalArgNum;
7694         assert(paramDsc->lvIsParam);
7695         GenTreePtr paramDest       = gtNewLclvNode(originalArgNum, paramDsc->lvType);
7696         GenTreePtr paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
7697         paramAssignStmt            = gtNewStmt(paramAssignNode, callILOffset);
7698
7699         fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
7700     }
7701     return paramAssignStmt;
7702 }
7703
7704 /*****************************************************************************
7705  *
7706  *  Transform the given GT_CALL tree for code generation.
7707  */
7708
7709 GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
7710 {
7711     if (call->CanTailCall())
7712     {
7713         // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
7714         assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
7715
7716         // It cannot be an inline candidate
7717         assert(!call->IsInlineCandidate());
7718
7719         const char* szFailReason   = nullptr;
7720         bool        hasStructParam = false;
7721         if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
7722         {
7723             szFailReason = "Might turn into an intrinsic";
7724         }
7725
7726         if (opts.compNeedSecurityCheck)
7727         {
7728             szFailReason = "Needs security check";
7729         }
7730         else if (compLocallocUsed)
7731         {
7732             szFailReason = "Localloc used";
7733         }
7734 #ifdef _TARGET_AMD64_
7735         // Needed for Jit64 compat.
7736         // In future, enabling tail calls from methods that need GS cookie check
7737         // would require codegen side work to emit GS cookie check before a tail
7738         // call.
7739         else if (getNeedsGSSecurityCookie())
7740         {
7741             szFailReason = "GS Security cookie check";
7742         }
7743 #endif
7744 #ifdef DEBUG
7745         // DDB 99324: Just disable tailcall under compGcChecks stress mode.
7746         else if (opts.compGcChecks)
7747         {
7748             szFailReason = "GcChecks";
7749         }
7750 #endif
7751 #if FEATURE_TAILCALL_OPT
7752         else
7753         {
7754             // We are still not sure whether it can be a tail call. Because, when converting
7755             // a call to an implicit tail call, we must check that there are no locals with
7756             // their address taken.  If this is the case, we have to assume that the address
7757             // has been leaked and the current stack frame must live until after the final
7758             // call.
7759
7760             // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
7761             // that lvHasLdAddrOp is much more conservative.  We cannot just base it on
7762             // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
7763             // during morph stage. The reason for also checking lvAddrExposed is that in case
7764             // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
7765             // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
7766             // never to be incorrect.
7767             //
7768             // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
7769             // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
7770             // is set. This avoids the need for iterating through all lcl vars of the current
7771             // method.  Right now throughout the code base we are not consistently using 'set'
7772             // method to set lvHasLdAddrOp and lvAddrExposed flags.
7773             unsigned   varNum;
7774             LclVarDsc* varDsc;
7775             bool       hasAddrExposedVars     = false;
7776             bool       hasStructPromotedParam = false;
7777             bool       hasPinnedVars          = false;
7778
7779             for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
7780             {
7781                 // If the method is marked as an explicit tail call we will skip the
7782                 // following three hazard checks.
7783                 // We still must check for any struct parameters and set 'hasStructParam'
7784                 // so that we won't transform the recursive tail call into a loop.
7785                 //
7786                 if (call->IsImplicitTailCall())
7787                 {
7788                     if (varDsc->lvHasLdAddrOp)
7789                     {
7790                         hasAddrExposedVars = true;
7791                         break;
7792                     }
7793                     if (varDsc->lvAddrExposed)
7794                     {
7795                         if (lvaIsImplicitByRefLocal(varNum))
7796                         {
7797                             // The address of the implicit-byref is a non-address use of the pointer parameter.
7798                         }
7799                         else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl))
7800                         {
7801                             // The address of the implicit-byref's field is likewise a non-address use of the pointer
7802                             // parameter.
7803                         }
7804                         else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum))
7805                         {
7806                             // This temp was used for struct promotion bookkeeping.  It will not be used, and will have
7807                             // its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs.
7808                             assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl));
7809                             assert(fgGlobalMorph);
7810                         }
7811                         else
7812                         {
7813                             hasAddrExposedVars = true;
7814                             break;
7815                         }
7816                     }
7817                     if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum))
7818                     {
7819                         hasStructPromotedParam = true;
7820                         break;
7821                     }
7822                     if (varDsc->lvPinned)
7823                     {
7824                         // A tail call removes the method from the stack, which means the pinning
7825                         // goes away for the callee.  We can't allow that.
7826                         hasPinnedVars = true;
7827                         break;
7828                     }
7829                 }
7830                 if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
7831                 {
7832                     hasStructParam = true;
7833                     // This prevents transforming a recursive tail call into a loop
7834                     // but doesn't prevent tail call optimization so we need to
7835                     // look at the rest of parameters.
7836                     continue;
7837                 }
7838             }
7839
7840             if (hasAddrExposedVars)
7841             {
7842                 szFailReason = "Local address taken";
7843             }
7844             if (hasStructPromotedParam)
7845             {
7846                 szFailReason = "Has Struct Promoted Param";
7847             }
7848             if (hasPinnedVars)
7849             {
7850                 szFailReason = "Has Pinned Vars";
7851             }
7852         }
7853 #endif // FEATURE_TAILCALL_OPT
7854
7855         if (varTypeIsStruct(call))
7856         {
7857             fgFixupStructReturn(call);
7858         }
7859
7860         var_types callType = call->TypeGet();
7861
7862         // We have to ensure to pass the incoming retValBuf as the
7863         // outgoing one. Using a temp will not do as this function will
7864         // not regain control to do the copy.
7865
7866         if (info.compRetBuffArg != BAD_VAR_NUM)
7867         {
7868             noway_assert(callType == TYP_VOID);
7869             GenTreePtr retValBuf = call->gtCallArgs->gtOp.gtOp1;
7870             if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
7871             {
7872                 szFailReason = "Need to copy return buffer";
7873             }
7874         }
7875
7876         // If this is an opportunistic tail call and cannot be dispatched as
7877         // fast tail call, go the non-tail call route.  This is done for perf
7878         // reason.
7879         //
7880         // Avoid the cost of determining whether can be dispatched as fast tail
7881         // call if we already know that tail call cannot be honored for other
7882         // reasons.
7883         bool canFastTailCall = false;
7884         if (szFailReason == nullptr)
7885         {
7886             canFastTailCall = fgCanFastTailCall(call);
7887             if (!canFastTailCall)
7888             {
7889                 // Implicit or opportunistic tail calls are always dispatched via fast tail call
7890                 // mechanism and never via tail call helper for perf.
7891                 if (call->IsImplicitTailCall())
7892                 {
7893                     szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
7894                 }
7895 #ifndef LEGACY_BACKEND
7896                 else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
7897                 {
7898                     // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
7899                     // dispatched as a fast tail call.
7900
7901                     // Methods with non-standard args will have indirection cell or cookie param passed
7902                     // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
7903                     // tail calling the target method and hence ".tail" prefix on such calls needs to be
7904                     // ignored.
7905                     //
7906                     // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
7907                     // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
7908                     // This is done by by adding stubAddr as an additional arg before the original list of
7909                     // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
7910                     // in Stublinkerx86.cpp.
7911                     szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
7912                                    "called via helper";
7913                 }
7914 #ifdef _TARGET_ARM64_
7915                 else
7916                 {
7917                     // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
7918                     // So, bail out if we can't make fast tail call.
7919                     szFailReason = "Non-qualified fast tail call";
7920                 }
7921 #endif
7922 #endif // LEGACY_BACKEND
7923             }
7924         }
7925
7926         // Clear these flags before calling fgMorphCall() to avoid recursion.
7927         bool isTailPrefixed = call->IsTailPrefixedCall();
7928         call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
7929
7930 #if FEATURE_TAILCALL_OPT
7931         call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
7932 #endif
7933
7934 #ifdef FEATURE_PAL
7935         if (!canFastTailCall && szFailReason == nullptr)
7936         {
7937             szFailReason = "Non fast tail calls disabled for PAL based systems.";
7938         }
7939 #endif // FEATURE_PAL
7940
7941         if (szFailReason != nullptr)
7942         {
7943 #ifdef DEBUG
7944             if (verbose)
7945             {
7946                 printf("\nRejecting tail call late for call ");
7947                 printTreeID(call);
7948                 printf(": %s\n", szFailReason);
7949             }
7950 #endif
7951
7952             // for non user funcs, we have no handles to report
7953             info.compCompHnd->reportTailCallDecision(nullptr,
7954                                                      (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
7955                                                      isTailPrefixed, TAILCALL_FAIL, szFailReason);
7956
7957             goto NO_TAIL_CALL;
7958         }
7959
7960 #if !FEATURE_TAILCALL_OPT_SHARED_RETURN
7961         // We enable shared-ret tail call optimization for recursive calls even if
7962         // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
7963         if (gtIsRecursiveCall(call))
7964 #endif
7965         {
7966             // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
7967             // but if the call falls through to a ret, and we are doing a tailcall, change it here.
7968             if (compCurBB->bbJumpKind != BBJ_RETURN)
7969             {
7970                 compCurBB->bbJumpKind = BBJ_RETURN;
7971             }
7972         }
7973
7974         // Set this flag before calling fgMorphCall() to prevent inlining this call.
7975         call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
7976
7977         bool fastTailCallToLoop = false;
7978 #if FEATURE_TAILCALL_OPT
7979         // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
7980         // or return type is a struct that can be passed in a register.
7981         //
7982         // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
7983         // hidden generic context param or through keep alive thisptr), then while transforming a recursive
7984         // call to such a method requires that the generic context stored on stack slot be updated.  Right now,
7985         // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
7986         // a recursive call into a loop.  Another option is to modify gtIsRecursiveCall() to check that the
7987         // generic type parameters of both caller and callee generic method are the same.
7988         if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
7989             !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet()))
7990         {
7991             call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
7992             fastTailCallToLoop = true;
7993         }
7994 #endif
7995
7996         // Do some target-specific transformations (before we process the args, etc.)
7997         // This is needed only for tail prefixed calls that cannot be dispatched as
7998         // fast calls.
7999         if (!canFastTailCall)
8000         {
8001             fgMorphTailCall(call);
8002         }
8003
8004         // Implementation note : If we optimize tailcall to do a direct jump
8005         // to the target function (after stomping on the return address, etc),
8006         // without using CORINFO_HELP_TAILCALL, we have to make certain that
8007         // we don't starve the hijacking logic (by stomping on the hijacked
8008         // return address etc).
8009
8010         // At this point, we are committed to do the tailcall.
8011         compTailCallUsed = true;
8012
8013         CorInfoTailCall tailCallResult;
8014
8015         if (fastTailCallToLoop)
8016         {
8017             tailCallResult = TAILCALL_RECURSIVE;
8018         }
8019         else if (canFastTailCall)
8020         {
8021             tailCallResult = TAILCALL_OPTIMIZED;
8022         }
8023         else
8024         {
8025             tailCallResult = TAILCALL_HELPER;
8026         }
8027
8028         // for non user funcs, we have no handles to report
8029         info.compCompHnd->reportTailCallDecision(nullptr,
8030                                                  (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
8031                                                  isTailPrefixed, tailCallResult, nullptr);
8032
8033         // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
8034         // to avoid doing any extra work for the return value.
8035         call->gtType = TYP_VOID;
8036
8037 #ifdef DEBUG
8038         if (verbose)
8039         {
8040             printf("\nGTF_CALL_M_TAILCALL bit set for call ");
8041             printTreeID(call);
8042             printf("\n");
8043             if (fastTailCallToLoop)
8044             {
8045                 printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
8046                 printTreeID(call);
8047                 printf("\n");
8048             }
8049         }
8050 #endif
8051
8052         GenTreePtr stmtExpr = fgMorphStmt->gtStmtExpr;
8053
8054 #ifdef DEBUG
8055         // Tail call needs to be in one of the following IR forms
8056         //    Either a call stmt or
8057         //    GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
8058         //    var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
8059         //    GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
8060         // In the above,
8061         //    GT_CASTS may be nested.
8062         genTreeOps stmtOper = stmtExpr->gtOper;
8063         if (stmtOper == GT_CALL)
8064         {
8065             noway_assert(stmtExpr == call);
8066         }
8067         else
8068         {
8069             noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA);
8070             GenTreePtr treeWithCall;
8071             if (stmtOper == GT_RETURN)
8072             {
8073                 treeWithCall = stmtExpr->gtGetOp1();
8074             }
8075             else if (stmtOper == GT_COMMA)
8076             {
8077                 // Second operation must be nop.
8078                 noway_assert(stmtExpr->gtGetOp2()->IsNothingNode());
8079                 treeWithCall = stmtExpr->gtGetOp1();
8080             }
8081             else
8082             {
8083                 treeWithCall = stmtExpr->gtGetOp2();
8084             }
8085
8086             // Peel off casts
8087             while (treeWithCall->gtOper == GT_CAST)
8088             {
8089                 noway_assert(!treeWithCall->gtOverflow());
8090                 treeWithCall = treeWithCall->gtGetOp1();
8091             }
8092
8093             noway_assert(treeWithCall == call);
8094         }
8095 #endif
8096
8097         // For void calls, we would have created a GT_CALL in the stmt list.
8098         // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
8099         // For calls returning structs, we would have a void call, followed by a void return.
8100         // For debuggable code, it would be an assignment of the call to a temp
8101         // We want to get rid of any of this extra trees, and just leave
8102         // the call.
8103         GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt;
8104
8105 #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_)
8106         // Legacy Jit64 Compat:
8107         // There could be any number of GT_NOPs between tail call and GT_RETURN.
8108         // That is tail call pattern could be one of the following:
8109         //  1) tail.call, nop*, ret
8110         //  2) tail.call, nop*, pop, nop*, ret
8111         //  3) var=tail.call, nop*, ret(var)
8112         //  4) var=tail.call, nop*, pop, ret
8113         //  5) comma(tail.call, nop), nop*, ret
8114         //
8115         // See impIsTailCallILPattern() for details on tail call IL patterns
8116         // that are supported.
8117         if (stmtExpr->gtOper != GT_RETURN)
8118         {
8119             // First delete all GT_NOPs after the call
8120             GenTreeStmt* morphStmtToRemove = nullptr;
8121             while (nextMorphStmt != nullptr)
8122             {
8123                 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
8124                 if (!nextStmtExpr->IsNothingNode())
8125                 {
8126                     break;
8127                 }
8128
8129                 morphStmtToRemove = nextMorphStmt;
8130                 nextMorphStmt     = morphStmtToRemove->gtNextStmt;
8131                 fgRemoveStmt(compCurBB, morphStmtToRemove);
8132             }
8133
8134             // Check to see if there is a pop.
8135             // Since tail call is honored, we can get rid of the stmt corresponding to pop.
8136             if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN)
8137             {
8138                 // Note that pop opcode may or may not result in a new stmt (for details see
8139                 // impImportBlockCode()). Hence, it is not possible to assert about the IR
8140                 // form generated by pop but pop tree must be side-effect free so that we can
8141                 // delete it safely.
8142                 GenTreeStmt* popStmt = nextMorphStmt;
8143                 nextMorphStmt        = nextMorphStmt->gtNextStmt;
8144
8145                 // Side effect flags on a GT_COMMA may be overly pessimistic, so examine
8146                 // the constituent nodes.
8147                 GenTreePtr popExpr          = popStmt->gtStmtExpr;
8148                 bool       isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0;
8149                 if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA))
8150                 {
8151                     isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) &&
8152                                        ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0);
8153                 }
8154                 noway_assert(isSideEffectFree);
8155                 fgRemoveStmt(compCurBB, popStmt);
8156             }
8157
8158             // Next delete any GT_NOP nodes after pop
8159             while (nextMorphStmt != nullptr)
8160             {
8161                 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
8162                 if (!nextStmtExpr->IsNothingNode())
8163                 {
8164                     break;
8165                 }
8166
8167                 morphStmtToRemove = nextMorphStmt;
8168                 nextMorphStmt     = morphStmtToRemove->gtNextStmt;
8169                 fgRemoveStmt(compCurBB, morphStmtToRemove);
8170             }
8171         }
8172 #endif // !FEATURE_CORECLR && _TARGET_AMD64_
8173
8174         // Delete GT_RETURN  if any
8175         if (nextMorphStmt != nullptr)
8176         {
8177             GenTreePtr retExpr = nextMorphStmt->gtStmtExpr;
8178             noway_assert(retExpr->gtOper == GT_RETURN);
8179
8180             // If var=call, then the next stmt must be a GT_RETURN(TYP_VOID) or GT_RETURN(var).
8181             // This can occur if impSpillStackEnsure() has introduced an assignment to a temp.
8182             if (stmtExpr->gtOper == GT_ASG && info.compRetType != TYP_VOID)
8183             {
8184                 noway_assert(stmtExpr->gtGetOp1()->OperIsLocal());
8185
8186                 GenTreePtr treeWithLcl = retExpr->gtGetOp1();
8187                 while (treeWithLcl->gtOper == GT_CAST)
8188                 {
8189                     noway_assert(!treeWithLcl->gtOverflow());
8190                     treeWithLcl = treeWithLcl->gtGetOp1();
8191                 }
8192
8193                 noway_assert(stmtExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum ==
8194                              treeWithLcl->AsLclVarCommon()->gtLclNum);
8195             }
8196
8197             fgRemoveStmt(compCurBB, nextMorphStmt);
8198         }
8199
8200         fgMorphStmt->gtStmtExpr = call;
8201
8202         // Tail call via helper: The VM can't use return address hijacking if we're
8203         // not going to return and the helper doesn't have enough info to safely poll,
8204         // so we poll before the tail call, if the block isn't already safe.  Since
8205         // tail call via helper is a slow mechanism it doen't matter whether we emit
8206         // GC poll.  This is done to be in parity with Jit64. Also this avoids GC info
8207         // size increase if all most all methods are expected to be tail calls (e.g. F#).
8208         //
8209         // Note that we can avoid emitting GC-poll if we know that the current BB is
8210         // dominated by a Gc-SafePoint block.  But we don't have dominator info at this
8211         // point.  One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
8212         // here and remove it in lowering if the block is dominated by a GC-SafePoint.  For
8213         // now it not clear whether optimizing slow tail calls is worth the effort.  As a
8214         // low cost check, we check whether the first and current basic blocks are
8215         // GC-SafePoints.
8216         //
8217         // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
8218         // is going to mark the method as fully interruptible if the block containing this tail
8219         // call is reachable without executing any call.
8220         if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
8221             !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
8222         {
8223             // We didn't insert a poll block, so we need to morph the call now
8224             // (Normally it will get morphed when we get to the split poll block)
8225             GenTreePtr temp = fgMorphCall(call);
8226             noway_assert(temp == call);
8227         }
8228
8229         // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
8230         // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
8231         //
8232         // Fast tail call: in case of fast tail calls, we need a jmp epilog and
8233         // hence mark it as BBJ_RETURN with BBF_JMP flag set.
8234         noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
8235
8236         if (canFastTailCall)
8237         {
8238             compCurBB->bbFlags |= BBF_HAS_JMP;
8239         }
8240         else
8241         {
8242             compCurBB->bbJumpKind = BBJ_THROW;
8243         }
8244
8245         // For non-void calls, we return a place holder which will be
8246         // used by the parent GT_RETURN node of this call.
8247
8248         GenTree* result = call;
8249         if (callType != TYP_VOID && info.compRetType != TYP_VOID)
8250         {
8251 #ifdef FEATURE_HFA
8252             // Return a dummy node, as the return is already removed.
8253             if (callType == TYP_STRUCT)
8254             {
8255                 // This is a HFA, use float 0.
8256                 callType = TYP_FLOAT;
8257             }
8258 #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
8259             // Return a dummy node, as the return is already removed.
8260             if (varTypeIsStruct(callType))
8261             {
8262                 // This is a register-returned struct. Return a 0.
8263                 // The actual return registers are hacked in lower and the register allocator.
8264                 callType = TYP_INT;
8265             }
8266 #endif
8267 #ifdef FEATURE_SIMD
8268             // Return a dummy node, as the return is already removed.
8269             if (varTypeIsSIMD(callType))
8270             {
8271                 callType = TYP_DOUBLE;
8272             }
8273 #endif
8274             result = gtNewZeroConNode(genActualType(callType));
8275             result = fgMorphTree(result);
8276         }
8277
8278         return result;
8279     }
8280
8281 NO_TAIL_CALL:
8282
8283     if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
8284         (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
8285 #ifdef FEATURE_READYTORUN_COMPILER
8286          || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
8287 #endif
8288              ) &&
8289         (call == fgMorphStmt->gtStmtExpr))
8290     {
8291         // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
8292         // Transform it into a null check.
8293
8294         GenTreePtr thisPtr = call->gtCallArgs->gtOp.gtOp1;
8295
8296         GenTreePtr nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
8297         nullCheck->gtFlags |= GTF_EXCEPT;
8298
8299         return fgMorphTree(nullCheck);
8300     }
8301
8302     noway_assert(call->gtOper == GT_CALL);
8303
8304     //
8305     // Only count calls once (only in the global morph phase)
8306     //
8307     if (fgGlobalMorph)
8308     {
8309         if (call->gtCallType == CT_INDIRECT)
8310         {
8311             optCallCount++;
8312             optIndirectCallCount++;
8313         }
8314         else if (call->gtCallType == CT_USER_FUNC)
8315         {
8316             optCallCount++;
8317             if (call->IsVirtual())
8318             {
8319                 optIndirectCallCount++;
8320             }
8321         }
8322     }
8323
8324     // Couldn't inline - remember that this BB contains method calls
8325
8326     // If this is a 'regular' call, mark the basic block as
8327     // having a call (for computing full interruptibility).
8328     CLANG_FORMAT_COMMENT_ANCHOR;
8329
8330     if (IsGcSafePoint(call))
8331     {
8332         compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
8333     }
8334
8335     // Morph Type.op_Equality and Type.op_Inequality
8336     // We need to do this before the arguments are morphed
8337     if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
8338     {
8339         CorInfoIntrinsics methodID = info.compCompHnd->getIntrinsicID(call->gtCallMethHnd);
8340
8341         genTreeOps simpleOp = GT_CALL;
8342         if (methodID == CORINFO_INTRINSIC_TypeEQ)
8343         {
8344             simpleOp = GT_EQ;
8345         }
8346         else if (methodID == CORINFO_INTRINSIC_TypeNEQ)
8347         {
8348             simpleOp = GT_NE;
8349         }
8350
8351         if (simpleOp == GT_EQ || simpleOp == GT_NE)
8352         {
8353             noway_assert(call->TypeGet() == TYP_INT);
8354
8355             // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType
8356             // objects. Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to
8357             // GT_NE/GT_NE: One important invariance that should never change is that type equivalency is always
8358             // equivalent to object identity equality for runtime type objects in reflection. This is also reflected
8359             // in RuntimeTypeHandle::TypeEquals. If this invariance would ever be broken, we need to remove the
8360             // optimization below.
8361
8362             GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1;
8363             GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
8364
8365             if (gtCanOptimizeTypeEquality(op1) || gtCanOptimizeTypeEquality(op2))
8366             {
8367                 GenTreePtr compare = gtNewOperNode(simpleOp, TYP_INT, op1, op2);
8368
8369                 // fgMorphSmpOp will further optimize the following patterns:
8370                 //  1. typeof(...) == typeof(...)
8371                 //  2. typeof(...) == obj.GetType()
8372                 return fgMorphTree(compare);
8373             }
8374         }
8375     }
8376
8377     // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
8378     GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
8379                                    // copy-back).
8380     unsigned             retValTmpNum = BAD_VAR_NUM;
8381     CORINFO_CLASS_HANDLE structHnd    = nullptr;
8382     if (call->HasRetBufArg() &&
8383         call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
8384     {
8385         // We're enforcing the invariant that return buffers pointers (at least for
8386         // struct return types containing GC pointers) are never pointers into the heap.
8387         // The large majority of cases are address of local variables, which are OK.
8388         // Otherwise, allocate a local of the given struct type, pass its address,
8389         // then assign from that into the proper destination.  (We don't need to do this
8390         // if we're passing the caller's ret buff arg to the callee, since the caller's caller
8391         // will maintain the same invariant.)
8392
8393         GenTreePtr dest = call->gtCallArgs->gtOp.gtOp1;
8394         assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
8395         if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
8396         {
8397             // We'll exempt helper calls from this, assuming that the helper implementation
8398             // follows the old convention, and does whatever barrier is required.
8399             if (call->gtCallType != CT_HELPER)
8400             {
8401                 structHnd = call->gtRetClsHnd;
8402                 if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
8403                     !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR) &&
8404                       dest->gtLclVar.gtLclNum == info.compRetBuffArg))
8405                 {
8406                     origDest = dest;
8407
8408                     retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
8409                     lvaSetStruct(retValTmpNum, structHnd, true);
8410                     dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8411                 }
8412             }
8413         }
8414
8415         call->gtCallArgs->gtOp.gtOp1 = dest;
8416     }
8417
8418     /* Process the "normal" argument list */
8419     call = fgMorphArgs(call);
8420     noway_assert(call->gtOper == GT_CALL);
8421
8422     // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
8423     // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
8424     if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
8425     {
8426         GenTree* value = gtArgEntryByArgNum(call, 2)->node;
8427         if (value->IsIntegralConst(0))
8428         {
8429             assert(value->OperGet() == GT_CNS_INT);
8430
8431             GenTree* arr   = gtArgEntryByArgNum(call, 0)->node;
8432             GenTree* index = gtArgEntryByArgNum(call, 1)->node;
8433
8434             // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy
8435             // the spill trees as well if necessary.
8436             GenTreeOp* argSetup = nullptr;
8437             for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest())
8438             {
8439                 GenTree* const arg = earlyArgs->Current();
8440                 if (arg->OperGet() != GT_ASG)
8441                 {
8442                     continue;
8443                 }
8444
8445                 assert(arg != arr);
8446                 assert(arg != index);
8447
8448                 arg->gtFlags &= ~GTF_LATE_ARG;
8449
8450                 GenTree* op1 = argSetup;
8451                 if (op1 == nullptr)
8452                 {
8453                     op1 = gtNewNothingNode();
8454 #if DEBUG
8455                     op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8456 #endif // DEBUG
8457                 }
8458
8459                 argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg);
8460
8461 #if DEBUG
8462                 argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8463 #endif // DEBUG
8464             }
8465
8466 #ifdef DEBUG
8467             auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult {
8468                 (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
8469                 return WALK_CONTINUE;
8470             };
8471
8472             fgWalkTreePost(&arr, resetMorphedFlag);
8473             fgWalkTreePost(&index, resetMorphedFlag);
8474             fgWalkTreePost(&value, resetMorphedFlag);
8475 #endif // DEBUG
8476
8477             GenTree* const nullCheckedArr = impCheckForNullPointer(arr);
8478             GenTree* const arrIndexNode   = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
8479             GenTree* const arrStore       = gtNewAssignNode(arrIndexNode, value);
8480             arrStore->gtFlags |= GTF_ASG;
8481
8482             GenTree* result = fgMorphTree(arrStore);
8483             if (argSetup != nullptr)
8484             {
8485                 result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result);
8486 #if DEBUG
8487                 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8488 #endif // DEBUG
8489             }
8490
8491             return result;
8492         }
8493     }
8494
8495     // Optimize get_ManagedThreadId(get_CurrentThread)
8496     if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8497         info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
8498     {
8499         noway_assert(origDest == nullptr);
8500         noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);
8501
8502         GenTreePtr innerCall = call->gtCallLateArgs->gtOp.gtOp1;
8503
8504         if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8505             info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
8506                 CORINFO_INTRINSIC_GetCurrentManagedThread)
8507         {
8508             // substitute expression with call to helper
8509             GenTreePtr newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT, 0);
8510             JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
8511             return fgMorphTree(newCall);
8512         }
8513     }
8514
8515     if (origDest != nullptr)
8516     {
8517         GenTreePtr retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8518         // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
8519         // var, which would allow the whole assignment to be optimized away to a NOP.  So in that case, make the
8520         // origDest into a comma that uses the var.  Note that the var doesn't have to be a temp for this to
8521         // be correct.
8522         if (origDest->OperGet() == GT_ASG)
8523         {
8524             if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
8525             {
8526                 GenTreePtr var = origDest->gtOp.gtOp1;
8527                 origDest       = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
8528                                          gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
8529             }
8530         }
8531         GenTreePtr copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
8532         copyBlk            = fgMorphTree(copyBlk);
8533         GenTree* result    = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
8534 #ifdef DEBUG
8535         result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8536 #endif
8537         return result;
8538     }
8539
8540     if (call->IsNoReturn())
8541     {
8542         //
8543         // If we know that the call does not return then we can set fgRemoveRestOfBlock
8544         // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
8545         // As a result the compiler won't need to preserve live registers across the call.
8546         //
8547         // This isn't need for tail calls as there shouldn't be any code after the call anyway.
8548         // Besides, the tail call code is part of the epilog and converting the block to
8549         // BBJ_THROW would result in the tail call being dropped as the epilog is generated
8550         // only for BBJ_RETURN blocks.
8551         //
8552         // Currently this doesn't work for non-void callees. Some of the code that handles
8553         // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
8554         // do not have this flag by default. We could add the flag here but the proper solution
8555         // would be to replace the return expression with a local var node during inlining
8556         // so the rest of the call tree stays in a separate statement. That statement can then
8557         // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
8558         //
8559
8560         if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
8561         {
8562             fgRemoveRestOfBlock = true;
8563         }
8564     }
8565
8566     return call;
8567 }
8568
8569 /*****************************************************************************
8570  *
8571  *  Transform the given GTK_CONST tree for code generation.
8572  */
8573
8574 GenTreePtr Compiler::fgMorphConst(GenTreePtr tree)
8575 {
8576     assert(tree->OperKind() & GTK_CONST);
8577
8578     /* Clear any exception flags or other unnecessary flags
8579      * that may have been set before folding this node to a constant */
8580
8581     tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
8582
8583     if (tree->OperGet() != GT_CNS_STR)
8584     {
8585         return tree;
8586     }
8587
8588     // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
8589     // guarantee slow performance for that block. Instead cache the return value
8590     // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
8591
8592     if (compCurBB->bbJumpKind == BBJ_THROW)
8593     {
8594         CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
8595         if (helper != CORINFO_HELP_UNDEF)
8596         {
8597             // For un-important blocks, we want to construct the string lazily
8598
8599             GenTreeArgList* args;
8600             if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
8601             {
8602                 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
8603             }
8604             else
8605             {
8606                 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
8607                                     gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
8608             }
8609
8610             tree = gtNewHelperCallNode(helper, TYP_REF, 0, args);
8611             return fgMorphTree(tree);
8612         }
8613     }
8614
8615     assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
8616
8617     LPVOID         pValue;
8618     InfoAccessType iat =
8619         info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);
8620
8621     tree = gtNewStringLiteralNode(iat, pValue);
8622
8623     return fgMorphTree(tree);
8624 }
8625
8626 /*****************************************************************************
8627  *
8628  *  Transform the given GTK_LEAF tree for code generation.
8629  */
8630
8631 GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree)
8632 {
8633     assert(tree->OperKind() & GTK_LEAF);
8634
8635     if (tree->gtOper == GT_LCL_VAR)
8636     {
8637         const bool forceRemorph = false;
8638         return fgMorphLocalVar(tree, forceRemorph);
8639     }
8640 #ifdef _TARGET_X86_
8641     else if (tree->gtOper == GT_LCL_FLD)
8642     {
8643         if (info.compIsVarArgs)
8644         {
8645             GenTreePtr newTree =
8646                 fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
8647             if (newTree != nullptr)
8648             {
8649                 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
8650                 {
8651                     fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
8652                 }
8653                 return newTree;
8654             }
8655         }
8656     }
8657 #endif // _TARGET_X86_
8658     else if (tree->gtOper == GT_FTN_ADDR)
8659     {
8660         CORINFO_CONST_LOOKUP addrInfo;
8661
8662 #ifdef FEATURE_READYTORUN_COMPILER
8663         if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
8664         {
8665             addrInfo = tree->gtFptrVal.gtEntryPoint;
8666         }
8667         else
8668 #endif
8669         {
8670             info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
8671         }
8672
8673         // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
8674         //
8675         tree->SetOper(GT_CNS_INT);
8676         tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
8677         tree->gtFlags |= GTF_ICON_FTN_ADDR;
8678
8679         switch (addrInfo.accessType)
8680         {
8681             case IAT_PPVALUE:
8682                 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8683                 tree->gtFlags |= GTF_IND_INVARIANT;
8684
8685                 __fallthrough;
8686
8687             case IAT_PVALUE:
8688                 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8689                 break;
8690
8691             case IAT_VALUE:
8692                 tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
8693                 break;
8694
8695             default:
8696                 noway_assert(!"Unknown addrInfo.accessType");
8697         }
8698
8699         return fgMorphTree(tree);
8700     }
8701
8702     return tree;
8703 }
8704
8705 void Compiler::fgAssignSetVarDef(GenTreePtr tree)
8706 {
8707     GenTreeLclVarCommon* lclVarCmnTree;
8708     bool                 isEntire = false;
8709     if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
8710     {
8711         if (isEntire)
8712         {
8713             lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
8714         }
8715         else
8716         {
8717             // We consider partial definitions to be modeled as uses followed by definitions.
8718             // This captures the idea that precedings defs are not necessarily made redundant
8719             // by this definition.
8720             lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
8721         }
8722     }
8723 }
8724
8725 //------------------------------------------------------------------------
8726 // fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
8727 //
8728 // Arguments:
8729 //    tree - The block assignment to be possibly morphed
8730 //
8731 // Return Value:
8732 //    The modified tree if successful, nullptr otherwise.
8733 //
8734 // Assumptions:
8735 //    'tree' must be a block assignment.
8736 //
8737 // Notes:
8738 //    If successful, this method always returns the incoming tree, modifying only
8739 //    its arguments.
8740
8741 GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
8742 {
8743     // This must be a block assignment.
8744     noway_assert(tree->OperIsBlkOp());
8745     var_types asgType = tree->TypeGet();
8746
8747     GenTreePtr asg         = tree;
8748     GenTreePtr dest        = asg->gtGetOp1();
8749     GenTreePtr src         = asg->gtGetOp2();
8750     unsigned   destVarNum  = BAD_VAR_NUM;
8751     LclVarDsc* destVarDsc  = nullptr;
8752     GenTreePtr lclVarTree  = nullptr;
8753     bool       isCopyBlock = asg->OperIsCopyBlkOp();
8754     bool       isInitBlock = !isCopyBlock;
8755
8756     unsigned             size;
8757     CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
8758 #ifdef FEATURE_SIMD
8759     // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD)
8760     // The SIMD type in question could be Vector2f which is 8-bytes in size.
8761     // The below check is to make sure that we don't turn that copyblk
8762     // into a assignment, since rationalizer logic will transform the
8763     // copyblk appropriately. Otherwise, the transformation made in this
8764     // routine will prevent rationalizer logic and we might end up with
8765     // GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert
8766     // in codegen.
8767     // TODO-1stClassStructs: This is here to preserve old behavior.
8768     // It should be eliminated.
8769     if (src->OperGet() == GT_SIMD)
8770     {
8771         return nullptr;
8772     }
8773 #endif
8774
8775     if (dest->gtEffectiveVal()->OperIsBlk())
8776     {
8777         GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
8778         size               = lhsBlk->Size();
8779         if (impIsAddressInLocal(lhsBlk->Addr(), &lclVarTree))
8780         {
8781             destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
8782             destVarDsc = &(lvaTable[destVarNum]);
8783         }
8784         if (lhsBlk->OperGet() == GT_OBJ)
8785         {
8786             clsHnd = lhsBlk->AsObj()->gtClass;
8787         }
8788     }
8789     else
8790     {
8791         // Is this an enregisterable struct that is already a simple assignment?
8792         // This can happen if we are re-morphing.
8793         if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
8794         {
8795             return tree;
8796         }
8797         noway_assert(dest->OperIsLocal());
8798         lclVarTree = dest;
8799         destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
8800         destVarDsc = &(lvaTable[destVarNum]);
8801         if (isCopyBlock)
8802         {
8803             clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
8804             size   = info.compCompHnd->getClassSize(clsHnd);
8805         }
8806         else
8807         {
8808             size = destVarDsc->lvExactSize;
8809         }
8810     }
8811
8812     //
8813     //  See if we can do a simple transformation:
8814     //
8815     //          GT_ASG <TYP_size>
8816     //          /   \
8817     //      GT_IND GT_IND or CNS_INT
8818     //         |      |
8819     //       [dest] [src]
8820     //
8821
8822     if (size == REGSIZE_BYTES)
8823     {
8824         if (clsHnd == NO_CLASS_HANDLE)
8825         {
8826             // A register-sized cpblk can be treated as an integer asignment.
8827             asgType = TYP_I_IMPL;
8828         }
8829         else
8830         {
8831             BYTE gcPtr;
8832             info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
8833             asgType = getJitGCType(gcPtr);
8834         }
8835     }
8836     else
8837     {
8838         switch (size)
8839         {
8840             case 1:
8841                 asgType = TYP_BYTE;
8842                 break;
8843             case 2:
8844                 asgType = TYP_SHORT;
8845                 break;
8846
8847 #ifdef _TARGET_64BIT_
8848             case 4:
8849                 asgType = TYP_INT;
8850                 break;
8851 #endif // _TARGET_64BIT_
8852         }
8853     }
8854
8855     // TODO-1stClassStructs: Change this to asgType != TYP_STRUCT.
8856     if (!varTypeIsStruct(asgType))
8857     {
8858         // For initBlk, a non constant source is not going to allow us to fiddle
8859         // with the bits to create a single assigment.
8860         noway_assert(size <= REGSIZE_BYTES);
8861
8862         if (isInitBlock && !src->IsConstInitVal())
8863         {
8864             return nullptr;
8865         }
8866
8867         if (destVarDsc != nullptr)
8868         {
8869 #if LOCAL_ASSERTION_PROP
8870             // Kill everything about dest
8871             if (optLocalAssertionProp)
8872             {
8873                 if (optAssertionCount > 0)
8874                 {
8875                     fgKillDependentAssertions(destVarNum DEBUGARG(tree));
8876                 }
8877             }
8878 #endif // LOCAL_ASSERTION_PROP
8879
8880             // A previous incarnation of this code also required the local not to be
8881             // address-exposed(=taken).  That seems orthogonal to the decision of whether
8882             // to do field-wise assignments: being address-exposed will cause it to be
8883             // "dependently" promoted, so it will be in the right memory location.  One possible
8884             // further reason for avoiding field-wise stores is that the struct might have alignment-induced
8885             // holes, whose contents could be meaningful in unsafe code.  If we decide that's a valid
8886             // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
8887             // memory of the struct prevent field-wise assignments.  Same situation exists for the "src" decision.
8888             if (varTypeIsStruct(lclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
8889             {
8890                 // Let fgMorphInitBlock handle it.  (Since we'll need to do field-var-wise assignments.)
8891                 return nullptr;
8892             }
8893             else if (!varTypeIsFloating(lclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
8894             {
8895                 // Use the dest local var directly, as well as its type.
8896                 dest    = lclVarTree;
8897                 asgType = destVarDsc->lvType;
8898
8899                 // If the block operation had been a write to a local var of a small int type,
8900                 // of the exact size of the small int type, and the var is NormalizeOnStore,
8901                 // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
8902                 // have done that normalization.  If we're now making it into an assignment,
8903                 // the NormalizeOnStore will work, and it can be a full def.
8904                 if (destVarDsc->lvNormalizeOnStore())
8905                 {
8906                     dest->gtFlags &= (~GTF_VAR_USEASG);
8907                 }
8908             }
8909             else
8910             {
8911                 // Could be a non-promoted struct, or a floating point type local, or
8912                 // an int subject to a partial write.  Don't enregister.
8913                 lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
8914
8915                 // Mark the local var tree as a definition point of the local.
8916                 lclVarTree->gtFlags |= GTF_VAR_DEF;
8917                 if (size < destVarDsc->lvExactSize)
8918                 { // If it's not a full-width assignment....
8919                     lclVarTree->gtFlags |= GTF_VAR_USEASG;
8920                 }
8921
8922                 if (dest == lclVarTree)
8923                 {
8924                     dest = gtNewOperNode(GT_IND, asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
8925                 }
8926             }
8927         }
8928
8929         // Check to ensure we don't have a reducible *(& ... )
8930         if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
8931         {
8932             GenTreePtr addrOp = dest->AsIndir()->Addr()->gtGetOp1();
8933             // Ignore reinterpret casts between int/gc
8934             if ((addrOp->TypeGet() == asgType) || (varTypeIsIntegralOrI(addrOp) && (genTypeSize(asgType) == size)))
8935             {
8936                 dest    = addrOp;
8937                 asgType = addrOp->TypeGet();
8938             }
8939         }
8940
8941         if (dest->gtEffectiveVal()->OperIsIndir())
8942         {
8943             // If we have no information about the destination, we have to assume it could
8944             // live anywhere (not just in the GC heap).
8945             // Mark the GT_IND node so that we use the correct write barrier helper in case
8946             // the field is a GC ref.
8947
8948             if (!fgIsIndirOfAddrOfLocal(dest))
8949             {
8950                 dest->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8951                 tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8952             }
8953         }
8954
8955         LclVarDsc* srcVarDsc = nullptr;
8956         if (isCopyBlock)
8957         {
8958             if (src->OperGet() == GT_LCL_VAR)
8959             {
8960                 lclVarTree = src;
8961                 srcVarDsc  = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
8962             }
8963             else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &lclVarTree))
8964             {
8965                 srcVarDsc = &(lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum]);
8966             }
8967             if (srcVarDsc != nullptr)
8968             {
8969                 if (varTypeIsStruct(lclVarTree) && (srcVarDsc->lvPromoted || srcVarDsc->lvIsSIMDType()))
8970                 {
8971                     // Let fgMorphCopyBlock handle it.
8972                     return nullptr;
8973                 }
8974                 else if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
8975                          size == genTypeSize(genActualType(lclVarTree->TypeGet())))
8976                 {
8977                     // Use the src local var directly.
8978                     src = lclVarTree;
8979                 }
8980                 else
8981                 {
8982 #ifndef LEGACY_BACKEND
8983
8984                     // The source argument of the copyblk can potentially
8985                     // be accessed only through indir(addr(lclVar))
8986                     // or indir(lclVarAddr) in rational form and liveness
8987                     // won't account for these uses. That said,
8988                     // we have to mark this local as address exposed so
8989                     // we don't delete it as a dead store later on.
8990                     unsigned lclVarNum                = lclVarTree->gtLclVarCommon.gtLclNum;
8991                     lvaTable[lclVarNum].lvAddrExposed = true;
8992                     lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
8993
8994 #else  // LEGACY_BACKEND
8995                     lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
8996 #endif // LEGACY_BACKEND
8997                     GenTree* srcAddr;
8998                     if (src == lclVarTree)
8999                     {
9000                         srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
9001                         src     = gtNewOperNode(GT_IND, asgType, srcAddr);
9002                     }
9003                     else
9004                     {
9005                         assert(src->OperIsIndir());
9006                     }
9007                 }
9008             }
9009             // If we have no information about the src, we have to assume it could
9010             // live anywhere (not just in the GC heap).
9011             // Mark the GT_IND node so that we use the correct write barrier helper in case
9012             // the field is a GC ref.
9013
9014             if (!fgIsIndirOfAddrOfLocal(src))
9015             {
9016                 src->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
9017             }
9018         }
9019         else
9020         {
9021 // InitBlk
9022 #if FEATURE_SIMD
9023             if (varTypeIsSIMD(asgType))
9024             {
9025                 assert(!isCopyBlock); // Else we would have returned the tree above.
9026                 noway_assert(src->IsIntegralConst(0));
9027                 noway_assert(destVarDsc != nullptr);
9028
9029                 src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
9030                 tree->gtOp.gtOp2 = src;
9031                 return tree;
9032             }
9033             else
9034 #endif
9035             {
9036                 if (src->OperIsInitVal())
9037                 {
9038                     src = src->gtGetOp1();
9039                 }
9040                 assert(src->IsCnsIntOrI());
9041                 // This will mutate the integer constant, in place, to be the correct
9042                 // value for the type we are using in the assignment.
9043                 src->AsIntCon()->FixupInitBlkValue(asgType);
9044             }
9045         }
9046
9047         // Ensure that the dest is setup appropriately.
9048         if (dest->gtEffectiveVal()->OperIsIndir())
9049         {
9050             dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/);
9051         }
9052
9053         // Ensure that the rhs is setup appropriately.
9054         if (isCopyBlock)
9055         {
9056             src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/);
9057         }
9058
9059         // Set the lhs and rhs on the assignment.
9060         if (dest != tree->gtOp.gtOp1)
9061         {
9062             asg->gtOp.gtOp1 = dest;
9063         }
9064         if (src != asg->gtOp.gtOp2)
9065         {
9066             asg->gtOp.gtOp2 = src;
9067         }
9068
9069         asg->ChangeType(asgType);
9070         dest->gtFlags |= GTF_DONT_CSE;
9071         asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
9072         // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
9073         asg->gtFlags &= ~GTF_REVERSE_OPS;
9074
9075 #ifdef DEBUG
9076         if (verbose)
9077         {
9078             printf("fgMorphOneAsgBlock (after):\n");
9079             gtDispTree(tree);
9080         }
9081 #endif
9082         return tree;
9083     }
9084
9085     return nullptr;
9086 }
9087
9088 //------------------------------------------------------------------------
9089 // fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
9090 //
9091 // Arguments:
9092 //    tree - a tree node with a gtOper of GT_INITBLK
9093 //           the child nodes for tree have already been Morphed
9094 //
9095 // Return Value:
9096 //    We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
9097 //    We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
9098 //    If we have performed struct promotion of the Dest() then we will try to
9099 //    perform a field by field assignment for each of the promoted struct fields
9100 //
9101 // Notes:
9102 //    If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
9103 //    if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9104 //    can not use a field by field assignment and must the orginal GT_INITBLK unmodified.
9105
9106 GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
9107 {
9108     // We must have the GT_ASG form of InitBlkOp.
9109     noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp());
9110 #ifdef DEBUG
9111     bool morphed = false;
9112 #endif // DEBUG
9113
9114     GenTree* asg      = tree;
9115     GenTree* src      = tree->gtGetOp2();
9116     GenTree* origDest = tree->gtGetOp1();
9117
9118     GenTree* dest = fgMorphBlkNode(origDest, true);
9119     if (dest != origDest)
9120     {
9121         tree->gtOp.gtOp1 = dest;
9122     }
9123     tree->gtType = dest->TypeGet();
9124     // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its
9125     // type will be the type of the original lclVar, in which case we will change it to TYP_INT).
9126     if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src))
9127     {
9128         src->gtType = TYP_INT;
9129     }
9130     JITDUMP("\nfgMorphInitBlock:");
9131
9132     GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
9133     if (oneAsgTree)
9134     {
9135         JITDUMP(" using oneAsgTree.\n");
9136         tree = oneAsgTree;
9137     }
9138     else
9139     {
9140         GenTree*             destAddr          = nullptr;
9141         GenTree*             initVal           = src->OperIsInitVal() ? src->gtGetOp1() : src;
9142         GenTree*             blockSize         = nullptr;
9143         unsigned             blockWidth        = 0;
9144         FieldSeqNode*        destFldSeq        = nullptr;
9145         LclVarDsc*           destLclVar        = nullptr;
9146         bool                 destDoFldAsg      = false;
9147         unsigned             destLclNum        = BAD_VAR_NUM;
9148         bool                 blockWidthIsConst = false;
9149         GenTreeLclVarCommon* lclVarTree        = nullptr;
9150         if (dest->IsLocal())
9151         {
9152             lclVarTree = dest->AsLclVarCommon();
9153         }
9154         else
9155         {
9156             if (dest->OperIsBlk())
9157             {
9158                 destAddr   = dest->AsBlk()->Addr();
9159                 blockWidth = dest->AsBlk()->gtBlkSize;
9160             }
9161             else
9162             {
9163                 assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT));
9164                 destAddr   = dest->gtGetOp1();
9165                 blockWidth = genTypeSize(dest->TypeGet());
9166             }
9167         }
9168         if (lclVarTree != nullptr)
9169         {
9170             destLclNum        = lclVarTree->gtLclNum;
9171             destLclVar        = &lvaTable[destLclNum];
9172             blockWidth        = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar);
9173             blockWidthIsConst = true;
9174         }
9175         else
9176         {
9177             if (dest->gtOper == GT_DYN_BLK)
9178             {
9179                 // The size must be an integer type
9180                 blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize;
9181                 assert(varTypeIsIntegral(blockSize->gtType));
9182             }
9183             else
9184             {
9185                 assert(blockWidth != 0);
9186                 blockWidthIsConst = true;
9187             }
9188
9189             if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
9190             {
9191                 destLclNum = lclVarTree->gtLclNum;
9192                 destLclVar = &lvaTable[destLclNum];
9193             }
9194         }
9195         if (destLclNum != BAD_VAR_NUM)
9196         {
9197 #if LOCAL_ASSERTION_PROP
9198             // Kill everything about destLclNum (and its field locals)
9199             if (optLocalAssertionProp)
9200             {
9201                 if (optAssertionCount > 0)
9202                 {
9203                     fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9204                 }
9205             }
9206 #endif // LOCAL_ASSERTION_PROP
9207
9208             if (destLclVar->lvPromoted && blockWidthIsConst)
9209             {
9210                 assert(initVal->OperGet() == GT_CNS_INT);
9211                 noway_assert(varTypeIsStruct(destLclVar));
9212                 noway_assert(!opts.MinOpts());
9213                 if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
9214                 {
9215                     JITDUMP(" dest is address exposed");
9216                 }
9217                 else
9218                 {
9219                     if (blockWidth == destLclVar->lvExactSize)
9220                     {
9221                         JITDUMP(" (destDoFldAsg=true)");
9222                         // We may decide later that a copyblk is required when this struct has holes
9223                         destDoFldAsg = true;
9224                     }
9225                     else
9226                     {
9227                         JITDUMP(" with mismatched size");
9228                     }
9229                 }
9230             }
9231         }
9232
9233         // Can we use field by field assignment for the dest?
9234         if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9235         {
9236             JITDUMP(" dest contains holes");
9237             destDoFldAsg = false;
9238         }
9239
9240         JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n");
9241
9242         // If we're doing an InitBlock and we've transformed the dest to a non-Blk
9243         // we need to change it back.
9244         if (!destDoFldAsg && !dest->OperIsBlk())
9245         {
9246             noway_assert(blockWidth != 0);
9247             tree->gtOp.gtOp1 = origDest;
9248             tree->gtType     = origDest->gtType;
9249         }
9250
9251         if (!destDoFldAsg && (destLclVar != nullptr))
9252         {
9253             // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
9254             if (!destLclVar->lvRegStruct)
9255             {
9256                 // Mark it as DoNotEnregister.
9257                 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
9258             }
9259         }
9260
9261         // Mark the dest struct as DoNotEnreg
9262         // when they are LclVar structs and we are using a CopyBlock
9263         // or the struct is not promoted
9264         //
9265         if (!destDoFldAsg)
9266         {
9267 #if CPU_USES_BLOCK_MOVE
9268             compBlkOpUsed = true;
9269 #endif
9270             dest             = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true);
9271             tree->gtOp.gtOp1 = dest;
9272             tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
9273         }
9274         else
9275         {
9276             // The initVal must be a constant of TYP_INT
9277             noway_assert(initVal->OperGet() == GT_CNS_INT);
9278             noway_assert(genActualType(initVal->gtType) == TYP_INT);
9279
9280             // The dest must be of a struct type.
9281             noway_assert(varTypeIsStruct(destLclVar));
9282
9283             //
9284             // Now, convert InitBlock to individual assignments
9285             //
9286
9287             tree = nullptr;
9288             INDEBUG(morphed = true);
9289
9290             GenTreePtr dest;
9291             GenTreePtr srcCopy;
9292             unsigned   fieldLclNum;
9293             unsigned   fieldCnt = destLclVar->lvFieldCnt;
9294
9295             for (unsigned i = 0; i < fieldCnt; ++i)
9296             {
9297                 fieldLclNum = destLclVar->lvFieldLclStart + i;
9298                 dest        = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
9299
9300                 noway_assert(lclVarTree->gtOper == GT_LCL_VAR);
9301                 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
9302                 dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));
9303
9304                 srcCopy = gtCloneExpr(initVal);
9305                 noway_assert(srcCopy != nullptr);
9306
9307                 // need type of oper to be same as tree
9308                 if (dest->gtType == TYP_LONG)
9309                 {
9310                     srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
9311                     // copy and extend the value
9312                     srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
9313                     /* Change the types of srcCopy to TYP_LONG */
9314                     srcCopy->gtType = TYP_LONG;
9315                 }
9316                 else if (varTypeIsFloating(dest->gtType))
9317                 {
9318                     srcCopy->ChangeOperConst(GT_CNS_DBL);
9319                     // setup the bit pattern
9320                     memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal,
9321                            sizeof(srcCopy->gtDblCon.gtDconVal));
9322                     /* Change the types of srcCopy to TYP_DOUBLE */
9323                     srcCopy->gtType = TYP_DOUBLE;
9324                 }
9325                 else
9326                 {
9327                     noway_assert(srcCopy->gtOper == GT_CNS_INT);
9328                     noway_assert(srcCopy->TypeGet() == TYP_INT);
9329                     // setup the bit pattern
9330                     memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal,
9331                            sizeof(srcCopy->gtIntCon.gtIconVal));
9332                 }
9333
9334                 srcCopy->gtType = dest->TypeGet();
9335
9336                 asg = gtNewAssignNode(dest, srcCopy);
9337
9338 #if LOCAL_ASSERTION_PROP
9339                 if (optLocalAssertionProp)
9340                 {
9341                     optAssertionGen(asg);
9342                 }
9343 #endif // LOCAL_ASSERTION_PROP
9344
9345                 if (tree)
9346                 {
9347                     tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
9348                 }
9349                 else
9350                 {
9351                     tree = asg;
9352                 }
9353             }
9354         }
9355     }
9356
9357 #ifdef DEBUG
9358     if (morphed)
9359     {
9360         tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9361
9362         if (verbose)
9363         {
9364             printf("fgMorphInitBlock (after):\n");
9365             gtDispTree(tree);
9366         }
9367     }
9368 #endif
9369
9370     return tree;
9371 }
9372
9373 //------------------------------------------------------------------------
9374 // fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
9375 //
9376 // Arguments:
9377 //    tree - the node to be modified.
9378 //    type - the type of indirection to change it to.
9379 //
9380 // Return Value:
9381 //    Returns the node, modified in place.
9382 //
9383 // Notes:
9384 //    This doesn't really warrant a separate method, but is here to abstract
9385 //    the fact that these nodes can be modified in-place.
9386
9387 GenTreePtr Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
9388 {
9389     tree->SetOper(GT_IND);
9390     tree->gtType = type;
9391     return tree;
9392 }
9393
9394 //------------------------------------------------------------------------
9395 // fgMorphGetStructAddr: Gets the address of a struct object
9396 //
9397 // Arguments:
9398 //    pTree    - the parent's pointer to the struct object node
9399 //    clsHnd   - the class handle for the struct type
9400 //    isRValue - true if this is a source (not dest)
9401 //
9402 // Return Value:
9403 //    Returns the address of the struct value, possibly modifying the existing tree to
9404 //    sink the address below any comma nodes (this is to canonicalize for value numbering).
9405 //    If this is a source, it will morph it to an GT_IND before taking its address,
9406 //    since it may not be remorphed (and we don't want blk nodes as rvalues).
9407
9408 GenTreePtr Compiler::fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
9409 {
9410     GenTree* addr;
9411     GenTree* tree = *pTree;
9412     // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
9413     // need to hang onto that for the purposes of value numbering.
9414     if (tree->OperIsIndir())
9415     {
9416         if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
9417         {
9418             addr = tree->gtOp.gtOp1;
9419         }
9420         else
9421         {
9422             if (isRValue && tree->OperIsBlk())
9423             {
9424                 tree->ChangeOper(GT_IND);
9425             }
9426             addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9427         }
9428     }
9429     else if (tree->gtOper == GT_COMMA)
9430     {
9431         // If this is a comma, we're going to "sink" the GT_ADDR below it.
9432         (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
9433         tree->gtType = TYP_BYREF;
9434         addr         = tree;
9435     }
9436     else
9437     {
9438         switch (tree->gtOper)
9439         {
9440             case GT_LCL_FLD:
9441             case GT_LCL_VAR:
9442             case GT_INDEX:
9443             case GT_FIELD:
9444             case GT_ARR_ELEM:
9445                 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9446                 break;
9447             default:
9448             {
9449                 // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
9450                 // not going to use "temp"
9451                 GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
9452                 addr          = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
9453                 break;
9454             }
9455         }
9456     }
9457     *pTree = addr;
9458     return addr;
9459 }
9460
9461 //------------------------------------------------------------------------
9462 // fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
9463 //
9464 // Arguments:
9465 //    tree   - The struct type node
9466 //    isDest - True if this is the destination of the assignment
9467 //
9468 // Return Value:
9469 //    Returns the possibly-morphed node. The caller is responsible for updating
9470 //    the parent of this node..
9471
9472 GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
9473 {
9474     if (tree->gtOper == GT_COMMA)
9475     {
9476         GenTree* effectiveVal = tree->gtEffectiveVal();
9477         GenTree* addr         = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9478 #ifdef DEBUG
9479         addr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9480 #endif
9481         // In order to CSE and value number array index expressions and bounds checks,
9482         // the commas in which they are contained need to match.
9483         // The pattern is that the COMMA should be the address expression.
9484         // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
9485         // TODO-1stClassStructs: Consider whether this can be improved.
9486         // Also consider whether some of this can be included in gtNewBlockVal (though note
9487         // that doing so may cause us to query the type system before we otherwise would).
9488         GenTree* lastComma = nullptr;
9489         for (GenTree* next = tree; next != nullptr && next->gtOper == GT_COMMA; next = next->gtGetOp2())
9490         {
9491             next->gtType = TYP_BYREF;
9492             lastComma    = next;
9493         }
9494         if (lastComma != nullptr)
9495         {
9496             noway_assert(lastComma->gtGetOp2() == effectiveVal);
9497             lastComma->gtOp.gtOp2 = addr;
9498             addr                  = tree;
9499         }
9500         var_types structType = effectiveVal->TypeGet();
9501         if (structType == TYP_STRUCT)
9502         {
9503             CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(effectiveVal);
9504             if (structHnd == NO_CLASS_HANDLE)
9505             {
9506                 tree = gtNewOperNode(GT_IND, effectiveVal->TypeGet(), addr);
9507             }
9508             else
9509             {
9510                 tree = gtNewObjNode(structHnd, addr);
9511                 if (tree->OperGet() == GT_OBJ)
9512                 {
9513                     gtSetObjGcInfo(tree->AsObj());
9514                 }
9515             }
9516         }
9517         else
9518         {
9519             tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
9520         }
9521 #ifdef DEBUG
9522         tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9523 #endif
9524     }
9525
9526     if (!tree->OperIsBlk())
9527     {
9528         return tree;
9529     }
9530     GenTreeBlk* blkNode = tree->AsBlk();
9531     if (blkNode->OperGet() == GT_DYN_BLK)
9532     {
9533         if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
9534         {
9535             unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
9536             // A GT_BLK with size of zero is not supported,
9537             // so if we encounter such a thing we just leave it as a GT_DYN_BLK
9538             if (size != 0)
9539             {
9540                 blkNode->AsDynBlk()->gtDynamicSize = nullptr;
9541                 blkNode->ChangeOper(GT_BLK);
9542                 blkNode->gtBlkSize = size;
9543             }
9544             else
9545             {
9546                 return tree;
9547             }
9548         }
9549         else
9550         {
9551             return tree;
9552         }
9553     }
9554     if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
9555         (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
9556     {
9557         GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
9558         if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
9559         {
9560             lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
9561         }
9562     }
9563
9564     return tree;
9565 }
9566
9567 //------------------------------------------------------------------------
9568 // fgMorphBlockOperand: Canonicalize an operand of a block assignment
9569 //
9570 // Arguments:
9571 //    tree     - The block operand
9572 //    asgType  - The type of the assignment
9573 //    blockWidth - The size of the block
9574 //    isDest     - true iff this is the destination of the assignment
9575 //
9576 // Return Value:
9577 //    Returns the morphed block operand
9578 //
9579 // Notes:
9580 //    This does the following:
9581 //    - Ensures that a struct operand is a block node or (for non-LEGACY_BACKEND) lclVar.
9582 //    - Ensures that any COMMAs are above ADDR nodes.
9583 //    Although 'tree' WAS an operand of a block assignment, the assignment
9584 //    may have been retyped to be a scalar assignment.
9585
9586 GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest)
9587 {
9588     GenTree* effectiveVal = tree->gtEffectiveVal();
9589
9590     if (!varTypeIsStruct(asgType))
9591     {
9592         if (effectiveVal->OperIsIndir())
9593         {
9594             GenTree* addr = effectiveVal->AsIndir()->Addr();
9595             if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
9596             {
9597                 effectiveVal = addr->gtGetOp1();
9598             }
9599             else if (effectiveVal->OperIsBlk())
9600             {
9601                 effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9602             }
9603             else
9604             {
9605                 effectiveVal->gtType = asgType;
9606             }
9607         }
9608         else if (effectiveVal->TypeGet() != asgType)
9609         {
9610             GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9611             effectiveVal  = gtNewOperNode(GT_IND, asgType, addr);
9612         }
9613     }
9614     else
9615     {
9616         GenTreeIndir*        indirTree        = nullptr;
9617         GenTreeLclVarCommon* lclNode          = nullptr;
9618         bool                 needsIndirection = true;
9619
9620         if (effectiveVal->OperIsIndir())
9621         {
9622             indirTree     = effectiveVal->AsIndir();
9623             GenTree* addr = effectiveVal->AsIndir()->Addr();
9624             if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
9625             {
9626                 lclNode = addr->gtGetOp1()->AsLclVarCommon();
9627             }
9628         }
9629         else if (effectiveVal->OperGet() == GT_LCL_VAR)
9630         {
9631             lclNode = effectiveVal->AsLclVarCommon();
9632         }
9633 #ifdef FEATURE_SIMD
9634         if (varTypeIsSIMD(asgType))
9635         {
9636             if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) &&
9637                 (indirTree->Addr()->gtGetOp1()->gtOper == GT_SIMD))
9638             {
9639                 assert(!isDest);
9640                 needsIndirection = false;
9641                 effectiveVal     = indirTree->Addr()->gtGetOp1();
9642             }
9643             if (effectiveVal->OperIsSIMD())
9644             {
9645                 needsIndirection = false;
9646             }
9647         }
9648 #endif // FEATURE_SIMD
9649         if (lclNode != nullptr)
9650         {
9651             LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]);
9652             if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth))
9653             {
9654 #ifndef LEGACY_BACKEND
9655                 effectiveVal     = lclNode;
9656                 needsIndirection = false;
9657 #endif // !LEGACY_BACKEND
9658             }
9659             else
9660             {
9661                 // This may be a lclVar that was determined to be address-exposed.
9662                 effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT);
9663             }
9664         }
9665         if (needsIndirection)
9666         {
9667             if (indirTree != nullptr)
9668             {
9669                 // We should never find a struct indirection on the lhs of an assignment.
9670                 assert(!isDest || indirTree->OperIsBlk());
9671                 if (!isDest && indirTree->OperIsBlk())
9672                 {
9673                     (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9674                 }
9675             }
9676             else
9677             {
9678                 GenTree* newTree;
9679                 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9680                 if (isDest)
9681                 {
9682                     CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
9683                     if (clsHnd == NO_CLASS_HANDLE)
9684                     {
9685                         newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
9686                     }
9687                     else
9688                     {
9689                         newTree = gtNewObjNode(clsHnd, addr);
9690                         if (isDest && (newTree->OperGet() == GT_OBJ))
9691                         {
9692                             gtSetObjGcInfo(newTree->AsObj());
9693                         }
9694                         if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
9695                         {
9696                             // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
9697                             // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
9698                             // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
9699                             // separately now to avoid excess diffs.
9700                             newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
9701                         }
9702                     }
9703                 }
9704                 else
9705                 {
9706                     newTree = new (this, GT_IND) GenTreeIndir(GT_IND, asgType, addr, nullptr);
9707                 }
9708                 effectiveVal = newTree;
9709             }
9710         }
9711     }
9712     tree = effectiveVal;
9713     return tree;
9714 }
9715
9716 //------------------------------------------------------------------------
9717 // fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk
9718 //
9719 // Arguments:
9720 //    dest - the GT_OBJ or GT_STORE_OBJ
9721 //
9722 // Assumptions:
9723 //    The destination must be known (by the caller) to be on the stack.
9724 //
9725 // Notes:
9726 //    If we have a CopyObj with a dest on the stack, and its size is small enouch
9727 //    to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a
9728 //    GC Unsafe CopyBlk that is non-interruptible.
9729 //    This is not supported for the JIT32_GCENCODER, in which case this method is a no-op.
9730 //
9731 void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest)
9732 {
9733 #if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9734     assert(dest->gtGcPtrCount != 0);
9735     unsigned blockWidth = dest->AsBlk()->gtBlkSize;
9736 #ifdef DEBUG
9737     bool     destOnStack = false;
9738     GenTree* destAddr    = dest->Addr();
9739     assert(destAddr->IsLocalAddrExpr() != nullptr);
9740 #endif
9741     if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT))
9742     {
9743         genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK;
9744         dest->SetOper(newOper);
9745         dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
9746     }
9747 #endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9748 }
9749
9750 //------------------------------------------------------------------------
9751 // fgMorphCopyBlock: Perform the Morphing of block copy
9752 //
9753 // Arguments:
9754 //    tree - a block copy (i.e. an assignment with a block op on the lhs).
9755 //
9756 // Return Value:
9757 //    We can return the orginal block copy unmodified (least desirable, but always correct)
9758 //    We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
9759 //    If we have performed struct promotion of the Source() or the Dest() then we will try to
9760 //    perform a field by field assignment for each of the promoted struct fields.
9761 //
9762 // Assumptions:
9763 //    The child nodes for tree have already been Morphed.
9764 //
9765 // Notes:
9766 //    If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
9767 //    When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
9768 //    and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
9769 //    if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9770 //    can not use a field by field assignment and must the orginal block copy unmodified.
9771
9772 GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
9773 {
9774     noway_assert(tree->OperIsCopyBlkOp());
9775
9776     JITDUMP("\nfgMorphCopyBlock:");
9777
9778     bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
9779
9780     GenTree* asg  = tree;
9781     GenTree* rhs  = asg->gtGetOp2();
9782     GenTree* dest = asg->gtGetOp1();
9783
9784 #if FEATURE_MULTIREG_RET
9785     // If this is a multi-reg return, we will not do any morphing of this node.
9786     if (rhs->IsMultiRegCall())
9787     {
9788         assert(dest->OperGet() == GT_LCL_VAR);
9789         JITDUMP(" not morphing a multireg call return\n");
9790         return tree;
9791     }
9792 #endif // FEATURE_MULTIREG_RET
9793
9794     // If we have an array index on the lhs, we need to create an obj node.
9795
9796     dest = fgMorphBlkNode(dest, true);
9797     if (dest != asg->gtGetOp1())
9798     {
9799         asg->gtOp.gtOp1 = dest;
9800         if (dest->IsLocal())
9801         {
9802             dest->gtFlags |= GTF_VAR_DEF;
9803         }
9804     }
9805     asg->gtType = dest->TypeGet();
9806     rhs         = fgMorphBlkNode(rhs, false);
9807
9808     asg->gtOp.gtOp2 = rhs;
9809
9810     GenTreePtr oldTree    = tree;
9811     GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
9812
9813     if (oneAsgTree)
9814     {
9815         JITDUMP(" using oneAsgTree.\n");
9816         tree = oneAsgTree;
9817     }
9818     else
9819     {
9820         unsigned             blockWidth;
9821         bool                 blockWidthIsConst = false;
9822         GenTreeLclVarCommon* lclVarTree        = nullptr;
9823         GenTreeLclVarCommon* srcLclVarTree     = nullptr;
9824         unsigned             destLclNum        = BAD_VAR_NUM;
9825         LclVarDsc*           destLclVar        = nullptr;
9826         FieldSeqNode*        destFldSeq        = nullptr;
9827         bool                 destDoFldAsg      = false;
9828         GenTreePtr           destAddr          = nullptr;
9829         GenTreePtr           srcAddr           = nullptr;
9830         bool                 destOnStack       = false;
9831         bool                 hasGCPtrs         = false;
9832
9833         JITDUMP("block assignment to morph:\n");
9834         DISPTREE(asg);
9835
9836         if (dest->IsLocal())
9837         {
9838             blockWidthIsConst = true;
9839             destOnStack       = true;
9840             if (dest->gtOper == GT_LCL_VAR)
9841             {
9842                 lclVarTree = dest->AsLclVarCommon();
9843                 destLclNum = lclVarTree->gtLclNum;
9844                 destLclVar = &lvaTable[destLclNum];
9845                 if (destLclVar->lvType == TYP_STRUCT)
9846                 {
9847                     // It would be nice if lvExactSize always corresponded to the size of the struct,
9848                     // but it doesn't always for the temps that the importer creates when it spills side
9849                     // effects.
9850                     // TODO-Cleanup: Determine when this happens, and whether it can be changed.
9851                     blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
9852                 }
9853                 else
9854                 {
9855                     blockWidth = genTypeSize(destLclVar->lvType);
9856                 }
9857                 hasGCPtrs = destLclVar->lvStructGcCount != 0;
9858             }
9859             else
9860             {
9861                 assert(dest->TypeGet() != TYP_STRUCT);
9862                 assert(dest->gtOper == GT_LCL_FLD);
9863                 blockWidth = genTypeSize(dest->TypeGet());
9864                 destAddr   = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
9865                 destFldSeq = dest->AsLclFld()->gtFieldSeq;
9866             }
9867         }
9868         else
9869         {
9870             GenTree* effectiveDest = dest->gtEffectiveVal();
9871             if (effectiveDest->OperGet() == GT_IND)
9872             {
9873                 assert(dest->TypeGet() != TYP_STRUCT);
9874                 blockWidth        = genTypeSize(effectiveDest->TypeGet());
9875                 blockWidthIsConst = true;
9876                 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
9877                 {
9878                     destAddr = dest->gtGetOp1();
9879                 }
9880             }
9881             else
9882             {
9883                 assert(effectiveDest->OperIsBlk());
9884                 GenTreeBlk* blk = effectiveDest->AsBlk();
9885
9886                 blockWidth        = blk->gtBlkSize;
9887                 blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
9888                 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
9889                 {
9890                     destAddr = blk->Addr();
9891                 }
9892             }
9893             if (destAddr != nullptr)
9894             {
9895                 noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
9896                 if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
9897                 {
9898                     destOnStack = true;
9899                     destLclNum  = lclVarTree->gtLclNum;
9900                     destLclVar  = &lvaTable[destLclNum];
9901                 }
9902             }
9903         }
9904
9905         if (destLclVar != nullptr)
9906         {
9907 #if LOCAL_ASSERTION_PROP
9908             // Kill everything about destLclNum (and its field locals)
9909             if (optLocalAssertionProp)
9910             {
9911                 if (optAssertionCount > 0)
9912                 {
9913                     fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9914                 }
9915             }
9916 #endif // LOCAL_ASSERTION_PROP
9917
9918             if (destLclVar->lvPromoted && blockWidthIsConst)
9919             {
9920                 noway_assert(varTypeIsStruct(destLclVar));
9921                 noway_assert(!opts.MinOpts());
9922
9923                 if (blockWidth == destLclVar->lvExactSize)
9924                 {
9925                     JITDUMP(" (destDoFldAsg=true)");
9926                     // We may decide later that a copyblk is required when this struct has holes
9927                     destDoFldAsg = true;
9928                 }
9929                 else
9930                 {
9931                     JITDUMP(" with mismatched dest size");
9932                 }
9933             }
9934         }
9935
9936         FieldSeqNode* srcFldSeq   = nullptr;
9937         unsigned      srcLclNum   = BAD_VAR_NUM;
9938         LclVarDsc*    srcLclVar   = nullptr;
9939         bool          srcDoFldAsg = false;
9940
9941         if (rhs->IsLocal())
9942         {
9943             srcLclVarTree = rhs->AsLclVarCommon();
9944             srcLclNum     = srcLclVarTree->gtLclNum;
9945             if (rhs->OperGet() == GT_LCL_FLD)
9946             {
9947                 srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
9948             }
9949         }
9950         else if (rhs->OperIsIndir())
9951         {
9952             if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
9953             {
9954                 srcLclNum = srcLclVarTree->gtLclNum;
9955             }
9956             else
9957             {
9958                 srcAddr = rhs->gtOp.gtOp1;
9959             }
9960         }
9961
9962         if (srcLclNum != BAD_VAR_NUM)
9963         {
9964             srcLclVar = &lvaTable[srcLclNum];
9965
9966             if (srcLclVar->lvPromoted && blockWidthIsConst)
9967             {
9968                 noway_assert(varTypeIsStruct(srcLclVar));
9969                 noway_assert(!opts.MinOpts());
9970
9971                 if (blockWidth == srcLclVar->lvExactSize)
9972                 {
9973                     JITDUMP(" (srcDoFldAsg=true)");
9974                     // We may decide later that a copyblk is required when this struct has holes
9975                     srcDoFldAsg = true;
9976                 }
9977                 else
9978                 {
9979                     JITDUMP(" with mismatched src size");
9980                 }
9981             }
9982         }
9983
9984         // Check to see if we are required to do a copy block because the struct contains holes
9985         // and either the src or dest is externally visible
9986         //
9987         bool requiresCopyBlock   = false;
9988         bool srcSingleLclVarAsg  = false;
9989         bool destSingleLclVarAsg = false;
9990
9991         if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq))
9992         {
9993             // Self-assign; no effect.
9994             GenTree* nop = gtNewNothingNode();
9995             INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
9996             return nop;
9997         }
9998
9999         // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
10000         if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
10001         {
10002             requiresCopyBlock = true;
10003         }
10004
10005         // Can we use field by field assignment for the dest?
10006         if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
10007         {
10008             JITDUMP(" dest contains custom layout and contains holes");
10009             // C++ style CopyBlock with holes
10010             requiresCopyBlock = true;
10011         }
10012
10013         // Can we use field by field assignment for the src?
10014         if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
10015         {
10016             JITDUMP(" src contains custom layout and contains holes");
10017             // C++ style CopyBlock with holes
10018             requiresCopyBlock = true;
10019         }
10020
10021 #if defined(_TARGET_ARM_)
10022         if ((rhs->OperIsIndir()) && (rhs->gtFlags & GTF_IND_UNALIGNED))
10023         {
10024             JITDUMP(" rhs is unaligned");
10025             requiresCopyBlock = true;
10026         }
10027
10028         if (asg->gtFlags & GTF_BLK_UNALIGNED)
10029         {
10030             JITDUMP(" asg is unaligned");
10031             requiresCopyBlock = true;
10032         }
10033 #endif // _TARGET_ARM_
10034
10035         if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
10036         {
10037             requiresCopyBlock = true;
10038         }
10039
10040         // Can't use field by field assignment if the src is a call.
10041         if (rhs->OperGet() == GT_CALL)
10042         {
10043             JITDUMP(" src is a call");
10044             // C++ style CopyBlock with holes
10045             requiresCopyBlock = true;
10046         }
10047
10048         // If we passed the above checks, then we will check these two
10049         if (!requiresCopyBlock)
10050         {
10051             // Are both dest and src promoted structs?
10052             if (destDoFldAsg && srcDoFldAsg)
10053             {
10054                 // Both structs should be of the same type, or each have a single field of the same type.
10055                 // If not we will use a copy block.
10056                 if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
10057                     lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
10058                 {
10059                     unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart;
10060                     unsigned srcFieldNum  = lvaTable[srcLclNum].lvFieldLclStart;
10061                     if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) ||
10062                         (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType))
10063                     {
10064                         requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
10065                         JITDUMP(" with mismatched types");
10066                     }
10067                 }
10068             }
10069             // Are neither dest or src promoted structs?
10070             else if (!destDoFldAsg && !srcDoFldAsg)
10071             {
10072                 requiresCopyBlock = true; // Leave as a CopyBlock
10073                 JITDUMP(" with no promoted structs");
10074             }
10075             else if (destDoFldAsg)
10076             {
10077                 // Match the following kinds of trees:
10078                 //  fgMorphTree BB01, stmt 9 (before)
10079                 //   [000052] ------------        const     int    8
10080                 //   [000053] -A--G-------     copyBlk   void
10081                 //   [000051] ------------           addr      byref
10082                 //   [000050] ------------              lclVar    long   V07 loc5
10083                 //   [000054] --------R---        <list>    void
10084                 //   [000049] ------------           addr      byref
10085                 //   [000048] ------------              lclVar    struct(P) V06 loc4
10086                 //                                              long   V06.h (offs=0x00) -> V17 tmp9
10087                 // Yields this transformation
10088                 //  fgMorphCopyBlock (after):
10089                 //   [000050] ------------        lclVar    long   V07 loc5
10090                 //   [000085] -A----------     =         long
10091                 //   [000083] D------N----        lclVar    long   V17 tmp9
10092                 //
10093                 if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
10094                     (blockWidth == genTypeSize(srcLclVar->TypeGet())))
10095                 {
10096                     // Reject the following tree:
10097                     //  - seen on x86chk    jit\jit64\hfa\main\hfa_sf3E_r.exe
10098                     //
10099                     //  fgMorphTree BB01, stmt 6 (before)
10100                     //   [000038] -------------        const     int    4
10101                     //   [000039] -A--G--------     copyBlk   void
10102                     //   [000037] -------------           addr      byref
10103                     //   [000036] -------------              lclVar    int    V05 loc3
10104                     //   [000040] --------R----        <list>    void
10105                     //   [000035] -------------           addr      byref
10106                     //   [000034] -------------              lclVar    struct(P) V04 loc2
10107                     //                                          float  V04.f1 (offs=0x00) -> V13 tmp6
10108                     // As this would framsform into
10109                     //   float V13 = int V05
10110                     //
10111                     unsigned  fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
10112                     var_types destType    = lvaTable[fieldLclNum].TypeGet();
10113                     if (srcLclVar->TypeGet() == destType)
10114                     {
10115                         srcSingleLclVarAsg = true;
10116                     }
10117                 }
10118             }
10119             else
10120             {
10121                 assert(srcDoFldAsg);
10122                 // Check for the symmetric case (which happens for the _pointer field of promoted spans):
10123                 //
10124                 //               [000240] -----+------             /--*  lclVar    struct(P) V18 tmp9
10125                 //                                                  /--*    byref  V18._value (offs=0x00) -> V30 tmp21
10126                 //               [000245] -A------R---             *  =         struct (copy)
10127                 //               [000244] -----+------             \--*  obj(8)    struct
10128                 //               [000243] -----+------                \--*  addr      byref
10129                 //               [000242] D----+-N----                   \--*  lclVar    byref  V28 tmp19
10130                 //
10131                 if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) &&
10132                     (blockWidth == genTypeSize(destLclVar->TypeGet())))
10133                 {
10134                     // Check for type agreement
10135                     unsigned  fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart;
10136                     var_types srcType     = lvaTable[fieldLclNum].TypeGet();
10137                     if (destLclVar->TypeGet() == srcType)
10138                     {
10139                         destSingleLclVarAsg = true;
10140                     }
10141                 }
10142             }
10143         }
10144
10145         // If we require a copy block the set both of the field assign bools to false
10146         if (requiresCopyBlock)
10147         {
10148             // If a copy block is required then we won't do field by field assignments
10149             destDoFldAsg = false;
10150             srcDoFldAsg  = false;
10151         }
10152
10153         JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
10154
10155         // Mark the dest/src structs as DoNotEnreg
10156         // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
10157         // or the struct is not promoted
10158         //
10159         if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg)
10160         {
10161             if (!destLclVar->lvRegStruct)
10162             {
10163                 // Mark it as DoNotEnregister.
10164                 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
10165             }
10166         }
10167
10168         if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
10169         {
10170             if (!srcLclVar->lvRegStruct)
10171             {
10172                 lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
10173             }
10174         }
10175
10176         if (requiresCopyBlock)
10177         {
10178 #if CPU_USES_BLOCK_MOVE
10179             compBlkOpUsed = true;
10180 #endif
10181             var_types asgType = dest->TypeGet();
10182             dest              = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
10183             asg->gtOp.gtOp1   = dest;
10184             asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
10185
10186             // Note that the unrolling of CopyBlk is only implemented on some platforms.
10187             // Currently that includes x64 and ARM but not x86: the code generation for this
10188             // construct requires the ability to mark certain regions of the generated code
10189             // as non-interruptible, and the GC encoding for the latter platform does not
10190             // have this capability.
10191
10192             // If we have a CopyObj with a dest on the stack
10193             // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
10194             // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes).
10195             // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.)
10196             //
10197             if (destOnStack && (dest->OperGet() == GT_OBJ))
10198             {
10199                 fgMorphUnsafeBlk(dest->AsObj());
10200             }
10201
10202             // Eliminate the "OBJ or BLK" node on the rhs.
10203             rhs             = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
10204             asg->gtOp.gtOp2 = rhs;
10205
10206 #ifdef LEGACY_BACKEND
10207             if (!rhs->OperIsIndir())
10208             {
10209                 noway_assert(rhs->gtOper == GT_LCL_VAR);
10210                 GenTree* rhsAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, rhs);
10211                 rhs              = gtNewOperNode(GT_IND, TYP_STRUCT, rhsAddr);
10212             }
10213 #endif // LEGACY_BACKEND
10214             // Formerly, liveness did not consider copyblk arguments of simple types as being
10215             // a use or def, so these variables were marked as address-exposed.
10216             // TODO-1stClassStructs: This should no longer be needed.
10217             if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
10218             {
10219                 JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
10220                 lvaTable[srcLclNum].lvAddrExposed = true;
10221             }
10222
10223             if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
10224             {
10225                 JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
10226                 lvaTable[destLclNum].lvAddrExposed = true;
10227             }
10228
10229             goto _Done;
10230         }
10231
10232         //
10233         // Otherwise we convert this CopyBlock into individual field by field assignments
10234         //
10235         tree = nullptr;
10236
10237         GenTreePtr src;
10238         GenTreePtr addrSpill            = nullptr;
10239         unsigned   addrSpillTemp        = BAD_VAR_NUM;
10240         bool       addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
10241
10242         unsigned fieldCnt = DUMMY_INIT(0);
10243
10244         if (destDoFldAsg && srcDoFldAsg)
10245         {
10246             // To do fieldwise assignments for both sides, they'd better be the same struct type!
10247             // All of these conditions were checked above...
10248             assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
10249             assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
10250
10251             fieldCnt = destLclVar->lvFieldCnt;
10252             goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
10253                                 // assignments.
10254         }
10255         else if (destDoFldAsg)
10256         {
10257             fieldCnt = destLclVar->lvFieldCnt;
10258             rhs      = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
10259             if (srcAddr == nullptr)
10260             {
10261                 srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
10262             }
10263         }
10264         else
10265         {
10266             assert(srcDoFldAsg);
10267             fieldCnt = srcLclVar->lvFieldCnt;
10268             dest     = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
10269             if (dest->OperIsBlk())
10270             {
10271                 (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
10272             }
10273             destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
10274         }
10275
10276         if (destDoFldAsg)
10277         {
10278             noway_assert(!srcDoFldAsg);
10279             if (gtClone(srcAddr))
10280             {
10281                 // srcAddr is simple expression. No need to spill.
10282                 noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10283             }
10284             else
10285             {
10286                 // srcAddr is complex expression. Clone and spill it (unless the destination is
10287                 // a struct local that only has one field, in which case we'd only use the
10288                 // address value once...)
10289                 if (destLclVar->lvFieldCnt > 1)
10290                 {
10291                     addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
10292                     noway_assert(addrSpill != nullptr);
10293                 }
10294             }
10295         }
10296
10297         if (srcDoFldAsg)
10298         {
10299             noway_assert(!destDoFldAsg);
10300
10301             // If we're doing field-wise stores, to an address within a local, and we copy
10302             // the address into "addrSpill", do *not* declare the original local var node in the
10303             // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
10304             // field-wise assignments as an "indirect" assignment to the local.
10305             // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
10306             // we clone it.)
10307             if (lclVarTree != nullptr)
10308             {
10309                 lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
10310             }
10311
10312             if (gtClone(destAddr))
10313             {
10314                 // destAddr is simple expression. No need to spill
10315                 noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10316             }
10317             else
10318             {
10319                 // destAddr is complex expression. Clone and spill it (unless
10320                 // the source is a struct local that only has one field, in which case we'd only
10321                 // use the address value once...)
10322                 if (srcLclVar->lvFieldCnt > 1)
10323                 {
10324                     addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
10325                     noway_assert(addrSpill != nullptr);
10326                 }
10327
10328                 // TODO-CQ: this should be based on a more general
10329                 // "BaseAddress" method, that handles fields of structs, before or after
10330                 // morphing.
10331                 if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
10332                 {
10333                     if (addrSpill->gtOp.gtOp1->IsLocal())
10334                     {
10335                         // We will *not* consider this to define the local, but rather have each individual field assign
10336                         // be a definition.
10337                         addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
10338                         assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
10339                                PROMOTION_TYPE_INDEPENDENT);
10340                         addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
10341                                                      // local stack frame
10342                     }
10343                 }
10344             }
10345         }
10346
10347         if (addrSpill != nullptr)
10348         {
10349             // Spill the (complex) address to a BYREF temp.
10350             // Note, at most one address may need to be spilled.
10351             addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
10352
10353             lvaTable[addrSpillTemp].lvType = TYP_BYREF;
10354
10355             if (addrSpillIsStackDest)
10356             {
10357                 lvaTable[addrSpillTemp].lvStackByref = true;
10358             }
10359
10360             tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);
10361
10362             // If we are assigning the address of a LclVar here
10363             // liveness does not account for this kind of address taken use.
10364             //
10365             // We have to mark this local as address exposed so
10366             // that we don't delete the definition for this LclVar
10367             // as a dead store later on.
10368             //
10369             if (addrSpill->OperGet() == GT_ADDR)
10370             {
10371                 GenTreePtr addrOp = addrSpill->gtOp.gtOp1;
10372                 if (addrOp->IsLocal())
10373                 {
10374                     unsigned lclVarNum                = addrOp->gtLclVarCommon.gtLclNum;
10375                     lvaTable[lclVarNum].lvAddrExposed = true;
10376                     lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
10377                 }
10378             }
10379         }
10380
10381     _AssignFields:
10382
10383         for (unsigned i = 0; i < fieldCnt; ++i)
10384         {
10385             FieldSeqNode* curFieldSeq = nullptr;
10386             if (destDoFldAsg)
10387             {
10388                 noway_assert(destLclNum != BAD_VAR_NUM);
10389                 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10390                 dest                 = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10391                 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
10392                 if (destAddr != nullptr)
10393                 {
10394                     noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
10395                     dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10396                 }
10397                 else
10398                 {
10399                     noway_assert(lclVarTree != nullptr);
10400                     dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10401                 }
10402                 // Don't CSE the lhs of an assignment.
10403                 dest->gtFlags |= GTF_DONT_CSE;
10404             }
10405             else
10406             {
10407                 noway_assert(srcDoFldAsg);
10408                 noway_assert(srcLclNum != BAD_VAR_NUM);
10409                 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10410
10411                 if (destSingleLclVarAsg)
10412                 {
10413                     noway_assert(fieldCnt == 1);
10414                     noway_assert(destLclVar != nullptr);
10415                     noway_assert(addrSpill == nullptr);
10416
10417                     dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet());
10418                 }
10419                 else
10420                 {
10421                     if (addrSpill)
10422                     {
10423                         assert(addrSpillTemp != BAD_VAR_NUM);
10424                         dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10425                     }
10426                     else
10427                     {
10428                         dest = gtCloneExpr(destAddr);
10429                         noway_assert(dest != nullptr);
10430
10431                         // Is the address of a local?
10432                         GenTreeLclVarCommon* lclVarTree = nullptr;
10433                         bool                 isEntire   = false;
10434                         bool*                pIsEntire  = (blockWidthIsConst ? &isEntire : nullptr);
10435                         if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
10436                         {
10437                             lclVarTree->gtFlags |= GTF_VAR_DEF;
10438                             if (!isEntire)
10439                             {
10440                                 lclVarTree->gtFlags |= GTF_VAR_USEASG;
10441                             }
10442                         }
10443                     }
10444
10445                     GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
10446                     // Have to set the field sequence -- which means we need the field handle.
10447                     CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
10448                     CORINFO_FIELD_HANDLE fieldHnd =
10449                         info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10450                     curFieldSeq                          = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10451                     fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
10452
10453                     dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
10454
10455                     dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
10456
10457                     // !!! The destination could be on stack. !!!
10458                     // This flag will let us choose the correct write barrier.
10459                     dest->gtFlags |= GTF_IND_TGTANYWHERE;
10460                 }
10461             }
10462
10463             if (srcDoFldAsg)
10464             {
10465                 noway_assert(srcLclNum != BAD_VAR_NUM);
10466                 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10467                 src                  = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10468
10469                 noway_assert(srcLclVarTree != nullptr);
10470                 src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
10471                 // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
10472                 // but they are when they are under a GT_ADDR.
10473                 src->gtFlags |= GTF_DONT_CSE;
10474             }
10475             else
10476             {
10477                 noway_assert(destDoFldAsg);
10478                 noway_assert(destLclNum != BAD_VAR_NUM);
10479                 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10480
10481                 if (srcSingleLclVarAsg)
10482                 {
10483                     noway_assert(fieldCnt == 1);
10484                     noway_assert(srcLclVar != nullptr);
10485                     noway_assert(addrSpill == nullptr);
10486
10487                     src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet());
10488                 }
10489                 else
10490                 {
10491                     if (addrSpill)
10492                     {
10493                         assert(addrSpillTemp != BAD_VAR_NUM);
10494                         src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10495                     }
10496                     else
10497                     {
10498                         src = gtCloneExpr(srcAddr);
10499                         noway_assert(src != nullptr);
10500                     }
10501
10502                     CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
10503                     CORINFO_FIELD_HANDLE fieldHnd =
10504                         info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10505                     curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10506
10507                     src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
10508                                         new (this, GT_CNS_INT)
10509                                             GenTreeIntCon(TYP_I_IMPL, lvaTable[fieldLclNum].lvFldOffset, curFieldSeq));
10510
10511                     src = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), src);
10512                 }
10513             }
10514
10515             noway_assert(dest->TypeGet() == src->TypeGet());
10516
10517             asg = gtNewAssignNode(dest, src);
10518
10519             // If we spilled the address, and we didn't do individual field assignments to promoted fields,
10520             // and it was of a local, record the assignment as an indirect update of a local.
10521             if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
10522             {
10523                 curFieldSeq   = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq);
10524                 bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType)) == genTypeSize(dest->TypeGet()));
10525                 IndirectAssignmentAnnotation* pIndirAnnot =
10526                     new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire);
10527                 GetIndirAssignMap()->Set(asg, pIndirAnnot);
10528             }
10529
10530 #if LOCAL_ASSERTION_PROP
10531             if (optLocalAssertionProp)
10532             {
10533                 optAssertionGen(asg);
10534             }
10535 #endif // LOCAL_ASSERTION_PROP
10536
10537             if (tree)
10538             {
10539                 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
10540             }
10541             else
10542             {
10543                 tree = asg;
10544             }
10545         }
10546     }
10547
10548     if (isLateArg)
10549     {
10550         tree->gtFlags |= GTF_LATE_ARG;
10551     }
10552
10553 #ifdef DEBUG
10554     if (tree != oldTree)
10555     {
10556         tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10557     }
10558
10559     if (verbose)
10560     {
10561         printf("\nfgMorphCopyBlock (after):\n");
10562         gtDispTree(tree);
10563     }
10564 #endif
10565
10566 _Done:
10567     return tree;
10568 }
10569
10570 // insert conversions and normalize to make tree amenable to register
10571 // FP architectures
10572 GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
10573 {
10574     if (tree->OperIsArithmetic())
10575     {
10576         if (varTypeIsFloating(tree))
10577         {
10578             GenTreePtr op1 = tree->gtOp.gtOp1;
10579             GenTreePtr op2 = tree->gtGetOp2();
10580
10581             if (op1->TypeGet() != tree->TypeGet())
10582             {
10583                 tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, tree->TypeGet());
10584             }
10585             if (op2->TypeGet() != tree->TypeGet())
10586             {
10587                 tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, tree->TypeGet());
10588             }
10589         }
10590     }
10591     else if (tree->OperIsCompare())
10592     {
10593         GenTreePtr op1 = tree->gtOp.gtOp1;
10594
10595         if (varTypeIsFloating(op1))
10596         {
10597             GenTreePtr op2 = tree->gtGetOp2();
10598             assert(varTypeIsFloating(op2));
10599
10600             if (op1->TypeGet() != op2->TypeGet())
10601             {
10602                 // both had better be floating, just one bigger than other
10603                 if (op1->TypeGet() == TYP_FLOAT)
10604                 {
10605                     assert(op2->TypeGet() == TYP_DOUBLE);
10606                     tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
10607                 }
10608                 else if (op2->TypeGet() == TYP_FLOAT)
10609                 {
10610                     assert(op1->TypeGet() == TYP_DOUBLE);
10611                     tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
10612                 }
10613             }
10614         }
10615     }
10616
10617     return tree;
10618 }
10619
10620 //--------------------------------------------------------------------------------------------------------------
10621 // fgMorphRecognizeBoxNullable:
10622 //   Recognize this pattern:
10623 //
10624 //   stmtExpr  void  (IL 0x000...  ???)
10625 //     return    int
10626 //             CNS_INT     ref    null
10627 //         EQ/NE/GT        int
10628 //             CALL help ref    HELPER.CORINFO_HELP_BOX_NULLABLE
10629 //                 CNS_INT(h)  long   0x7fed96836c8 class
10630 //                 ADDR      byref
10631 //                     FIELD struct value
10632 //                         LCL_VAR ref V00 this
10633 //
10634 //   which comes from this code:
10635 //
10636 //      return this.value==null;
10637 //
10638 //   and transform it into
10639 //
10640 //   stmtExpr  void  (IL 0x000...  ???)
10641 //     return    int
10642 //             CNS_INT     ref    null
10643 //         EQ/NE/GT        int
10644 //             IND bool
10645 //                 ADDR      byref
10646 //                     FIELD struct value
10647 //                         LCL_VAR ref V00 this
10648 //
10649 // Arguments:
10650 //       compare - Compare tree to optimize.
10651 //
10652 // return value:
10653 //       A tree that has a call to CORINFO_HELP_BOX_NULLABLE optimized away if the pattern is found;
10654 //       the original tree otherwise.
10655 //
10656
10657 GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
10658 {
10659     GenTree*     op1 = compare->gtOp.gtOp1;
10660     GenTree*     op2 = compare->gtOp.gtOp2;
10661     GenTree*     opCns;
10662     GenTreeCall* opCall;
10663
10664     if (op1->IsCnsIntOrI() && op2->IsHelperCall())
10665     {
10666         opCns  = op1;
10667         opCall = op2->AsCall();
10668     }
10669     else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
10670     {
10671         opCns  = op2;
10672         opCall = op1->AsCall();
10673     }
10674     else
10675     {
10676         return compare;
10677     }
10678
10679     if (!opCns->IsIntegralConst(0))
10680     {
10681         return compare;
10682     }
10683
10684     if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
10685     {
10686         return compare;
10687     }
10688
10689     // Get the nullable struct argument
10690     GenTree* arg = opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
10691
10692     // Check for cases that are unsafe to optimize and return the unchanged tree
10693     if (arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || ((arg->gtFlags & GTF_LATE_ARG) != 0))
10694     {
10695         return compare;
10696     }
10697
10698     // Replace the box with an access of the nullable 'hasValue' field which is at the zero offset
10699     GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, arg);
10700
10701     if (opCall == op1)
10702     {
10703         compare->gtOp.gtOp1 = newOp;
10704     }
10705     else
10706     {
10707         compare->gtOp.gtOp2 = newOp;
10708     }
10709
10710     return compare;
10711 }
10712
10713 #ifdef FEATURE_SIMD
10714
10715 //--------------------------------------------------------------------------------------------------------------
10716 // getSIMDStructFromField:
10717 //   Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for
10718 //   the struct node, also base type, field index and simd size. If it is not, just return  nullptr.
10719 //   Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
10720 //   should return nullptr, since in this case we should treat SIMD struct as a regular struct.
10721 //   However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
10722 //   as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
10723 //   if the struct is a SIMD struct.
10724 //
10725 // Arguments:
10726 //       tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
10727 //               struct used for simd intrinsic or not.
10728 //       pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
10729 //                      to simd lclvar's base type.
10730 //       indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
10731 //                  equals to the index number of this field.
10732 //       simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
10733 //                     equals to the simd struct size which this tree belongs to.
10734 //      ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
10735 //                                  the UsedInSIMDIntrinsic check.
10736 //
10737 // return value:
10738 //       A GenTreePtr which points the simd lclvar tree belongs to. If the tree is not the simd
10739 //       instrinic related field, return nullptr.
10740 //
10741
10742 GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree,
10743                                             var_types* pBaseTypeOut,
10744                                             unsigned*  indexOut,
10745                                             unsigned*  simdSizeOut,
10746                                             bool       ignoreUsedInSIMDIntrinsic /*false*/)
10747 {
10748     GenTreePtr ret = nullptr;
10749     if (tree->OperGet() == GT_FIELD)
10750     {
10751         GenTreePtr objRef = tree->gtField.gtFldObj;
10752         if (objRef != nullptr)
10753         {
10754             GenTreePtr obj = nullptr;
10755             if (objRef->gtOper == GT_ADDR)
10756             {
10757                 obj = objRef->gtOp.gtOp1;
10758             }
10759             else if (ignoreUsedInSIMDIntrinsic)
10760             {
10761                 obj = objRef;
10762             }
10763             else
10764             {
10765                 return nullptr;
10766             }
10767
10768             if (isSIMDTypeLocal(obj))
10769             {
10770                 unsigned   lclNum = obj->gtLclVarCommon.gtLclNum;
10771                 LclVarDsc* varDsc = &lvaTable[lclNum];
10772                 if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
10773                 {
10774                     *simdSizeOut  = varDsc->lvExactSize;
10775                     *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
10776                     ret           = obj;
10777                 }
10778             }
10779             else if (obj->OperGet() == GT_SIMD)
10780             {
10781                 ret                   = obj;
10782                 GenTreeSIMD* simdNode = obj->AsSIMD();
10783                 *simdSizeOut          = simdNode->gtSIMDSize;
10784                 *pBaseTypeOut         = simdNode->gtSIMDBaseType;
10785             }
10786         }
10787     }
10788     if (ret != nullptr)
10789     {
10790         unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
10791         *indexOut             = tree->gtField.gtFldOffset / BaseTypeSize;
10792     }
10793     return ret;
10794 }
10795
10796 /*****************************************************************************
10797 *  If a read operation tries to access simd struct field, then transform the
10798 *  operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
10799 *  Otherwise, return the old tree.
10800 *  Argument:
10801 *   tree - GenTreePtr. If this pointer points to simd struct which is used for simd
10802 *          intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem.
10803 *  Return:
10804 *   A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
10805 *   return nullptr.
10806 */
10807
10808 GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
10809 {
10810     unsigned   index          = 0;
10811     var_types  baseType       = TYP_UNKNOWN;
10812     unsigned   simdSize       = 0;
10813     GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
10814     if (simdStructNode != nullptr)
10815     {
10816         assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
10817         GenTree* op2 = gtNewIconNode(index);
10818         tree         = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
10819 #ifdef DEBUG
10820         tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10821 #endif
10822     }
10823     return tree;
10824 }
10825
10826 /*****************************************************************************
10827 *  Transform an assignment of a SIMD struct field to SIMD intrinsic
10828 *  SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment,
10829 *  then return the old tree.
10830 *  Argument:
10831 *   tree - GenTreePtr. If this pointer points to simd struct which is used for simd
10832 *          intrinsic, we will morph it as simd intrinsic set.
10833 *  Return:
10834 *   A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
10835 *   return nullptr.
10836 */
10837
10838 GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree)
10839 {
10840     assert(tree->OperGet() == GT_ASG);
10841     GenTreePtr op1 = tree->gtGetOp1();
10842     GenTreePtr op2 = tree->gtGetOp2();
10843
10844     unsigned   index         = 0;
10845     var_types  baseType      = TYP_UNKNOWN;
10846     unsigned   simdSize      = 0;
10847     GenTreePtr simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
10848     if (simdOp1Struct != nullptr)
10849     {
10850         // Generate the simd set intrinsic
10851         assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
10852
10853         SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
10854         switch (index)
10855         {
10856             case 0:
10857                 simdIntrinsicID = SIMDIntrinsicSetX;
10858                 break;
10859             case 1:
10860                 simdIntrinsicID = SIMDIntrinsicSetY;
10861                 break;
10862             case 2:
10863                 simdIntrinsicID = SIMDIntrinsicSetZ;
10864                 break;
10865             case 3:
10866                 simdIntrinsicID = SIMDIntrinsicSetW;
10867                 break;
10868             default:
10869                 noway_assert(!"There is no set intrinsic for index bigger than 3");
10870         }
10871
10872         GenTreePtr target = gtClone(simdOp1Struct);
10873         assert(target != nullptr);
10874         GenTreePtr simdTree = gtNewSIMDNode(target->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
10875         tree->gtOp.gtOp1    = target;
10876         tree->gtOp.gtOp2    = simdTree;
10877 #ifdef DEBUG
10878         tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10879 #endif
10880     }
10881
10882     return tree;
10883 }
10884
10885 #endif // FEATURE_SIMD
10886
10887 /*****************************************************************************
10888  *
10889  *  Transform the given GTK_SMPOP tree for code generation.
10890  */
10891
10892 #ifdef _PREFAST_
10893 #pragma warning(push)
10894 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
10895 #endif
10896 GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
10897 {
10898     // this extra scope is a workaround for a gcc bug
10899     // the inline destructor for ALLOCA_CHECK confuses the control
10900     // flow and gcc thinks that the function never returns
10901     {
10902         ALLOCA_CHECK();
10903         assert(tree->OperKind() & GTK_SMPOP);
10904
10905         /* The steps in this function are :
10906            o Perform required preorder processing
10907            o Process the first, then second operand, if any
10908            o Perform required postorder morphing
10909            o Perform optional postorder morphing if optimizing
10910          */
10911
10912         bool isQmarkColon = false;
10913
10914 #if LOCAL_ASSERTION_PROP
10915         AssertionIndex origAssertionCount = DUMMY_INIT(0);
10916         AssertionDsc*  origAssertionTab   = DUMMY_INIT(NULL);
10917
10918         AssertionIndex thenAssertionCount = DUMMY_INIT(0);
10919         AssertionDsc*  thenAssertionTab   = DUMMY_INIT(NULL);
10920 #endif
10921
10922         if (fgGlobalMorph)
10923         {
10924             tree = fgMorphForRegisterFP(tree);
10925         }
10926
10927         genTreeOps oper = tree->OperGet();
10928         var_types  typ  = tree->TypeGet();
10929         GenTreePtr op1  = tree->gtOp.gtOp1;
10930         GenTreePtr op2  = tree->gtGetOp2IfPresent();
10931
10932         /*-------------------------------------------------------------------------
10933          * First do any PRE-ORDER processing
10934          */
10935
10936         switch (oper)
10937         {
10938             // Some arithmetic operators need to use a helper call to the EE
10939             int helper;
10940
10941             case GT_ASG:
10942                 tree = fgDoNormalizeOnStore(tree);
10943                 /* fgDoNormalizeOnStore can change op2 */
10944                 noway_assert(op1 == tree->gtOp.gtOp1);
10945                 op2 = tree->gtOp.gtOp2;
10946
10947 #ifdef FEATURE_SIMD
10948                 {
10949                     // We should check whether op2 should be assigned to a SIMD field or not.
10950                     // If it is, we should tranlate the tree to simd intrinsic.
10951                     assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0));
10952                     GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
10953                     typ                = tree->TypeGet();
10954                     op1                = tree->gtGetOp1();
10955                     op2                = tree->gtGetOp2();
10956 #ifdef DEBUG
10957                     assert((tree == newTree) && (tree->OperGet() == oper));
10958                     if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
10959                     {
10960                         tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
10961                     }
10962 #endif // DEBUG
10963                 }
10964 #endif
10965
10966                 __fallthrough;
10967
10968             case GT_ASG_ADD:
10969             case GT_ASG_SUB:
10970             case GT_ASG_MUL:
10971             case GT_ASG_DIV:
10972             case GT_ASG_MOD:
10973             case GT_ASG_UDIV:
10974             case GT_ASG_UMOD:
10975             case GT_ASG_OR:
10976             case GT_ASG_XOR:
10977             case GT_ASG_AND:
10978             case GT_ASG_LSH:
10979             case GT_ASG_RSH:
10980             case GT_ASG_RSZ:
10981             case GT_CHS:
10982
10983                 // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
10984                 // Previously, the "lhs" (addr) of a block op was CSE'd.  So, to duplicate the former
10985                 // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
10986                 // TODO-1stClassStructs: improve this.
10987                 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
10988                 {
10989                     op1->gtFlags |= GTF_DONT_CSE;
10990                 }
10991                 break;
10992
10993             case GT_ADDR:
10994
10995                 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
10996                 op1->gtFlags |= GTF_DONT_CSE;
10997                 break;
10998
10999             case GT_QMARK:
11000             case GT_JTRUE:
11001
11002                 noway_assert(op1);
11003
11004                 if (op1->OperKind() & GTK_RELOP)
11005                 {
11006                     noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
11007                     /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
11008                        not need to materialize the result as a 0 or 1. */
11009
11010                     /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
11011                     op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
11012
11013                     // Request that the codegen for op1 sets the condition flags
11014                     // when it generates the code for op1.
11015                     //
11016                     // Codegen for op1 must set the condition flags if
11017                     // this method returns true.
11018                     //
11019                     op1->gtRequestSetFlags();
11020                 }
11021                 else
11022                 {
11023                     GenTreePtr effOp1 = op1->gtEffectiveVal();
11024                     noway_assert((effOp1->gtOper == GT_CNS_INT) &&
11025                                  (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
11026                 }
11027                 break;
11028
11029             case GT_COLON:
11030 #if LOCAL_ASSERTION_PROP
11031                 if (optLocalAssertionProp)
11032 #endif
11033                 {
11034                     isQmarkColon = true;
11035                 }
11036                 break;
11037
11038             case GT_INDEX:
11039                 return fgMorphArrayIndex(tree);
11040
11041             case GT_CAST:
11042                 return fgMorphCast(tree);
11043
11044             case GT_MUL:
11045
11046 #ifndef _TARGET_64BIT_
11047                 if (typ == TYP_LONG)
11048                 {
11049                     /* For (long)int1 * (long)int2, we dont actually do the
11050                        casts, and just multiply the 32 bit values, which will
11051                        give us the 64 bit result in edx:eax */
11052
11053                     noway_assert(op2);
11054                     if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
11055                          genActualType(op1->CastFromType()) == TYP_INT &&
11056                          genActualType(op2->CastFromType()) == TYP_INT) &&
11057                         !op1->gtOverflow() && !op2->gtOverflow())
11058                     {
11059                         // The casts have to be of the same signedness.
11060                         if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
11061                         {
11062                             // We see if we can force an int constant to change its signedness
11063                             GenTreePtr constOp;
11064                             if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
11065                                 constOp = op1;
11066                             else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
11067                                 constOp = op2;
11068                             else
11069                                 goto NO_MUL_64RSLT;
11070
11071                             if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
11072                                 constOp->gtFlags ^= GTF_UNSIGNED;
11073                             else
11074                                 goto NO_MUL_64RSLT;
11075                         }
11076
11077                         // The only combination that can overflow
11078                         if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
11079                             goto NO_MUL_64RSLT;
11080
11081                         /* Remaining combinations can never overflow during long mul. */
11082
11083                         tree->gtFlags &= ~GTF_OVERFLOW;
11084
11085                         /* Do unsigned mul only if the casts were unsigned */
11086
11087                         tree->gtFlags &= ~GTF_UNSIGNED;
11088                         tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
11089
11090                         /* Since we are committing to GTF_MUL_64RSLT, we don't want
11091                            the casts to be folded away. So morph the castees directly */
11092
11093                         op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
11094                         op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
11095
11096                         // Propagate side effect flags up the tree
11097                         op1->gtFlags &= ~GTF_ALL_EFFECT;
11098                         op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11099                         op2->gtFlags &= ~GTF_ALL_EFFECT;
11100                         op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11101
11102                         // If the GT_MUL can be altogether folded away, we should do that.
11103
11104                         if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
11105                             opts.OptEnabled(CLFLG_CONSTANTFOLD))
11106                         {
11107                             tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
11108                             tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
11109                             noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
11110                             tree = gtFoldExprConst(tree);
11111                             noway_assert(tree->OperIsConst());
11112                             return tree;
11113                         }
11114
11115                         tree->gtFlags |= GTF_MUL_64RSLT;
11116
11117                         // If op1 and op2 are unsigned casts, we need to do an unsigned mult
11118                         tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
11119
11120                         // Insert GT_NOP nodes for the cast operands so that they do not get folded
11121                         // And propagate the new flags. We don't want to CSE the casts because
11122                         // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
11123
11124                         if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
11125                         {
11126                             op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
11127                             op1->gtFlags &= ~GTF_ALL_EFFECT;
11128                             op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11129                         }
11130
11131                         if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
11132                         {
11133                             op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
11134                             op2->gtFlags &= ~GTF_ALL_EFFECT;
11135                             op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11136                         }
11137
11138                         op1->gtFlags |= GTF_DONT_CSE;
11139                         op2->gtFlags |= GTF_DONT_CSE;
11140
11141                         tree->gtFlags &= ~GTF_ALL_EFFECT;
11142                         tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
11143
11144                         goto DONE_MORPHING_CHILDREN;
11145                     }
11146                     else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
11147                     {
11148                     NO_MUL_64RSLT:
11149                         if (tree->gtOverflow())
11150                             helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
11151                         else
11152                             helper = CORINFO_HELP_LMUL;
11153
11154                         goto USE_HELPER_FOR_ARITH;
11155                     }
11156                     else
11157                     {
11158                         /* We are seeing this node again. We have decided to use
11159                            GTF_MUL_64RSLT, so leave it alone. */
11160
11161                         assert(tree->gtIsValid64RsltMul());
11162                     }
11163                 }
11164 #endif // !_TARGET_64BIT_
11165                 break;
11166
11167             case GT_DIV:
11168
11169 #ifndef _TARGET_64BIT_
11170                 if (typ == TYP_LONG)
11171                 {
11172                     helper = CORINFO_HELP_LDIV;
11173                     goto USE_HELPER_FOR_ARITH;
11174                 }
11175
11176 #if USE_HELPERS_FOR_INT_DIV
11177                 if (typ == TYP_INT && !fgIsSignedDivOptimizable(op2))
11178                 {
11179                     helper = CORINFO_HELP_DIV;
11180                     goto USE_HELPER_FOR_ARITH;
11181                 }
11182 #endif
11183 #endif // !_TARGET_64BIT_
11184
11185 #ifndef LEGACY_BACKEND
11186                 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11187                 {
11188                     op2 = gtFoldExprConst(op2);
11189                 }
11190 #endif // !LEGACY_BACKEND
11191                 break;
11192
11193             case GT_UDIV:
11194
11195 #ifndef _TARGET_64BIT_
11196                 if (typ == TYP_LONG)
11197                 {
11198                     helper = CORINFO_HELP_ULDIV;
11199                     goto USE_HELPER_FOR_ARITH;
11200                 }
11201 #if USE_HELPERS_FOR_INT_DIV
11202                 if (typ == TYP_INT && !fgIsUnsignedDivOptimizable(op2))
11203                 {
11204                     helper = CORINFO_HELP_UDIV;
11205                     goto USE_HELPER_FOR_ARITH;
11206                 }
11207 #endif
11208 #endif // _TARGET_64BIT_
11209                 break;
11210
11211             case GT_MOD:
11212
11213                 if (varTypeIsFloating(typ))
11214                 {
11215                     helper = CORINFO_HELP_DBLREM;
11216                     noway_assert(op2);
11217                     if (op1->TypeGet() == TYP_FLOAT)
11218                     {
11219                         if (op2->TypeGet() == TYP_FLOAT)
11220                         {
11221                             helper = CORINFO_HELP_FLTREM;
11222                         }
11223                         else
11224                         {
11225                             tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
11226                         }
11227                     }
11228                     else if (op2->TypeGet() == TYP_FLOAT)
11229                     {
11230                         tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
11231                     }
11232                     goto USE_HELPER_FOR_ARITH;
11233                 }
11234
11235                 // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
11236                 // A similar optimization for signed mod will not work for a negative perfectly divisible
11237                 // HI-word. To make it correct, we would need to divide without the sign and then flip the
11238                 // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
11239                 goto ASSIGN_HELPER_FOR_MOD;
11240
11241             case GT_UMOD:
11242
11243 #ifdef _TARGET_ARMARCH_
11244 //
11245 // Note for _TARGET_ARMARCH_ we don't have  a remainder instruction, so we don't do this optimization
11246 //
11247 #else  // _TARGET_XARCH
11248                 /* If this is an unsigned long mod with op2 which is a cast to long from a
11249                    constant int, then don't morph to a call to the helper.  This can be done
11250                    faster inline using idiv.
11251                 */
11252
11253                 noway_assert(op2);
11254                 if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
11255                     ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
11256                     ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
11257                 {
11258                     if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
11259                         op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
11260                         op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
11261                         (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
11262                     {
11263                         tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
11264                         noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
11265                     }
11266
11267                     if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
11268                         op2->gtIntConCommon.LngValue() <= 0x3fffffff)
11269                     {
11270                         tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
11271                         noway_assert(op1->TypeGet() == TYP_LONG);
11272
11273                         // Update flags for op1 morph
11274                         tree->gtFlags &= ~GTF_ALL_EFFECT;
11275
11276                         tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
11277
11278                         // If op1 is a constant, then do constant folding of the division operator
11279                         if (op1->gtOper == GT_CNS_NATIVELONG)
11280                         {
11281                             tree = gtFoldExpr(tree);
11282                         }
11283                         return tree;
11284                     }
11285                 }
11286 #endif // _TARGET_XARCH
11287
11288             ASSIGN_HELPER_FOR_MOD:
11289
11290                 // For "val % 1", return 0 if op1 doesn't have any side effects
11291                 // and we are not in the CSE phase, we cannot discard 'tree'
11292                 // because it may contain CSE expressions that we haven't yet examined.
11293                 //
11294                 if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
11295                 {
11296                     if (op2->IsIntegralConst(1))
11297                     {
11298                         GenTreePtr zeroNode = gtNewZeroConNode(typ);
11299 #ifdef DEBUG
11300                         zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11301 #endif
11302                         DEBUG_DESTROY_NODE(tree);
11303                         return zeroNode;
11304                     }
11305                 }
11306
11307 #ifndef _TARGET_64BIT_
11308                 if (typ == TYP_LONG)
11309                 {
11310                     helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
11311                     goto USE_HELPER_FOR_ARITH;
11312                 }
11313
11314 #if USE_HELPERS_FOR_INT_DIV
11315                 if (typ == TYP_INT)
11316                 {
11317                     if (oper == GT_UMOD && !fgIsUnsignedModOptimizable(op2))
11318                     {
11319                         helper = CORINFO_HELP_UMOD;
11320                         goto USE_HELPER_FOR_ARITH;
11321                     }
11322                     else if (oper == GT_MOD && !fgIsSignedModOptimizable(op2))
11323                     {
11324                         helper = CORINFO_HELP_MOD;
11325                         goto USE_HELPER_FOR_ARITH;
11326                     }
11327                 }
11328 #endif
11329 #endif // !_TARGET_64BIT_
11330
11331 #ifndef LEGACY_BACKEND
11332                 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11333                 {
11334                     op2 = gtFoldExprConst(op2);
11335                 }
11336
11337 #ifdef _TARGET_ARM64_
11338
11339                 // For ARM64 we don't have a remainder instruction,
11340                 // The architecture manual suggests the following transformation to
11341                 // generate code for such operator:
11342                 //
11343                 // a % b = a - (a / b) * b;
11344                 //
11345                 // NOTE: we should never need to perform this transformation when remorphing, since global morphing
11346                 //       should already have done so and we do not introduce new modulus nodes in later phases.
11347                 assert(!optValnumCSE_phase);
11348                 tree = fgMorphModToSubMulDiv(tree->AsOp());
11349                 op1  = tree->gtOp.gtOp1;
11350                 op2  = tree->gtOp.gtOp2;
11351 #else  //_TARGET_ARM64_
11352                 // If b is not a power of 2 constant then lowering replaces a % b
11353                 // with a - (a / b) * b and applies magic division optimization to
11354                 // a / b. The code may already contain an a / b expression (e.g.
11355                 // x = a / 10; y = a % 10;) and then we end up with redundant code.
11356                 // If we convert % to / here we give CSE the opportunity to eliminate
11357                 // the redundant division. If there's no redundant division then
11358                 // nothing is lost, lowering would have done this transform anyway.
11359
11360                 if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
11361                 {
11362                     ssize_t divisorValue    = op2->AsIntCon()->IconValue();
11363                     size_t  absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
11364                                                                            : static_cast<size_t>(abs(divisorValue));
11365
11366                     if (!isPow2(absDivisorValue))
11367                     {
11368                         tree = fgMorphModToSubMulDiv(tree->AsOp());
11369                         op1  = tree->gtOp.gtOp1;
11370                         op2  = tree->gtOp.gtOp2;
11371                     }
11372                 }
11373 #endif //_TARGET_ARM64_
11374 #endif // !LEGACY_BACKEND
11375                 break;
11376
11377             USE_HELPER_FOR_ARITH:
11378             {
11379                 /* We have to morph these arithmetic operations into helper calls
11380                    before morphing the arguments (preorder), else the arguments
11381                    won't get correct values of fgPtrArgCntCur.
11382                    However, try to fold the tree first in case we end up with a
11383                    simple node which won't need a helper call at all */
11384
11385                 noway_assert(tree->OperIsBinary());
11386
11387                 GenTreePtr oldTree = tree;
11388
11389                 tree = gtFoldExpr(tree);
11390
11391                 // Were we able to fold it ?
11392                 // Note that gtFoldExpr may return a non-leaf even if successful
11393                 // e.g. for something like "expr / 1" - see also bug #290853
11394                 if (tree->OperIsLeaf() || (oldTree != tree))
11395                 {
11396                     return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
11397                 }
11398
11399                 // Did we fold it into a comma node with throw?
11400                 if (tree->gtOper == GT_COMMA)
11401                 {
11402                     noway_assert(fgIsCommaThrow(tree));
11403                     return fgMorphTree(tree);
11404                 }
11405             }
11406                 return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
11407
11408             case GT_RETURN:
11409                 // normalize small integer return values
11410                 if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) &&
11411                     (op1->TypeGet() != TYP_VOID) && fgCastNeeded(op1, info.compRetType))
11412                 {
11413                     // Small-typed return values are normalized by the callee
11414                     op1 = gtNewCastNode(TYP_INT, op1, info.compRetType);
11415
11416                     // Propagate GTF_COLON_COND
11417                     op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
11418
11419                     tree->gtOp.gtOp1 = fgMorphCast(op1);
11420
11421                     // Propagate side effect flags
11422                     tree->gtFlags &= ~GTF_ALL_EFFECT;
11423                     tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11424
11425                     return tree;
11426                 }
11427                 break;
11428
11429             case GT_EQ:
11430             case GT_NE:
11431
11432                 // Check for typeof(...) == obj.GetType()
11433                 // Also check for typeof(...) == typeof(...)
11434                 // IMPORTANT NOTE: this optimization relies on a one-to-one mapping between
11435                 // type handles and instances of System.Type
11436                 // If this invariant is ever broken, the optimization will need updating
11437                 CLANG_FORMAT_COMMENT_ANCHOR;
11438
11439 #ifdef LEGACY_BACKEND
11440                 if (op1->gtOper == GT_CALL && op2->gtOper == GT_CALL &&
11441                     ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
11442                      (op1->gtCall.gtCallType == CT_HELPER)) &&
11443                     ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
11444                      (op2->gtCall.gtCallType == CT_HELPER)))
11445 #else
11446                 if ((((op1->gtOper == GT_INTRINSIC) &&
11447                       (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
11448                      ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) &&
11449                     (((op2->gtOper == GT_INTRINSIC) &&
11450                       (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
11451                      ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER))))
11452 #endif
11453                 {
11454                     GenTreePtr pGetClassFromHandle;
11455                     GenTreePtr pGetType;
11456
11457 #ifdef LEGACY_BACKEND
11458                     bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall());
11459                     bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall());
11460 #else
11461                     bool bOp1ClassFromHandle =
11462                         op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall()) : false;
11463                     bool bOp2ClassFromHandle =
11464                         op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall()) : false;
11465 #endif
11466
11467                     // Optimize typeof(...) == typeof(...)
11468                     // Typically this occurs in generic code that attempts a type switch
11469                     // e.g. typeof(T) == typeof(int)
11470
11471                     if (bOp1ClassFromHandle && bOp2ClassFromHandle)
11472                     {
11473                         GenTreePtr classFromHandleArg1 = tree->gtOp.gtOp1->gtCall.gtCallArgs->gtOp.gtOp1;
11474                         GenTreePtr classFromHandleArg2 = tree->gtOp.gtOp2->gtCall.gtCallArgs->gtOp.gtOp1;
11475
11476                         GenTreePtr compare = gtNewOperNode(oper, TYP_INT, classFromHandleArg1, classFromHandleArg2);
11477
11478                         compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
11479
11480                         // Morph and return
11481                         return fgMorphTree(compare);
11482                     }
11483                     else if (bOp1ClassFromHandle || bOp2ClassFromHandle)
11484                     {
11485                         //
11486                         // Now check for GetClassFromHandle(handle) == obj.GetType()
11487                         //
11488
11489                         if (bOp1ClassFromHandle)
11490                         {
11491                             pGetClassFromHandle = tree->gtOp.gtOp1;
11492                             pGetType            = op2;
11493                         }
11494                         else
11495                         {
11496                             pGetClassFromHandle = tree->gtOp.gtOp2;
11497                             pGetType            = op1;
11498                         }
11499
11500                         GenTreePtr pGetClassFromHandleArgument = pGetClassFromHandle->gtCall.gtCallArgs->gtOp.gtOp1;
11501                         GenTreePtr pConstLiteral               = pGetClassFromHandleArgument;
11502
11503                         // Unwrap GT_NOP node used to prevent constant folding
11504                         if (pConstLiteral->gtOper == GT_NOP && pConstLiteral->gtType == TYP_I_IMPL)
11505                         {
11506                             pConstLiteral = pConstLiteral->gtOp.gtOp1;
11507                         }
11508
11509                         // In the ngen case, we have to go thru an indirection to get the right handle.
11510                         if (pConstLiteral->gtOper == GT_IND)
11511                         {
11512                             pConstLiteral = pConstLiteral->gtOp.gtOp1;
11513                         }
11514 #ifdef LEGACY_BACKEND
11515
11516                         if (pGetType->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC &&
11517                             info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) ==
11518                                 CORINFO_INTRINSIC_Object_GetType &&
11519 #else
11520                         if ((pGetType->gtOper == GT_INTRINSIC) &&
11521                             (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) &&
11522 #endif
11523                             pConstLiteral->gtOper == GT_CNS_INT && pConstLiteral->gtType == TYP_I_IMPL)
11524                         {
11525                             CORINFO_CLASS_HANDLE clsHnd =
11526                                 CORINFO_CLASS_HANDLE(pConstLiteral->gtIntCon.gtCompileTimeHandle);
11527
11528                             if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd))
11529                             {
11530                                 // Method Table tree
11531                                 CLANG_FORMAT_COMMENT_ANCHOR;
11532 #ifdef LEGACY_BACKEND
11533                                 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
11534 #else
11535                                 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1);
11536 #endif
11537                                 objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null
11538                                 compCurBB->bbFlags |= BBF_HAS_VTABREF;
11539                                 optMethodFlags |= OMF_HAS_VTABLEREF;
11540
11541                                 // Method table constant
11542                                 GenTreePtr cnsMT = pGetClassFromHandleArgument;
11543
11544                                 GenTreePtr compare = gtNewOperNode(oper, TYP_INT, objMT, cnsMT);
11545
11546                                 compare->gtFlags |=
11547                                     tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
11548
11549                                 // Morph and return
11550                                 return fgMorphTree(compare);
11551                             }
11552                         }
11553                     }
11554                 }
11555
11556                 __fallthrough;
11557
11558             case GT_GT:
11559
11560                 // Try to optimize away calls to CORINFO_HELP_BOX_NULLABLE for GT_EQ, GT_NE, and unsigned GT_GT.
11561                 if ((oper != GT_GT) || tree->IsUnsigned())
11562                 {
11563                     fgMorphRecognizeBoxNullable(tree);
11564                 }
11565
11566                 op1 = tree->gtOp.gtOp1;
11567                 op2 = tree->gtGetOp2IfPresent();
11568
11569                 break;
11570
11571 #ifdef _TARGET_ARM_
11572             case GT_INTRINSIC:
11573                 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
11574                 {
11575                     switch (tree->TypeGet())
11576                     {
11577                         case TYP_DOUBLE:
11578                             return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
11579                         case TYP_FLOAT:
11580                             return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
11581                         default:
11582                             unreached();
11583                     }
11584                 }
11585                 break;
11586 #endif
11587
11588             default:
11589                 break;
11590         }
11591
11592 #if !CPU_HAS_FP_SUPPORT
11593         tree = fgMorphToEmulatedFP(tree);
11594 #endif
11595
11596         /* Could this operator throw an exception? */
11597         if (fgGlobalMorph && tree->OperMayThrow())
11598         {
11599             if (((tree->OperGet() != GT_IND) && !tree->OperIsBlk()) || fgAddrCouldBeNull(tree->gtOp.gtOp1))
11600             {
11601                 /* Mark the tree node as potentially throwing an exception */
11602                 tree->gtFlags |= GTF_EXCEPT;
11603             }
11604         }
11605
11606         /*-------------------------------------------------------------------------
11607          * Process the first operand, if any
11608          */
11609
11610         if (op1)
11611         {
11612
11613 #if LOCAL_ASSERTION_PROP
11614             // If we are entering the "then" part of a Qmark-Colon we must
11615             // save the state of the current copy assignment table
11616             // so that we can restore this state when entering the "else" part
11617             if (isQmarkColon)
11618             {
11619                 noway_assert(optLocalAssertionProp);
11620                 if (optAssertionCount)
11621                 {
11622                     noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11623                     unsigned tabSize   = optAssertionCount * sizeof(AssertionDsc);
11624                     origAssertionTab   = (AssertionDsc*)ALLOCA(tabSize);
11625                     origAssertionCount = optAssertionCount;
11626                     memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
11627                 }
11628                 else
11629                 {
11630                     origAssertionCount = 0;
11631                     origAssertionTab   = nullptr;
11632                 }
11633             }
11634 #endif // LOCAL_ASSERTION_PROP
11635
11636             // We might need a new MorphAddressContext context.  (These are used to convey
11637             // parent context about how addresses being calculated will be used; see the
11638             // specification comment for MorphAddrContext for full details.)
11639             // Assume it's an Ind context to start.
11640             MorphAddrContext  subIndMac1(MACK_Ind);
11641             MorphAddrContext* subMac1 = mac;
11642             if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind)
11643             {
11644                 switch (tree->gtOper)
11645                 {
11646                     case GT_ADDR:
11647                         if (subMac1 == nullptr)
11648                         {
11649                             subMac1         = &subIndMac1;
11650                             subMac1->m_kind = MACK_Addr;
11651                         }
11652                         break;
11653                     case GT_COMMA:
11654                         // In a comma, the incoming context only applies to the rightmost arg of the
11655                         // comma list.  The left arg (op1) gets a fresh context.
11656                         subMac1 = nullptr;
11657                         break;
11658                     case GT_OBJ:
11659                     case GT_BLK:
11660                     case GT_DYN_BLK:
11661                     case GT_IND:
11662                         subMac1 = &subIndMac1;
11663                         break;
11664                     default:
11665                         break;
11666                 }
11667             }
11668
11669             // For additions, if we're in an IND context keep track of whether
11670             // all offsets added to the address are constant, and their sum.
11671             if (tree->gtOper == GT_ADD && subMac1 != nullptr)
11672             {
11673                 assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
11674                 GenTreePtr otherOp = tree->gtOp.gtOp2;
11675                 // Is the other operator a constant?
11676                 if (otherOp->IsCnsIntOrI())
11677                 {
11678                     ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
11679                     totalOffset += otherOp->gtIntConCommon.IconValue();
11680                     if (totalOffset.IsOverflow())
11681                     {
11682                         // We will consider an offset so large as to overflow as "not a constant" --
11683                         // we will do a null check.
11684                         subMac1->m_allConstantOffsets = false;
11685                     }
11686                     else
11687                     {
11688                         subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11689                     }
11690                 }
11691                 else
11692                 {
11693                     subMac1->m_allConstantOffsets = false;
11694                 }
11695             }
11696
11697             // If gtOp1 is a GT_FIELD, we need to pass down the mac if
11698             // its parent is GT_ADDR, since the address of the field
11699             // is part of an ongoing address computation. Otherwise
11700             // op1 represents the value of the field and so any address
11701             // calculations it does are in a new context.
11702             if ((op1->gtOper == GT_FIELD) && (tree->gtOper != GT_ADDR))
11703             {
11704                 subMac1 = nullptr;
11705
11706                 // The impact of this field's value to any ongoing
11707                 // address computation is handled below when looking
11708                 // at op2.
11709             }
11710
11711             tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
11712
11713 #if LOCAL_ASSERTION_PROP
11714             // If we are exiting the "then" part of a Qmark-Colon we must
11715             // save the state of the current copy assignment table
11716             // so that we can merge this state with the "else" part exit
11717             if (isQmarkColon)
11718             {
11719                 noway_assert(optLocalAssertionProp);
11720                 if (optAssertionCount)
11721                 {
11722                     noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11723                     unsigned tabSize   = optAssertionCount * sizeof(AssertionDsc);
11724                     thenAssertionTab   = (AssertionDsc*)ALLOCA(tabSize);
11725                     thenAssertionCount = optAssertionCount;
11726                     memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
11727                 }
11728                 else
11729                 {
11730                     thenAssertionCount = 0;
11731                     thenAssertionTab   = nullptr;
11732                 }
11733             }
11734 #endif // LOCAL_ASSERTION_PROP
11735
11736             /* Morphing along with folding and inlining may have changed the
11737              * side effect flags, so we have to reset them
11738              *
11739              * NOTE: Don't reset the exception flags on nodes that may throw */
11740
11741             assert(tree->gtOper != GT_CALL);
11742
11743             if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId))
11744             {
11745                 tree->gtFlags &= ~GTF_CALL;
11746             }
11747
11748             if (!tree->OperMayThrow())
11749             {
11750                 tree->gtFlags &= ~GTF_EXCEPT;
11751             }
11752
11753             /* Propagate the new flags */
11754             tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
11755
11756             // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
11757             // Similarly for clsVar
11758             if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
11759             {
11760                 tree->gtFlags &= ~GTF_GLOB_REF;
11761             }
11762         } // if (op1)
11763
11764         /*-------------------------------------------------------------------------
11765          * Process the second operand, if any
11766          */
11767
11768         if (op2)
11769         {
11770
11771 #if LOCAL_ASSERTION_PROP
11772             // If we are entering the "else" part of a Qmark-Colon we must
11773             // reset the state of the current copy assignment table
11774             if (isQmarkColon)
11775             {
11776                 noway_assert(optLocalAssertionProp);
11777                 optAssertionReset(0);
11778                 if (origAssertionCount)
11779                 {
11780                     size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
11781                     memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
11782                     optAssertionReset(origAssertionCount);
11783                 }
11784             }
11785 #endif // LOCAL_ASSERTION_PROP
11786
11787             // We might need a new MorphAddressContext context to use in evaluating op2.
11788             // (These are used to convey parent context about how addresses being calculated
11789             // will be used; see the specification comment for MorphAddrContext for full details.)
11790             // Assume it's an Ind context to start.
11791             switch (tree->gtOper)
11792             {
11793                 case GT_ADD:
11794                     if (mac != nullptr && mac->m_kind == MACK_Ind)
11795                     {
11796                         GenTreePtr otherOp = tree->gtOp.gtOp1;
11797                         // Is the other operator a constant?
11798                         if (otherOp->IsCnsIntOrI())
11799                         {
11800                             mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11801                         }
11802                         else
11803                         {
11804                             mac->m_allConstantOffsets = false;
11805                         }
11806                     }
11807                     break;
11808                 default:
11809                     break;
11810             }
11811
11812             // If gtOp2 is a GT_FIELD, we must be taking its value,
11813             // so it should evaluate its address in a new context.
11814             if (op2->gtOper == GT_FIELD)
11815             {
11816                 // The impact of this field's value to any ongoing
11817                 // address computation is handled above when looking
11818                 // at op1.
11819                 mac = nullptr;
11820             }
11821
11822             tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
11823
11824             /* Propagate the side effect flags from op2 */
11825
11826             tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
11827
11828 #if LOCAL_ASSERTION_PROP
11829             // If we are exiting the "else" part of a Qmark-Colon we must
11830             // merge the state of the current copy assignment table with
11831             // that of the exit of the "then" part.
11832             if (isQmarkColon)
11833             {
11834                 noway_assert(optLocalAssertionProp);
11835                 // If either exit table has zero entries then
11836                 // the merged table also has zero entries
11837                 if (optAssertionCount == 0 || thenAssertionCount == 0)
11838                 {
11839                     optAssertionReset(0);
11840                 }
11841                 else
11842                 {
11843                     size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
11844                     if ((optAssertionCount != thenAssertionCount) ||
11845                         (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
11846                     {
11847                         // Yes they are different so we have to find the merged set
11848                         // Iterate over the copy asgn table removing any entries
11849                         // that do not have an exact match in the thenAssertionTab
11850                         AssertionIndex index = 1;
11851                         while (index <= optAssertionCount)
11852                         {
11853                             AssertionDsc* curAssertion = optGetAssertion(index);
11854
11855                             for (unsigned j = 0; j < thenAssertionCount; j++)
11856                             {
11857                                 AssertionDsc* thenAssertion = &thenAssertionTab[j];
11858
11859                                 // Do the left sides match?
11860                                 if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
11861                                     (curAssertion->assertionKind == thenAssertion->assertionKind))
11862                                 {
11863                                     // Do the right sides match?
11864                                     if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
11865                                         (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
11866                                     {
11867                                         goto KEEP;
11868                                     }
11869                                     else
11870                                     {
11871                                         goto REMOVE;
11872                                     }
11873                                 }
11874                             }
11875                         //
11876                         // If we fall out of the loop above then we didn't find
11877                         // any matching entry in the thenAssertionTab so it must
11878                         // have been killed on that path so we remove it here
11879                         //
11880                         REMOVE:
11881                             // The data at optAssertionTabPrivate[i] is to be removed
11882                             CLANG_FORMAT_COMMENT_ANCHOR;
11883 #ifdef DEBUG
11884                             if (verbose)
11885                             {
11886                                 printf("The QMARK-COLON ");
11887                                 printTreeID(tree);
11888                                 printf(" removes assertion candidate #%d\n", index);
11889                             }
11890 #endif
11891                             optAssertionRemove(index);
11892                             continue;
11893                         KEEP:
11894                             // The data at optAssertionTabPrivate[i] is to be kept
11895                             index++;
11896                         }
11897                     }
11898                 }
11899             }
11900 #endif    // LOCAL_ASSERTION_PROP
11901         } // if (op2)
11902
11903     DONE_MORPHING_CHILDREN:
11904
11905 /*-------------------------------------------------------------------------
11906  * Now do POST-ORDER processing
11907  */
11908
11909 #if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_)
11910         // Variable shifts of a long end up being helper calls, so mark the tree as such. This
11911         // is potentially too conservative, since they'll get treated as having side effects.
11912         // It is important to mark them as calls so if they are part of an argument list,
11913         // they will get sorted and processed properly (for example, it is important to handle
11914         // all nested calls before putting struct arguments in the argument registers). We
11915         // could mark the trees just before argument processing, but it would require a full
11916         // tree walk of the argument tree, so we just do it here, instead, even though we'll
11917         // mark non-argument trees (that will still get converted to calls, anyway).
11918         if (GenTree::OperIsShift(oper) && (tree->TypeGet() == TYP_LONG) && (op2->OperGet() != GT_CNS_INT))
11919         {
11920             tree->gtFlags |= GTF_CALL;
11921         }
11922 #endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_
11923
11924         if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) &&
11925             (op2 && !varTypeIsGC(op2->TypeGet())))
11926         {
11927             // The tree is really not GC but was marked as such. Now that the
11928             // children have been unmarked, unmark the tree too.
11929
11930             // Remember that GT_COMMA inherits it's type only from op2
11931             if (tree->gtOper == GT_COMMA)
11932             {
11933                 tree->gtType = genActualType(op2->TypeGet());
11934             }
11935             else
11936             {
11937                 tree->gtType = genActualType(op1->TypeGet());
11938             }
11939         }
11940
11941         GenTreePtr oldTree = tree;
11942
11943         GenTreePtr qmarkOp1 = nullptr;
11944         GenTreePtr qmarkOp2 = nullptr;
11945
11946         if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
11947         {
11948             qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
11949             qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
11950         }
11951
11952         // Try to fold it, maybe we get lucky,
11953         tree = gtFoldExpr(tree);
11954
11955         if (oldTree != tree)
11956         {
11957             /* if gtFoldExpr returned op1 or op2 then we are done */
11958             if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
11959             {
11960                 return tree;
11961             }
11962
11963             /* If we created a comma-throw tree then we need to morph op1 */
11964             if (fgIsCommaThrow(tree))
11965             {
11966                 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
11967                 fgMorphTreeDone(tree);
11968                 return tree;
11969             }
11970
11971             return tree;
11972         }
11973         else if (tree->OperKind() & GTK_CONST)
11974         {
11975             return tree;
11976         }
11977
11978         /* gtFoldExpr could have used setOper to change the oper */
11979         oper = tree->OperGet();
11980         typ  = tree->TypeGet();
11981
11982         /* gtFoldExpr could have changed op1 and op2 */
11983         op1 = tree->gtOp.gtOp1;
11984         op2 = tree->gtGetOp2IfPresent();
11985
11986         // Do we have an integer compare operation?
11987         //
11988         if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
11989         {
11990             // Are we comparing against zero?
11991             //
11992             if (op2->IsIntegralConst(0))
11993             {
11994                 // Request that the codegen for op1 sets the condition flags
11995                 // when it generates the code for op1.
11996                 //
11997                 // Codegen for op1 must set the condition flags if
11998                 // this method returns true.
11999                 //
12000                 op1->gtRequestSetFlags();
12001             }
12002         }
12003         /*-------------------------------------------------------------------------
12004          * Perform the required oper-specific postorder morphing
12005          */
12006
12007         GenTreePtr           temp;
12008         GenTreePtr           cns1, cns2;
12009         GenTreePtr           thenNode;
12010         GenTreePtr           elseNode;
12011         size_t               ival1, ival2;
12012         GenTreePtr           lclVarTree;
12013         GenTreeLclVarCommon* lclVarCmnTree;
12014         FieldSeqNode*        fieldSeq = nullptr;
12015
12016         switch (oper)
12017         {
12018             case GT_ASG:
12019
12020                 lclVarTree = fgIsIndirOfAddrOfLocal(op1);
12021                 if (lclVarTree != nullptr)
12022                 {
12023                     lclVarTree->gtFlags |= GTF_VAR_DEF;
12024                 }
12025
12026                 if (op1->gtEffectiveVal()->OperIsConst())
12027                 {
12028                     op1              = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
12029                     tree->gtOp.gtOp1 = op1;
12030                 }
12031
12032                 /* If we are storing a small type, we might be able to omit a cast */
12033                 if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet()))
12034                 {
12035                     if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
12036                     {
12037                         var_types castType = op2->CastToType();
12038
12039                         // If we are performing a narrowing cast and
12040                         // castType is larger or the same as op1's type
12041                         // then we can discard the cast.
12042
12043                         if (varTypeIsSmall(castType) && (castType >= op1->TypeGet()))
12044                         {
12045                             tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
12046                         }
12047                     }
12048                     else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet()))
12049                     {
12050                         /* We don't need to zero extend the setcc instruction */
12051                         op2->gtType = TYP_BYTE;
12052                     }
12053                 }
12054                 // If we introduced a CSE we may need to undo the optimization above
12055                 // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
12056                 // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
12057                 else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
12058                 {
12059                     unsigned   varNum = op1->gtLclVarCommon.gtLclNum;
12060                     LclVarDsc* varDsc = &lvaTable[varNum];
12061
12062                     /* We again need to zero extend the setcc instruction */
12063                     op2->gtType = varDsc->TypeGet();
12064                 }
12065                 fgAssignSetVarDef(tree);
12066
12067                 __fallthrough;
12068
12069             case GT_ASG_ADD:
12070             case GT_ASG_SUB:
12071             case GT_ASG_MUL:
12072             case GT_ASG_DIV:
12073             case GT_ASG_MOD:
12074             case GT_ASG_UDIV:
12075             case GT_ASG_UMOD:
12076             case GT_ASG_OR:
12077             case GT_ASG_XOR:
12078             case GT_ASG_AND:
12079             case GT_ASG_LSH:
12080             case GT_ASG_RSH:
12081             case GT_ASG_RSZ:
12082
12083                 /* We can't CSE the LHS of an assignment */
12084                 /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
12085                 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
12086                 {
12087                     op1->gtFlags |= GTF_DONT_CSE;
12088                 }
12089                 break;
12090
12091             case GT_EQ:
12092             case GT_NE:
12093
12094                 /* Make sure we're allowed to do this */
12095
12096                 if (optValnumCSE_phase)
12097                 {
12098                     // It is not safe to reorder/delete CSE's
12099                     break;
12100                 }
12101
12102                 cns2 = op2;
12103
12104                 /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
12105
12106                 if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
12107                 {
12108                     op1 = tree->gtOp.gtOp1;
12109
12110                     /* Since this can occur repeatedly we use a while loop */
12111
12112                     while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) &&
12113                            (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) && (op1->gtType == TYP_INT) &&
12114                            (op1->gtOverflow() == false))
12115                     {
12116                         /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
12117
12118                         ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
12119                         ival2 = cns2->gtIntCon.gtIconVal;
12120
12121                         if (op1->gtOper == GT_ADD)
12122                         {
12123                             ival2 -= ival1;
12124                         }
12125                         else
12126                         {
12127                             ival2 += ival1;
12128                         }
12129                         cns2->gtIntCon.gtIconVal = ival2;
12130
12131 #ifdef _TARGET_64BIT_
12132                         // we need to properly re-sign-extend or truncate as needed.
12133                         cns2->AsIntCon()->TruncateOrSignExtend32();
12134 #endif // _TARGET_64BIT_
12135
12136                         op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12137                     }
12138                 }
12139
12140                 //
12141                 // Here we look for the following tree
12142                 //
12143                 //                        EQ/NE
12144                 //                        /  \
12145                 //                      op1   CNS 0/1
12146                 //
12147                 ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
12148
12149                 // cast to unsigned allows test for both 0 and 1
12150                 if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
12151                 {
12152                     ival2 = (size_t)cns2->gtIntConCommon.IconValue();
12153                 }
12154                 else // cast to UINT64 allows test for both 0 and 1
12155                     if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
12156                 {
12157                     ival2 = (size_t)cns2->gtIntConCommon.LngValue();
12158                 }
12159
12160                 if (ival2 != INT_MAX)
12161                 {
12162                     // If we don't have a comma and relop, we can't do this optimization
12163                     //
12164                     if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
12165                     {
12166                         // Here we look for the following transformation
12167                         //
12168                         //                  EQ/NE                    Possible REVERSE(RELOP)
12169                         //                  /  \                           /      \
12170                         //               COMMA CNS 0/1             ->   COMMA   relop_op2
12171                         //              /   \                          /    \
12172                         //             x  RELOP                       x     relop_op1
12173                         //               /    \
12174                         //         relop_op1  relop_op2
12175                         //
12176                         //
12177                         //
12178                         GenTreePtr comma = op1;
12179                         GenTreePtr relop = comma->gtOp.gtOp2;
12180
12181                         GenTreePtr relop_op1 = relop->gtOp.gtOp1;
12182
12183                         bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12184
12185                         if (reverse)
12186                         {
12187                             gtReverseCond(relop);
12188                         }
12189
12190                         relop->gtOp.gtOp1 = comma;
12191                         comma->gtOp.gtOp2 = relop_op1;
12192
12193                         // Comma now has fewer nodes underneath it, so we need to regenerate its flags
12194                         comma->gtFlags &= ~GTF_ALL_EFFECT;
12195                         comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
12196                         comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12197
12198                         noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
12199                         noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
12200                         relop->gtFlags |=
12201                             tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);
12202
12203                         return relop;
12204                     }
12205
12206                     if (op1->gtOper == GT_COMMA)
12207                     {
12208                         // Here we look for the following tree
12209                         // and when the LCL_VAR is a temp we can fold the tree:
12210                         //
12211                         //                        EQ/NE                  EQ/NE
12212                         //                        /  \                   /  \
12213                         //                     COMMA  CNS 0/1  ->     RELOP CNS 0/1
12214                         //                     /   \                   / \
12215                         //                   ASG  LCL_VAR
12216                         //                  /  \
12217                         //           LCL_VAR   RELOP
12218                         //                      / \
12219                         //
12220
12221                         GenTreePtr asg = op1->gtOp.gtOp1;
12222                         GenTreePtr lcl = op1->gtOp.gtOp2;
12223
12224                         /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
12225                         if (asg->gtOper != GT_ASG)
12226                         {
12227                             goto SKIP;
12228                         }
12229
12230                         /* The right side of the comma must be a LCL_VAR temp */
12231                         if (lcl->gtOper != GT_LCL_VAR)
12232                         {
12233                             goto SKIP;
12234                         }
12235
12236                         unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
12237                         noway_assert(lclNum < lvaCount);
12238
12239                         /* If the LCL_VAR is not a temp then bail, a temp has a single def */
12240                         if (!lvaTable[lclNum].lvIsTemp)
12241                         {
12242                             goto SKIP;
12243                         }
12244
12245 #if FEATURE_ANYCSE
12246                         /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
12247                         // Fix 383856 X86/ARM ILGEN
12248                         if (lclNumIsCSE(lclNum))
12249                         {
12250                             goto SKIP;
12251                         }
12252 #endif
12253
12254                         /* We also must be assigning the result of a RELOP */
12255                         if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
12256                         {
12257                             goto SKIP;
12258                         }
12259
12260                         /* Both of the LCL_VAR must match */
12261                         if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
12262                         {
12263                             goto SKIP;
12264                         }
12265
12266                         /* If right side of asg is not a RELOP then skip */
12267                         if (!asg->gtOp.gtOp2->OperIsCompare())
12268                         {
12269                             goto SKIP;
12270                         }
12271
12272                         LclVarDsc* varDsc = lvaTable + lclNum;
12273
12274                         /* Set op1 to the right side of asg, (i.e. the RELOP) */
12275                         op1 = asg->gtOp.gtOp2;
12276
12277                         DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
12278                         DEBUG_DESTROY_NODE(lcl);
12279
12280                         /* This local variable should never be used again */
12281                         // <BUGNUM>
12282                         // VSW 184221: Make RefCnt to zero to indicate that this local var
12283                         // is not used any more. (Keey the lvType as is.)
12284                         // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars
12285                         // And then emitter::emitEndCodeGen will assert in the following line:
12286                         //        noway_assert( dsc->lvTracked);
12287                         // </BUGNUM>
12288                         noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet.
12289                                      varDsc->lvRefCnt == 2    // Or, we assume this tmp should only be used here,
12290                                                               // and it only shows up twice.
12291                                      );
12292                         lvaTable[lclNum].lvRefCnt = 0;
12293                         lvaTable[lclNum].lvaResetSortAgainFlag(this);
12294                     }
12295
12296                     if (op1->OperIsCompare())
12297                     {
12298                         // Here we look for the following tree
12299                         //
12300                         //                        EQ/NE           ->      RELOP/!RELOP
12301                         //                        /  \                       /    \
12302                         //                     RELOP  CNS 0/1
12303                         //                     /   \
12304                         //
12305                         // Note that we will remove/destroy the EQ/NE node and move
12306                         // the RELOP up into it's location.
12307
12308                         /* Here we reverse the RELOP if necessary */
12309
12310                         bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12311
12312                         if (reverse)
12313                         {
12314                             gtReverseCond(op1);
12315                         }
12316
12317                         /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
12318                         op1->gtType = tree->gtType;
12319
12320                         noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
12321                         op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12322
12323                         DEBUG_DESTROY_NODE(tree);
12324                         return op1;
12325                     }
12326
12327                     //
12328                     // Now we check for a compare with the result of an '&' operator
12329                     //
12330                     // Here we look for the following transformation:
12331                     //
12332                     //                        EQ/NE                  EQ/NE
12333                     //                        /  \                   /  \
12334                     //                      AND   CNS 0/1  ->      AND   CNS 0
12335                     //                     /   \                  /   \
12336                     //                RSZ/RSH   CNS 1            x     CNS (1 << y)
12337                     //                  /  \
12338                     //                 x   CNS_INT +y
12339
12340                     if (op1->gtOper == GT_AND)
12341                     {
12342                         GenTreePtr andOp    = op1;
12343                         GenTreePtr rshiftOp = andOp->gtOp.gtOp1;
12344
12345                         if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
12346                         {
12347                             goto SKIP;
12348                         }
12349
12350                         if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
12351                         {
12352                             goto SKIP;
12353                         }
12354
12355                         ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
12356
12357                         if (shiftAmount < 0)
12358                         {
12359                             goto SKIP;
12360                         }
12361
12362                         if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
12363                         {
12364                             goto SKIP;
12365                         }
12366
12367                         if (andOp->gtType == TYP_INT)
12368                         {
12369                             if (shiftAmount > 31)
12370                             {
12371                                 goto SKIP;
12372                             }
12373
12374                             UINT32 newAndOperand = ((UINT32)1) << shiftAmount;
12375
12376                             andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
12377
12378                             // Reverse the cond if necessary
12379                             if (ival2 == 1)
12380                             {
12381                                 gtReverseCond(tree);
12382                                 cns2->gtIntCon.gtIconVal = 0;
12383                                 oper                     = tree->gtOper;
12384                             }
12385                         }
12386                         else if (andOp->gtType == TYP_LONG)
12387                         {
12388                             if (shiftAmount > 63)
12389                             {
12390                                 goto SKIP;
12391                             }
12392
12393                             UINT64 newAndOperand = ((UINT64)1) << shiftAmount;
12394
12395                             andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
12396
12397                             // Reverse the cond if necessary
12398                             if (ival2 == 1)
12399                             {
12400                                 gtReverseCond(tree);
12401                                 cns2->gtIntConCommon.SetLngValue(0);
12402                                 oper = tree->gtOper;
12403                             }
12404                         }
12405
12406                         andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
12407
12408                         DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
12409                         DEBUG_DESTROY_NODE(rshiftOp);
12410                     }
12411                 } // END if (ival2 != INT_MAX)
12412
12413             SKIP:
12414                 /* Now check for compares with small constant longs that can be cast to int */
12415
12416                 if (!cns2->OperIsConst())
12417                 {
12418                     goto COMPARE;
12419                 }
12420
12421                 if (cns2->TypeGet() != TYP_LONG)
12422                 {
12423                     goto COMPARE;
12424                 }
12425
12426                 /* Is the constant 31 bits or smaller? */
12427
12428                 if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
12429                 {
12430                     goto COMPARE;
12431                 }
12432
12433                 /* Is the first comparand mask operation of type long ? */
12434
12435                 if (op1->gtOper != GT_AND)
12436                 {
12437                     /* Another interesting case: cast from int */
12438
12439                     if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
12440                         !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
12441                         !op1->gtOverflow())              // cannot be an overflow checking cast
12442                     {
12443                         /* Simply make this into an integer comparison */
12444
12445                         tree->gtOp.gtOp1 = op1->gtCast.CastOp();
12446                         tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
12447                     }
12448
12449                     goto COMPARE;
12450                 }
12451
12452                 noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
12453
12454                 /* Is the result of the mask effectively an INT ? */
12455
12456                 GenTreePtr andMask;
12457                 andMask = op1->gtOp.gtOp2;
12458                 if (andMask->gtOper != GT_CNS_NATIVELONG)
12459                 {
12460                     goto COMPARE;
12461                 }
12462                 if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
12463                 {
12464                     goto COMPARE;
12465                 }
12466
12467                 /* Now we know that we can cast gtOp.gtOp1 of AND to int */
12468
12469                 op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, TYP_INT);
12470
12471                 /* now replace the mask node (gtOp.gtOp2 of AND node) */
12472
12473                 noway_assert(andMask == op1->gtOp.gtOp2);
12474
12475                 ival1 = (int)andMask->gtIntConCommon.LngValue();
12476                 andMask->SetOper(GT_CNS_INT);
12477                 andMask->gtType             = TYP_INT;
12478                 andMask->gtIntCon.gtIconVal = ival1;
12479
12480                 /* now change the type of the AND node */
12481
12482                 op1->gtType = TYP_INT;
12483
12484                 /* finally we replace the comparand */
12485
12486                 ival2 = (int)cns2->gtIntConCommon.LngValue();
12487                 cns2->SetOper(GT_CNS_INT);
12488                 cns2->gtType = TYP_INT;
12489
12490                 noway_assert(cns2 == op2);
12491                 cns2->gtIntCon.gtIconVal = ival2;
12492
12493                 goto COMPARE;
12494
12495             case GT_LT:
12496             case GT_LE:
12497             case GT_GE:
12498             case GT_GT:
12499
12500                 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
12501                 {
12502                     if (op2->gtOper == GT_CNS_INT)
12503                     {
12504                         cns2 = op2;
12505                         /* Check for "expr relop 1" */
12506                         if (cns2->IsIntegralConst(1))
12507                         {
12508                             /* Check for "expr >= 1" */
12509                             if (oper == GT_GE)
12510                             {
12511                                 /* Change to "expr > 0" */
12512                                 oper = GT_GT;
12513                                 goto SET_OPER;
12514                             }
12515                             /* Check for "expr < 1" */
12516                             else if (oper == GT_LT)
12517                             {
12518                                 /* Change to "expr <= 0" */
12519                                 oper = GT_LE;
12520                                 goto SET_OPER;
12521                             }
12522                         }
12523                         /* Check for "expr relop -1" */
12524                         else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
12525                         {
12526                             /* Check for "expr <= -1" */
12527                             if (oper == GT_LE)
12528                             {
12529                                 /* Change to "expr < 0" */
12530                                 oper = GT_LT;
12531                                 goto SET_OPER;
12532                             }
12533                             /* Check for "expr > -1" */
12534                             else if (oper == GT_GT)
12535                             {
12536                                 /* Change to "expr >= 0" */
12537                                 oper = GT_GE;
12538
12539                             SET_OPER:
12540                                 // IF we get here we should be changing 'oper'
12541                                 assert(tree->OperGet() != oper);
12542
12543                                 // Keep the old ValueNumber for 'tree' as the new expr
12544                                 // will still compute the same value as before
12545                                 tree->SetOper(oper, GenTree::PRESERVE_VN);
12546                                 cns2->gtIntCon.gtIconVal = 0;
12547
12548                                 // vnStore is null before the ValueNumber phase has run
12549                                 if (vnStore != nullptr)
12550                                 {
12551                                     // Update the ValueNumber for 'cns2', as we just changed it to 0
12552                                     fgValueNumberTreeConst(cns2);
12553                                 }
12554
12555                                 op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
12556                             }
12557                         }
12558                     }
12559                 }
12560                 else // we have an unsigned comparison
12561                 {
12562                     if (op2->IsIntegralConst(0))
12563                     {
12564                         if ((oper == GT_GT) || (oper == GT_LE))
12565                         {
12566                             // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
12567                             // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
12568                             // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
12569                             // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
12570                             // occurs as a result of branch inversion.
12571                             oper = (oper == GT_LE) ? GT_EQ : GT_NE;
12572                             tree->SetOper(oper, GenTree::PRESERVE_VN);
12573                             tree->gtFlags &= ~GTF_UNSIGNED;
12574                         }
12575                     }
12576                 }
12577
12578             COMPARE:
12579
12580                 noway_assert(tree->OperKind() & GTK_RELOP);
12581
12582 #ifdef LEGACY_BACKEND
12583                 /* Check if the result of the comparison is used for a jump.
12584                  * If not then only the int (i.e. 32 bit) case is handled in
12585                  * the code generator through the (x86) "set" instructions.
12586                  * For the rest of the cases, the simplest way is to
12587                  * "simulate" the comparison with ?:
12588                  *
12589                  * On ARM, we previously used the IT instruction, but the IT instructions
12590                  * have mostly been declared obsolete and off-limits, so all cases on ARM
12591                  * get converted to ?: */
12592
12593                 if (!(tree->gtFlags & GTF_RELOP_JMP_USED) && fgMorphRelopToQmark(op1))
12594                 {
12595                     /* We convert it to "(CMP_TRUE) ? (1):(0)" */
12596
12597                     op1 = tree;
12598                     op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12599                     op1->gtRequestSetFlags();
12600
12601                     op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0));
12602                     op2 = fgMorphTree(op2);
12603
12604                     tree = gtNewQmarkNode(TYP_INT, op1, op2);
12605
12606                     fgMorphTreeDone(tree);
12607
12608                     return tree;
12609                 }
12610 #endif // LEGACY_BACKEND
12611                 break;
12612
12613 #ifdef LEGACY_BACKEND
12614             case GT_QMARK:
12615
12616                 /* If op1 is a comma throw node then we won't be keeping op2 */
12617                 if (fgIsCommaThrow(op1))
12618                 {
12619                     break;
12620                 }
12621
12622                 /* Get hold of the two branches */
12623
12624                 noway_assert(op2->OperGet() == GT_COLON);
12625                 elseNode = op2->AsColon()->ElseNode();
12626                 thenNode = op2->AsColon()->ThenNode();
12627
12628                 /* Try to hoist assignments out of qmark colon constructs.
12629                    ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */
12630
12631                 if (tree->TypeGet() == TYP_VOID && thenNode->OperGet() == GT_ASG && elseNode->OperGet() == GT_ASG &&
12632                     thenNode->TypeGet() != TYP_LONG && GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) &&
12633                     thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet())
12634                 {
12635                     noway_assert(thenNode->TypeGet() == elseNode->TypeGet());
12636
12637                     GenTreePtr asg    = thenNode;
12638                     GenTreePtr colon  = op2;
12639                     colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2;
12640                     colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2;
12641                     tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType;
12642                     asg->gtOp.gtOp2              = tree;
12643
12644                     // Asg will have all the flags that the QMARK had
12645                     asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT);
12646
12647                     // Colon flag won't have the flags that x had.
12648                     colon->gtFlags &= ~GTF_ALL_EFFECT;
12649                     colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags | colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12650
12651                     DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1);
12652                     DEBUG_DESTROY_NODE(elseNode);
12653
12654                     return asg;
12655                 }
12656
12657                 /* If the 'else' branch is empty swap the two branches and reverse the condition */
12658
12659                 if (elseNode->IsNothingNode())
12660                 {
12661                     /* This can only happen for VOID ?: */
12662                     noway_assert(op2->gtType == TYP_VOID);
12663
12664                     /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */
12665                     if (thenNode->IsNothingNode())
12666                     {
12667                         // We may be able to throw away op1 (unless it has side-effects)
12668
12669                         if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
12670                         {
12671                             /* Just return a a Nop Node */
12672                             return thenNode;
12673                         }
12674                         else
12675                         {
12676                             /* Just return the relop, but clear the special flags.  Note
12677                                that we can't do that for longs and floats (see code under
12678                                COMPARE label above) */
12679
12680                             if (!fgMorphRelopToQmark(op1->gtOp.gtOp1))
12681                             {
12682                                 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
12683                                 return op1;
12684                             }
12685                         }
12686                     }
12687                     else
12688                     {
12689                         GenTreePtr tmp = elseNode;
12690
12691                         op2->AsColon()->ElseNode() = elseNode = thenNode;
12692                         op2->AsColon()->ThenNode() = thenNode = tmp;
12693                         gtReverseCond(op1);
12694                     }
12695                 }
12696
12697 #if !defined(_TARGET_ARM_)
12698                 // If we have (cond)?0:1, then we just return "cond" for TYP_INTs
12699                 //
12700                 // Don't do this optimization for ARM: we always require assignment
12701                 // to boolean to remain ?:, since we don't have any way to generate
12702                 // this with straight-line code, like x86 does using setcc (at least
12703                 // after the IT instruction is deprecated).
12704
12705                 if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT && genActualType(typ) == TYP_INT &&
12706                     thenNode->gtOper == GT_CNS_INT && elseNode->gtOper == GT_CNS_INT)
12707                 {
12708                     ival1 = thenNode->gtIntCon.gtIconVal;
12709                     ival2 = elseNode->gtIntCon.gtIconVal;
12710
12711                     // Is one constant 0 and the other 1?
12712                     if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0)
12713                     {
12714                         // If the constants are {1, 0}, reverse the condition
12715                         if (ival1 == 1)
12716                         {
12717                             gtReverseCond(op1);
12718                         }
12719
12720                         // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it
12721                         // needs to materialize the result as a 0 or 1.
12722                         noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED));
12723                         op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
12724
12725                         DEBUG_DESTROY_NODE(tree);
12726                         DEBUG_DESTROY_NODE(op2);
12727
12728                         return op1;
12729                     }
12730                 }
12731 #endif // !_TARGET_ARM_
12732
12733                 break; // end case GT_QMARK
12734 #endif                 // LEGACY_BACKEND
12735
12736             case GT_MUL:
12737
12738 #ifndef _TARGET_64BIT_
12739                 if (typ == TYP_LONG)
12740                 {
12741                     // This must be GTF_MUL_64RSLT
12742                     assert(tree->gtIsValid64RsltMul());
12743                     return tree;
12744                 }
12745 #endif // _TARGET_64BIT_
12746                 goto CM_OVF_OP;
12747
12748             case GT_SUB:
12749
12750                 if (tree->gtOverflow())
12751                 {
12752                     goto CM_OVF_OP;
12753                 }
12754
12755                 // TODO #4104: there are a lot of other places where
12756                 // this condition is not checked before transformations.
12757                 if (fgGlobalMorph)
12758                 {
12759                     /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
12760
12761                     noway_assert(op2);
12762                     if (op2->IsCnsIntOrI())
12763                     {
12764                         /* Negate the constant and change the node to be "+" */
12765
12766                         op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
12767                         oper = GT_ADD;
12768                         tree->ChangeOper(oper);
12769                         goto CM_ADD_OP;
12770                     }
12771
12772                     /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
12773
12774                     noway_assert(op1);
12775                     if (op1->IsCnsIntOrI())
12776                     {
12777                         noway_assert(varTypeIsIntOrI(tree));
12778
12779                         tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG
12780                                                                                            // node should be the same
12781                         // as the type of the tree, i.e. tree->gtType.
12782                         fgMorphTreeDone(op2);
12783
12784                         oper = GT_ADD;
12785                         tree->ChangeOper(oper);
12786                         goto CM_ADD_OP;
12787                     }
12788
12789                     /* No match - exit */
12790                 }
12791                 break;
12792
12793 #ifdef _TARGET_ARM64_
12794             case GT_DIV:
12795                 if (!varTypeIsFloating(tree->gtType))
12796                 {
12797                     // Codegen for this instruction needs to be able to throw two exceptions:
12798                     fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
12799                     fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
12800                 }
12801                 break;
12802             case GT_UDIV:
12803                 // Codegen for this instruction needs to be able to throw one exception:
12804                 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
12805                 break;
12806 #endif
12807
12808             case GT_ADD:
12809
12810             CM_OVF_OP:
12811                 if (tree->gtOverflow())
12812                 {
12813                     tree->gtRequestSetFlags();
12814
12815                     // Add the excptn-throwing basic block to jump to on overflow
12816
12817                     fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
12818
12819                     // We can't do any commutative morphing for overflow instructions
12820
12821                     break;
12822                 }
12823
12824             CM_ADD_OP:
12825
12826             case GT_OR:
12827             case GT_XOR:
12828             case GT_AND:
12829
12830                 /* Commute any non-REF constants to the right */
12831
12832                 noway_assert(op1);
12833                 if (op1->OperIsConst() && (op1->gtType != TYP_REF))
12834                 {
12835                     // TODO-Review: We used to assert here that
12836                     // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
12837                     // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
12838                     // and would sometimes hit this assertion.  This may indicate a missed "remorph".
12839                     // Task is to re-enable this assertion and investigate.
12840
12841                     /* Swap the operands */
12842                     tree->gtOp.gtOp1 = op2;
12843                     tree->gtOp.gtOp2 = op1;
12844
12845                     op1 = op2;
12846                     op2 = tree->gtOp.gtOp2;
12847                 }
12848
12849                 /* See if we can fold GT_ADD nodes. */
12850
12851                 if (oper == GT_ADD)
12852                 {
12853                     /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
12854
12855                     if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
12856                         op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
12857                         !op1->gtOverflow() && !op2->gtOverflow())
12858                     {
12859                         cns1 = op1->gtOp.gtOp2;
12860                         cns2 = op2->gtOp.gtOp2;
12861                         cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
12862 #ifdef _TARGET_64BIT_
12863                         if (cns1->TypeGet() == TYP_INT)
12864                         {
12865                             // we need to properly re-sign-extend or truncate after adding two int constants above
12866                             cns1->AsIntCon()->TruncateOrSignExtend32();
12867                         }
12868 #endif //_TARGET_64BIT_
12869
12870                         tree->gtOp.gtOp2 = cns1;
12871                         DEBUG_DESTROY_NODE(cns2);
12872
12873                         op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
12874                         op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
12875                         DEBUG_DESTROY_NODE(op2);
12876                         op2 = tree->gtOp.gtOp2;
12877                     }
12878
12879                     if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
12880                     {
12881                         /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
12882
12883                         if (op1->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op1) && op1->gtOp.gtOp2->IsCnsIntOrI() &&
12884                             !op1->gtOverflow() && op1->gtOp.gtOp2->OperGet() == op2->OperGet())
12885                         {
12886                             cns1 = op1->gtOp.gtOp2;
12887                             op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
12888                                                              op2->gtIntConCommon.IconValue());
12889 #ifdef _TARGET_64BIT_
12890                             if (op2->TypeGet() == TYP_INT)
12891                             {
12892                                 // we need to properly re-sign-extend or truncate after adding two int constants above
12893                                 op2->AsIntCon()->TruncateOrSignExtend32();
12894                             }
12895 #endif //_TARGET_64BIT_
12896
12897                             if (cns1->OperGet() == GT_CNS_INT)
12898                             {
12899                                 op2->gtIntCon.gtFieldSeq =
12900                                     GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
12901                             }
12902                             DEBUG_DESTROY_NODE(cns1);
12903
12904                             tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12905                             DEBUG_DESTROY_NODE(op1);
12906                             op1 = tree->gtOp.gtOp1;
12907                         }
12908
12909                         // Fold (x + 0).
12910
12911                         if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
12912                         {
12913
12914                             // If this addition is adding an offset to a null pointer,
12915                             // avoid the work and yield the null pointer immediately.
12916                             // Dereferencing the pointer in either case will have the
12917                             // same effect.
12918
12919                             if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) &&
12920                                 ((op1->gtFlags & GTF_ALL_EFFECT) == 0))
12921                             {
12922                                 op2->gtType = tree->gtType;
12923                                 DEBUG_DESTROY_NODE(op1);
12924                                 DEBUG_DESTROY_NODE(tree);
12925                                 return op2;
12926                             }
12927
12928                             // Remove the addition iff it won't change the tree type
12929                             // to TYP_REF.
12930
12931                             if (!gtIsActiveCSE_Candidate(op2) &&
12932                                 ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
12933                             {
12934                                 if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
12935                                     (op2->gtIntCon.gtFieldSeq != nullptr) &&
12936                                     (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
12937                                 {
12938                                     fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
12939                                 }
12940
12941                                 DEBUG_DESTROY_NODE(op2);
12942                                 DEBUG_DESTROY_NODE(tree);
12943
12944                                 return op1;
12945                             }
12946                         }
12947                     }
12948                 }
12949                 /* See if we can fold GT_MUL by const nodes */
12950                 else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
12951                 {
12952 #ifndef _TARGET_64BIT_
12953                     noway_assert(typ <= TYP_UINT);
12954 #endif // _TARGET_64BIT_
12955                     noway_assert(!tree->gtOverflow());
12956
12957                     ssize_t mult            = op2->gtIntConCommon.IconValue();
12958                     bool    op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12959                                            op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
12960
12961                     assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
12962
12963                     if (mult == 0)
12964                     {
12965                         // We may be able to throw away op1 (unless it has side-effects)
12966
12967                         if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
12968                         {
12969                             DEBUG_DESTROY_NODE(op1);
12970                             DEBUG_DESTROY_NODE(tree);
12971                             return op2; // Just return the "0" node
12972                         }
12973
12974                         // We need to keep op1 for the side-effects. Hang it off
12975                         // a GT_COMMA node
12976
12977                         tree->ChangeOper(GT_COMMA);
12978                         return tree;
12979                     }
12980
12981                     size_t abs_mult      = (mult >= 0) ? mult : -mult;
12982                     size_t lowestBit     = genFindLowestBit(abs_mult);
12983                     bool   changeToShift = false;
12984
12985                     // is it a power of two? (positive or negative)
12986                     if (abs_mult == lowestBit)
12987                     {
12988                         // if negative negate (min-int does not need negation)
12989                         if (mult < 0 && mult != SSIZE_T_MIN)
12990                         {
12991                             tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
12992                             fgMorphTreeDone(op1);
12993                         }
12994
12995                         // If "op2" is a constant array index, the other multiplicand must be a constant.
12996                         // Transfer the annotation to the other one.
12997                         if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12998                             op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
12999                         {
13000                             assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
13001                             GenTreePtr otherOp = op1;
13002                             if (otherOp->OperGet() == GT_NEG)
13003                             {
13004                                 otherOp = otherOp->gtOp.gtOp1;
13005                             }
13006                             assert(otherOp->OperGet() == GT_CNS_INT);
13007                             assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
13008                             otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
13009                         }
13010
13011                         if (abs_mult == 1)
13012                         {
13013                             DEBUG_DESTROY_NODE(op2);
13014                             DEBUG_DESTROY_NODE(tree);
13015                             return op1;
13016                         }
13017
13018                         /* Change the multiplication into a shift by log2(val) bits */
13019                         op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
13020                         changeToShift = true;
13021                     }
13022 #if LEA_AVAILABLE
13023                     else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
13024                     {
13025                         int     shift  = genLog2(lowestBit);
13026                         ssize_t factor = abs_mult >> shift;
13027
13028                         if (factor == 3 || factor == 5 || factor == 9)
13029                         {
13030                             // if negative negate (min-int does not need negation)
13031                             if (mult < 0 && mult != SSIZE_T_MIN)
13032                             {
13033                                 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
13034                                 fgMorphTreeDone(op1);
13035                             }
13036
13037                             GenTreePtr factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
13038                             if (op2IsConstIndex)
13039                             {
13040                                 factorIcon->AsIntCon()->gtFieldSeq =
13041                                     GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
13042                             }
13043
13044                             // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
13045                             tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
13046                             fgMorphTreeDone(op1);
13047
13048                             op2->gtIntConCommon.SetIconValue(shift);
13049                             changeToShift = true;
13050                         }
13051                     }
13052 #endif // LEA_AVAILABLE
13053                     if (changeToShift)
13054                     {
13055                         // vnStore is null before the ValueNumber phase has run
13056                         if (vnStore != nullptr)
13057                         {
13058                             // Update the ValueNumber for 'op2', as we just changed the constant
13059                             fgValueNumberTreeConst(op2);
13060                         }
13061                         oper = GT_LSH;
13062                         // Keep the old ValueNumber for 'tree' as the new expr
13063                         // will still compute the same value as before
13064                         tree->ChangeOper(oper, GenTree::PRESERVE_VN);
13065
13066                         goto DONE_MORPHING_CHILDREN;
13067                     }
13068                 }
13069                 else if (fgOperIsBitwiseRotationRoot(oper))
13070                 {
13071                     tree = fgRecognizeAndMorphBitwiseRotation(tree);
13072
13073                     // fgRecognizeAndMorphBitwiseRotation may return a new tree
13074                     oper = tree->OperGet();
13075                     typ  = tree->TypeGet();
13076                     op1  = tree->gtOp.gtOp1;
13077                     op2  = tree->gtOp.gtOp2;
13078                 }
13079
13080                 break;
13081
13082             case GT_CHS:
13083             case GT_NOT:
13084             case GT_NEG:
13085
13086                 /* Any constant cases should have been folded earlier */
13087                 noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
13088                 break;
13089
13090             case GT_CKFINITE:
13091
13092                 noway_assert(varTypeIsFloating(op1->TypeGet()));
13093
13094                 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN, fgPtrArgCntCur);
13095                 break;
13096
13097             case GT_OBJ:
13098                 // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
13099                 // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
13100                 // is a local or clsVar, even if it has been address-exposed.
13101                 if (op1->OperGet() == GT_ADDR)
13102                 {
13103                     tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
13104                 }
13105                 break;
13106
13107             case GT_IND:
13108
13109                 // Can not remove a GT_IND if it is currently a CSE candidate.
13110                 if (gtIsActiveCSE_Candidate(tree))
13111                 {
13112                     break;
13113                 }
13114
13115                 bool foldAndReturnTemp;
13116                 foldAndReturnTemp = false;
13117                 temp              = nullptr;
13118                 ival1             = 0;
13119
13120                 /* Try to Fold *(&X) into X */
13121                 if (op1->gtOper == GT_ADDR)
13122                 {
13123                     // Can not remove a GT_ADDR if it is currently a CSE candidate.
13124                     if (gtIsActiveCSE_Candidate(op1))
13125                     {
13126                         break;
13127                     }
13128
13129                     temp = op1->gtOp.gtOp1; // X
13130
13131                     // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
13132                     // they are the *same* struct type.  In fact, they almost certainly aren't.  If the
13133                     // address has an associated field sequence, that identifies this case; go through
13134                     // the "lcl_fld" path rather than this one.
13135                     FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
13136                     if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
13137                     {
13138                         foldAndReturnTemp = true;
13139                     }
13140                     else if (temp->OperIsLocal())
13141                     {
13142                         unsigned   lclNum = temp->gtLclVarCommon.gtLclNum;
13143                         LclVarDsc* varDsc = &lvaTable[lclNum];
13144
13145                         // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
13146                         if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
13147                         {
13148                             noway_assert(varTypeIsStruct(varDsc));
13149
13150                             // We will try to optimize when we have a single field struct that is being struct promoted
13151                             if (varDsc->lvFieldCnt == 1)
13152                             {
13153                                 unsigned lclNumFld = varDsc->lvFieldLclStart;
13154                                 // just grab the promoted field
13155                                 LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
13156
13157                                 // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
13158                                 // is zero
13159                                 if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
13160                                 {
13161                                     // We can just use the existing promoted field LclNum
13162                                     temp->gtLclVarCommon.SetLclNum(lclNumFld);
13163                                     temp->gtType = fieldVarDsc->TypeGet();
13164
13165                                     foldAndReturnTemp = true;
13166                                 }
13167                             }
13168                         }
13169                         // If the type of the IND (typ) is a "small int", and the type of the local has the
13170                         // same width, then we can reduce to just the local variable -- it will be
13171                         // correctly normalized, and signed/unsigned differences won't matter.
13172                         //
13173                         // The below transformation cannot be applied if the local var needs to be normalized on load.
13174                         else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
13175                                  !lvaTable[lclNum].lvNormalizeOnLoad())
13176                         {
13177                             tree->gtType = typ = temp->TypeGet();
13178                             foldAndReturnTemp  = true;
13179                         }
13180                         else
13181                         {
13182                             // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
13183                             // nullptr)
13184                             assert(fieldSeq == nullptr);
13185                             bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
13186                             assert(b || fieldSeq == nullptr);
13187
13188                             if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
13189                             {
13190                                 // Append the field sequence, change the type.
13191                                 temp->AsLclFld()->gtFieldSeq =
13192                                     GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13193                                 temp->gtType = typ;
13194
13195                                 foldAndReturnTemp = true;
13196                             }
13197                         }
13198                         // Otherwise will will fold this into a GT_LCL_FLD below
13199                         //   where we check (temp != nullptr)
13200                     }
13201                     else // !temp->OperIsLocal()
13202                     {
13203                         // We don't try to fold away the GT_IND/GT_ADDR for this case
13204                         temp = nullptr;
13205                     }
13206                 }
13207                 else if (op1->OperGet() == GT_ADD)
13208                 {
13209                     /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
13210
13211                     if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
13212                         (!(opts.MinOpts() || opts.compDbgCode)))
13213                     {
13214                         // No overflow arithmetic with pointers
13215                         noway_assert(!op1->gtOverflow());
13216
13217                         temp = op1->gtOp.gtOp1->gtOp.gtOp1;
13218                         if (!temp->OperIsLocal())
13219                         {
13220                             temp = nullptr;
13221                             break;
13222                         }
13223
13224                         // Can not remove the GT_ADDR if it is currently a CSE candidate.
13225                         if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
13226                         {
13227                             break;
13228                         }
13229
13230                         ival1    = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
13231                         fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
13232
13233                         // Does the address have an associated zero-offset field sequence?
13234                         FieldSeqNode* addrFieldSeq = nullptr;
13235                         if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
13236                         {
13237                             fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
13238                         }
13239
13240                         if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
13241                         {
13242                             noway_assert(!varTypeIsGC(temp->TypeGet()));
13243                             foldAndReturnTemp = true;
13244                         }
13245                         else
13246                         {
13247                             // The emitter can't handle large offsets
13248                             if (ival1 != (unsigned short)ival1)
13249                             {
13250                                 break;
13251                             }
13252
13253                             // The emitter can get confused by invalid offsets
13254                             if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
13255                             {
13256                                 break;
13257                             }
13258
13259 #ifdef _TARGET_ARM_
13260                             // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
13261                             //
13262                             if (varTypeIsFloating(typ))
13263                             {
13264                                 if ((ival1 % emitTypeSize(typ)) != 0)
13265                                 {
13266                                     tree->gtFlags |= GTF_IND_UNALIGNED;
13267                                     break;
13268                                 }
13269                             }
13270 #endif
13271                         }
13272                         // Now we can fold this into a GT_LCL_FLD below
13273                         //   where we check (temp != nullptr)
13274                     }
13275                 }
13276
13277                 // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging:
13278                 // - We may have a load of a local where the load has a different type than the local
13279                 // - We may have a load of a local plus an offset
13280                 //
13281                 // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and
13282                 // offset if doing so is legal. The only cases in which this transformation is illegal are if the load
13283                 // begins before the local or if the load extends beyond the end of the local (i.e. if the load is
13284                 // out-of-bounds w.r.t. the local).
13285                 if ((temp != nullptr) && !foldAndReturnTemp)
13286                 {
13287                     assert(temp->OperIsLocal());
13288
13289                     const unsigned   lclNum = temp->AsLclVarCommon()->gtLclNum;
13290                     LclVarDsc* const varDsc = &lvaTable[lclNum];
13291
13292                     const var_types tempTyp = temp->TypeGet();
13293                     const bool      useExactSize =
13294                         varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK);
13295                     const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp);
13296
13297                     // Make sure we do not enregister this lclVar.
13298                     lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
13299
13300                     // If the size of the load is greater than the size of the lclVar, we cannot fold this access into
13301                     // a lclFld: the access represented by an lclFld node must begin at or after the start of the
13302                     // lclVar and must not extend beyond the end of the lclVar.
13303                     if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize))
13304                     {
13305                         // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
13306                         // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
13307                         // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
13308                         //
13309                         if (temp->OperGet() == GT_LCL_FLD)
13310                         {
13311                             temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
13312                             temp->AsLclFld()->gtFieldSeq =
13313                                 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13314                         }
13315                         else
13316                         {
13317                             temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
13318                             temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
13319                             if (fieldSeq != nullptr)
13320                             { // If it does represent a field, note that.
13321                                 temp->AsLclFld()->gtFieldSeq = fieldSeq;
13322                             }
13323                         }
13324                         temp->gtType      = tree->gtType;
13325                         foldAndReturnTemp = true;
13326                     }
13327                 }
13328
13329                 if (foldAndReturnTemp)
13330                 {
13331                     assert(temp != nullptr);
13332                     assert(temp->TypeGet() == typ);
13333                     assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR));
13334
13335                     // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for
13336                     // 'temp' because a GT_ADDR always marks it for its operand.
13337                     temp->gtFlags &= ~GTF_DONT_CSE;
13338                     temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
13339
13340                     if (op1->OperGet() == GT_ADD)
13341                     {
13342                         DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
13343                         DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
13344                     }
13345                     DEBUG_DESTROY_NODE(op1);  // GT_ADD or GT_ADDR
13346                     DEBUG_DESTROY_NODE(tree); // GT_IND
13347
13348                     // If the result of the fold is a local var, we may need to perform further adjustments e.g. for
13349                     // normalization.
13350                     if (temp->OperIs(GT_LCL_VAR))
13351                     {
13352 #ifdef DEBUG
13353                         // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear
13354                         // and the node in question must have this bit set (as it has already been morphed).
13355                         temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
13356 #endif // DEBUG
13357                         const bool forceRemorph = true;
13358                         temp                    = fgMorphLocalVar(temp, forceRemorph);
13359 #ifdef DEBUG
13360                         // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the
13361                         // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function
13362                         // returns.
13363                         temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13364 #endif // DEBUG
13365                     }
13366
13367                     return temp;
13368                 }
13369
13370                 // Only do this optimization when we are in the global optimizer. Doing this after value numbering
13371                 // could result in an invalid value number for the newly generated GT_IND node.
13372                 if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
13373                 {
13374                     // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
13375                     // TBD: this transformation is currently necessary for correctness -- it might
13376                     // be good to analyze the failures that result if we don't do this, and fix them
13377                     // in other ways.  Ideally, this should be optional.
13378                     GenTreePtr commaNode = op1;
13379                     unsigned   treeFlags = tree->gtFlags;
13380                     commaNode->gtType    = typ;
13381                     commaNode->gtFlags   = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13382                                                                            // dangerous, clear the GTF_REVERSE_OPS at
13383                                                                            // least.
13384 #ifdef DEBUG
13385                     commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13386 #endif
13387                     while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13388                     {
13389                         commaNode          = commaNode->gtOp.gtOp2;
13390                         commaNode->gtType  = typ;
13391                         commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13392                                                                              // dangerous, clear the GTF_REVERSE_OPS at
13393                                                                              // least.
13394 #ifdef DEBUG
13395                         commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13396 #endif
13397                     }
13398                     bool      wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
13399                     ArrayInfo arrInfo;
13400                     if (wasArrIndex)
13401                     {
13402                         bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
13403                         assert(b);
13404                         GetArrayInfoMap()->Remove(tree);
13405                     }
13406                     tree         = op1;
13407                     op1          = gtNewOperNode(GT_IND, typ, commaNode->gtOp.gtOp2);
13408                     op1->gtFlags = treeFlags;
13409                     if (wasArrIndex)
13410                     {
13411                         GetArrayInfoMap()->Set(op1, arrInfo);
13412                     }
13413 #ifdef DEBUG
13414                     op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13415 #endif
13416                     commaNode->gtOp.gtOp2 = op1;
13417                     return tree;
13418                 }
13419
13420                 break;
13421
13422             case GT_ADDR:
13423
13424                 // Can not remove op1 if it is currently a CSE candidate.
13425                 if (gtIsActiveCSE_Candidate(op1))
13426                 {
13427                     break;
13428                 }
13429
13430                 if (op1->OperGet() == GT_IND)
13431                 {
13432                     if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
13433                     {
13434                         // Can not remove a GT_ADDR if it is currently a CSE candidate.
13435                         if (gtIsActiveCSE_Candidate(tree))
13436                         {
13437                             break;
13438                         }
13439
13440                         // Perform the transform ADDR(IND(...)) == (...).
13441                         GenTreePtr addr = op1->gtOp.gtOp1;
13442
13443                         noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13444
13445                         DEBUG_DESTROY_NODE(op1);
13446                         DEBUG_DESTROY_NODE(tree);
13447
13448                         return addr;
13449                     }
13450                 }
13451                 else if (op1->OperGet() == GT_OBJ)
13452                 {
13453                     // Can not remove a GT_ADDR if it is currently a CSE candidate.
13454                     if (gtIsActiveCSE_Candidate(tree))
13455                     {
13456                         break;
13457                     }
13458
13459                     // Perform the transform ADDR(OBJ(...)) == (...).
13460                     GenTreePtr addr = op1->AsObj()->Addr();
13461
13462                     noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13463
13464                     DEBUG_DESTROY_NODE(op1);
13465                     DEBUG_DESTROY_NODE(tree);
13466
13467                     return addr;
13468                 }
13469                 else if (op1->gtOper == GT_CAST)
13470                 {
13471                     GenTreePtr casting = op1->gtCast.CastOp();
13472                     if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
13473                     {
13474                         DEBUG_DESTROY_NODE(op1);
13475                         tree->gtOp.gtOp1 = op1 = casting;
13476                     }
13477                 }
13478                 else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
13479                 {
13480                     // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
13481                     // (Be sure to mark "z" as an l-value...)
13482                     GenTreePtr commaNode = op1;
13483                     while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13484                     {
13485                         commaNode = commaNode->gtOp.gtOp2;
13486                     }
13487                     // The top-level addr might be annotated with a zeroOffset field.
13488                     FieldSeqNode* zeroFieldSeq = nullptr;
13489                     bool          isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
13490                     tree                       = op1;
13491                     commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
13492
13493                     // If the node we're about to put under a GT_ADDR is an indirection, it
13494                     // doesn't need to be materialized, since we only want the addressing mode. Because
13495                     // of this, this GT_IND is not a faulting indirection and we don't have to extract it
13496                     // as a side effect.
13497                     GenTree* commaOp2 = commaNode->gtOp.gtOp2;
13498                     if (commaOp2->OperIsBlk())
13499                     {
13500                         commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
13501                     }
13502                     if (commaOp2->gtOper == GT_IND)
13503                     {
13504                         commaOp2->gtFlags |= GTF_IND_NONFAULTING;
13505                     }
13506
13507                     op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
13508
13509                     if (isZeroOffset)
13510                     {
13511                         // Transfer the annotation to the new GT_ADDR node.
13512                         GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
13513                     }
13514                     commaNode->gtOp.gtOp2 = op1;
13515                     // Originally, I gave all the comma nodes type "byref".  But the ADDR(IND(x)) == x transform
13516                     // might give op1 a type different from byref (like, say, native int).  So now go back and give
13517                     // all the comma nodes the type of op1.
13518                     // TODO: the comma flag update below is conservative and can be improved.
13519                     // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
13520                     // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
13521                     commaNode = tree;
13522                     while (commaNode->gtOper == GT_COMMA)
13523                     {
13524                         commaNode->gtType = op1->gtType;
13525                         commaNode->gtFlags |= op1->gtFlags;
13526 #ifdef DEBUG
13527                         commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13528 #endif
13529                         commaNode = commaNode->gtOp.gtOp2;
13530                     }
13531
13532                     return tree;
13533                 }
13534
13535                 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
13536                 op1->gtFlags |= GTF_DONT_CSE;
13537                 break;
13538
13539             case GT_COLON:
13540                 if (fgGlobalMorph)
13541                 {
13542                     /* Mark the nodes that are conditionally executed */
13543                     fgWalkTreePre(&tree, gtMarkColonCond);
13544                 }
13545                 /* Since we're doing this postorder we clear this if it got set by a child */
13546                 fgRemoveRestOfBlock = false;
13547                 break;
13548
13549             case GT_COMMA:
13550
13551                 /* Special case: trees that don't produce a value */
13552                 if ((op2->OperKind() & GTK_ASGOP) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) ||
13553                     fgIsThrow(op2))
13554                 {
13555                     typ = tree->gtType = TYP_VOID;
13556                 }
13557
13558                 // If we are in the Valuenum CSE phase then don't morph away anything as these
13559                 // nodes may have CSE defs/uses in them.
13560                 //
13561                 if (!optValnumCSE_phase)
13562                 {
13563                     // Extract the side effects from the left side of the comma.  Since they don't "go" anywhere, this
13564                     // is all we need.
13565
13566                     GenTreePtr op1SideEffects = nullptr;
13567                     // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
13568                     // hoisted expressions in loops.
13569                     gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
13570                     if (op1SideEffects)
13571                     {
13572                         // Replace the left hand side with the side effect list.
13573                         tree->gtOp.gtOp1 = op1SideEffects;
13574                         tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT);
13575                     }
13576                     else
13577                     {
13578                         /* The left operand is worthless, throw it away */
13579                         if (lvaLocalVarRefCounted)
13580                         {
13581                             lvaRecursiveDecRefCounts(op1);
13582                         }
13583                         op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13584                         DEBUG_DESTROY_NODE(tree);
13585                         DEBUG_DESTROY_NODE(op1);
13586                         return op2;
13587                     }
13588
13589                     /* If the right operand is just a void nop node, throw it away */
13590                     if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
13591                     {
13592                         op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13593                         DEBUG_DESTROY_NODE(tree);
13594                         DEBUG_DESTROY_NODE(op2);
13595                         return op1;
13596                     }
13597                 }
13598
13599                 break;
13600
13601             case GT_JTRUE:
13602
13603                 /* Special case if fgRemoveRestOfBlock is set to true */
13604                 if (fgRemoveRestOfBlock)
13605                 {
13606                     if (fgIsCommaThrow(op1, true))
13607                     {
13608                         GenTreePtr throwNode = op1->gtOp.gtOp1;
13609                         noway_assert(throwNode->gtType == TYP_VOID);
13610
13611                         return throwNode;
13612                     }
13613
13614                     noway_assert(op1->OperKind() & GTK_RELOP);
13615                     noway_assert(op1->gtFlags & GTF_EXCEPT);
13616
13617                     // We need to keep op1 for the side-effects. Hang it off
13618                     // a GT_COMMA node
13619
13620                     tree->ChangeOper(GT_COMMA);
13621                     tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
13622
13623                     // Additionally since we're eliminating the JTRUE
13624                     // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
13625                     // So we change it into a GT_COMMA as well.
13626                     op1->ChangeOper(GT_COMMA);
13627                     op1->gtType = op1->gtOp.gtOp1->gtType;
13628
13629                     return tree;
13630                 }
13631
13632             default:
13633                 break;
13634         }
13635
13636         assert(oper == tree->gtOper);
13637
13638         // If we are in the Valuenum CSE phase then don't morph away anything as these
13639         // nodes may have CSE defs/uses in them.
13640         //
13641         if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList())
13642         {
13643             /* Check for op1 as a GT_COMMA with a unconditional throw node */
13644             if (op1 && fgIsCommaThrow(op1, true))
13645             {
13646                 if ((op1->gtFlags & GTF_COLON_COND) == 0)
13647                 {
13648                     /* We can safely throw out the rest of the statements */
13649                     fgRemoveRestOfBlock = true;
13650                 }
13651
13652                 GenTreePtr throwNode = op1->gtOp.gtOp1;
13653                 noway_assert(throwNode->gtType == TYP_VOID);
13654
13655                 if (oper == GT_COMMA)
13656                 {
13657                     /* Both tree and op1 are GT_COMMA nodes */
13658                     /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
13659                     tree->gtOp.gtOp1 = throwNode;
13660                     return tree;
13661                 }
13662                 else if (oper != GT_NOP)
13663                 {
13664                     if (genActualType(typ) == genActualType(op1->gtType))
13665                     {
13666                         /* The types match so, return the comma throw node as the new tree */
13667                         return op1;
13668                     }
13669                     else
13670                     {
13671                         if (typ == TYP_VOID)
13672                         {
13673                             // Return the throw node
13674                             return throwNode;
13675                         }
13676                         else
13677                         {
13678                             GenTreePtr commaOp2 = op1->gtOp.gtOp2;
13679
13680                             // need type of oper to be same as tree
13681                             if (typ == TYP_LONG)
13682                             {
13683                                 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13684                                 commaOp2->gtIntConCommon.SetLngValue(0);
13685                                 /* Change the types of oper and commaOp2 to TYP_LONG */
13686                                 op1->gtType = commaOp2->gtType = TYP_LONG;
13687                             }
13688                             else if (varTypeIsFloating(typ))
13689                             {
13690                                 commaOp2->ChangeOperConst(GT_CNS_DBL);
13691                                 commaOp2->gtDblCon.gtDconVal = 0.0;
13692                                 /* Change the types of oper and commaOp2 to TYP_DOUBLE */
13693                                 op1->gtType = commaOp2->gtType = TYP_DOUBLE;
13694                             }
13695                             else
13696                             {
13697                                 commaOp2->ChangeOperConst(GT_CNS_INT);
13698                                 commaOp2->gtIntConCommon.SetIconValue(0);
13699                                 /* Change the types of oper and commaOp2 to TYP_INT */
13700                                 op1->gtType = commaOp2->gtType = TYP_INT;
13701                             }
13702
13703                             /* Return the GT_COMMA node as the new tree */
13704                             return op1;
13705                         }
13706                     }
13707                 }
13708             }
13709
13710             /* Check for op2 as a GT_COMMA with a unconditional throw */
13711
13712             if (op2 && fgIsCommaThrow(op2, true))
13713             {
13714                 if ((op2->gtFlags & GTF_COLON_COND) == 0)
13715                 {
13716                     /* We can safely throw out the rest of the statements */
13717                     fgRemoveRestOfBlock = true;
13718                 }
13719
13720                 // If op1 has no side-effects
13721                 if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
13722                 {
13723                     // If tree is an asg node
13724                     if (tree->OperIsAssignment())
13725                     {
13726                         /* Return the throw node as the new tree */
13727                         return op2->gtOp.gtOp1;
13728                     }
13729
13730                     if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
13731                     {
13732                         /* Return the throw node as the new tree */
13733                         return op2->gtOp.gtOp1;
13734                     }
13735
13736                     // If tree is a comma node
13737                     if (tree->OperGet() == GT_COMMA)
13738                     {
13739                         /* Return the throw node as the new tree */
13740                         return op2->gtOp.gtOp1;
13741                     }
13742
13743                     /* for the shift nodes the type of op2 can differ from the tree type */
13744                     if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
13745                     {
13746                         noway_assert(GenTree::OperIsShiftOrRotate(oper));
13747
13748                         GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13749
13750                         commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13751                         commaOp2->gtIntConCommon.SetLngValue(0);
13752
13753                         /* Change the types of oper and commaOp2 to TYP_LONG */
13754                         op2->gtType = commaOp2->gtType = TYP_LONG;
13755                     }
13756
13757                     if ((genActualType(typ) == TYP_INT) &&
13758                         (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
13759                     {
13760                         // An example case is comparison (say GT_GT) of two longs or floating point values.
13761
13762                         GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13763
13764                         commaOp2->ChangeOperConst(GT_CNS_INT);
13765                         commaOp2->gtIntCon.gtIconVal = 0;
13766                         /* Change the types of oper and commaOp2 to TYP_INT */
13767                         op2->gtType = commaOp2->gtType = TYP_INT;
13768                     }
13769
13770                     if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
13771                     {
13772                         noway_assert(tree->OperGet() == GT_ADD);
13773
13774                         GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13775
13776                         commaOp2->ChangeOperConst(GT_CNS_INT);
13777                         commaOp2->gtIntCon.gtIconVal = 0;
13778                         /* Change the types of oper and commaOp2 to TYP_BYREF */
13779                         op2->gtType = commaOp2->gtType = TYP_BYREF;
13780                     }
13781
13782                     /* types should now match */
13783                     noway_assert((genActualType(typ) == genActualType(op2->gtType)));
13784
13785                     /* Return the GT_COMMA node as the new tree */
13786                     return op2;
13787                 }
13788             }
13789         }
13790
13791         /*-------------------------------------------------------------------------
13792          * Optional morphing is done if tree transformations is permitted
13793          */
13794
13795         if ((opts.compFlags & CLFLG_TREETRANS) == 0)
13796         {
13797             return tree;
13798         }
13799
13800         tree = fgMorphSmpOpOptional(tree->AsOp());
13801
13802     } // extra scope for gcc workaround
13803     return tree;
13804 }
13805 #ifdef _PREFAST_
13806 #pragma warning(pop)
13807 #endif
13808
13809 GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
13810 {
13811     genTreeOps oper = tree->gtOper;
13812     GenTree*   op1  = tree->gtOp1;
13813     GenTree*   op2  = tree->gtOp2;
13814     var_types  typ  = tree->TypeGet();
13815
13816     if (fgGlobalMorph && GenTree::OperIsCommutative(oper))
13817     {
13818         /* Swap the operands so that the more expensive one is 'op1' */
13819
13820         if (tree->gtFlags & GTF_REVERSE_OPS)
13821         {
13822             tree->gtOp1 = op2;
13823             tree->gtOp2 = op1;
13824
13825             op2 = op1;
13826             op1 = tree->gtOp1;
13827
13828             tree->gtFlags &= ~GTF_REVERSE_OPS;
13829         }
13830
13831         if (oper == op2->gtOper)
13832         {
13833             /*  Reorder nested operators at the same precedence level to be
13834                 left-recursive. For example, change "(a+(b+c))" to the
13835                 equivalent expression "((a+b)+c)".
13836              */
13837
13838             /* Things are handled differently for floating-point operators */
13839
13840             if (!varTypeIsFloating(tree->TypeGet()))
13841             {
13842                 fgMoveOpsLeft(tree);
13843                 op1 = tree->gtOp1;
13844                 op2 = tree->gtOp2;
13845             }
13846         }
13847     }
13848
13849 #if REARRANGE_ADDS
13850
13851     /* Change "((x+icon)+y)" to "((x+y)+icon)"
13852        Don't reorder floating-point operations */
13853
13854     if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
13855         varTypeIsIntegralOrI(typ))
13856     {
13857         GenTreePtr ad2 = op1->gtOp.gtOp2;
13858
13859         if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0)
13860         {
13861             // This takes
13862             //       + (tree)
13863             //      / \
13864             //     /   \
13865             //    /     \
13866             //   + (op1) op2
13867             //  / \
13868             //     \
13869             //     ad2
13870             //
13871             // And it swaps ad2 and op2.  If (op2) is varTypeIsGC, then this implies that (tree) is
13872             // varTypeIsGC.  If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
13873             // (op1) with a child that is varTypeIsGC.  If we encounter that situation, make (op1) the same
13874             // type as (tree).
13875             //
13876             // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
13877             // necessary
13878
13879             if (varTypeIsGC(op2->TypeGet()))
13880             {
13881                 noway_assert(varTypeIsGC(typ));
13882                 op1->gtType = typ;
13883             }
13884             tree->gtOp2 = ad2;
13885
13886             op1->gtOp.gtOp2 = op2;
13887             op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
13888
13889             op2 = tree->gtOp2;
13890         }
13891     }
13892
13893 #endif
13894
13895     /*-------------------------------------------------------------------------
13896      * Perform optional oper-specific postorder morphing
13897      */
13898
13899     switch (oper)
13900     {
13901         genTreeOps cmop;
13902         bool       dstIsSafeLclVar;
13903
13904         case GT_ASG:
13905             /* We'll convert "a = a <op> x" into "a <op>= x"                     */
13906             /*     and also  "a = x <op> a" into "a <op>= x" for communative ops */
13907             CLANG_FORMAT_COMMENT_ANCHOR;
13908
13909             if (typ == TYP_LONG)
13910             {
13911                 break;
13912             }
13913
13914             if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
13915             {
13916                 if (tree->OperIsCopyBlkOp())
13917                 {
13918                     return fgMorphCopyBlock(tree);
13919                 }
13920                 else
13921                 {
13922                     return fgMorphInitBlock(tree);
13923                 }
13924             }
13925
13926             /* Make sure we're allowed to do this */
13927
13928             if (optValnumCSE_phase)
13929             {
13930                 // It is not safe to reorder/delete CSE's
13931                 break;
13932             }
13933
13934             /* Are we assigning to a GT_LCL_VAR ? */
13935
13936             dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR);
13937
13938             /* If we have a GT_LCL_VAR, then is the address taken? */
13939             if (dstIsSafeLclVar)
13940             {
13941                 unsigned   lclNum = op1->gtLclVarCommon.gtLclNum;
13942                 LclVarDsc* varDsc = lvaTable + lclNum;
13943
13944                 noway_assert(lclNum < lvaCount);
13945
13946                 /* Is the address taken? */
13947                 if (varDsc->lvAddrExposed)
13948                 {
13949                     dstIsSafeLclVar = false;
13950                 }
13951                 else if (op2->gtFlags & GTF_ASG)
13952                 {
13953                     break;
13954                 }
13955             }
13956
13957             if (!dstIsSafeLclVar)
13958             {
13959                 if (op2->gtFlags & GTF_ASG)
13960                 {
13961                     break;
13962                 }
13963
13964                 if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
13965                 {
13966                     break;
13967                 }
13968             }
13969
13970             /* Special case: a cast that can be thrown away */
13971
13972             if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
13973             {
13974                 var_types srct;
13975                 var_types cast;
13976                 var_types dstt;
13977
13978                 srct = op2->gtCast.CastOp()->TypeGet();
13979                 cast = (var_types)op2->CastToType();
13980                 dstt = op1->TypeGet();
13981
13982                 /* Make sure these are all ints and precision is not lost */
13983
13984                 if (cast >= dstt && dstt <= TYP_INT && srct <= TYP_INT)
13985                 {
13986                     op2 = tree->gtOp2 = op2->gtCast.CastOp();
13987                 }
13988             }
13989
13990             /* Make sure we have the operator range right */
13991
13992             static_assert(GT_SUB == GT_ADD + 1, "bad oper value");
13993             static_assert(GT_MUL == GT_ADD + 2, "bad oper value");
13994             static_assert(GT_DIV == GT_ADD + 3, "bad oper value");
13995             static_assert(GT_MOD == GT_ADD + 4, "bad oper value");
13996             static_assert(GT_UDIV == GT_ADD + 5, "bad oper value");
13997             static_assert(GT_UMOD == GT_ADD + 6, "bad oper value");
13998
13999             static_assert(GT_OR == GT_ADD + 7, "bad oper value");
14000             static_assert(GT_XOR == GT_ADD + 8, "bad oper value");
14001             static_assert(GT_AND == GT_ADD + 9, "bad oper value");
14002
14003             static_assert(GT_LSH == GT_ADD + 10, "bad oper value");
14004             static_assert(GT_RSH == GT_ADD + 11, "bad oper value");
14005             static_assert(GT_RSZ == GT_ADD + 12, "bad oper value");
14006
14007             /* Check for a suitable operator on the RHS */
14008
14009             cmop = op2->OperGet();
14010
14011             switch (cmop)
14012             {
14013                 case GT_NEG:
14014                     // GT_CHS only supported for integer types
14015                     if (varTypeIsFloating(tree->TypeGet()))
14016                     {
14017                         break;
14018                     }
14019
14020                     goto ASG_OP;
14021
14022                 case GT_MUL:
14023                     // GT_ASG_MUL only supported for floating point types
14024                     if (!varTypeIsFloating(tree->TypeGet()))
14025                     {
14026                         break;
14027                     }
14028
14029                     __fallthrough;
14030
14031                 case GT_ADD:
14032                 case GT_SUB:
14033                     if (op2->gtOverflow())
14034                     {
14035                         /* Disable folding into "<op>=" if the result can be
14036                            visible to anyone as <op> may throw an exception and
14037                            the assignment should not proceed
14038                            We are safe with an assignment to a local variables
14039                          */
14040                         if (ehBlockHasExnFlowDsc(compCurBB))
14041                         {
14042                             break;
14043                         }
14044                         if (!dstIsSafeLclVar)
14045                         {
14046                             break;
14047                         }
14048                     }
14049 #ifndef _TARGET_AMD64_
14050                     // This is hard for byte-operations as we need to make
14051                     // sure both operands are in RBM_BYTE_REGS.
14052                     if (varTypeIsByte(op2->TypeGet()))
14053                         break;
14054 #endif // _TARGET_AMD64_
14055                     goto ASG_OP;
14056
14057                 case GT_DIV:
14058                 case GT_UDIV:
14059                     // GT_ASG_DIV only supported for floating point types
14060                     if (!varTypeIsFloating(tree->TypeGet()))
14061                     {
14062                         break;
14063                     }
14064
14065                 case GT_LSH:
14066                 case GT_RSH:
14067                 case GT_RSZ:
14068                 case GT_OR:
14069                 case GT_XOR:
14070                 case GT_AND:
14071                 ASG_OP:
14072                 {
14073                     bool bReverse       = false;
14074                     bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse);
14075                     if (bAsgOpFoldable)
14076                     {
14077                         if (bReverse)
14078                         {
14079                             // We will transform this from "a = x <op> a" to "a <op>= x"
14080                             // so we can now destroy the duplicate "a"
14081                             DEBUG_DESTROY_NODE(op2->gtOp.gtOp2);
14082                             op2->gtOp.gtOp2 = op2->gtOp.gtOp1;
14083                         }
14084
14085                         /* Special case: "x |= -1" and "x &= 0" */
14086                         if (((cmop == GT_AND) && op2->gtOp.gtOp2->IsIntegralConst(0)) ||
14087                             ((cmop == GT_OR) && op2->gtOp.gtOp2->IsIntegralConst(-1)))
14088                         {
14089                             /* Simply change to an assignment */
14090                             tree->gtOp2 = op2->gtOp.gtOp2;
14091                             break;
14092                         }
14093
14094                         if (cmop == GT_NEG)
14095                         {
14096                             /* This is "x = -x;", use the flipsign operator */
14097
14098                             tree->ChangeOper(GT_CHS);
14099
14100                             if (op1->gtOper == GT_LCL_VAR)
14101                             {
14102                                 op1->gtFlags |= GTF_VAR_USEASG;
14103                             }
14104
14105                             tree->gtOp2 = gtNewIconNode(0, op1->TypeGet());
14106
14107                             break;
14108                         }
14109
14110                         if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
14111                         {
14112                             // Changing from x = x op y to x op= y when x is a small integer type
14113                             // makes the op size smaller (originally the op size was 32 bits, after
14114                             // sign or zero extension of x, and there is an implicit truncation in the
14115                             // assignment).
14116                             // This is ok in most cases because the upper bits were
14117                             // lost when assigning the op result to a small type var,
14118                             // but it may not be ok for the right shift operation where the higher bits
14119                             // could be shifted into the lower bits and preserved.
14120                             // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) ==
14121                             // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) ==
14122                             // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the
14123                             // wrong
14124                             // result:
14125                             // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
14126                             // but  (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
14127                             // The result becomes correct if we use >>unsigned instead of >>signed.
14128                             noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet());
14129                             cmop = GT_RSZ;
14130                         }
14131
14132                         /* Replace with an assignment operator */
14133                         noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD);
14134                         noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD);
14135                         noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD);
14136                         noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD);
14137                         noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD);
14138                         noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD);
14139                         noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD);
14140                         noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD);
14141
14142                         tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD));
14143                         tree->gtOp2 = op2->gtOp.gtOp2;
14144
14145                         /* Propagate GTF_OVERFLOW */
14146
14147                         if (op2->gtOverflowEx())
14148                         {
14149                             tree->gtType = op2->gtType;
14150                             tree->gtFlags |= (op2->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
14151                         }
14152
14153 #if FEATURE_SET_FLAGS
14154
14155                         /* Propagate GTF_SET_FLAGS */
14156                         if (op2->gtSetFlags())
14157                         {
14158                             tree->gtRequestSetFlags();
14159                         }
14160
14161 #endif // FEATURE_SET_FLAGS
14162
14163                         DEBUG_DESTROY_NODE(op2);
14164                         op2 = tree->gtOp2;
14165
14166                         /* The target is used as well as being defined */
14167                         if (op1->OperIsLocal())
14168                         {
14169                             op1->gtFlags &= ~GTF_VAR_USEDEF;
14170                             op1->gtFlags |= GTF_VAR_USEASG;
14171                         }
14172
14173 #if CPU_HAS_FP_SUPPORT
14174                         /* Check for the special case "x += y * x;" */
14175
14176                         // GT_ASG_MUL only supported for floating point types
14177                         if (cmop != GT_ADD && cmop != GT_SUB)
14178                         {
14179                             break;
14180                         }
14181
14182                         if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet()))
14183                         {
14184                             if (GenTree::Compare(op1, op2->gtOp.gtOp1))
14185                             {
14186                                 /* Change "x += x * y" into "x *= (y + 1)" */
14187
14188                                 op2 = op2->gtOp.gtOp2;
14189                             }
14190                             else if (GenTree::Compare(op1, op2->gtOp.gtOp2))
14191                             {
14192                                 /* Change "x += y * x" into "x *= (y + 1)" */
14193
14194                                 op2 = op2->gtOp.gtOp1;
14195                             }
14196                             else
14197                             {
14198                                 break;
14199                             }
14200
14201                             op1 = gtNewDconNode(1.0);
14202
14203                             /* Now make the "*=" node */
14204
14205                             if (cmop == GT_ADD)
14206                             {
14207                                 /* Change "x += x * y" into "x *= (y + 1)" */
14208
14209                                 tree->gtOp2 = op2 = gtNewOperNode(GT_ADD, tree->TypeGet(), op2, op1);
14210                             }
14211                             else
14212                             {
14213                                 /* Change "x -= x * y" into "x *= (1 - y)" */
14214
14215                                 noway_assert(cmop == GT_SUB);
14216                                 tree->gtOp2 = op2 = gtNewOperNode(GT_SUB, tree->TypeGet(), op1, op2);
14217                             }
14218                             tree->ChangeOper(GT_ASG_MUL);
14219                         }
14220 #endif // CPU_HAS_FP_SUPPORT
14221                     }
14222                 }
14223
14224                 break;
14225
14226                 case GT_NOT:
14227
14228                     /* Is the destination identical to the first RHS sub-operand? */
14229
14230                     if (GenTree::Compare(op1, op2->gtOp.gtOp1))
14231                     {
14232                         /* This is "x = ~x" which is the same as "x ^= -1"
14233                          * Transform the node into a GT_ASG_XOR */
14234
14235                         noway_assert(genActualType(typ) == TYP_INT || genActualType(typ) == TYP_LONG);
14236
14237                         op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT) ? gtNewIconNode(-1) : gtNewLconNode(-1);
14238
14239                         cmop = GT_XOR;
14240                         goto ASG_OP;
14241                     }
14242
14243                     break;
14244                 default:
14245                     break;
14246             }
14247
14248             break;
14249
14250         case GT_MUL:
14251
14252             /* Check for the case "(val + icon) * icon" */
14253
14254             if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
14255             {
14256                 GenTreePtr add = op1->gtOp.gtOp2;
14257
14258                 if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
14259                 {
14260                     if (tree->gtOverflow() || op1->gtOverflow())
14261                     {
14262                         break;
14263                     }
14264
14265                     ssize_t imul = op2->gtIntCon.gtIconVal;
14266                     ssize_t iadd = add->gtIntCon.gtIconVal;
14267
14268                     /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
14269
14270                     oper = GT_ADD;
14271                     tree->ChangeOper(oper);
14272
14273                     op2->gtIntCon.gtIconVal = iadd * imul;
14274
14275                     op1->ChangeOper(GT_MUL);
14276
14277                     add->gtIntCon.gtIconVal = imul;
14278 #ifdef _TARGET_64BIT_
14279                     if (add->gtType == TYP_INT)
14280                     {
14281                         // we need to properly re-sign-extend or truncate after multiplying two int constants above
14282                         add->AsIntCon()->TruncateOrSignExtend32();
14283                     }
14284 #endif //_TARGET_64BIT_
14285                 }
14286             }
14287
14288             break;
14289
14290         case GT_DIV:
14291
14292             /* For "val / 1", just return "val" */
14293
14294             if (op2->IsIntegralConst(1))
14295             {
14296                 DEBUG_DESTROY_NODE(tree);
14297                 return op1;
14298             }
14299
14300             break;
14301
14302         case GT_LSH:
14303
14304             /* Check for the case "(val + icon) << icon" */
14305
14306             if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
14307             {
14308                 GenTreePtr cns = op1->gtOp.gtOp2;
14309
14310                 if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
14311                 {
14312                     ssize_t ishf = op2->gtIntConCommon.IconValue();
14313                     ssize_t iadd = cns->gtIntConCommon.IconValue();
14314
14315                     // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
14316
14317                     /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
14318
14319                     tree->ChangeOper(GT_ADD);
14320                     ssize_t result = iadd << ishf;
14321                     op2->gtIntConCommon.SetIconValue(result);
14322 #ifdef _TARGET_64BIT_
14323                     if (op1->gtType == TYP_INT)
14324                     {
14325                         op2->AsIntCon()->TruncateOrSignExtend32();
14326                     }
14327 #endif // _TARGET_64BIT_
14328
14329                     // we are reusing the shift amount node here, but the type we want is that of the shift result
14330                     op2->gtType = op1->gtType;
14331
14332                     if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
14333                         cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
14334                     {
14335                         assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
14336                         op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
14337                     }
14338
14339                     op1->ChangeOper(GT_LSH);
14340
14341                     cns->gtIntConCommon.SetIconValue(ishf);
14342                 }
14343             }
14344
14345             break;
14346
14347         case GT_XOR:
14348
14349             if (!optValnumCSE_phase)
14350             {
14351                 /* "x ^ -1" is "~x" */
14352
14353                 if (op2->IsIntegralConst(-1))
14354                 {
14355                     tree->ChangeOper(GT_NOT);
14356                     tree->gtOp2 = nullptr;
14357                     DEBUG_DESTROY_NODE(op2);
14358                 }
14359                 else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
14360                 {
14361                     /* "binaryVal ^ 1" is "!binaryVal" */
14362                     gtReverseCond(op1);
14363                     DEBUG_DESTROY_NODE(op2);
14364                     DEBUG_DESTROY_NODE(tree);
14365                     return op1;
14366                 }
14367             }
14368
14369             break;
14370
14371         case GT_INIT_VAL:
14372             // Initialization values for initBlk have special semantics - their lower
14373             // byte is used to fill the struct. However, we allow 0 as a "bare" value,
14374             // which enables them to get a VNForZero, and be propagated.
14375             if (op1->IsIntegralConst(0))
14376             {
14377                 return op1;
14378             }
14379             break;
14380
14381         default:
14382             break;
14383     }
14384     return tree;
14385 }
14386
14387 //------------------------------------------------------------------------
14388 // fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b
14389 // (see ECMA III 3.55 and III.3.56).
14390 //
14391 // Arguments:
14392 //    tree - The GT_MOD/GT_UMOD tree to morph
14393 //
14394 // Returns:
14395 //    The morphed tree
14396 //
14397 // Notes:
14398 //    For ARM64 we don't have a remainder instruction so this transform is
14399 //    always done. For XARCH this transform is done if we know that magic
14400 //    division will be used, in that case this transform allows CSE to
14401 //    eliminate the redundant div from code like "x = a / 3; y = a % 3;".
14402 //
14403 //    This method will produce the above expression in 'a' and 'b' are
14404 //    leaf nodes, otherwise, if any of them is not a leaf it will spill
14405 //    its value into a temporary variable, an example:
14406 //    (x * 2 - 1) % (y + 1) ->  t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
14407 //
14408 GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
14409 {
14410     if (tree->OperGet() == GT_MOD)
14411     {
14412         tree->SetOper(GT_DIV);
14413     }
14414     else if (tree->OperGet() == GT_UMOD)
14415     {
14416         tree->SetOper(GT_UDIV);
14417     }
14418     else
14419     {
14420         noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
14421     }
14422
14423     var_types type        = tree->gtType;
14424     GenTree*  denominator = tree->gtOp2;
14425     GenTree*  numerator   = tree->gtOp1;
14426
14427     if (!numerator->OperIsLeaf())
14428     {
14429         numerator = fgMakeMultiUse(&tree->gtOp1);
14430     }
14431     else if (lvaLocalVarRefCounted && numerator->OperIsLocal())
14432     {
14433         // Morphing introduces new lclVar references. Increase ref counts
14434         lvaIncRefCnts(numerator);
14435     }
14436
14437     if (!denominator->OperIsLeaf())
14438     {
14439         denominator = fgMakeMultiUse(&tree->gtOp2);
14440     }
14441     else if (lvaLocalVarRefCounted && denominator->OperIsLocal())
14442     {
14443         // Morphing introduces new lclVar references. Increase ref counts
14444         lvaIncRefCnts(denominator);
14445     }
14446
14447     // The numerator and denominator may have been assigned to temps, in which case
14448     // their defining assignments are in the current tree. Therefore, we need to
14449     // set the execuction order accordingly on the nodes we create.
14450     // That is, the "mul" will be evaluated in "normal" order, and the "sub" must
14451     // be set to be evaluated in reverse order.
14452     //
14453     GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
14454     assert(!mul->IsReverseOp());
14455     GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
14456     sub->gtFlags |= GTF_REVERSE_OPS;
14457
14458 #ifdef DEBUG
14459     sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14460 #endif
14461
14462     return sub;
14463 }
14464
14465 //------------------------------------------------------------------------------
14466 // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
14467 //
14468 //
14469 // Arguments:
14470 //    oper  - Operation to check
14471 //
14472 // Return Value:
14473 //    True if the operation can be a root of a bitwise rotation tree; false otherwise.
14474
14475 bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
14476 {
14477     return (oper == GT_OR) || (oper == GT_XOR);
14478 }
14479
14480 //------------------------------------------------------------------------------
14481 // fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
14482 //                                      an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
14483 //
14484 // Arguments:
14485 //    tree  - tree to check for a rotation pattern
14486 //
14487 // Return Value:
14488 //    An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
14489 //
14490 // Assumption:
14491 //    The input is a GT_OR or a GT_XOR tree.
14492
14493 GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
14494 {
14495 #ifndef LEGACY_BACKEND
14496     //
14497     // Check for a rotation pattern, e.g.,
14498     //
14499     //                         OR                      ROL
14500     //                      /      \                   / \
14501     //                    LSH      RSZ      ->        x   y
14502     //                    / \      / \
14503     //                   x  AND   x  AND
14504     //                      / \      / \
14505     //                     y  31   ADD  31
14506     //                             / \
14507     //                            NEG 32
14508     //                             |
14509     //                             y
14510     // The patterns recognized:
14511     // (x << (y & M)) op (x >>> ((-y + N) & M))
14512     // (x >>> ((-y + N) & M)) op (x << (y & M))
14513     //
14514     // (x << y) op (x >>> (-y + N))
14515     // (x >> > (-y + N)) op (x << y)
14516     //
14517     // (x >>> (y & M)) op (x << ((-y + N) & M))
14518     // (x << ((-y + N) & M)) op (x >>> (y & M))
14519     //
14520     // (x >>> y) op (x << (-y + N))
14521     // (x << (-y + N)) op (x >>> y)
14522     //
14523     // (x << c1) op (x >>> c2)
14524     // (x >>> c1) op (x << c2)
14525     //
14526     // where
14527     // c1 and c2 are const
14528     // c1 + c2 == bitsize(x)
14529     // N == bitsize(x)
14530     // M is const
14531     // M & (N - 1) == N - 1
14532     // op is either | or ^
14533
14534     if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
14535     {
14536         // We can't do anything if the tree has assignments, calls, or volatile
14537         // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
14538         // thrown by the original tree will be thrown by the transformed tree as well.
14539         return tree;
14540     }
14541
14542     genTreeOps oper = tree->OperGet();
14543     assert(fgOperIsBitwiseRotationRoot(oper));
14544
14545     // Check if we have an LSH on one side of the OR and an RSZ on the other side.
14546     GenTreePtr op1            = tree->gtGetOp1();
14547     GenTreePtr op2            = tree->gtGetOp2();
14548     GenTreePtr leftShiftTree  = nullptr;
14549     GenTreePtr rightShiftTree = nullptr;
14550     if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
14551     {
14552         leftShiftTree  = op1;
14553         rightShiftTree = op2;
14554     }
14555     else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
14556     {
14557         leftShiftTree  = op2;
14558         rightShiftTree = op1;
14559     }
14560     else
14561     {
14562         return tree;
14563     }
14564
14565     // Check if the trees representing the value to shift are identical.
14566     // We already checked that there are no side effects above.
14567     if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
14568     {
14569         GenTreePtr rotatedValue           = leftShiftTree->gtGetOp1();
14570         var_types  rotatedValueActualType = genActualType(rotatedValue->gtType);
14571         ssize_t    rotatedValueBitSize    = genTypeSize(rotatedValueActualType) * 8;
14572         noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
14573         GenTreePtr leftShiftIndex  = leftShiftTree->gtGetOp2();
14574         GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();
14575
14576         // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
14577         // shouldn't be masked for the transformation to be valid. If additional
14578         // higher bits are not masked, the transformation is still valid since the result
14579         // of MSIL shift instructions is unspecified if the shift amount is greater or equal
14580         // than the width of the value being shifted.
14581         ssize_t minimalMask    = rotatedValueBitSize - 1;
14582         ssize_t leftShiftMask  = -1;
14583         ssize_t rightShiftMask = -1;
14584
14585         if ((leftShiftIndex->OperGet() == GT_AND))
14586         {
14587             if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
14588             {
14589                 leftShiftMask  = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14590                 leftShiftIndex = leftShiftIndex->gtGetOp1();
14591             }
14592             else
14593             {
14594                 return tree;
14595             }
14596         }
14597
14598         if ((rightShiftIndex->OperGet() == GT_AND))
14599         {
14600             if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
14601             {
14602                 rightShiftMask  = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14603                 rightShiftIndex = rightShiftIndex->gtGetOp1();
14604             }
14605             else
14606             {
14607                 return tree;
14608             }
14609         }
14610
14611         if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
14612         {
14613             // The shift index is overmasked, e.g., we have
14614             // something like (x << y & 15) or
14615             // (x >> (32 - y) & 15 with 32 bit x.
14616             // The transformation is not valid.
14617             return tree;
14618         }
14619
14620         GenTreePtr shiftIndexWithAdd    = nullptr;
14621         GenTreePtr shiftIndexWithoutAdd = nullptr;
14622         genTreeOps rotateOp             = GT_NONE;
14623         GenTreePtr rotateIndex          = nullptr;
14624
14625         if (leftShiftIndex->OperGet() == GT_ADD)
14626         {
14627             shiftIndexWithAdd    = leftShiftIndex;
14628             shiftIndexWithoutAdd = rightShiftIndex;
14629             rotateOp             = GT_ROR;
14630         }
14631         else if (rightShiftIndex->OperGet() == GT_ADD)
14632         {
14633             shiftIndexWithAdd    = rightShiftIndex;
14634             shiftIndexWithoutAdd = leftShiftIndex;
14635             rotateOp             = GT_ROL;
14636         }
14637
14638         if (shiftIndexWithAdd != nullptr)
14639         {
14640             if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
14641             {
14642                 if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
14643                 {
14644                     if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
14645                     {
14646                         if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
14647                         {
14648                             // We found one of these patterns:
14649                             // (x << (y & M)) | (x >>> ((-y + N) & M))
14650                             // (x << y) | (x >>> (-y + N))
14651                             // (x >>> (y & M)) | (x << ((-y + N) & M))
14652                             // (x >>> y) | (x << (-y + N))
14653                             // where N == bitsize(x), M is const, and
14654                             // M & (N - 1) == N - 1
14655                             CLANG_FORMAT_COMMENT_ANCHOR;
14656
14657 #ifndef _TARGET_64BIT_
14658                             if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
14659                             {
14660                                 // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86.
14661                                 // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
14662                                 // to add helpers for GT_ROL and GT_ROR.
14663                                 return tree;
14664                             }
14665 #endif
14666
14667                             rotateIndex = shiftIndexWithoutAdd;
14668                         }
14669                     }
14670                 }
14671             }
14672         }
14673         else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
14674         {
14675             if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
14676             {
14677                 // We found this pattern:
14678                 // (x << c1) | (x >>> c2)
14679                 // where c1 and c2 are const and c1 + c2 == bitsize(x)
14680                 rotateOp    = GT_ROL;
14681                 rotateIndex = leftShiftIndex;
14682             }
14683         }
14684
14685         if (rotateIndex != nullptr)
14686         {
14687             noway_assert(GenTree::OperIsRotate(rotateOp));
14688
14689             unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
14690
14691             // We can use the same tree only during global morph; reusing the tree in a later morph
14692             // may invalidate value numbers.
14693             if (fgGlobalMorph)
14694             {
14695                 tree->gtOp.gtOp1 = rotatedValue;
14696                 tree->gtOp.gtOp2 = rotateIndex;
14697                 tree->ChangeOper(rotateOp);
14698
14699                 unsigned childFlags = 0;
14700                 for (GenTree* op : tree->Operands())
14701                 {
14702                     childFlags |= (op->gtFlags & GTF_ALL_EFFECT);
14703                 }
14704
14705                 // The parent's flags should be a superset of its operands' flags
14706                 noway_assert((inputTreeEffects & childFlags) == childFlags);
14707             }
14708             else
14709             {
14710                 tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
14711                 noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
14712             }
14713
14714             return tree;
14715         }
14716     }
14717 #endif // LEGACY_BACKEND
14718     return tree;
14719 }
14720
14721 #if !CPU_HAS_FP_SUPPORT
14722 GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
14723 {
14724
14725     genTreeOps oper = tree->OperGet();
14726     var_types  typ  = tree->TypeGet();
14727     GenTreePtr op1  = tree->gtOp.gtOp1;
14728     GenTreePtr op2  = tree->gtGetOp2IfPresent();
14729
14730     /*
14731         We have to use helper calls for all FP operations:
14732
14733             FP operators that operate on FP values
14734             casts to and from FP
14735             comparisons of FP values
14736      */
14737
14738     if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
14739     {
14740         int        helper;
14741         GenTreePtr args;
14742         size_t     argc = genTypeStSz(typ);
14743
14744         /* Not all FP operations need helper calls */
14745
14746         switch (oper)
14747         {
14748             case GT_ASG:
14749             case GT_IND:
14750             case GT_LIST:
14751             case GT_ADDR:
14752             case GT_COMMA:
14753                 return tree;
14754         }
14755
14756 #ifdef DEBUG
14757
14758         /* If the result isn't FP, it better be a compare or cast */
14759
14760         if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
14761             gtDispTree(tree);
14762
14763         noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
14764 #endif
14765
14766         /* Keep track of how many arguments we're passing */
14767
14768         fgPtrArgCntCur += argc;
14769
14770         /* Is this a binary operator? */
14771
14772         if (op2)
14773         {
14774             /* Add the second operand to the argument count */
14775
14776             fgPtrArgCntCur += argc;
14777             argc *= 2;
14778
14779             /* What kind of an operator do we have? */
14780
14781             switch (oper)
14782             {
14783                 case GT_ADD:
14784                     helper = CPX_R4_ADD;
14785                     break;
14786                 case GT_SUB:
14787                     helper = CPX_R4_SUB;
14788                     break;
14789                 case GT_MUL:
14790                     helper = CPX_R4_MUL;
14791                     break;
14792                 case GT_DIV:
14793                     helper = CPX_R4_DIV;
14794                     break;
14795                 // case GT_MOD: helper = CPX_R4_REM; break;
14796
14797                 case GT_EQ:
14798                     helper = CPX_R4_EQ;
14799                     break;
14800                 case GT_NE:
14801                     helper = CPX_R4_NE;
14802                     break;
14803                 case GT_LT:
14804                     helper = CPX_R4_LT;
14805                     break;
14806                 case GT_LE:
14807                     helper = CPX_R4_LE;
14808                     break;
14809                 case GT_GE:
14810                     helper = CPX_R4_GE;
14811                     break;
14812                 case GT_GT:
14813                     helper = CPX_R4_GT;
14814                     break;
14815
14816                 default:
14817 #ifdef DEBUG
14818                     gtDispTree(tree);
14819 #endif
14820                     noway_assert(!"unexpected FP binary op");
14821                     break;
14822             }
14823
14824             args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
14825         }
14826         else
14827         {
14828             switch (oper)
14829             {
14830                 case GT_RETURN:
14831                     return tree;
14832
14833                 case GT_CAST:
14834                     noway_assert(!"FP cast");
14835
14836                 case GT_NEG:
14837                     helper = CPX_R4_NEG;
14838                     break;
14839
14840                 default:
14841 #ifdef DEBUG
14842                     gtDispTree(tree);
14843 #endif
14844                     noway_assert(!"unexpected FP unary op");
14845                     break;
14846             }
14847
14848             args = gtNewArgList(tree->gtOp.gtOp1);
14849         }
14850
14851         /* If we have double result/operands, modify the helper */
14852
14853         if (typ == TYP_DOUBLE)
14854         {
14855             static_assert_no_msg(CPX_R4_NEG + 1 == CPX_R8_NEG);
14856             static_assert_no_msg(CPX_R4_ADD + 1 == CPX_R8_ADD);
14857             static_assert_no_msg(CPX_R4_SUB + 1 == CPX_R8_SUB);
14858             static_assert_no_msg(CPX_R4_MUL + 1 == CPX_R8_MUL);
14859             static_assert_no_msg(CPX_R4_DIV + 1 == CPX_R8_DIV);
14860
14861             helper++;
14862         }
14863         else
14864         {
14865             noway_assert(tree->OperIsCompare());
14866
14867             static_assert_no_msg(CPX_R4_EQ + 1 == CPX_R8_EQ);
14868             static_assert_no_msg(CPX_R4_NE + 1 == CPX_R8_NE);
14869             static_assert_no_msg(CPX_R4_LT + 1 == CPX_R8_LT);
14870             static_assert_no_msg(CPX_R4_LE + 1 == CPX_R8_LE);
14871             static_assert_no_msg(CPX_R4_GE + 1 == CPX_R8_GE);
14872             static_assert_no_msg(CPX_R4_GT + 1 == CPX_R8_GT);
14873         }
14874
14875         tree = fgMorphIntoHelperCall(tree, helper, args);
14876
14877         if (fgPtrArgCntMax < fgPtrArgCntCur)
14878         {
14879             JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
14880             fgPtrArgCntMax = fgPtrArgCntCur;
14881         }
14882
14883         fgPtrArgCntCur -= argc;
14884         return tree;
14885
14886         case GT_RETURN:
14887
14888             if (op1)
14889             {
14890
14891                 if (compCurBB == genReturnBB)
14892                 {
14893                     /* This is the 'exitCrit' call at the exit label */
14894
14895                     noway_assert(op1->gtType == TYP_VOID);
14896                     noway_assert(op2 == 0);
14897
14898                     tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
14899
14900                     return tree;
14901                 }
14902
14903                 /* This is a (real) return value -- check its type */
14904                 CLANG_FORMAT_COMMENT_ANCHOR;
14905
14906 #ifdef DEBUG
14907                 if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
14908                 {
14909                     bool allowMismatch = false;
14910
14911                     // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
14912                     if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
14913                         (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
14914                         allowMismatch = true;
14915
14916                     if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
14917                         allowMismatch = true;
14918
14919                     if (!allowMismatch)
14920                         NO_WAY("Return type mismatch");
14921                 }
14922 #endif
14923             }
14924             break;
14925     }
14926     return tree;
14927 }
14928 #endif
14929
14930 /*****************************************************************************
14931  *
14932  *  Transform the given tree for code generation and return an equivalent tree.
14933  */
14934
14935 GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
14936 {
14937     assert(tree);
14938     assert(tree->gtOper != GT_STMT);
14939
14940 #ifdef DEBUG
14941     if (verbose)
14942     {
14943         if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
14944         {
14945             noway_assert(!"JitBreakMorphTree hit");
14946         }
14947     }
14948 #endif
14949
14950 #ifdef DEBUG
14951     int thisMorphNum = 0;
14952     if (verbose && treesBeforeAfterMorph)
14953     {
14954         thisMorphNum = morphNum++;
14955         printf("\nfgMorphTree (before %d):\n", thisMorphNum);
14956         gtDispTree(tree);
14957     }
14958 #endif
14959
14960     if (fgGlobalMorph)
14961     {
14962         // Apply any rewrites for implicit byref arguments before morphing the
14963         // tree.
14964
14965         if (fgMorphImplicitByRefArgs(tree))
14966         {
14967 #ifdef DEBUG
14968             if (verbose && treesBeforeAfterMorph)
14969             {
14970                 printf("\nfgMorphTree (%d), after implicit-byref rewrite:\n", thisMorphNum);
14971                 gtDispTree(tree);
14972             }
14973 #endif
14974         }
14975     }
14976
14977 /*-------------------------------------------------------------------------
14978  * fgMorphTree() can potentially replace a tree with another, and the
14979  * caller has to store the return value correctly.
14980  * Turn this on to always make copy of "tree" here to shake out
14981  * hidden/unupdated references.
14982  */
14983
14984 #ifdef DEBUG
14985
14986     if (compStressCompile(STRESS_GENERIC_CHECK, 0))
14987     {
14988         GenTreePtr copy;
14989
14990 #ifdef SMALL_TREE_NODES
14991         if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
14992         {
14993             copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
14994         }
14995         else
14996 #endif
14997         {
14998             copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
14999         }
15000
15001         copy->CopyFrom(tree, this);
15002
15003 #if defined(LATE_DISASM)
15004         // GT_CNS_INT is considered small, so CopyFrom() won't copy all fields
15005         if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
15006         {
15007             copy->gtIntCon.gtIconHdl.gtIconHdl1 = tree->gtIntCon.gtIconHdl.gtIconHdl1;
15008             copy->gtIntCon.gtIconHdl.gtIconHdl2 = tree->gtIntCon.gtIconHdl.gtIconHdl2;
15009         }
15010 #endif
15011
15012         DEBUG_DESTROY_NODE(tree);
15013         tree = copy;
15014     }
15015 #endif // DEBUG
15016
15017     if (fgGlobalMorph)
15018     {
15019         /* Ensure that we haven't morphed this node already */
15020         assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
15021
15022 #if LOCAL_ASSERTION_PROP
15023         /* Before morphing the tree, we try to propagate any active assertions */
15024         if (optLocalAssertionProp)
15025         {
15026             /* Do we have any active assertions? */
15027
15028             if (optAssertionCount > 0)
15029             {
15030                 GenTreePtr newTree = tree;
15031                 while (newTree != nullptr)
15032                 {
15033                     tree = newTree;
15034                     /* newTree is non-Null if we propagated an assertion */
15035                     newTree = optAssertionProp(apFull, tree, nullptr);
15036                 }
15037                 assert(tree != nullptr);
15038             }
15039         }
15040         PREFAST_ASSUME(tree != nullptr);
15041 #endif
15042     }
15043
15044     /* Save the original un-morphed tree for fgMorphTreeDone */
15045
15046     GenTreePtr oldTree = tree;
15047
15048     /* Figure out what kind of a node we have */
15049
15050     unsigned kind = tree->OperKind();
15051
15052     /* Is this a constant node? */
15053
15054     if (kind & GTK_CONST)
15055     {
15056         tree = fgMorphConst(tree);
15057         goto DONE;
15058     }
15059
15060     /* Is this a leaf node? */
15061
15062     if (kind & GTK_LEAF)
15063     {
15064         tree = fgMorphLeaf(tree);
15065         goto DONE;
15066     }
15067
15068     /* Is it a 'simple' unary/binary operator? */
15069
15070     if (kind & GTK_SMPOP)
15071     {
15072         tree = fgMorphSmpOp(tree, mac);
15073         goto DONE;
15074     }
15075
15076     /* See what kind of a special operator we have here */
15077
15078     switch (tree->OperGet())
15079     {
15080         case GT_FIELD:
15081             tree = fgMorphField(tree, mac);
15082             break;
15083
15084         case GT_CALL:
15085             tree = fgMorphCall(tree->AsCall());
15086             break;
15087
15088         case GT_ARR_BOUNDS_CHECK:
15089 #ifdef FEATURE_SIMD
15090         case GT_SIMD_CHK:
15091 #endif // FEATURE_SIMD
15092         {
15093             fgSetRngChkTarget(tree);
15094
15095             GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
15096             bndsChk->gtIndex          = fgMorphTree(bndsChk->gtIndex);
15097             bndsChk->gtArrLen         = fgMorphTree(bndsChk->gtArrLen);
15098             // If the index is a comma(throw, x), just return that.
15099             if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
15100             {
15101                 tree = bndsChk->gtIndex;
15102             }
15103
15104             // Propagate effects flags upwards
15105             bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
15106             bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
15107
15108             // Otherwise, we don't change the tree.
15109         }
15110         break;
15111
15112         case GT_ARR_ELEM:
15113             tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
15114             tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15115
15116             unsigned dim;
15117             for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
15118             {
15119                 tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
15120                 tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
15121             }
15122             if (fgGlobalMorph)
15123             {
15124                 fgSetRngChkTarget(tree, false);
15125             }
15126             break;
15127
15128         case GT_ARR_OFFSET:
15129             tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
15130             tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
15131             tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
15132             tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
15133             tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
15134             tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15135             if (fgGlobalMorph)
15136             {
15137                 fgSetRngChkTarget(tree, false);
15138             }
15139             break;
15140
15141         case GT_CMPXCHG:
15142             tree->gtCmpXchg.gtOpLocation  = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
15143             tree->gtCmpXchg.gtOpValue     = fgMorphTree(tree->gtCmpXchg.gtOpValue);
15144             tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
15145             break;
15146
15147         case GT_STORE_DYN_BLK:
15148             tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
15149             __fallthrough;
15150         case GT_DYN_BLK:
15151             tree->gtDynBlk.Addr()        = fgMorphTree(tree->gtDynBlk.Addr());
15152             tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
15153             break;
15154
15155         default:
15156 #ifdef DEBUG
15157             gtDispTree(tree);
15158 #endif
15159             noway_assert(!"unexpected operator");
15160     }
15161 DONE:
15162
15163     fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
15164
15165     return tree;
15166 }
15167
15168 #if LOCAL_ASSERTION_PROP
15169 //------------------------------------------------------------------------
15170 // fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum
15171 //
15172 // Arguments:
15173 //    lclNum - The varNum of the lclVar for which we're killing assertions.
15174 //    tree   - (DEBUG only) the tree responsible for killing its assertions.
15175 //
15176 void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTreePtr tree))
15177 {
15178     /* All dependent assertions are killed here */
15179
15180     ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
15181
15182     if (killed)
15183     {
15184         AssertionIndex index = optAssertionCount;
15185         while (killed && (index > 0))
15186         {
15187             if (BitVecOps::IsMember(apTraits, killed, index - 1))
15188             {
15189 #ifdef DEBUG
15190                 AssertionDsc* curAssertion = optGetAssertion(index);
15191                 noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
15192                              ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
15193                 if (verbose)
15194                 {
15195                     printf("\nThe assignment ");
15196                     printTreeID(tree);
15197                     printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
15198                     optPrintAssertion(curAssertion);
15199                 }
15200 #endif
15201                 // Remove this bit from the killed mask
15202                 BitVecOps::RemoveElemD(apTraits, killed, index - 1);
15203
15204                 optAssertionRemove(index);
15205             }
15206
15207             index--;
15208         }
15209
15210         // killed mask should now be zero
15211         noway_assert(BitVecOps::IsEmpty(apTraits, killed));
15212     }
15213 }
15214 //------------------------------------------------------------------------
15215 // fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum.
15216 //
15217 // Arguments:
15218 //    lclNum - The varNum of the lclVar for which we're killing assertions.
15219 //    tree   - (DEBUG only) the tree responsible for killing its assertions.
15220 //
15221 // Notes:
15222 //    For structs and struct fields, it will invalidate the children and parent
15223 //    respectively.
15224 //    Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar.
15225 //
15226 void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree))
15227 {
15228     LclVarDsc* varDsc = &lvaTable[lclNum];
15229
15230     if (varDsc->lvPromoted)
15231     {
15232         noway_assert(varTypeIsStruct(varDsc));
15233
15234         // Kill the field locals.
15235         for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
15236         {
15237             fgKillDependentAssertionsSingle(i DEBUGARG(tree));
15238         }
15239
15240         // Kill the struct local itself.
15241         fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15242     }
15243     else if (varDsc->lvIsStructField)
15244     {
15245         // Kill the field local.
15246         fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15247
15248         // Kill the parent struct.
15249         fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree));
15250     }
15251     else
15252     {
15253         fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15254     }
15255 }
15256 #endif // LOCAL_ASSERTION_PROP
15257
15258 /*****************************************************************************
15259  *
15260  *  This function is called to complete the morphing of a tree node
15261  *  It should only be called once for each node.
15262  *  If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
15263  *  to enforce the invariant that each node is only morphed once.
15264  *  If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
15265  *  by an equivalent tree.
15266  *
15267  */
15268
15269 void Compiler::fgMorphTreeDone(GenTreePtr tree,
15270                                GenTreePtr oldTree /* == NULL */
15271                                DEBUGARG(int morphNum))
15272 {
15273 #ifdef DEBUG
15274     if (verbose && treesBeforeAfterMorph)
15275     {
15276         printf("\nfgMorphTree (after %d):\n", morphNum);
15277         gtDispTree(tree);
15278         printf(""); // in our logic this causes a flush
15279     }
15280 #endif
15281
15282     if (!fgGlobalMorph)
15283     {
15284         return;
15285     }
15286
15287     if ((oldTree != nullptr) && (oldTree != tree))
15288     {
15289         /* Ensure that we have morphed this node */
15290         assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
15291
15292 #ifdef DEBUG
15293         TransferTestDataToNode(oldTree, tree);
15294 #endif
15295     }
15296     else
15297     {
15298         // Ensure that we haven't morphed this node already
15299         assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
15300     }
15301
15302     if (tree->OperKind() & GTK_CONST)
15303     {
15304         goto DONE;
15305     }
15306
15307 #if LOCAL_ASSERTION_PROP
15308
15309     if (!optLocalAssertionProp)
15310     {
15311         goto DONE;
15312     }
15313
15314     /* Do we have any active assertions? */
15315
15316     if (optAssertionCount > 0)
15317     {
15318         /* Is this an assignment to a local variable */
15319         GenTreeLclVarCommon* lclVarTree = nullptr;
15320         if (tree->DefinesLocal(this, &lclVarTree))
15321         {
15322             unsigned lclNum = lclVarTree->gtLclNum;
15323             noway_assert(lclNum < lvaCount);
15324             fgKillDependentAssertions(lclNum DEBUGARG(tree));
15325         }
15326     }
15327
15328     /* If this tree makes a new assertion - make it available */
15329     optAssertionGen(tree);
15330
15331 #endif // LOCAL_ASSERTION_PROP
15332
15333 DONE:;
15334
15335 #ifdef DEBUG
15336     /* Mark this node as being morphed */
15337     tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
15338 #endif
15339 }
15340
15341 /*****************************************************************************
15342  *
15343  *  Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
15344  *  Returns true if we modified the flow graph
15345  */
15346
15347 bool Compiler::fgFoldConditional(BasicBlock* block)
15348 {
15349     bool result = false;
15350
15351     // We don't want to make any code unreachable
15352     if (opts.compDbgCode || opts.MinOpts())
15353     {
15354         return false;
15355     }
15356
15357     if (block->bbJumpKind == BBJ_COND)
15358     {
15359         noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15360
15361         GenTreePtr stmt = block->bbTreeList->gtPrev;
15362
15363         noway_assert(stmt->gtNext == nullptr);
15364
15365         if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15366         {
15367             noway_assert(fgRemoveRestOfBlock);
15368
15369             /* Unconditional throw - transform the basic block into a BBJ_THROW */
15370             fgConvertBBToThrowBB(block);
15371
15372             /* Remove 'block' from the predecessor list of 'block->bbNext' */
15373             fgRemoveRefPred(block->bbNext, block);
15374
15375             /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
15376             fgRemoveRefPred(block->bbJumpDest, block);
15377
15378 #ifdef DEBUG
15379             if (verbose)
15380             {
15381                 printf("\nConditional folded at BB%02u\n", block->bbNum);
15382                 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
15383             }
15384 #endif
15385             goto DONE_COND;
15386         }
15387
15388         noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
15389
15390         /* Did we fold the conditional */
15391
15392         noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15393         GenTreePtr cond;
15394         cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15395
15396         if (cond->OperKind() & GTK_CONST)
15397         {
15398             /* Yupee - we folded the conditional!
15399              * Remove the conditional statement */
15400
15401             noway_assert(cond->gtOper == GT_CNS_INT);
15402             noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));
15403
15404             /* remove the statement from bbTreelist - No need to update
15405              * the reference counts since there are no lcl vars */
15406             fgRemoveStmt(block, stmt);
15407
15408             // block is a BBJ_COND that we are folding the conditional for
15409             // bTaken is the path that will always be taken from block
15410             // bNotTaken is the path that will never be taken from block
15411             //
15412             BasicBlock* bTaken;
15413             BasicBlock* bNotTaken;
15414
15415             if (cond->gtIntCon.gtIconVal != 0)
15416             {
15417                 /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
15418                 block->bbJumpKind = BBJ_ALWAYS;
15419                 bTaken            = block->bbJumpDest;
15420                 bNotTaken         = block->bbNext;
15421             }
15422             else
15423             {
15424                 /* Unmark the loop if we are removing a backwards branch */
15425                 /* dest block must also be marked as a loop head and     */
15426                 /* We must be able to reach the backedge block           */
15427                 if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
15428                     fgReachable(block->bbJumpDest, block))
15429                 {
15430                     optUnmarkLoopBlocks(block->bbJumpDest, block);
15431                 }
15432
15433                 /* JTRUE 0 - transform the basic block into a BBJ_NONE   */
15434                 block->bbJumpKind = BBJ_NONE;
15435                 noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
15436                 bTaken    = block->bbNext;
15437                 bNotTaken = block->bbJumpDest;
15438             }
15439
15440             if (fgHaveValidEdgeWeights)
15441             {
15442                 // We are removing an edge from block to bNotTaken
15443                 // and we have already computed the edge weights, so
15444                 // we will try to adjust some of the weights
15445                 //
15446                 flowList*   edgeTaken = fgGetPredForBlock(bTaken, block);
15447                 BasicBlock* bUpdated  = nullptr; // non-NULL if we updated the weight of an internal block
15448
15449                 // We examine the taken edge (block -> bTaken)
15450                 // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
15451                 // else if bTaken has valid profile weight and block does not we try to adjust block's weight
15452                 // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
15453                 //
15454                 if (block->hasProfileWeight())
15455                 {
15456                     // The edge weights for (block -> bTaken) are 100% of block's weight
15457                     edgeTaken->flEdgeWeightMin = block->bbWeight;
15458                     edgeTaken->flEdgeWeightMax = block->bbWeight;
15459
15460                     if (!bTaken->hasProfileWeight())
15461                     {
15462                         if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
15463                         {
15464                             // Update the weight of bTaken
15465                             bTaken->inheritWeight(block);
15466                             bUpdated = bTaken;
15467                         }
15468                     }
15469                 }
15470                 else if (bTaken->hasProfileWeight())
15471                 {
15472                     if (bTaken->countOfInEdges() == 1)
15473                     {
15474                         // There is only one in edge to bTaken
15475                         edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
15476                         edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
15477
15478                         // Update the weight of block
15479                         block->inheritWeight(bTaken);
15480                         bUpdated = block;
15481                     }
15482                 }
15483
15484                 if (bUpdated != nullptr)
15485                 {
15486                     flowList* edge;
15487                     // Now fix the weights of the edges out of 'bUpdated'
15488                     switch (bUpdated->bbJumpKind)
15489                     {
15490                         case BBJ_NONE:
15491                             edge                  = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15492                             edge->flEdgeWeightMax = bUpdated->bbWeight;
15493                             break;
15494                         case BBJ_COND:
15495                             edge                  = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15496                             edge->flEdgeWeightMax = bUpdated->bbWeight;
15497                             __fallthrough;
15498                         case BBJ_ALWAYS:
15499                             edge                  = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
15500                             edge->flEdgeWeightMax = bUpdated->bbWeight;
15501                             break;
15502                         default:
15503                             // We don't handle BBJ_SWITCH
15504                             break;
15505                     }
15506                 }
15507             }
15508
15509             /* modify the flow graph */
15510
15511             /* Remove 'block' from the predecessor list of 'bNotTaken' */
15512             fgRemoveRefPred(bNotTaken, block);
15513
15514 #ifdef DEBUG
15515             if (verbose)
15516             {
15517                 printf("\nConditional folded at BB%02u\n", block->bbNum);
15518                 printf("BB%02u becomes a %s", block->bbNum,
15519                        block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15520                 if (block->bbJumpKind == BBJ_ALWAYS)
15521                 {
15522                     printf(" to BB%02u", block->bbJumpDest->bbNum);
15523                 }
15524                 printf("\n");
15525             }
15526 #endif
15527
15528             /* if the block was a loop condition we may have to modify
15529              * the loop table */
15530
15531             for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
15532             {
15533                 /* Some loops may have been already removed by
15534                  * loop unrolling or conditional folding */
15535
15536                 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
15537                 {
15538                     continue;
15539                 }
15540
15541                 /* We are only interested in the loop bottom */
15542
15543                 if (optLoopTable[loopNum].lpBottom == block)
15544                 {
15545                     if (cond->gtIntCon.gtIconVal == 0)
15546                     {
15547                         /* This was a bogus loop (condition always false)
15548                          * Remove the loop from the table */
15549
15550                         optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
15551 #ifdef DEBUG
15552                         if (verbose)
15553                         {
15554                             printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n", loopNum,
15555                                    optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
15556                         }
15557 #endif
15558                     }
15559                 }
15560             }
15561         DONE_COND:
15562             result = true;
15563         }
15564     }
15565     else if (block->bbJumpKind == BBJ_SWITCH)
15566     {
15567         noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15568
15569         GenTreePtr stmt = block->bbTreeList->gtPrev;
15570
15571         noway_assert(stmt->gtNext == nullptr);
15572
15573         if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15574         {
15575             noway_assert(fgRemoveRestOfBlock);
15576
15577             /* Unconditional throw - transform the basic block into a BBJ_THROW */
15578             fgConvertBBToThrowBB(block);
15579
15580             /* update the flow graph */
15581
15582             unsigned     jumpCnt = block->bbJumpSwt->bbsCount;
15583             BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
15584
15585             for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15586             {
15587                 BasicBlock* curJump = *jumpTab;
15588
15589                 /* Remove 'block' from the predecessor list of 'curJump' */
15590                 fgRemoveRefPred(curJump, block);
15591             }
15592
15593 #ifdef DEBUG
15594             if (verbose)
15595             {
15596                 printf("\nConditional folded at BB%02u\n", block->bbNum);
15597                 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
15598             }
15599 #endif
15600             goto DONE_SWITCH;
15601         }
15602
15603         noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
15604
15605         /* Did we fold the conditional */
15606
15607         noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15608         GenTreePtr cond;
15609         cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15610
15611         if (cond->OperKind() & GTK_CONST)
15612         {
15613             /* Yupee - we folded the conditional!
15614              * Remove the conditional statement */
15615
15616             noway_assert(cond->gtOper == GT_CNS_INT);
15617
15618             /* remove the statement from bbTreelist - No need to update
15619              * the reference counts since there are no lcl vars */
15620             fgRemoveStmt(block, stmt);
15621
15622             /* modify the flow graph */
15623
15624             /* Find the actual jump target */
15625             unsigned switchVal;
15626             switchVal = (unsigned)cond->gtIntCon.gtIconVal;
15627             unsigned jumpCnt;
15628             jumpCnt = block->bbJumpSwt->bbsCount;
15629             BasicBlock** jumpTab;
15630             jumpTab = block->bbJumpSwt->bbsDstTab;
15631             bool foundVal;
15632             foundVal = false;
15633
15634             for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15635             {
15636                 BasicBlock* curJump = *jumpTab;
15637
15638                 assert(curJump->countOfInEdges() > 0);
15639
15640                 // If val matches switchVal or we are at the last entry and
15641                 // we never found the switch value then set the new jump dest
15642
15643                 if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
15644                 {
15645                     if (curJump != block->bbNext)
15646                     {
15647                         /* transform the basic block into a BBJ_ALWAYS */
15648                         block->bbJumpKind = BBJ_ALWAYS;
15649                         block->bbJumpDest = curJump;
15650
15651                         // if we are jumping backwards, make sure we have a GC Poll.
15652                         if (curJump->bbNum > block->bbNum)
15653                         {
15654                             block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15655                         }
15656                     }
15657                     else
15658                     {
15659                         /* transform the basic block into a BBJ_NONE */
15660                         block->bbJumpKind = BBJ_NONE;
15661                         block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15662                     }
15663                     foundVal = true;
15664                 }
15665                 else
15666                 {
15667                     /* Remove 'block' from the predecessor list of 'curJump' */
15668                     fgRemoveRefPred(curJump, block);
15669                 }
15670             }
15671 #ifdef DEBUG
15672             if (verbose)
15673             {
15674                 printf("\nConditional folded at BB%02u\n", block->bbNum);
15675                 printf("BB%02u becomes a %s", block->bbNum,
15676                        block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15677                 if (block->bbJumpKind == BBJ_ALWAYS)
15678                 {
15679                     printf(" to BB%02u", block->bbJumpDest->bbNum);
15680                 }
15681                 printf("\n");
15682             }
15683 #endif
15684         DONE_SWITCH:
15685             result = true;
15686         }
15687     }
15688     return result;
15689 }
15690
15691 //*****************************************************************************
15692 //
15693 // Morphs a single statement in a block.
15694 // Can be called anytime, unlike fgMorphStmts() which should only be called once.
15695 //
15696 // Returns true  if 'stmt' was removed from the block.
15697 // Returns false if 'stmt' is still in the block (even if other statements were removed).
15698 //
15699
15700 bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg))
15701 {
15702     assert(block != nullptr);
15703     assert(stmt != nullptr);
15704
15705     compCurBB   = block;
15706     compCurStmt = stmt;
15707
15708     GenTree* morph = fgMorphTree(stmt->gtStmtExpr);
15709
15710     // Bug 1106830 - During the CSE phase we can't just remove
15711     // morph->gtOp.gtOp2 as it could contain CSE expressions.
15712     // This leads to a noway_assert in OptCSE.cpp when
15713     // searching for the removed CSE ref. (using gtFindLink)
15714     //
15715     if (!optValnumCSE_phase)
15716     {
15717         // Check for morph as a GT_COMMA with an unconditional throw
15718         if (fgIsCommaThrow(morph, true))
15719         {
15720 #ifdef DEBUG
15721             if (verbose)
15722             {
15723                 printf("Folding a top-level fgIsCommaThrow stmt\n");
15724                 printf("Removing op2 as unreachable:\n");
15725                 gtDispTree(morph->gtOp.gtOp2);
15726                 printf("\n");
15727             }
15728 #endif
15729             // Use the call as the new stmt
15730             morph = morph->gtOp.gtOp1;
15731             noway_assert(morph->gtOper == GT_CALL);
15732         }
15733
15734         // we can get a throw as a statement root
15735         if (fgIsThrow(morph))
15736         {
15737 #ifdef DEBUG
15738             if (verbose)
15739             {
15740                 printf("We have a top-level fgIsThrow stmt\n");
15741                 printf("Removing the rest of block as unreachable:\n");
15742             }
15743 #endif
15744             noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15745             fgRemoveRestOfBlock = true;
15746         }
15747     }
15748
15749     stmt->gtStmtExpr = morph;
15750
15751     if (lvaLocalVarRefCounted)
15752     {
15753         // fgMorphTree may have introduced new lclVar references. Bump the ref counts if requested.
15754         lvaRecursiveIncRefCounts(stmt->gtStmtExpr);
15755     }
15756
15757     // Can the entire tree be removed?
15758     bool removedStmt = fgCheckRemoveStmt(block, stmt);
15759
15760     // Or this is the last statement of a conditional branch that was just folded?
15761     if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock)
15762     {
15763         if (fgFoldConditional(block))
15764         {
15765             if (block->bbJumpKind != BBJ_THROW)
15766             {
15767                 removedStmt = true;
15768             }
15769         }
15770     }
15771
15772     if (!removedStmt)
15773     {
15774         // Have to re-do the evaluation order since for example some later code does not expect constants as op1
15775         gtSetStmtInfo(stmt);
15776
15777         // Have to re-link the nodes for this statement
15778         fgSetStmtSeq(stmt);
15779     }
15780
15781 #ifdef DEBUG
15782     if (verbose)
15783     {
15784         printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
15785         gtDispTree(morph);
15786         printf("\n");
15787     }
15788 #endif
15789
15790     if (fgRemoveRestOfBlock)
15791     {
15792         // Remove the rest of the stmts in the block
15793         for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
15794         {
15795             fgRemoveStmt(block, stmt);
15796         }
15797
15798         // The rest of block has been removed and we will always throw an exception.
15799
15800         // Update succesors of block
15801         fgRemoveBlockAsPred(block);
15802
15803         // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
15804         // We should not convert it to a ThrowBB.
15805         if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
15806         {
15807             // Convert block to a throw bb
15808             fgConvertBBToThrowBB(block);
15809         }
15810
15811 #ifdef DEBUG
15812         if (verbose)
15813         {
15814             printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum);
15815         }
15816 #endif
15817         fgRemoveRestOfBlock = false;
15818     }
15819
15820     return removedStmt;
15821 }
15822
15823 /*****************************************************************************
15824  *
15825  *  Morph the statements of the given block.
15826  *  This function should be called just once for a block. Use fgMorphBlockStmt()
15827  *  for reentrant calls.
15828  */
15829
15830 void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw)
15831 {
15832     fgRemoveRestOfBlock = false;
15833
15834     noway_assert(fgExpandInline == false);
15835
15836     /* Make the current basic block address available globally */
15837
15838     compCurBB = block;
15839
15840     *mult = *lnot = *loadw = false;
15841
15842     fgCurrentlyInUseArgTemps = hashBv::Create(this);
15843
15844     GenTreeStmt* stmt = block->firstStmt();
15845     GenTreePtr   prev = nullptr;
15846     for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt)
15847     {
15848         assert(stmt->gtOper == GT_STMT);
15849
15850         if (fgRemoveRestOfBlock)
15851         {
15852             fgRemoveStmt(block, stmt);
15853             continue;
15854         }
15855 #ifdef FEATURE_SIMD
15856         if (!opts.MinOpts() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT && stmt->gtStmtExpr->OperGet() == GT_ASG)
15857         {
15858             fgMorphCombineSIMDFieldAssignments(block, stmt);
15859         }
15860 #endif
15861
15862         fgMorphStmt     = stmt;
15863         compCurStmt     = stmt;
15864         GenTreePtr tree = stmt->gtStmtExpr;
15865
15866 #ifdef DEBUG
15867         compCurStmtNum++;
15868         if (stmt == block->bbTreeList)
15869         {
15870             block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
15871         }
15872
15873         unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
15874
15875         if (verbose)
15876         {
15877             printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum);
15878             gtDispTree(tree);
15879         }
15880 #endif
15881
15882         /* Morph this statement tree */
15883
15884         GenTreePtr morph = fgMorphTree(tree);
15885
15886         // mark any outgoing arg temps as free so we can reuse them in the next statement.
15887
15888         fgCurrentlyInUseArgTemps->ZeroAll();
15889
15890         // Has fgMorphStmt been sneakily changed ?
15891
15892         if (stmt->gtStmtExpr != tree)
15893         {
15894             /* This must be tailcall. Ignore 'morph' and carry on with
15895                the tail-call node */
15896
15897             morph = stmt->gtStmtExpr;
15898             noway_assert(compTailCallUsed);
15899             noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
15900             noway_assert(stmt->gtNextStmt == nullptr);
15901
15902             GenTreeCall* call = morph->AsCall();
15903             // Could either be
15904             //   - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15905             //   - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15906             //     a jmp.
15907             noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15908                          (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15909                           (compCurBB->bbFlags & BBF_HAS_JMP)));
15910         }
15911         else if (block != compCurBB)
15912         {
15913             /* This must be a tail call that caused a GCPoll to get
15914                injected.  We haven't actually morphed the call yet
15915                but the flag still got set, clear it here...  */
15916             CLANG_FORMAT_COMMENT_ANCHOR;
15917
15918 #ifdef DEBUG
15919             tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
15920 #endif
15921
15922             noway_assert(compTailCallUsed);
15923             noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
15924             noway_assert(stmt->gtNextStmt == nullptr);
15925
15926             GenTreeCall* call = morph->AsCall();
15927
15928             // Could either be
15929             //   - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15930             //   - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15931             //     a jmp.
15932             noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15933                          (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15934                           (compCurBB->bbFlags & BBF_HAS_JMP)));
15935         }
15936
15937 #ifdef DEBUG
15938         if (compStressCompile(STRESS_CLONE_EXPR, 30))
15939         {
15940             // Clone all the trees to stress gtCloneExpr()
15941
15942             if (verbose)
15943             {
15944                 printf("\nfgMorphTree (stressClone from):\n");
15945                 gtDispTree(morph);
15946             }
15947
15948             morph = gtCloneExpr(morph);
15949             noway_assert(morph);
15950
15951             if (verbose)
15952             {
15953                 printf("\nfgMorphTree (stressClone to):\n");
15954                 gtDispTree(morph);
15955             }
15956         }
15957
15958         /* If the hash value changes. we modified the tree during morphing */
15959         if (verbose)
15960         {
15961             unsigned newHash = gtHashValue(morph);
15962             if (newHash != oldHash)
15963             {
15964                 printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum);
15965                 gtDispTree(morph);
15966             }
15967         }
15968 #endif
15969
15970         /* Check for morph as a GT_COMMA with an unconditional throw */
15971         if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
15972         {
15973             /* Use the call as the new stmt */
15974             morph = morph->gtOp.gtOp1;
15975             noway_assert(morph->gtOper == GT_CALL);
15976             noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15977
15978             fgRemoveRestOfBlock = true;
15979         }
15980
15981         stmt->gtStmtExpr = tree = morph;
15982
15983         noway_assert(fgPtrArgCntCur == 0);
15984
15985         if (fgRemoveRestOfBlock)
15986         {
15987             continue;
15988         }
15989
15990         /* Has the statement been optimized away */
15991
15992         if (fgCheckRemoveStmt(block, stmt))
15993         {
15994             continue;
15995         }
15996
15997         /* Check if this block ends with a conditional branch that can be folded */
15998
15999         if (fgFoldConditional(block))
16000         {
16001             continue;
16002         }
16003
16004         if (ehBlockHasExnFlowDsc(block))
16005         {
16006             continue;
16007         }
16008
16009 #if OPT_MULT_ADDSUB
16010
16011         /* Note whether we have two or more +=/-= operators in a row */
16012
16013         if (tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB)
16014         {
16015             if (prev && prev->gtOper == tree->gtOper)
16016             {
16017                 *mult = true;
16018             }
16019         }
16020
16021 #endif
16022
16023         /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */
16024
16025         if (tree->gtOper == GT_ASG_OR && prev && prev->gtOper == GT_ASG)
16026         {
16027             *loadw = true;
16028         }
16029     }
16030
16031     if (fgRemoveRestOfBlock)
16032     {
16033         if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
16034         {
16035             GenTreePtr first = block->bbTreeList;
16036             noway_assert(first);
16037             GenTreePtr last = first->gtPrev;
16038             noway_assert(last && last->gtNext == nullptr);
16039             GenTreePtr lastStmt = last->gtStmt.gtStmtExpr;
16040
16041             if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) ||
16042                 ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)))
16043             {
16044                 GenTreePtr op1 = lastStmt->gtOp.gtOp1;
16045
16046                 if (op1->OperKind() & GTK_RELOP)
16047                 {
16048                     /* Unmark the comparison node with GTF_RELOP_JMP_USED */
16049                     op1->gtFlags &= ~GTF_RELOP_JMP_USED;
16050                 }
16051
16052                 last->gtStmt.gtStmtExpr = fgMorphTree(op1);
16053             }
16054         }
16055
16056         /* Mark block as a BBJ_THROW block */
16057         fgConvertBBToThrowBB(block);
16058     }
16059
16060     noway_assert(fgExpandInline == false);
16061
16062 #if FEATURE_FASTTAILCALL
16063     GenTreePtr recursiveTailCall = nullptr;
16064     if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
16065     {
16066         fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
16067     }
16068 #endif
16069
16070 #ifdef DEBUG
16071     compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
16072 #endif
16073
16074     // Reset this back so that it doesn't leak out impacting other blocks
16075     fgRemoveRestOfBlock = false;
16076 }
16077
16078 /*****************************************************************************
16079  *
16080  *  Morph the blocks of the method.
16081  *  Returns true if the basic block list is modified.
16082  *  This function should be called just once.
16083  */
16084
16085 void Compiler::fgMorphBlocks()
16086 {
16087 #ifdef DEBUG
16088     if (verbose)
16089     {
16090         printf("\n*************** In fgMorphBlocks()\n");
16091     }
16092 #endif
16093
16094     /* Since fgMorphTree can be called after various optimizations to re-arrange
16095      * the nodes we need a global flag to signal if we are during the one-pass
16096      * global morphing */
16097
16098     fgGlobalMorph = true;
16099
16100 #if LOCAL_ASSERTION_PROP
16101     //
16102     // Local assertion prop is enabled if we are optimized
16103     //
16104     optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts());
16105
16106     if (optLocalAssertionProp)
16107     {
16108         //
16109         // Initialize for local assertion prop
16110         //
16111         optAssertionInit(true);
16112     }
16113 #elif ASSERTION_PROP
16114     //
16115     // If LOCAL_ASSERTION_PROP is not set
16116     // and we have global assertion prop
16117     // then local assertion prop is always off
16118     //
16119     optLocalAssertionProp = false;
16120
16121 #endif
16122
16123     /*-------------------------------------------------------------------------
16124      * Process all basic blocks in the function
16125      */
16126
16127     BasicBlock* block = fgFirstBB;
16128     noway_assert(block);
16129
16130 #ifdef DEBUG
16131     compCurStmtNum = 0;
16132 #endif
16133
16134     do
16135     {
16136 #if OPT_MULT_ADDSUB
16137         bool mult = false;
16138 #endif
16139
16140 #if OPT_BOOL_OPS
16141         bool lnot = false;
16142 #endif
16143
16144         bool loadw = false;
16145
16146 #ifdef DEBUG
16147         if (verbose)
16148         {
16149             printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName);
16150         }
16151 #endif
16152
16153 #if LOCAL_ASSERTION_PROP
16154         if (optLocalAssertionProp)
16155         {
16156             //
16157             // Clear out any currently recorded assertion candidates
16158             // before processing each basic block,
16159             // also we must  handle QMARK-COLON specially
16160             //
16161             optAssertionReset(0);
16162         }
16163 #endif
16164
16165         /* Process all statement trees in the basic block */
16166
16167         GenTreePtr tree;
16168
16169         fgMorphStmts(block, &mult, &lnot, &loadw);
16170
16171 #if OPT_MULT_ADDSUB
16172
16173         if (mult && (opts.compFlags & CLFLG_TREETRANS) && !opts.compDbgCode && !opts.MinOpts())
16174         {
16175             for (tree = block->bbTreeList; tree; tree = tree->gtNext)
16176             {
16177                 assert(tree->gtOper == GT_STMT);
16178                 GenTreePtr last = tree->gtStmt.gtStmtExpr;
16179
16180                 if (last->gtOper == GT_ASG_ADD || last->gtOper == GT_ASG_SUB)
16181                 {
16182                     GenTreePtr temp;
16183                     GenTreePtr next;
16184
16185                     GenTreePtr dst1 = last->gtOp.gtOp1;
16186                     GenTreePtr src1 = last->gtOp.gtOp2;
16187
16188                     if (!last->IsCnsIntOrI())
16189                     {
16190                         goto NOT_CAFFE;
16191                     }
16192
16193                     if (dst1->gtOper != GT_LCL_VAR)
16194                     {
16195                         goto NOT_CAFFE;
16196                     }
16197                     if (!src1->IsCnsIntOrI())
16198                     {
16199                         goto NOT_CAFFE;
16200                     }
16201
16202                     for (;;)
16203                     {
16204                         GenTreePtr dst2;
16205                         GenTreePtr src2;
16206
16207                         /* Look at the next statement */
16208
16209                         temp = tree->gtNext;
16210                         if (!temp)
16211                         {
16212                             goto NOT_CAFFE;
16213                         }
16214
16215                         noway_assert(temp->gtOper == GT_STMT);
16216                         next = temp->gtStmt.gtStmtExpr;
16217
16218                         if (next->gtOper != last->gtOper)
16219                         {
16220                             goto NOT_CAFFE;
16221                         }
16222                         if (next->gtType != last->gtType)
16223                         {
16224                             goto NOT_CAFFE;
16225                         }
16226
16227                         dst2 = next->gtOp.gtOp1;
16228                         src2 = next->gtOp.gtOp2;
16229
16230                         if (dst2->gtOper != GT_LCL_VAR)
16231                         {
16232                             goto NOT_CAFFE;
16233                         }
16234                         if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum)
16235                         {
16236                             goto NOT_CAFFE;
16237                         }
16238
16239                         if (!src2->IsCnsIntOrI())
16240                         {
16241                             goto NOT_CAFFE;
16242                         }
16243
16244                         if (last->gtOverflow() != next->gtOverflow())
16245                         {
16246                             goto NOT_CAFFE;
16247                         }
16248
16249                         const ssize_t i1    = src1->gtIntCon.gtIconVal;
16250                         const ssize_t i2    = src2->gtIntCon.gtIconVal;
16251                         const ssize_t itemp = i1 + i2;
16252
16253                         /* if the operators are checking for overflow, check for overflow of the operands */
16254
16255                         if (next->gtOverflow())
16256                         {
16257                             if (next->TypeGet() == TYP_LONG)
16258                             {
16259                                 if (next->gtFlags & GTF_UNSIGNED)
16260                                 {
16261                                     ClrSafeInt<UINT64> si1(i1);
16262                                     if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow())
16263                                     {
16264                                         goto NOT_CAFFE;
16265                                     }
16266                                 }
16267                                 else
16268                                 {
16269                                     ClrSafeInt<INT64> si1(i1);
16270                                     if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow())
16271                                     {
16272                                         goto NOT_CAFFE;
16273                                     }
16274                                 }
16275                             }
16276                             else if (next->gtFlags & GTF_UNSIGNED)
16277                             {
16278                                 ClrSafeInt<UINT32> si1(i1);
16279                                 if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow())
16280                                 {
16281                                     goto NOT_CAFFE;
16282                                 }
16283                             }
16284                             else
16285                             {
16286                                 ClrSafeInt<INT32> si1(i1);
16287                                 if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow())
16288                                 {
16289                                     goto NOT_CAFFE;
16290                                 }
16291                             }
16292                         }
16293
16294                         /* Fold the two increments/decrements into one */
16295
16296                         src1->gtIntCon.gtIconVal = itemp;
16297 #ifdef _TARGET_64BIT_
16298                         if (src1->gtType == TYP_INT)
16299                         {
16300                             src1->AsIntCon()->TruncateOrSignExtend32();
16301                         }
16302 #endif //_TARGET_64BIT_
16303
16304                         /* Remove the second statement completely */
16305
16306                         noway_assert(tree->gtNext == temp);
16307                         noway_assert(temp->gtPrev == tree);
16308
16309                         if (temp->gtNext)
16310                         {
16311                             noway_assert(temp->gtNext->gtPrev == temp);
16312
16313                             temp->gtNext->gtPrev = tree;
16314                             tree->gtNext         = temp->gtNext;
16315                         }
16316                         else
16317                         {
16318                             tree->gtNext = nullptr;
16319
16320                             noway_assert(block->bbTreeList->gtPrev == temp);
16321
16322                             block->bbTreeList->gtPrev = tree;
16323                         }
16324                     }
16325                 }
16326
16327             NOT_CAFFE:;
16328             }
16329         }
16330
16331 #endif
16332
16333         /* Are we using a single return block? */
16334
16335         if (block->bbJumpKind == BBJ_RETURN)
16336         {
16337             if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
16338             {
16339                 /* We'll jump to the genReturnBB */
16340                 CLANG_FORMAT_COMMENT_ANCHOR;
16341
16342 #if !defined(_TARGET_X86_)
16343                 if (info.compFlags & CORINFO_FLG_SYNCH)
16344                 {
16345                     fgConvertSyncReturnToLeave(block);
16346                 }
16347                 else
16348 #endif // !_TARGET_X86_
16349                 {
16350                     block->bbJumpKind = BBJ_ALWAYS;
16351                     block->bbJumpDest = genReturnBB;
16352                     fgReturnCount--;
16353                 }
16354
16355                 // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
16356                 // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
16357                 // Such blocks do materialize as part of in-lining.
16358                 //
16359                 // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
16360                 // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
16361                 // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
16362                 // is BAD_VAR_NUM.
16363                 //
16364                 // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
16365
16366                 GenTreePtr last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
16367                 GenTreePtr ret  = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;
16368
16369                 // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
16370                 if (genReturnLocal != BAD_VAR_NUM)
16371                 {
16372                     // Method must be returning a value other than TYP_VOID.
16373                     noway_assert(compMethodHasRetVal());
16374
16375                     // This block must be ending with a GT_RETURN
16376                     noway_assert(last != nullptr);
16377                     noway_assert(last->gtOper == GT_STMT);
16378                     noway_assert(last->gtNext == nullptr);
16379                     noway_assert(ret != nullptr);
16380
16381                     // GT_RETURN must have non-null operand as the method is returning the value assigned to
16382                     // genReturnLocal
16383                     noway_assert(ret->OperGet() == GT_RETURN);
16384                     noway_assert(ret->gtGetOp1() != nullptr);
16385
16386                     GenTreePtr tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
16387
16388                     last->gtStmt.gtStmtExpr = (tree->OperIsCopyBlkOp()) ? fgMorphCopyBlock(tree) : tree;
16389
16390                     // make sure that copy-prop ignores this assignment.
16391                     last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
16392                 }
16393                 else if (ret != nullptr && ret->OperGet() == GT_RETURN)
16394                 {
16395                     // This block ends with a GT_RETURN
16396                     noway_assert(last != nullptr);
16397                     noway_assert(last->gtOper == GT_STMT);
16398                     noway_assert(last->gtNext == nullptr);
16399
16400                     // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block
16401                     noway_assert(ret->TypeGet() == TYP_VOID);
16402                     noway_assert(ret->gtGetOp1() == nullptr);
16403
16404                     fgRemoveStmt(block, last);
16405                 }
16406
16407 #ifdef DEBUG
16408                 if (verbose)
16409                 {
16410                     printf("morph BB%02u to point at onereturn.  New block is\n", block->bbNum);
16411                     fgTableDispBasicBlock(block);
16412                 }
16413 #endif
16414             }
16415         }
16416
16417         block = block->bbNext;
16418     } while (block);
16419
16420     /* We are done with the global morphing phase */
16421
16422     fgGlobalMorph = false;
16423
16424 #ifdef DEBUG
16425     if (verboseTrees)
16426     {
16427         fgDispBasicBlocks(true);
16428     }
16429 #endif
16430 }
16431
16432 //------------------------------------------------------------------------
16433 // fgCheckArgCnt: Check whether the maximum arg size will change codegen requirements
16434 //
16435 // Notes:
16436 //    fpPtrArgCntMax records the maximum number of pushed arguments.
16437 //    Depending upon this value of the maximum number of pushed arguments
16438 //    we may need to use an EBP frame or be partially interuptible.
16439 //    This functionality has been factored out of fgSetOptions() because
16440 //    the Rationalizer can create new calls.
16441 //
16442 // Assumptions:
16443 //    This must be called before isFramePointerRequired() is called, because it is a
16444 //    phased variable (can only be written before it has been read).
16445 //
16446 void Compiler::fgCheckArgCnt()
16447 {
16448     if (!compCanEncodePtrArgCntMax())
16449     {
16450 #ifdef DEBUG
16451         if (verbose)
16452         {
16453             printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
16454                    "interruptible\n");
16455         }
16456 #endif
16457         genInterruptible = false;
16458     }
16459     if (fgPtrArgCntMax >= sizeof(unsigned))
16460     {
16461 #ifdef DEBUG
16462         if (verbose)
16463         {
16464             printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
16465         }
16466 #endif
16467         codeGen->setFramePointerRequired(true);
16468     }
16469 }
16470
16471 /*****************************************************************************
16472  *
16473  *  Make some decisions about the kind of code to generate.
16474  */
16475
16476 void Compiler::fgSetOptions()
16477 {
16478 #ifdef DEBUG
16479     /* Should we force fully interruptible code ? */
16480     if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
16481     {
16482         noway_assert(!codeGen->isGCTypeFixed());
16483         genInterruptible = true;
16484     }
16485 #endif
16486
16487     if (opts.compDbgCode)
16488     {
16489         assert(!codeGen->isGCTypeFixed());
16490         genInterruptible = true; // debugging is easier this way ...
16491     }
16492
16493     /* Assume we won't need an explicit stack frame if this is allowed */
16494
16495     // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
16496     // the callee-saved registers.
16497     noway_assert(!compTailCallUsed || !compLocallocUsed);
16498
16499     if (compLocallocUsed)
16500     {
16501         codeGen->setFramePointerRequired(true);
16502     }
16503
16504 #ifdef _TARGET_X86_
16505
16506     if (compTailCallUsed)
16507         codeGen->setFramePointerRequired(true);
16508
16509 #endif // _TARGET_X86_
16510
16511     if (!opts.genFPopt)
16512     {
16513         codeGen->setFramePointerRequired(true);
16514     }
16515
16516     // Assert that the EH table has been initialized by now. Note that
16517     // compHndBBtabAllocCount never decreases; it is a high-water mark
16518     // of table allocation. In contrast, compHndBBtabCount does shrink
16519     // if we delete a dead EH region, and if it shrinks to zero, the
16520     // table pointer compHndBBtab is unreliable.
16521     assert(compHndBBtabAllocCount >= info.compXcptnsCount);
16522
16523 #ifdef _TARGET_X86_
16524
16525     // Note: this case, and the !X86 case below, should both use the
16526     // !X86 path. This would require a few more changes for X86 to use
16527     // compHndBBtabCount (the current number of EH clauses) instead of
16528     // info.compXcptnsCount (the number of EH clauses in IL), such as
16529     // in ehNeedsShadowSPslots(). This is because sometimes the IL has
16530     // an EH clause that we delete as statically dead code before we
16531     // get here, leaving no EH clauses left, and thus no requirement
16532     // to use a frame pointer because of EH. But until all the code uses
16533     // the same test, leave info.compXcptnsCount here.
16534     if (info.compXcptnsCount > 0)
16535     {
16536         codeGen->setFramePointerRequiredEH(true);
16537     }
16538
16539 #else // !_TARGET_X86_
16540
16541     if (compHndBBtabCount > 0)
16542     {
16543         codeGen->setFramePointerRequiredEH(true);
16544     }
16545
16546 #endif // _TARGET_X86_
16547
16548 #ifdef UNIX_X86_ABI
16549     if (info.compXcptnsCount > 0)
16550     {
16551         assert(!codeGen->isGCTypeFixed());
16552         // Enforce fully interruptible codegen for funclet unwinding
16553         genInterruptible = true;
16554     }
16555 #endif // UNIX_X86_ABI
16556
16557     fgCheckArgCnt();
16558
16559     if (info.compCallUnmanaged)
16560     {
16561         codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
16562     }
16563
16564     if (info.compPublishStubParam)
16565     {
16566         codeGen->setFramePointerRequiredGCInfo(true);
16567     }
16568
16569     if (opts.compNeedSecurityCheck)
16570     {
16571         codeGen->setFramePointerRequiredGCInfo(true);
16572
16573 #ifndef JIT32_GCENCODER
16574
16575         // The decoder only reports objects in frames with exceptions if the frame
16576         // is fully interruptible.
16577         // Even if there is no catch or other way to resume execution in this frame
16578         // the VM requires the security object to remain alive until later, so
16579         // Frames with security objects must be fully interruptible.
16580         genInterruptible = true;
16581
16582 #endif // JIT32_GCENCODER
16583     }
16584
16585     if (compIsProfilerHookNeeded())
16586     {
16587         codeGen->setFramePointerRequired(true);
16588     }
16589
16590     if (info.compIsVarArgs)
16591     {
16592         // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
16593         codeGen->setFramePointerRequiredGCInfo(true);
16594     }
16595
16596     if (lvaReportParamTypeArg())
16597     {
16598         codeGen->setFramePointerRequiredGCInfo(true);
16599     }
16600
16601     // printf("method will %s be fully interruptible\n", genInterruptible ? "   " : "not");
16602 }
16603
16604 /*****************************************************************************/
16605
16606 GenTreePtr Compiler::fgInitThisClass()
16607 {
16608     noway_assert(!compIsForInlining());
16609
16610     CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
16611
16612     if (!kind.needsRuntimeLookup)
16613     {
16614         return fgGetSharedCCtor(info.compClassHnd);
16615     }
16616     else
16617     {
16618 #ifdef FEATURE_READYTORUN_COMPILER
16619         // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR.
16620         if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI))
16621         {
16622             CORINFO_RESOLVED_TOKEN resolvedToken;
16623             memset(&resolvedToken, 0, sizeof(resolvedToken));
16624
16625             // We are in a shared method body, but maybe we don't need a runtime lookup after all.
16626             // This covers the case of a generic method on a non-generic type.
16627             if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
16628             {
16629                 resolvedToken.hClass = info.compClassHnd;
16630                 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
16631             }
16632
16633             // We need a runtime lookup.
16634             GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
16635
16636             // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
16637             // base of the class that owns the method being compiled". If we're in this method, it means we're not
16638             // inlining and there's no ambiguity.
16639             return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF,
16640                                              gtNewArgList(ctxTree), &kind);
16641         }
16642 #endif
16643
16644         // Collectible types requires that for shared generic code, if we use the generic context paramter
16645         // that we report it. (This is a conservative approach, we could detect some cases particularly when the
16646         // context parameter is this that we don't need the eager reporting logic.)
16647         lvaGenericsContextUseCount++;
16648
16649         switch (kind.runtimeLookupKind)
16650         {
16651             case CORINFO_LOOKUP_THISOBJ:
16652                 // This code takes a this pointer; but we need to pass the static method desc to get the right point in
16653                 // the hierarchy
16654                 {
16655                     GenTreePtr vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
16656                     // Vtable pointer of this object
16657                     vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
16658                     vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
16659                     GenTreePtr methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
16660
16661                     return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
16662                                                gtNewArgList(vtTree, methodHnd));
16663                 }
16664
16665             case CORINFO_LOOKUP_CLASSPARAM:
16666             {
16667                 GenTreePtr vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16668                 return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, 0, gtNewArgList(vtTree));
16669             }
16670
16671             case CORINFO_LOOKUP_METHODPARAM:
16672             {
16673                 GenTreePtr methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16674                 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
16675                                            gtNewArgList(gtNewIconNode(0), methHndTree));
16676             }
16677         }
16678     }
16679
16680     noway_assert(!"Unknown LOOKUP_KIND");
16681     UNREACHABLE();
16682 }
16683
16684 #ifdef DEBUG
16685 /*****************************************************************************
16686  *
16687  *  Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
16688  *  except for the allowed ? 1 : 0; pattern.
16689  */
16690 Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTreePtr* tree, fgWalkData* data)
16691 {
16692     if ((*tree)->OperGet() == GT_QMARK)
16693     {
16694         fgCheckQmarkAllowedForm(*tree);
16695     }
16696     return WALK_CONTINUE;
16697 }
16698
16699 void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
16700 {
16701     assert(tree->OperGet() == GT_QMARK);
16702 #ifndef LEGACY_BACKEND
16703     assert(!"Qmarks beyond morph disallowed.");
16704 #else  // LEGACY_BACKEND
16705     GenTreePtr colon = tree->gtOp.gtOp2;
16706
16707     assert(colon->gtOp.gtOp1->IsIntegralConst(0));
16708     assert(colon->gtOp.gtOp2->IsIntegralConst(1));
16709 #endif // LEGACY_BACKEND
16710 }
16711
16712 /*****************************************************************************
16713  *
16714  *  Verify that the importer has created GT_QMARK nodes in a way we can
16715  *  process them. The following is allowed:
16716  *
16717  *  1. A top level qmark. Top level qmark is of the form:
16718  *      a) (bool) ? (void) : (void) OR
16719  *      b) V0N = (bool) ? (type) : (type)
16720  *
16721  *  2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
16722  *     of either op1 of colon or op2 of colon but not a child of any other
16723  *     operator.
16724  */
16725 void Compiler::fgPreExpandQmarkChecks(GenTreePtr expr)
16726 {
16727     GenTreePtr topQmark = fgGetTopLevelQmark(expr);
16728
16729     // If the top level Qmark is null, then scan the tree to make sure
16730     // there are no qmarks within it.
16731     if (topQmark == nullptr)
16732     {
16733         fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
16734     }
16735     else
16736     {
16737         // We could probably expand the cond node also, but don't think the extra effort is necessary,
16738         // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
16739         fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);
16740
16741         fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
16742         fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
16743     }
16744 }
16745 #endif // DEBUG
16746
16747 /*****************************************************************************
16748  *
16749  *  Get the top level GT_QMARK node in a given "expr", return NULL if such a
16750  *  node is not present. If the top level GT_QMARK node is assigned to a
16751  *  GT_LCL_VAR, then return the lcl node in ppDst.
16752  *
16753  */
16754 GenTreePtr Compiler::fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst /* = NULL */)
16755 {
16756     if (ppDst != nullptr)
16757     {
16758         *ppDst = nullptr;
16759     }
16760
16761     GenTreePtr topQmark = nullptr;
16762     if (expr->gtOper == GT_QMARK)
16763     {
16764         topQmark = expr;
16765     }
16766     else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16767     {
16768         topQmark = expr->gtOp.gtOp2;
16769         if (ppDst != nullptr)
16770         {
16771             *ppDst = expr->gtOp.gtOp1;
16772         }
16773     }
16774     return topQmark;
16775 }
16776
16777 /*********************************************************************************
16778  *
16779  *  For a castclass helper call,
16780  *  Importer creates the following tree:
16781  *      tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
16782  *
16783  *  This method splits the qmark expression created by the importer into the
16784  *  following blocks: (block, asg, cond1, cond2, helper, remainder)
16785  *  Notice that op1 is the result for both the conditions. So we coalesce these
16786  *  assignments into a single block instead of two blocks resulting a nested diamond.
16787  *
16788  *                       +---------->-----------+
16789  *                       |          |           |
16790  *                       ^          ^           v
16791  *                       |          |           |
16792  *  block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
16793  *
16794  *  We expect to achieve the following codegen:
16795  *     mov      rsi, rdx                           tmp = op1                  // asgBlock
16796  *     test     rsi, rsi                           goto skip if tmp == null ? // cond1Block
16797  *     je       SKIP
16798  *     mov      rcx, 0x76543210                    cns = op2                  // cond2Block
16799  *     cmp      qword ptr [rsi], rcx               goto skip if *tmp == op2
16800  *     je       SKIP
16801  *     call     CORINFO_HELP_CHKCASTCLASS_SPECIAL  tmp = helper(cns, tmp)     // helperBlock
16802  *     mov      rsi, rax
16803  *  SKIP:                                                                     // remainderBlock
16804  *     tmp has the result.
16805  *
16806  */
16807 void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt)
16808 {
16809 #ifdef DEBUG
16810     if (verbose)
16811     {
16812         printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum);
16813         fgDispBasicBlocks(block, block, true);
16814     }
16815 #endif // DEBUG
16816
16817     GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16818
16819     GenTreePtr dst   = nullptr;
16820     GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
16821     noway_assert(dst != nullptr);
16822
16823     assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
16824
16825     // Get cond, true, false exprs for the qmark.
16826     GenTreePtr condExpr  = qmark->gtGetOp1();
16827     GenTreePtr trueExpr  = qmark->gtGetOp2()->AsColon()->ThenNode();
16828     GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16829
16830     // Get cond, true, false exprs for the nested qmark.
16831     GenTreePtr nestedQmark = falseExpr;
16832     GenTreePtr cond2Expr;
16833     GenTreePtr true2Expr;
16834     GenTreePtr false2Expr;
16835
16836     if (nestedQmark->gtOper == GT_QMARK)
16837     {
16838         cond2Expr  = nestedQmark->gtGetOp1();
16839         true2Expr  = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
16840         false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
16841
16842         assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
16843         cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
16844     }
16845     else
16846     {
16847         // This is a rare case that arises when we are doing minopts and encounter isinst of null
16848         // gtFoldExpr was still is able to optimize away part of the tree (but not all).
16849         // That means it does not match our pattern.
16850
16851         // Rather than write code to handle this case, just fake up some nodes to make it match the common
16852         // case.  Synthesize a comparison that is always true, and for the result-on-true, use the
16853         // entire subtree we expected to be the nested question op.
16854
16855         cond2Expr  = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
16856         true2Expr  = nestedQmark;
16857         false2Expr = gtNewIconNode(0, TYP_I_IMPL);
16858     }
16859     assert(false2Expr->OperGet() == trueExpr->OperGet());
16860
16861     // Clear flags as they are now going to be part of JTRUE.
16862     assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16863     condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16864
16865     // Create the chain of blocks. See method header comment.
16866     // The order of blocks after this is the following:
16867     //     block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
16868     //
16869     // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16870     // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16871     // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16872     // remainderBlock will still be GC safe.
16873     unsigned    propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16874     BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16875     fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16876
16877     BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
16878     BasicBlock* cond2Block  = fgNewBBafter(BBJ_COND, block, true);
16879     BasicBlock* cond1Block  = fgNewBBafter(BBJ_COND, block, true);
16880     BasicBlock* asgBlock    = fgNewBBafter(BBJ_NONE, block, true);
16881
16882     remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16883
16884     // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16885     // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16886     if ((block->bbFlags & BBF_INTERNAL) == 0)
16887     {
16888         helperBlock->bbFlags &= ~BBF_INTERNAL;
16889         cond2Block->bbFlags &= ~BBF_INTERNAL;
16890         cond1Block->bbFlags &= ~BBF_INTERNAL;
16891         asgBlock->bbFlags &= ~BBF_INTERNAL;
16892         helperBlock->bbFlags |= BBF_IMPORTED;
16893         cond2Block->bbFlags |= BBF_IMPORTED;
16894         cond1Block->bbFlags |= BBF_IMPORTED;
16895         asgBlock->bbFlags |= BBF_IMPORTED;
16896     }
16897
16898     // Chain the flow correctly.
16899     fgAddRefPred(asgBlock, block);
16900     fgAddRefPred(cond1Block, asgBlock);
16901     fgAddRefPred(cond2Block, cond1Block);
16902     fgAddRefPred(helperBlock, cond2Block);
16903     fgAddRefPred(remainderBlock, helperBlock);
16904     fgAddRefPred(remainderBlock, cond1Block);
16905     fgAddRefPred(remainderBlock, cond2Block);
16906
16907     cond1Block->bbJumpDest = remainderBlock;
16908     cond2Block->bbJumpDest = remainderBlock;
16909
16910     // Set the weights; some are guesses.
16911     asgBlock->inheritWeight(block);
16912     cond1Block->inheritWeight(block);
16913     cond2Block->inheritWeightPercentage(cond1Block, 50);
16914     helperBlock->inheritWeightPercentage(cond2Block, 50);
16915
16916     // Append cond1 as JTRUE to cond1Block
16917     GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
16918     GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16919     fgInsertStmtAtEnd(cond1Block, jmpStmt);
16920
16921     // Append cond2 as JTRUE to cond2Block
16922     jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
16923     jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16924     fgInsertStmtAtEnd(cond2Block, jmpStmt);
16925
16926     // AsgBlock should get tmp = op1 assignment.
16927     trueExpr            = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
16928     GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
16929     fgInsertStmtAtEnd(asgBlock, trueStmt);
16930
16931     // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
16932     gtReverseCond(cond2Expr);
16933     GenTreePtr helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
16934     GenTreePtr helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
16935     fgInsertStmtAtEnd(helperBlock, helperStmt);
16936
16937     // Finally remove the nested qmark stmt.
16938     fgRemoveStmt(block, stmt);
16939
16940 #ifdef DEBUG
16941     if (verbose)
16942     {
16943         printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum);
16944         fgDispBasicBlocks(block, remainderBlock, true);
16945     }
16946 #endif // DEBUG
16947 }
16948
16949 /*****************************************************************************
16950  *
16951  *  Expand a statement with a top level qmark node. There are three cases, based
16952  *  on whether the qmark has both "true" and "false" arms, or just one of them.
16953  *
16954  *     S0;
16955  *     C ? T : F;
16956  *     S1;
16957  *
16958  *     Generates ===>
16959  *
16960  *                       bbj_always
16961  *                       +---->------+
16962  *                 false |           |
16963  *     S0 -->-- ~C -->-- T   F -->-- S1
16964  *              |            |
16965  *              +--->--------+
16966  *              bbj_cond(true)
16967  *
16968  *     -----------------------------------------
16969  *
16970  *     S0;
16971  *     C ? T : NOP;
16972  *     S1;
16973  *
16974  *     Generates ===>
16975  *
16976  *                 false
16977  *     S0 -->-- ~C -->-- T -->-- S1
16978  *              |                |
16979  *              +-->-------------+
16980  *              bbj_cond(true)
16981  *
16982  *     -----------------------------------------
16983  *
16984  *     S0;
16985  *     C ? NOP : F;
16986  *     S1;
16987  *
16988  *     Generates ===>
16989  *
16990  *                false
16991  *     S0 -->-- C -->-- F -->-- S1
16992  *              |               |
16993  *              +-->------------+
16994  *              bbj_cond(true)
16995  *
16996  *  If the qmark assigns to a variable, then create tmps for "then"
16997  *  and "else" results and assign the temp to the variable as a writeback step.
16998  */
16999 void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTreePtr stmt)
17000 {
17001     GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
17002
17003     // Retrieve the Qmark node to be expanded.
17004     GenTreePtr dst   = nullptr;
17005     GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
17006     if (qmark == nullptr)
17007     {
17008         return;
17009     }
17010
17011     if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
17012     {
17013         fgExpandQmarkForCastInstOf(block, stmt);
17014         return;
17015     }
17016
17017 #ifdef DEBUG
17018     if (verbose)
17019     {
17020         printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum);
17021         fgDispBasicBlocks(block, block, true);
17022     }
17023 #endif // DEBUG
17024
17025     // Retrieve the operands.
17026     GenTreePtr condExpr  = qmark->gtGetOp1();
17027     GenTreePtr trueExpr  = qmark->gtGetOp2()->AsColon()->ThenNode();
17028     GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
17029
17030     assert(condExpr->gtFlags & GTF_RELOP_QMARK);
17031     condExpr->gtFlags &= ~GTF_RELOP_QMARK;
17032
17033     assert(!varTypeIsFloating(condExpr->TypeGet()));
17034
17035     bool hasTrueExpr  = (trueExpr->OperGet() != GT_NOP);
17036     bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
17037     assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
17038
17039     // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
17040     //     block ... condBlock ... elseBlock ... remainderBlock
17041     //
17042     // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
17043     // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
17044     // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
17045     // remainderBlock will still be GC safe.
17046     unsigned    propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
17047     BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
17048     fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
17049
17050     BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
17051     BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
17052
17053     // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
17054     // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
17055     if ((block->bbFlags & BBF_INTERNAL) == 0)
17056     {
17057         condBlock->bbFlags &= ~BBF_INTERNAL;
17058         elseBlock->bbFlags &= ~BBF_INTERNAL;
17059         condBlock->bbFlags |= BBF_IMPORTED;
17060         elseBlock->bbFlags |= BBF_IMPORTED;
17061     }
17062
17063     remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
17064
17065     condBlock->inheritWeight(block);
17066
17067     fgAddRefPred(condBlock, block);
17068     fgAddRefPred(elseBlock, condBlock);
17069     fgAddRefPred(remainderBlock, elseBlock);
17070
17071     BasicBlock* thenBlock = nullptr;
17072     if (hasTrueExpr && hasFalseExpr)
17073     {
17074         //                       bbj_always
17075         //                       +---->------+
17076         //                 false |           |
17077         //     S0 -->-- ~C -->-- T   F -->-- S1
17078         //              |            |
17079         //              +--->--------+
17080         //              bbj_cond(true)
17081         //
17082         gtReverseCond(condExpr);
17083         condBlock->bbJumpDest = elseBlock;
17084
17085         thenBlock             = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
17086         thenBlock->bbJumpDest = remainderBlock;
17087         if ((block->bbFlags & BBF_INTERNAL) == 0)
17088         {
17089             thenBlock->bbFlags &= ~BBF_INTERNAL;
17090             thenBlock->bbFlags |= BBF_IMPORTED;
17091         }
17092
17093         elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
17094
17095         fgAddRefPred(thenBlock, condBlock);
17096         fgAddRefPred(remainderBlock, thenBlock);
17097
17098         thenBlock->inheritWeightPercentage(condBlock, 50);
17099         elseBlock->inheritWeightPercentage(condBlock, 50);
17100     }
17101     else if (hasTrueExpr)
17102     {
17103         //                 false
17104         //     S0 -->-- ~C -->-- T -->-- S1
17105         //              |                |
17106         //              +-->-------------+
17107         //              bbj_cond(true)
17108         //
17109         gtReverseCond(condExpr);
17110         condBlock->bbJumpDest = remainderBlock;
17111         fgAddRefPred(remainderBlock, condBlock);
17112         // Since we have no false expr, use the one we'd already created.
17113         thenBlock = elseBlock;
17114         elseBlock = nullptr;
17115
17116         thenBlock->inheritWeightPercentage(condBlock, 50);
17117     }
17118     else if (hasFalseExpr)
17119     {
17120         //                false
17121         //     S0 -->-- C -->-- F -->-- S1
17122         //              |               |
17123         //              +-->------------+
17124         //              bbj_cond(true)
17125         //
17126         condBlock->bbJumpDest = remainderBlock;
17127         fgAddRefPred(remainderBlock, condBlock);
17128
17129         elseBlock->inheritWeightPercentage(condBlock, 50);
17130     }
17131
17132     GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
17133     GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
17134     fgInsertStmtAtEnd(condBlock, jmpStmt);
17135
17136     // Remove the original qmark statement.
17137     fgRemoveStmt(block, stmt);
17138
17139     // Since we have top level qmarks, we either have a dst for it in which case
17140     // we need to create tmps for true and falseExprs, else just don't bother
17141     // assigning.
17142     unsigned lclNum = BAD_VAR_NUM;
17143     if (dst != nullptr)
17144     {
17145         assert(dst->gtOper == GT_LCL_VAR);
17146         lclNum = dst->gtLclVar.gtLclNum;
17147     }
17148     else
17149     {
17150         assert(qmark->TypeGet() == TYP_VOID);
17151     }
17152
17153     if (hasTrueExpr)
17154     {
17155         if (dst != nullptr)
17156         {
17157             trueExpr = gtNewTempAssign(lclNum, trueExpr);
17158         }
17159         GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
17160         fgInsertStmtAtEnd(thenBlock, trueStmt);
17161     }
17162
17163     // Assign the falseExpr into the dst or tmp, insert in elseBlock
17164     if (hasFalseExpr)
17165     {
17166         if (dst != nullptr)
17167         {
17168             falseExpr = gtNewTempAssign(lclNum, falseExpr);
17169         }
17170         GenTreePtr falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
17171         fgInsertStmtAtEnd(elseBlock, falseStmt);
17172     }
17173
17174 #ifdef DEBUG
17175     if (verbose)
17176     {
17177         printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum);
17178         fgDispBasicBlocks(block, remainderBlock, true);
17179     }
17180 #endif // DEBUG
17181 }
17182
17183 /*****************************************************************************
17184  *
17185  *  Expand GT_QMARK nodes from the flow graph into basic blocks.
17186  *
17187  */
17188
17189 void Compiler::fgExpandQmarkNodes()
17190 {
17191     if (compQmarkUsed)
17192     {
17193         for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
17194         {
17195             for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
17196             {
17197                 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
17198 #ifdef DEBUG
17199                 fgPreExpandQmarkChecks(expr);
17200 #endif
17201                 fgExpandQmarkStmt(block, stmt);
17202             }
17203         }
17204 #ifdef DEBUG
17205         fgPostExpandQmarkChecks();
17206 #endif
17207     }
17208     compQmarkRationalized = true;
17209 }
17210
17211 #ifdef DEBUG
17212 /*****************************************************************************
17213  *
17214  *  Make sure we don't have any more GT_QMARK nodes.
17215  *
17216  */
17217 void Compiler::fgPostExpandQmarkChecks()
17218 {
17219     for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
17220     {
17221         for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
17222         {
17223             GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
17224             fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
17225         }
17226     }
17227 }
17228 #endif
17229
17230 /*****************************************************************************
17231  *
17232  *  Transform all basic blocks for codegen.
17233  */
17234
17235 void Compiler::fgMorph()
17236 {
17237     noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
17238
17239     fgOutgoingArgTemps = nullptr;
17240
17241 #ifdef DEBUG
17242     if (verbose)
17243     {
17244         printf("*************** In fgMorph()\n");
17245     }
17246     if (verboseTrees)
17247     {
17248         fgDispBasicBlocks(true);
17249     }
17250 #endif // DEBUG
17251
17252     // Insert call to class constructor as the first basic block if
17253     // we were asked to do so.
17254     if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
17255                                     impTokenLookupContextHandle /* context */) &
17256         CORINFO_INITCLASS_USE_HELPER)
17257     {
17258         fgEnsureFirstBBisScratch();
17259         fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
17260     }
17261
17262 #ifdef DEBUG
17263     if (opts.compGcChecks)
17264     {
17265         for (unsigned i = 0; i < info.compArgsCount; i++)
17266         {
17267             if (lvaTable[i].TypeGet() == TYP_REF)
17268             {
17269                 // confirm that the argument is a GC pointer (for debugging (GC stress))
17270                 GenTreePtr      op   = gtNewLclvNode(i, TYP_REF);
17271                 GenTreeArgList* args = gtNewArgList(op);
17272                 op                   = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, 0, args);
17273
17274                 fgEnsureFirstBBisScratch();
17275                 fgInsertStmtAtEnd(fgFirstBB, op);
17276             }
17277         }
17278     }
17279
17280     if (opts.compStackCheckOnRet)
17281     {
17282         lvaReturnEspCheck                  = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck"));
17283         lvaTable[lvaReturnEspCheck].lvType = TYP_INT;
17284     }
17285
17286     if (opts.compStackCheckOnCall)
17287     {
17288         lvaCallEspCheck                  = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck"));
17289         lvaTable[lvaCallEspCheck].lvType = TYP_INT;
17290     }
17291 #endif // DEBUG
17292
17293     /* Filter out unimported BBs */
17294
17295     fgRemoveEmptyBlocks();
17296
17297 #ifdef DEBUG
17298     /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
17299     fgDebugCheckBBlist(false, false);
17300 #endif // DEBUG
17301
17302     EndPhase(PHASE_MORPH_INIT);
17303
17304     /* Inline */
17305     fgInline();
17306 #if 0
17307     JITDUMP("trees after inlining\n");
17308     DBEXEC(VERBOSE, fgDispBasicBlocks(true));
17309 #endif
17310
17311     RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
17312
17313     EndPhase(PHASE_MORPH_INLINE);
17314
17315     /* Add any internal blocks/trees we may need */
17316
17317     fgAddInternal();
17318
17319 #if OPT_BOOL_OPS
17320     fgMultipleNots = false;
17321 #endif
17322
17323 #ifdef DEBUG
17324     /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
17325     fgDebugCheckBBlist(false, false);
17326 #endif // DEBUG
17327
17328     fgRemoveEmptyTry();
17329
17330     EndPhase(PHASE_EMPTY_TRY);
17331
17332     fgRemoveEmptyFinally();
17333
17334     EndPhase(PHASE_EMPTY_FINALLY);
17335
17336     fgMergeFinallyChains();
17337
17338     EndPhase(PHASE_MERGE_FINALLY_CHAINS);
17339
17340     fgCloneFinally();
17341
17342     EndPhase(PHASE_CLONE_FINALLY);
17343
17344     fgUpdateFinallyTargetFlags();
17345
17346     /* For x64 and ARM64 we need to mark irregular parameters */
17347     fgMarkImplicitByRefArgs();
17348
17349     /* Promote struct locals if necessary */
17350     fgPromoteStructs();
17351
17352     /* Now it is the time to figure out what locals have address-taken. */
17353     fgMarkAddressExposedLocals();
17354
17355     EndPhase(PHASE_STR_ADRLCL);
17356
17357     /* Apply the type update to implicit byref parameters; also choose (based on address-exposed
17358        analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. */
17359     fgRetypeImplicitByRefArgs();
17360
17361 #ifdef DEBUG
17362     /* Now that locals have address-taken and implicit byref marked, we can safely apply stress. */
17363     lvaStressLclFld();
17364     fgStress64RsltMul();
17365 #endif // DEBUG
17366
17367     EndPhase(PHASE_MORPH_IMPBYREF);
17368
17369     /* Morph the trees in all the blocks of the method */
17370
17371     fgMorphBlocks();
17372
17373     /* Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args */
17374     fgMarkDemotedImplicitByRefArgs();
17375
17376     EndPhase(PHASE_MORPH_GLOBAL);
17377
17378 #if 0
17379     JITDUMP("trees after fgMorphBlocks\n");
17380     DBEXEC(VERBOSE, fgDispBasicBlocks(true));
17381 #endif
17382
17383     /* Decide the kind of code we want to generate */
17384
17385     fgSetOptions();
17386
17387     fgExpandQmarkNodes();
17388
17389 #ifdef DEBUG
17390     compCurBB = nullptr;
17391 #endif // DEBUG
17392 }
17393
17394 /*****************************************************************************
17395  *
17396  *  Promoting struct locals
17397  */
17398 void Compiler::fgPromoteStructs()
17399 {
17400 #ifdef DEBUG
17401     if (verbose)
17402     {
17403         printf("*************** In fgPromoteStructs()\n");
17404     }
17405 #endif // DEBUG
17406
17407     if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
17408     {
17409         return;
17410     }
17411
17412     if (fgNoStructPromotion)
17413     {
17414         return;
17415     }
17416
17417 #if 0
17418     // The code in this #if has been useful in debugging struct promotion issues, by
17419     // enabling selective enablement of the struct promotion optimization according to
17420     // method hash.
17421 #ifdef DEBUG
17422     unsigned methHash = info.compMethodHash();
17423     char* lostr = getenv("structpromohashlo");
17424     unsigned methHashLo = 0;
17425     if (lostr != NULL)
17426     {
17427         sscanf_s(lostr, "%x", &methHashLo);
17428     }
17429     char* histr = getenv("structpromohashhi");
17430     unsigned methHashHi = UINT32_MAX;
17431     if (histr != NULL)
17432     {
17433         sscanf_s(histr, "%x", &methHashHi);
17434     }
17435     if (methHash < methHashLo || methHash > methHashHi)
17436     {
17437         return;
17438     }
17439     else
17440     {
17441         printf("Promoting structs for method %s, hash = 0x%x.\n",
17442                info.compFullName, info.compMethodHash());
17443         printf("");         // in our logic this causes a flush
17444     }
17445 #endif // DEBUG
17446 #endif // 0
17447
17448     if (info.compIsVarArgs)
17449     {
17450         return;
17451     }
17452
17453     if (getNeedsGSSecurityCookie())
17454     {
17455         return;
17456     }
17457
17458 #ifdef DEBUG
17459     if (verbose)
17460     {
17461         printf("\nlvaTable before fgPromoteStructs\n");
17462         lvaTableDump();
17463     }
17464 #endif // DEBUG
17465
17466     // The lvaTable might grow as we grab temps. Make a local copy here.
17467     unsigned startLvaCount = lvaCount;
17468
17469     //
17470     // Loop through the original lvaTable. Looking for struct locals to be promoted.
17471     //
17472     lvaStructPromotionInfo structPromotionInfo;
17473     bool                   tooManyLocals = false;
17474
17475     for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
17476     {
17477         // Whether this var got promoted
17478         bool       promotedVar = false;
17479         LclVarDsc* varDsc      = &lvaTable[lclNum];
17480
17481         // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
17482         // its fields.  Instead, we will attempt to enregister the entire struct.
17483         if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic())
17484         {
17485             varDsc->lvRegStruct = true;
17486         }
17487         // Don't promote if we have reached the tracking limit.
17488         else if (lvaHaveManyLocals())
17489         {
17490             // Print the message first time when we detected this condition
17491             if (!tooManyLocals)
17492             {
17493                 JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
17494             }
17495             tooManyLocals = true;
17496         }
17497         else if (varTypeIsStruct(varDsc))
17498         {
17499             bool shouldPromote;
17500
17501             lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
17502             if (structPromotionInfo.canPromote)
17503             {
17504                 shouldPromote = lvaShouldPromoteStructVar(lclNum, &structPromotionInfo);
17505             }
17506             else
17507             {
17508                 shouldPromote = false;
17509             }
17510
17511 #if 0
17512             // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
17513             // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
17514             static int structPromoVarNum = 0;
17515             structPromoVarNum++;
17516             if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
17517 #endif // 0
17518
17519             if (shouldPromote)
17520             {
17521                 // Promote the this struct local var.
17522                 lvaPromoteStructVar(lclNum, &structPromotionInfo);
17523                 promotedVar = true;
17524
17525 #ifdef _TARGET_ARM_
17526                 if (structPromotionInfo.requiresScratchVar)
17527                 {
17528                     // Ensure that the scratch variable is allocated, in case we
17529                     // pass a promoted struct as an argument.
17530                     if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
17531                     {
17532                         lvaPromotedStructAssemblyScratchVar =
17533                             lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
17534                         lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
17535                     }
17536                 }
17537 #endif // _TARGET_ARM_
17538             }
17539         }
17540
17541         if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed)
17542         {
17543             // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
17544             // we will treat it as a reg struct.
17545             varDsc->lvRegStruct = true;
17546         }
17547     }
17548
17549 #ifdef DEBUG
17550     if (verbose)
17551     {
17552         printf("\nlvaTable after fgPromoteStructs\n");
17553         lvaTableDump();
17554     }
17555 #endif // DEBUG
17556 }
17557
17558 Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
17559 {
17560     noway_assert(tree->OperGet() == GT_FIELD);
17561
17562     GenTreePtr objRef = tree->gtField.gtFldObj;
17563     GenTreePtr obj    = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
17564     noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));
17565
17566     /* Is this an instance data member? */
17567
17568     if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
17569     {
17570         unsigned   lclNum = obj->gtLclVarCommon.gtLclNum;
17571         LclVarDsc* varDsc = &lvaTable[lclNum];
17572
17573         if (varTypeIsStruct(obj))
17574         {
17575             if (varDsc->lvPromoted)
17576             {
17577                 // Promoted struct
17578                 unsigned fldOffset     = tree->gtField.gtFldOffset;
17579                 unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17580                 noway_assert(fieldLclIndex != BAD_VAR_NUM);
17581
17582                 if (lvaIsImplicitByRefLocal(lclNum))
17583                 {
17584                     // Keep track of the number of appearances of each promoted implicit
17585                     // byref (here during struct promotion, which happens during address-exposed
17586                     // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
17587                     // byref params when deciding if it's legal to elide certain copies of them.
17588                     // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
17589                     // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
17590                     // chance, so have to check now.
17591                     JITDUMP(
17592                         "Incrementing ref count from %d to %d for V%02d in fgMorphStructField for promoted struct\n",
17593                         varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
17594                     varDsc->lvRefCnt++;
17595                 }
17596
17597                 tree->SetOper(GT_LCL_VAR);
17598                 tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
17599                 tree->gtType = lvaTable[fieldLclIndex].TypeGet();
17600                 tree->gtFlags &= GTF_NODE_MASK;
17601                 tree->gtFlags &= ~GTF_GLOB_REF;
17602
17603                 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17604                 if (parent->gtOper == GT_ASG)
17605                 {
17606                     if (parent->gtOp.gtOp1 == tree)
17607                     {
17608                         tree->gtFlags |= GTF_VAR_DEF;
17609                         tree->gtFlags |= GTF_DONT_CSE;
17610                     }
17611
17612                     // Promotion of struct containing struct fields where the field
17613                     // is a struct with a single pointer sized scalar type field: in
17614                     // this case struct promotion uses the type  of the underlying
17615                     // scalar field as the type of struct field instead of recursively
17616                     // promoting. This can lead to a case where we have a block-asgn
17617                     // with its RHS replaced with a scalar type.  Mark RHS value as
17618                     // DONT_CSE so that assertion prop will not do const propagation.
17619                     // The reason this is required is that if RHS of a block-asg is a
17620                     // constant, then it is interpreted as init-block incorrectly.
17621                     //
17622                     // TODO - This can also be avoided if we implement recursive struct
17623                     // promotion.
17624                     if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree))
17625                     {
17626                         tree->gtFlags |= GTF_DONT_CSE;
17627                     }
17628                 }
17629 #ifdef DEBUG
17630                 if (verbose)
17631                 {
17632                     printf("Replacing the field in promoted struct with a local var:\n");
17633                     fgWalkPre->printModified = true;
17634                 }
17635 #endif // DEBUG
17636                 return WALK_SKIP_SUBTREES;
17637             }
17638         }
17639         else
17640         {
17641             // Normed struct
17642             // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
17643             // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
17644             // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
17645             // there is one extremely rare case where that won't be true. An enum type is a special value type
17646             // that contains exactly one element of a primitive integer type (that, for CLS programs is named
17647             // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
17648             // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
17649             // ldfld. For example:
17650             //
17651             //  .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
17652             //  {
17653             //    .field public specialname rtspecialname int16 value__
17654             //    .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
17655             //  }
17656             //  .method public hidebysig static void  Main() cil managed
17657             //  {
17658             //     .locals init (valuetype mynamespace.e_t V_0)
17659             //     ...
17660             //     ldloca.s   V_0
17661             //     ldflda     int16 mynamespace.e_t::value__
17662             //     ...
17663             //  }
17664             //
17665             // Normally, compilers will not generate the ldflda, since it is superfluous.
17666             //
17667             // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
17668             // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
17669             // mismatch like this, don't do this morphing. The local var may end up getting marked as
17670             // address taken, and the appropriate SHORT load will be done from memory in that case.
17671
17672             if (tree->TypeGet() == obj->TypeGet())
17673             {
17674                 if (lvaIsImplicitByRefLocal(lclNum))
17675                 {
17676                     // Keep track of the number of appearances of each promoted implicit
17677                     // byref (here during struct promotion, which happens during address-exposed
17678                     // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
17679                     // byref params when deciding if it's legal to elide certain copies of them.
17680                     // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
17681                     // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
17682                     // chance, so have to check now.
17683                     JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField for normed struct\n",
17684                             varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
17685                     varDsc->lvRefCnt++;
17686                 }
17687
17688                 tree->ChangeOper(GT_LCL_VAR);
17689                 tree->gtLclVarCommon.SetLclNum(lclNum);
17690                 tree->gtFlags &= GTF_NODE_MASK;
17691
17692                 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17693                 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17694                 {
17695                     tree->gtFlags |= GTF_VAR_DEF;
17696                     tree->gtFlags |= GTF_DONT_CSE;
17697                 }
17698 #ifdef DEBUG
17699                 if (verbose)
17700                 {
17701                     printf("Replacing the field in normed struct with the local var:\n");
17702                     fgWalkPre->printModified = true;
17703                 }
17704 #endif // DEBUG
17705                 return WALK_SKIP_SUBTREES;
17706             }
17707         }
17708     }
17709
17710     return WALK_CONTINUE;
17711 }
17712
17713 Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre)
17714 {
17715     noway_assert(tree->OperGet() == GT_LCL_FLD);
17716
17717     unsigned   lclNum = tree->gtLclFld.gtLclNum;
17718     LclVarDsc* varDsc = &lvaTable[lclNum];
17719
17720     if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
17721     {
17722         // Promoted struct
17723         unsigned   fldOffset     = tree->gtLclFld.gtLclOffs;
17724         unsigned   fieldLclIndex = 0;
17725         LclVarDsc* fldVarDsc     = nullptr;
17726
17727         if (fldOffset != BAD_VAR_NUM)
17728         {
17729             fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17730             noway_assert(fieldLclIndex != BAD_VAR_NUM);
17731             fldVarDsc = &lvaTable[fieldLclIndex];
17732         }
17733
17734         if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
17735 #ifdef _TARGET_X86_
17736             && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
17737 #endif
17738                 )
17739         {
17740             // There is an existing sub-field we can use
17741             tree->gtLclFld.SetLclNum(fieldLclIndex);
17742
17743             // We need to keep the types 'compatible'.  If we can switch back to a GT_LCL_VAR
17744             CLANG_FORMAT_COMMENT_ANCHOR;
17745
17746 #ifdef _TARGET_ARM_
17747             assert(varTypeIsIntegralOrI(tree->TypeGet()) || varTypeIsFloating(tree->TypeGet()));
17748 #else
17749             assert(varTypeIsIntegralOrI(tree->TypeGet()));
17750 #endif
17751             if (varTypeCanReg(fldVarDsc->TypeGet()))
17752             {
17753                 // If the type is integer-ish, then we can use it as-is
17754                 tree->ChangeOper(GT_LCL_VAR);
17755                 assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
17756                 tree->gtType = fldVarDsc->TypeGet();
17757 #ifdef DEBUG
17758                 if (verbose)
17759                 {
17760                     printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n");
17761                     fgWalkPre->printModified = true;
17762                 }
17763 #endif // DEBUG
17764             }
17765
17766             GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17767             if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17768             {
17769                 tree->gtFlags |= GTF_VAR_DEF;
17770                 tree->gtFlags |= GTF_DONT_CSE;
17771             }
17772         }
17773         else
17774         {
17775             // There is no existing field that has all the parts that we need
17776             // So we must ensure that the struct lives in memory.
17777             lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
17778
17779 #ifdef DEBUG
17780             // We can't convert this guy to a float because he really does have his
17781             // address taken..
17782             varDsc->lvKeepType = 1;
17783 #endif // DEBUG
17784         }
17785
17786         return WALK_SKIP_SUBTREES;
17787     }
17788
17789     return WALK_CONTINUE;
17790 }
17791
17792 //------------------------------------------------------------------------
17793 // fgMarkImplicitByRefArgs: Identify any by-value struct parameters which are "implicit by-reference";
17794 //                          i.e. which the ABI requires to be passed by making a copy in the caller and
17795 //                          passing its address to the callee.  Mark their `LclVarDsc`s such that
17796 //                          `lvaIsImplicitByRefLocal` will return true for them.
17797
17798 void Compiler::fgMarkImplicitByRefArgs()
17799 {
17800 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
17801 #ifdef DEBUG
17802     if (verbose)
17803     {
17804         printf("\n*************** In fgMarkImplicitByRefs()\n");
17805     }
17806 #endif // DEBUG
17807
17808     for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17809     {
17810         LclVarDsc* varDsc = &lvaTable[lclNum];
17811
17812         if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
17813         {
17814             size_t size;
17815
17816             if (varDsc->lvSize() > REGSIZE_BYTES)
17817             {
17818                 size = varDsc->lvSize();
17819             }
17820             else
17821             {
17822                 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
17823                 size                         = info.compCompHnd->getClassSize(typeHnd);
17824             }
17825
17826 #if defined(_TARGET_AMD64_)
17827             if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
17828 #elif defined(_TARGET_ARM64_)
17829             if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc))
17830 #endif
17831             {
17832                 // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
17833                 // So I am now using it to indicate that this is one of the weird implicit
17834                 // by ref locals.
17835                 // The address taken cleanup will look for references to locals marked like
17836                 // this, and transform them appropriately.
17837                 varDsc->lvIsTemp = 1;
17838
17839                 // Clear the ref count field; fgMarkAddressTakenLocals will increment it per
17840                 // appearance of implicit-by-ref param so that call arg morphing can do an
17841                 // optimization for single-use implicit-by-ref params whose single use is as
17842                 // an outgoing call argument.
17843                 varDsc->lvRefCnt = 0;
17844             }
17845         }
17846     }
17847
17848 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
17849 }
17850
17851 //------------------------------------------------------------------------
17852 // fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from
17853 //                            struct to pointer).  Also choose (based on address-exposed analysis)
17854 //                            which struct promotions of implicit byrefs to keep or discard.
17855 //                            For those which are kept, insert the appropriate initialization code.
17856 //                            For those which are to be discarded, annotate the promoted field locals
17857 //                            so that fgMorphImplicitByRefArgs will know to rewrite their appearances
17858 //                            using indirections off the pointer parameters.
17859
17860 void Compiler::fgRetypeImplicitByRefArgs()
17861 {
17862 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
17863 #ifdef DEBUG
17864     if (verbose)
17865     {
17866         printf("\n*************** In fgRetypeImplicitByRefArgs()\n");
17867     }
17868 #endif // DEBUG
17869
17870     for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17871     {
17872         LclVarDsc* varDsc = &lvaTable[lclNum];
17873
17874         if (lvaIsImplicitByRefLocal(lclNum))
17875         {
17876             size_t size;
17877
17878             if (varDsc->lvSize() > REGSIZE_BYTES)
17879             {
17880                 size = varDsc->lvSize();
17881             }
17882             else
17883             {
17884                 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
17885                 size                         = info.compCompHnd->getClassSize(typeHnd);
17886             }
17887
17888             if (varDsc->lvPromoted)
17889             {
17890                 // This implicit-by-ref was promoted; create a new temp to represent the
17891                 // promoted struct before rewriting this parameter as a pointer.
17892                 unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref"));
17893                 lvaSetStruct(newLclNum, lvaGetStruct(lclNum), true);
17894                 // Update varDsc since lvaGrabTemp might have re-allocated the var dsc array.
17895                 varDsc = &lvaTable[lclNum];
17896
17897                 // Copy the struct promotion annotations to the new temp.
17898                 LclVarDsc* newVarDsc       = &lvaTable[newLclNum];
17899                 newVarDsc->lvPromoted      = true;
17900                 newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart;
17901                 newVarDsc->lvFieldCnt      = varDsc->lvFieldCnt;
17902                 newVarDsc->lvContainsHoles = varDsc->lvContainsHoles;
17903                 newVarDsc->lvCustomLayout  = varDsc->lvCustomLayout;
17904 #ifdef DEBUG
17905                 newVarDsc->lvKeepType = true;
17906 #endif // DEBUG
17907
17908                 // Propagate address-taken-ness and do-not-enregister-ness.
17909                 newVarDsc->lvAddrExposed     = varDsc->lvAddrExposed;
17910                 newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister;
17911 #ifdef DEBUG
17912                 newVarDsc->lvLclBlockOpAddr   = varDsc->lvLclBlockOpAddr;
17913                 newVarDsc->lvLclFieldExpr     = varDsc->lvLclFieldExpr;
17914                 newVarDsc->lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr;
17915                 newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr;
17916                 newVarDsc->lvLiveAcrossUCall  = varDsc->lvLiveAcrossUCall;
17917 #endif // DEBUG
17918
17919                 // If the promotion is dependent, the promoted temp would just be committed
17920                 // to memory anyway, so we'll rewrite its appearances to be indirections
17921                 // through the pointer parameter, the same as we'd do for this
17922                 // parameter if it weren't promoted at all (otherwise the initialization
17923                 // of the new temp would just be a needless memcpy at method entry).
17924                 bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ||
17925                                      (varDsc->lvRefCnt <= varDsc->lvFieldCnt);
17926
17927                 if (!undoPromotion)
17928                 {
17929                     // Insert IR that initializes the temp from the parameter.
17930                     // LHS is a simple reference to the temp.
17931                     fgEnsureFirstBBisScratch();
17932                     GenTreePtr lhs = gtNewLclvNode(newLclNum, varDsc->lvType);
17933                     // RHS is an indirection (using GT_OBJ) off the parameter.
17934                     GenTreePtr addr   = gtNewLclvNode(lclNum, TYP_BYREF);
17935                     GenTreePtr rhs    = gtNewBlockVal(addr, (unsigned)size);
17936                     GenTreePtr assign = gtNewAssignNode(lhs, rhs);
17937                     fgInsertStmtAtBeg(fgFirstBB, assign);
17938                 }
17939
17940                 // Update the locals corresponding to the promoted fields.
17941                 unsigned fieldLclStart = varDsc->lvFieldLclStart;
17942                 unsigned fieldCount    = varDsc->lvFieldCnt;
17943                 unsigned fieldLclStop  = fieldLclStart + fieldCount;
17944
17945                 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
17946                 {
17947                     LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
17948
17949                     if (undoPromotion)
17950                     {
17951                         // Leave lvParentLcl pointing to the parameter so that fgMorphImplicitByRefArgs
17952                         // will know to rewrite appearances of this local.
17953                         assert(fieldVarDsc->lvParentLcl == lclNum);
17954                     }
17955                     else
17956                     {
17957                         // Set the new parent.
17958                         fieldVarDsc->lvParentLcl = newLclNum;
17959                         // Clear the ref count field; it is used to communicate the nubmer of references
17960                         // to the implicit byref parameter when morphing calls that pass the implicit byref
17961                         // out as an outgoing argument value, but that doesn't pertain to this field local
17962                         // which is now a field of a non-arg local.
17963                         fieldVarDsc->lvRefCnt = 0;
17964                     }
17965
17966                     fieldVarDsc->lvIsParam = false;
17967                     // The fields shouldn't inherit any register preferences from
17968                     // the parameter which is really a pointer to the struct.
17969                     fieldVarDsc->lvIsRegArg      = false;
17970                     fieldVarDsc->lvIsMultiRegArg = false;
17971                     fieldVarDsc->lvSetIsHfaRegArg(false);
17972                     fieldVarDsc->lvArgReg = REG_NA;
17973 #if FEATURE_MULTIREG_ARGS
17974                     fieldVarDsc->lvOtherArgReg = REG_NA;
17975 #endif
17976                     fieldVarDsc->lvPrefReg = 0;
17977                 }
17978
17979                 // Hijack lvFieldLclStart to record the new temp number.
17980                 // It will get fixed up in fgMarkDemotedImplicitByRefArgs.
17981                 varDsc->lvFieldLclStart = newLclNum;
17982                 // Go ahead and clear lvFieldCnt -- either we're promoting
17983                 // a replacement temp or we're not promoting this arg, and
17984                 // in either case the parameter is now a pointer that doesn't
17985                 // have these fields.
17986                 varDsc->lvFieldCnt = 0;
17987
17988                 // Hijack lvPromoted to communicate to fgMorphImplicitByRefArgs
17989                 // whether references to the struct should be rewritten as
17990                 // indirections off the pointer (not promoted) or references
17991                 // to the new struct local (promoted).
17992                 varDsc->lvPromoted = !undoPromotion;
17993             }
17994             else
17995             {
17996                 // The "undo promotion" path above clears lvPromoted for args that struct
17997                 // promotion wanted to promote but that aren't considered profitable to
17998                 // rewrite.  It hijacks lvFieldLclStart to communicate to
17999                 // fgMarkDemotedImplicitByRefArgs that it needs to clean up annotations left
18000                 // on such args for fgMorphImplicitByRefArgs to consult in the interim.
18001                 // Here we have an arg that was simply never promoted, so make sure it doesn't
18002                 // have nonzero lvFieldLclStart, since that would confuse fgMorphImplicitByRefArgs
18003                 // and fgMarkDemotedImplicitByRefArgs.
18004                 assert(varDsc->lvFieldLclStart == 0);
18005             }
18006
18007             // Since the parameter in this position is really a pointer, its type is TYP_BYREF.
18008             varDsc->lvType = TYP_BYREF;
18009
18010             // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
18011             // make sure that the following flag is not set as these will force SSA to
18012             // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa)
18013             //
18014             varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
18015
18016 #ifdef DEBUG
18017             // This should not be converted to a double in stress mode,
18018             // because it is really a pointer
18019             varDsc->lvKeepType = 1;
18020
18021             // The struct parameter may have had its address taken, but the pointer parameter
18022             // cannot -- any uses of the struct parameter's address are uses of the pointer
18023             // parameter's value, and there's no way for the MSIL to reference the pointer
18024             // parameter's address.  So clear the address-taken bit for the parameter.
18025             varDsc->lvAddrExposed     = 0;
18026             varDsc->lvDoNotEnregister = 0;
18027
18028             if (verbose)
18029             {
18030                 printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
18031             }
18032 #endif // DEBUG
18033         }
18034     }
18035
18036 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18037 }
18038
18039 //------------------------------------------------------------------------
18040 // fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion
18041 //                                 asked to promote.  Appearances of these have now been rewritten
18042 //                                 (by fgMorphImplicitByRefArgs) using indirections from the pointer
18043 //                                 parameter or references to the promotion temp, as appropriate.
18044
18045 void Compiler::fgMarkDemotedImplicitByRefArgs()
18046 {
18047 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
18048
18049     for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
18050     {
18051         LclVarDsc* varDsc = &lvaTable[lclNum];
18052
18053         if (lvaIsImplicitByRefLocal(lclNum))
18054         {
18055             if (varDsc->lvPromoted)
18056             {
18057                 // The parameter is simply a pointer now, so clear lvPromoted.  It was left set
18058                 // by fgRetypeImplicitByRefArgs to communicate to fgMorphImplicitByRefArgs that
18059                 // appearances of this arg needed to be rewritten to a new promoted struct local.
18060                 varDsc->lvPromoted = false;
18061
18062                 // Clear the lvFieldLclStart value that was set by fgRetypeImplicitByRefArgs
18063                 // to tell fgMorphImplicitByRefArgs which local is the new promoted struct one.
18064                 varDsc->lvFieldLclStart = 0;
18065             }
18066             else if (varDsc->lvFieldLclStart != 0)
18067             {
18068                 // We created new temps to represent a promoted struct corresponding to this
18069                 // parameter, but decided not to go through with the promotion and have
18070                 // rewritten all uses as indirections off the pointer parameter.
18071                 // We stashed the pointer to the new struct temp in lvFieldLclStart; make
18072                 // note of that and clear the annotation.
18073                 unsigned structLclNum   = varDsc->lvFieldLclStart;
18074                 varDsc->lvFieldLclStart = 0;
18075
18076                 // Clear the arg's ref count; this was set during address-taken analysis so that
18077                 // call morphing could identify single-use implicit byrefs; we're done with
18078                 // that, and want it to be in its default state of zero when we go to set
18079                 // real ref counts for all variables.
18080                 varDsc->lvRefCnt = 0;
18081
18082                 // The temp struct is now unused; set flags appropriately so that we
18083                 // won't allocate space for it on the stack.
18084                 LclVarDsc* structVarDsc     = &lvaTable[structLclNum];
18085                 structVarDsc->lvRefCnt      = 0;
18086                 structVarDsc->lvAddrExposed = false;
18087 #ifdef DEBUG
18088                 structVarDsc->lvUnusedStruct = true;
18089 #endif // DEBUG
18090
18091                 unsigned fieldLclStart = structVarDsc->lvFieldLclStart;
18092                 unsigned fieldCount    = structVarDsc->lvFieldCnt;
18093                 unsigned fieldLclStop  = fieldLclStart + fieldCount;
18094
18095                 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
18096                 {
18097                     // Fix the pointer to the parent local.
18098                     LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
18099                     assert(fieldVarDsc->lvParentLcl == lclNum);
18100                     fieldVarDsc->lvParentLcl = structLclNum;
18101
18102                     // The field local is now unused; set flags appropriately so that
18103                     // we won't allocate stack space for it.
18104                     fieldVarDsc->lvRefCnt      = 0;
18105                     fieldVarDsc->lvAddrExposed = false;
18106                 }
18107             }
18108         }
18109     }
18110
18111 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18112 }
18113
18114 /*****************************************************************************
18115  *
18116  *  Morph irregular parameters
18117  *    for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
18118  */
18119 bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree)
18120 {
18121 #if (!defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) && !defined(_TARGET_ARM64_)
18122
18123     return false;
18124
18125 #else  // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18126
18127     bool changed = false;
18128
18129     // Implicit byref morphing needs to know if the reference to the parameter is a
18130     // child of GT_ADDR or not, so this method looks one level down and does the
18131     // rewrite whenever a child is a reference to an implicit byref parameter.
18132     if (tree->gtOper == GT_ADDR)
18133     {
18134         if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
18135         {
18136             GenTreePtr morphedTree = fgMorphImplicitByRefArgs(tree, true);
18137             changed                = (morphedTree != nullptr);
18138             assert(!changed || (morphedTree == tree));
18139         }
18140     }
18141     else
18142     {
18143         for (GenTreePtr* pTree : tree->UseEdges())
18144         {
18145             GenTreePtr childTree = *pTree;
18146             if (childTree->gtOper == GT_LCL_VAR)
18147             {
18148                 GenTreePtr newChildTree = fgMorphImplicitByRefArgs(childTree, false);
18149                 if (newChildTree != nullptr)
18150                 {
18151                     changed = true;
18152                     *pTree  = newChildTree;
18153                 }
18154             }
18155         }
18156     }
18157
18158     return changed;
18159 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18160 }
18161
18162 GenTreePtr Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree, bool isAddr)
18163 {
18164     assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
18165     assert(isAddr == (tree->gtOper == GT_ADDR));
18166
18167     GenTreePtr lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
18168     unsigned   lclNum     = lclVarTree->gtLclVarCommon.gtLclNum;
18169     LclVarDsc* lclVarDsc  = &lvaTable[lclNum];
18170
18171     CORINFO_FIELD_HANDLE fieldHnd;
18172     unsigned             fieldOffset  = 0;
18173     var_types            fieldRefType = TYP_UNKNOWN;
18174
18175     if (lvaIsImplicitByRefLocal(lclNum))
18176     {
18177         // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
18178         // re-invoke the traversal to mark address-taken locals.
18179         // So, we may encounter a tree that has already been transformed to TYP_BYREF.
18180         // If we do, leave it as-is.
18181         if (!varTypeIsStruct(lclVarTree))
18182         {
18183             assert(lclVarTree->TypeGet() == TYP_BYREF);
18184
18185             return nullptr;
18186         }
18187         else if (lclVarDsc->lvPromoted)
18188         {
18189             // fgRetypeImplicitByRefArgs created a new promoted struct local to represent this
18190             // arg.  Rewrite this to refer to the new local.
18191             assert(lclVarDsc->lvFieldLclStart != 0);
18192             lclVarTree->AsLclVarCommon()->SetLclNum(lclVarDsc->lvFieldLclStart);
18193             return tree;
18194         }
18195
18196         fieldHnd = nullptr;
18197     }
18198     else if (lclVarDsc->lvIsStructField && lvaIsImplicitByRefLocal(lclVarDsc->lvParentLcl))
18199     {
18200         // This was a field reference to an implicit-by-reference struct parameter that was
18201         // dependently promoted; update it to a field reference off the pointer.
18202         // Grab the field handle from the struct field lclVar.
18203         fieldHnd    = lclVarDsc->lvFieldHnd;
18204         fieldOffset = lclVarDsc->lvFldOffset;
18205         assert(fieldHnd != nullptr);
18206         // Update lclNum/lclVarDsc to refer to the parameter
18207         lclNum       = lclVarDsc->lvParentLcl;
18208         lclVarDsc    = &lvaTable[lclNum];
18209         fieldRefType = lclVarTree->TypeGet();
18210     }
18211     else
18212     {
18213         // We only need to tranform the 'marked' implicit by ref parameters
18214         return nullptr;
18215     }
18216
18217     // This is no longer a def of the lclVar, even if it WAS a def of the struct.
18218     lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);
18219
18220     if (isAddr)
18221     {
18222         if (fieldHnd == nullptr)
18223         {
18224             // change &X into just plain X
18225             tree->CopyFrom(lclVarTree, this);
18226             tree->gtType = TYP_BYREF;
18227         }
18228         else
18229         {
18230             // change &(X.f) [i.e. GT_ADDR of local for promoted arg field]
18231             // into &(X, f) [i.e. GT_ADDR of GT_FIELD off ptr param]
18232             lclVarTree->gtLclVarCommon.SetLclNum(lclNum);
18233             lclVarTree->gtType = TYP_BYREF;
18234             tree->gtOp.gtOp1   = gtNewFieldRef(fieldRefType, fieldHnd, lclVarTree, fieldOffset);
18235         }
18236
18237 #ifdef DEBUG
18238         if (verbose)
18239         {
18240             printf("Replacing address of implicit by ref struct parameter with byref:\n");
18241         }
18242 #endif // DEBUG
18243     }
18244     else
18245     {
18246         // Change X into OBJ(X) or FIELD(X, f)
18247         var_types structType = tree->gtType;
18248         tree->gtType         = TYP_BYREF;
18249
18250         if (fieldHnd)
18251         {
18252             tree->gtLclVarCommon.SetLclNum(lclNum);
18253             tree = gtNewFieldRef(fieldRefType, fieldHnd, tree, fieldOffset);
18254         }
18255         else
18256         {
18257             tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
18258         }
18259
18260         if (structType == TYP_STRUCT)
18261         {
18262             gtSetObjGcInfo(tree->AsObj());
18263         }
18264
18265         // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
18266         // we could remove TGTANYWHERE
18267         tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
18268
18269 #ifdef DEBUG
18270         if (verbose)
18271         {
18272             printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
18273         }
18274 #endif // DEBUG
18275     }
18276
18277 #ifdef DEBUG
18278     if (verbose)
18279     {
18280         gtDispTree(tree);
18281     }
18282 #endif // DEBUG
18283
18284     return tree;
18285 }
18286
18287 // An "AddrExposedContext" expresses the calling context in which an address expression occurs.
18288 enum AddrExposedContext
18289 {
18290     AXC_None,     // None of the below seen yet.
18291     AXC_Ind,      // The address being computed is to be dereferenced.
18292     AXC_Addr,     // We're computing a raw address (not dereferenced, at least not immediately).
18293     AXC_IndWide,  // A block operation dereferenced an address referencing more bytes than the address
18294                   // addresses -- if the address addresses a field of a struct local, we need to consider
18295                   // the entire local address taken (not just the field).
18296     AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
18297                   // on more bytes than the width of the storage location addressed.  If this is a
18298                   // field of a promoted struct local, declare the entire struct local address-taken.
18299     AXC_IndAdd,   // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
18300                   // If one arg is a constant int, evaluate the other in an IND context.  Otherwise, none.
18301 };
18302
18303 typedef ArrayStack<AddrExposedContext> AXCStack;
18304
18305 // We use pre-post to simulate passing an argument in a recursion, via a stack.
18306 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
18307 {
18308     AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
18309     (void)axcStack->Pop();
18310     return WALK_CONTINUE;
18311 }
18312
18313 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
18314 {
18315     GenTreePtr         tree     = *pTree;
18316     Compiler*          comp     = fgWalkPre->compiler;
18317     AXCStack*          axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
18318     AddrExposedContext axc      = axcStack->Top();
18319
18320     // In some situations, we have to figure out what the effective context is in which to
18321     // evaluate the current tree, depending on which argument position it is in its parent.
18322
18323     switch (axc)
18324     {
18325
18326         case AXC_IndAdd:
18327         {
18328             GenTreePtr parent = fgWalkPre->parentStack->Index(1);
18329             assert(parent->OperGet() == GT_ADD);
18330             // Is one of the args a constant representing a field offset,
18331             // and is this the other?  If so, Ind context.
18332             if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree)
18333             {
18334                 axc = AXC_Ind;
18335             }
18336             else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree)
18337             {
18338                 axc = AXC_Ind;
18339             }
18340             else
18341             {
18342                 axc = AXC_None;
18343             }
18344         }
18345         break;
18346
18347         default:
18348             break;
18349     }
18350
18351     // Now recurse properly for the tree.
18352     switch (tree->gtOper)
18353     {
18354         case GT_IND:
18355             if (axc != AXC_Addr)
18356             {
18357                 axcStack->Push(AXC_Ind);
18358             }
18359             else
18360             {
18361                 axcStack->Push(AXC_None);
18362             }
18363             return WALK_CONTINUE;
18364
18365         case GT_BLK:
18366         case GT_OBJ:
18367             if (axc == AXC_Addr)
18368             {
18369                 axcStack->Push(AXC_None);
18370             }
18371             else if (tree->TypeGet() == TYP_STRUCT)
18372             {
18373                 // The block operation will derefence its argument(s) -- usually.  If the size of the initblk
18374                 // or copyblk exceeds the size of a storage location whose address is used as one of the
18375                 // arguments, then we have to consider that storage location (indeed, it's underlying containing
18376                 // location) to be address taken.  So get the width of the initblk or copyblk.
18377
18378                 GenTreePtr  parent = fgWalkPre->parentStack->Index(1);
18379                 GenTreeBlk* blk    = tree->AsBlk();
18380                 unsigned    width  = blk->gtBlkSize;
18381                 noway_assert(width != 0);
18382                 axc           = AXC_Ind;
18383                 GenTree* addr = blk->Addr();
18384                 if (addr->OperGet() == GT_ADDR)
18385                 {
18386                     if (parent->gtOper == GT_ASG)
18387                     {
18388                         if ((tree == parent->gtOp.gtOp1) &&
18389                             ((width == 0) || !comp->fgFitsInOrNotLoc(addr->gtGetOp1(), width)))
18390                         {
18391                             axc = AXC_IndWide;
18392                         }
18393                     }
18394                     else
18395                     {
18396                         assert(parent->gtOper == GT_CALL);
18397                     }
18398                 }
18399                 axcStack->Push(axc);
18400             }
18401             else
18402             {
18403                 // This is like a regular GT_IND.
18404                 axcStack->Push(AXC_Ind);
18405             }
18406             return WALK_CONTINUE;
18407
18408         case GT_DYN_BLK:
18409             // Assume maximal width.
18410             axcStack->Push(AXC_IndWide);
18411             return WALK_CONTINUE;
18412
18413         case GT_LIST:
18414         case GT_FIELD_LIST:
18415             axcStack->Push(AXC_None);
18416             return WALK_CONTINUE;
18417
18418         case GT_INDEX:
18419             // Taking the address of an array element never takes the address of a local.
18420             axcStack->Push(AXC_None);
18421             return WALK_CONTINUE;
18422
18423         case GT_ADDR:
18424 #ifdef FEATURE_SIMD
18425             if (tree->gtOp.gtOp1->OperGet() == GT_SIMD)
18426             {
18427                 axcStack->Push(AXC_None);
18428             }
18429             else
18430 #endif // FEATURE_SIMD
18431                 if (axc == AXC_Ind)
18432             {
18433                 axcStack->Push(AXC_None);
18434             }
18435             else if (axc == AXC_IndWide)
18436             {
18437                 axcStack->Push(AXC_AddrWide);
18438             }
18439             else
18440             {
18441                 assert(axc == AXC_None);
18442                 axcStack->Push(AXC_Addr);
18443             }
18444             return WALK_CONTINUE;
18445
18446         case GT_FIELD:
18447             // First, handle a couple of special cases: field of promoted struct local, field
18448             // of "normed" struct.
18449             if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES)
18450             {
18451                 // It (may have) replaced the field with a local var or local field.  If we're in an addr context,
18452                 // label it addr-taken.
18453                 if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide))
18454                 {
18455                     unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18456                     comp->lvaSetVarAddrExposed(lclNum);
18457                     if (axc == AXC_AddrWide)
18458                     {
18459                         LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18460                         if (varDsc->lvIsStructField)
18461                         {
18462                             comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
18463                         }
18464                     }
18465                 }
18466                 // Push something to keep the PostCB, which will pop it, happy.
18467                 axcStack->Push(AXC_None);
18468                 return WALK_SKIP_SUBTREES;
18469             }
18470             else
18471             {
18472                 // GT_FIELD is an implicit deref.
18473                 if (axc == AXC_Addr)
18474                 {
18475                     axcStack->Push(AXC_None);
18476                 }
18477                 else if (axc == AXC_AddrWide)
18478                 {
18479                     axcStack->Push(AXC_IndWide);
18480                 }
18481                 else
18482                 {
18483                     axcStack->Push(AXC_Ind);
18484                 }
18485                 return WALK_CONTINUE;
18486             }
18487
18488         case GT_LCL_FLD:
18489         {
18490             assert(axc != AXC_Addr);
18491             unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18492             if (comp->lvaIsImplicitByRefLocal(lclNum))
18493             {
18494                 // Keep track of the number of appearances of each promoted implicit
18495                 // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
18496                 // checks the ref counts for implicit byref params when deciding if it's legal
18497                 // to elide certain copies of them.
18498                 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18499                 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
18500                         varDsc->lvRefCnt + 1, lclNum);
18501
18502                 varDsc->lvRefCnt++;
18503             }
18504             // This recognizes certain forms, and does all the work.  In that case, returns WALK_SKIP_SUBTREES,
18505             // else WALK_CONTINUE.  We do the same here.
18506             fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre);
18507             if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide))
18508             {
18509                 comp->lvaSetVarAddrExposed(lclNum);
18510                 if (axc == AXC_AddrWide)
18511                 {
18512                     LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18513                     if (varDsc->lvIsStructField)
18514                     {
18515                         comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
18516                     }
18517                 }
18518             }
18519             // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter
18520             // what, but something to be popped by the post callback.  If we're going
18521             // to analyze children, the LCL_FLD creates an Ind context, so use that.
18522             axcStack->Push(AXC_Ind);
18523             return res;
18524         }
18525
18526         case GT_LCL_VAR:
18527         {
18528             unsigned   lclNum = tree->gtLclVarCommon.gtLclNum;
18529             LclVarDsc* varDsc = &comp->lvaTable[lclNum];
18530
18531             if (comp->lvaIsImplicitByRefLocal(lclNum))
18532             {
18533                 // Keep track of the number of appearances of each promoted implicit
18534                 // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
18535                 // checks the ref counts for implicit byref params when deciding if it's legal
18536                 // to elide certain copies of them.
18537                 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
18538                         varDsc->lvRefCnt + 1, lclNum);
18539
18540                 varDsc->lvRefCnt++;
18541             }
18542
18543             if (axc == AXC_Addr || axc == AXC_AddrWide)
18544             {
18545                 comp->lvaSetVarAddrExposed(lclNum);
18546                 if (axc == AXC_AddrWide)
18547                 {
18548                     if (varDsc->lvIsStructField)
18549                     {
18550                         comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
18551                     }
18552                 }
18553
18554                 // We may need to Quirk the storage size for this LCL_VAR
18555                 // some PInvoke signatures incorrectly specify a ByRef to an INT32
18556                 // when they actually write a SIZE_T or INT64
18557                 if (axc == AXC_Addr)
18558                 {
18559                     comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack);
18560                 }
18561             }
18562             // Push something to keep the PostCB, which will pop it, happy.
18563             axcStack->Push(AXC_None);
18564             // The tree is a leaf.
18565             return WALK_SKIP_SUBTREES;
18566         }
18567
18568         case GT_ADD:
18569             assert(axc != AXC_Addr);
18570             // See below about treating pointer operations as wider indirection.
18571             if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
18572             {
18573                 axcStack->Push(AXC_IndWide);
18574             }
18575             else if (axc == AXC_Ind)
18576             {
18577                 // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
18578                 // If it's an add of a constant and an address, and the constant represents a field,
18579                 // then we'll evaluate the address argument in an Ind context; otherwise, the None context.
18580                 axcStack->Push(AXC_IndAdd);
18581             }
18582             else
18583             {
18584                 axcStack->Push(axc);
18585             }
18586             return WALK_CONTINUE;
18587
18588         // !!! Treat Pointer Operations as Wider Indirection
18589         //
18590         // If we are performing pointer operations, make sure we treat that as equivalent to a wider
18591         // indirection. This is because the pointers could be pointing to the address of struct fields
18592         // and could be used to perform operations on the whole struct or passed to another method.
18593         //
18594         // When visiting a node in this pre-order walk, we do not know if we would in the future
18595         // encounter a GT_ADDR of a GT_FIELD below.
18596         //
18597         // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
18598         // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
18599         // wider indirection context down the expr tree.
18600         //
18601         // Example, in unsafe code,
18602         //
18603         //   IL_000e  12 00             ldloca.s     0x0
18604         //   IL_0010  7c 02 00 00 04    ldflda       0x4000002
18605         //   IL_0015  12 00             ldloca.s     0x0
18606         //   IL_0017  7c 01 00 00 04    ldflda       0x4000001
18607         //   IL_001c  59                sub
18608         //
18609         // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
18610         // consider GT_SUB to be equivalent of an AXC_IndWide.
18611         //
18612         // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
18613         // them as AXC_IndWide.
18614         //
18615
18616         // BINOP
18617         case GT_SUB:
18618         case GT_MUL:
18619         case GT_DIV:
18620         case GT_UDIV:
18621         case GT_OR:
18622         case GT_XOR:
18623         case GT_AND:
18624         case GT_LSH:
18625         case GT_RSH:
18626         case GT_RSZ:
18627         case GT_ROL:
18628         case GT_ROR:
18629         case GT_EQ:
18630         case GT_NE:
18631         case GT_LT:
18632         case GT_LE:
18633         case GT_GT:
18634         case GT_GE:
18635         // UNOP
18636         case GT_CAST:
18637             if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) ||
18638                 (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
18639             {
18640                 axcStack->Push(AXC_IndWide);
18641                 return WALK_CONTINUE;
18642             }
18643             __fallthrough;
18644
18645         default:
18646             // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None".  We must
18647             // handle the "Ind" propogation explicitly above.
18648             if (axc == AXC_Addr || axc == AXC_AddrWide)
18649             {
18650                 axcStack->Push(axc);
18651             }
18652             else
18653             {
18654                 axcStack->Push(AXC_None);
18655             }
18656             return WALK_CONTINUE;
18657     }
18658 }
18659
18660 bool Compiler::fgFitsInOrNotLoc(GenTreePtr tree, unsigned width)
18661 {
18662     if (tree->TypeGet() != TYP_STRUCT)
18663     {
18664         return width <= genTypeSize(tree->TypeGet());
18665     }
18666     else if (tree->OperGet() == GT_LCL_VAR)
18667     {
18668         assert(tree->TypeGet() == TYP_STRUCT);
18669         unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
18670         return width <= lvaTable[lclNum].lvExactSize;
18671     }
18672     else if (tree->OperGet() == GT_FIELD)
18673     {
18674         CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass(tree->gtField.gtFldHnd);
18675         return width <= info.compCompHnd->getClassSize(fldClass);
18676     }
18677     else if (tree->OperGet() == GT_INDEX)
18678     {
18679         return width <= tree->gtIndex.gtIndElemSize;
18680     }
18681     else
18682     {
18683         return false;
18684     }
18685 }
18686
18687 void Compiler::fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq)
18688 {
18689     assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);
18690
18691     switch (op1->OperGet())
18692     {
18693         case GT_ADDR:
18694             if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
18695             {
18696                 GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
18697                 lclFld->gtFieldSeq    = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
18698             }
18699             break;
18700
18701         case GT_ADD:
18702             if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
18703             {
18704                 FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
18705                 if (op1Fs != nullptr)
18706                 {
18707                     op1Fs                                = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18708                     op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
18709                 }
18710             }
18711             else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
18712             {
18713                 FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
18714                 if (op2Fs != nullptr)
18715                 {
18716                     op2Fs                                = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
18717                     op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
18718                 }
18719             }
18720             break;
18721
18722         case GT_CNS_INT:
18723         {
18724             FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
18725             if (op1Fs != nullptr)
18726             {
18727                 op1Fs                    = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18728                 op1->gtIntCon.gtFieldSeq = op1Fs;
18729             }
18730         }
18731         break;
18732
18733         default:
18734             // Record in the general zero-offset map.
18735             GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
18736             break;
18737     }
18738 }
18739
18740 /*****************************************************************************
18741  *
18742  *  Mark address-taken locals.
18743  */
18744
18745 void Compiler::fgMarkAddressExposedLocals()
18746 {
18747 #ifdef DEBUG
18748     if (verbose)
18749     {
18750         printf("\n*************** In fgMarkAddressExposedLocals()\n");
18751     }
18752 #endif // DEBUG
18753
18754     BasicBlock* block = fgFirstBB;
18755     noway_assert(block);
18756
18757     do
18758     {
18759         /* Make the current basic block address available globally */
18760
18761         compCurBB = block;
18762
18763         GenTreePtr stmt;
18764
18765         for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
18766         {
18767             // Call Compiler::fgMarkAddrTakenLocalsCB on each node
18768             AXCStack stk(this);
18769             stk.Push(AXC_None); // We start in neither an addr or ind context.
18770             fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
18771         }
18772
18773         block = block->bbNext;
18774
18775     } while (block);
18776 }
18777
18778 // fgNodesMayInterfere:
18779 //   return true if moving nodes relative to each other can change the result of a computation
18780 //
18781 // args:
18782 //   read: a node which reads
18783 //
18784
18785 bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read)
18786 {
18787     LclVarDsc* srcVar = nullptr;
18788
18789     bool readIsIndir  = read->OperIsIndir() || read->OperIsImplicitIndir();
18790     bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir();
18791
18792     if (read->OperIsLocal())
18793     {
18794         srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum];
18795     }
18796
18797     if (writeIsIndir)
18798     {
18799         if (srcVar && srcVar->lvAddrExposed)
18800         {
18801             return true;
18802         }
18803         else if (readIsIndir)
18804         {
18805             return true;
18806         }
18807         return false;
18808     }
18809     else if (write->OperIsLocal())
18810     {
18811         LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum];
18812         if (readIsIndir)
18813         {
18814             return dstVar->lvAddrExposed;
18815         }
18816         else if (read->OperIsLocal())
18817         {
18818             if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum)
18819             {
18820                 return true;
18821             }
18822             return false;
18823         }
18824         else
18825         {
18826             return false;
18827         }
18828     }
18829     else
18830     {
18831         return false;
18832     }
18833 }
18834
18835 /** This predicate decides whether we will fold a tree with the structure:
18836  *  x = x <op> y where x could be any arbitrary expression into
18837  *  x <op>= y.
18838  *
18839  *  This modification is only performed when the target architecture supports
18840  *  complex addressing modes.  In the case of ARM for example, this transformation
18841  *  yields no benefit.
18842  *
18843  *  In case this functions decides we can proceed to fold into an assignment operator
18844  *  we need to inspect whether the operator is commutative to tell fgMorph whether we need to
18845  *  reverse the tree due to the fact we saw x = y <op> x and we want to fold that into
18846  *  x <op>= y because the operator property.
18847  */
18848 bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse)
18849 {
18850 #if CPU_LOAD_STORE_ARCH
18851     /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */
18852     return false;
18853 #elif !defined(LEGACY_BACKEND)
18854     return false;
18855 #else  // defined(LEGACY_BACKEND)
18856
18857     GenTreePtr op1  = tree->gtOp.gtOp1;
18858     GenTreePtr op2  = tree->gtGetOp2();
18859     genTreeOps cmop = op2->OperGet();
18860
18861     /* Is the destination identical to the first RHS sub-operand? */
18862     if (GenTree::Compare(op1, op2->gtOp.gtOp1))
18863     {
18864         /*
18865         Do not transform the following tree
18866
18867         [0024CFA4] -----------               const     int    1
18868         [0024CFDC] ----G------               |         int
18869         [0024CF5C] -----------               lclVar    ubyte  V01 tmp0
18870         [0024D05C] -A--G------               =         ubyte
18871         [0024D014] D------N---               lclVar    ubyte  V01 tmp0
18872
18873         to
18874
18875         [0024CFA4] -----------               const     int    1
18876         [0024D05C] -A--G------               |=        ubyte
18877         [0024D014] U------N---               lclVar    ubyte  V01 tmp0
18878
18879         , when V01 is a struct field local.
18880         */
18881
18882         if (op1->gtOper == GT_LCL_VAR && varTypeIsSmall(op1->TypeGet()) && op1->TypeGet() != op2->gtOp.gtOp2->TypeGet())
18883         {
18884             unsigned   lclNum = op1->gtLclVarCommon.gtLclNum;
18885             LclVarDsc* varDsc = lvaTable + lclNum;
18886
18887             if (varDsc->lvIsStructField)
18888             {
18889                 return false;
18890             }
18891         }
18892
18893         *bReverse = false;
18894         return true;
18895     }
18896     else if (GenTree::OperIsCommutative(cmop))
18897     {
18898         /* For commutative ops only, check for "a = x <op> a" */
18899
18900         /* Should we be doing this at all? */
18901         if ((opts.compFlags & CLFLG_TREETRANS) == 0)
18902         {
18903             return false;
18904         }
18905
18906         /* Can we swap the operands to cmop ... */
18907         if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) && (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT))
18908         {
18909             // Both sides must have side effects to prevent swap */
18910             return false;
18911         }
18912
18913         /* Is the destination identical to the second RHS sub-operand? */
18914         if (GenTree::Compare(op1, op2->gtOp.gtOp2))
18915         {
18916             *bReverse = true;
18917             return true;
18918         }
18919     }
18920     return false;
18921 #endif // defined(LEGACY_BACKEND)
18922 }
18923
18924 #ifdef FEATURE_SIMD
18925
18926 //-----------------------------------------------------------------------------------
18927 // fgMorphCombineSIMDFieldAssignments:
18928 //  If the RHS of the input stmt is a read for simd vector X Field, then this function
18929 //  will keep reading next few stmts based on the vector size(2, 3, 4).
18930 //  If the next stmts LHS are located contiguous and RHS are also located
18931 //  contiguous, then we replace those statements with a copyblk.
18932 //
18933 // Argument:
18934 //  block - BasicBlock*. block which stmt belongs to
18935 //  stmt  - GenTreeStmt*. the stmt node we want to check
18936 //
18937 // return value:
18938 //  if this funciton successfully optimized the stmts, then return true. Otherwise
18939 //  return false;
18940
18941 bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt)
18942 {
18943
18944     noway_assert(stmt->gtOper == GT_STMT);
18945     GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
18946     assert(tree->OperGet() == GT_ASG);
18947
18948     GenTreePtr originalLHS    = tree->gtOp.gtOp1;
18949     GenTreePtr prevLHS        = tree->gtOp.gtOp1;
18950     GenTreePtr prevRHS        = tree->gtOp.gtOp2;
18951     unsigned   index          = 0;
18952     var_types  baseType       = TYP_UNKNOWN;
18953     unsigned   simdSize       = 0;
18954     GenTreePtr simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
18955
18956     if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
18957     {
18958         // if the RHS is not from a SIMD vector field X, then there is no need to check further.
18959         return false;
18960     }
18961
18962     var_types  simdType             = getSIMDTypeForSize(simdSize);
18963     int        assignmentsCount     = simdSize / genTypeSize(baseType) - 1;
18964     int        remainingAssignments = assignmentsCount;
18965     GenTreePtr curStmt              = stmt->gtNext;
18966     GenTreePtr lastStmt             = stmt;
18967
18968     while (curStmt != nullptr && remainingAssignments > 0)
18969     {
18970         GenTreePtr exp = curStmt->gtStmt.gtStmtExpr;
18971         if (exp->OperGet() != GT_ASG)
18972         {
18973             break;
18974         }
18975         GenTreePtr curLHS = exp->gtGetOp1();
18976         GenTreePtr curRHS = exp->gtGetOp2();
18977
18978         if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
18979         {
18980             break;
18981         }
18982
18983         remainingAssignments--;
18984         prevLHS = curLHS;
18985         prevRHS = curRHS;
18986
18987         lastStmt = curStmt;
18988         curStmt  = curStmt->gtNext;
18989     }
18990
18991     if (remainingAssignments > 0)
18992     {
18993         // if the left assignments number is bigger than zero, then this means
18994         // that the assignments are not assgining to the contiguously memory
18995         // locations from same vector.
18996         return false;
18997     }
18998 #ifdef DEBUG
18999     if (verbose)
19000     {
19001         printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
19002         printf("From BB%02u, stmt", block->bbNum);
19003         printTreeID(stmt);
19004         printf(" to stmt");
19005         printTreeID(lastStmt);
19006         printf("\n");
19007     }
19008 #endif
19009
19010     for (int i = 0; i < assignmentsCount; i++)
19011     {
19012         fgRemoveStmt(block, stmt->gtNext);
19013     }
19014
19015     GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
19016     if (simdStructNode->OperIsLocal())
19017     {
19018         setLclRelatedToSIMDIntrinsic(simdStructNode);
19019     }
19020     GenTree* copyBlkAddr = copyBlkDst;
19021     if (copyBlkAddr->gtOper == GT_LEA)
19022     {
19023         copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
19024     }
19025     GenTreeLclVarCommon* localDst = nullptr;
19026     if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
19027     {
19028         setLclRelatedToSIMDIntrinsic(localDst);
19029     }
19030
19031     GenTree* simdStructAddr;
19032     if (simdStructNode->TypeGet() == TYP_BYREF)
19033     {
19034         assert(simdStructNode->OperIsLocal());
19035         assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
19036         simdStructNode = gtNewOperNode(GT_IND, simdType, simdStructNode);
19037     }
19038     else
19039     {
19040         assert(varTypeIsSIMD(simdStructNode));
19041     }
19042
19043 #ifdef DEBUG
19044     if (verbose)
19045     {
19046         printf("\nBB%02u stmt", block->bbNum);
19047         printTreeID(stmt);
19048         printf("(before)\n");
19049         gtDispTree(stmt);
19050     }
19051 #endif
19052
19053     // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
19054     GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
19055     blkNode->gtType  = simdType;
19056     tree             = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
19057                           false, // not volatile
19058                           true); // copyBlock
19059
19060     stmt->gtStmt.gtStmtExpr = tree;
19061
19062     // Since we generated a new address node which didn't exist before,
19063     // we should expose this address manually here.
19064     AXCStack stk(this);
19065     stk.Push(AXC_None);
19066     fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
19067
19068 #ifdef DEBUG
19069     if (verbose)
19070     {
19071         printf("\nReplaced BB%02u stmt", block->bbNum);
19072         printTreeID(stmt);
19073         printf("(after)\n");
19074         gtDispTree(stmt);
19075     }
19076 #endif
19077     return true;
19078 }
19079
19080 #endif // FEATURE_SIMD