1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 #include "allocacheck.h" // for alloca
21 // Convert the given node into a call to the specified helper passing
22 // the given argument list.
24 // Tries to fold constants and also adds an edge for overflow exception
25 // returns the morphed tree
26 GenTreePtr Compiler::fgMorphCastIntoHelper(GenTreePtr tree, int helper, GenTreePtr oper)
30 /* If the operand is a constant, we'll try to fold it */
31 if (oper->OperIsConst())
33 GenTreePtr oldTree = tree;
35 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
39 return fgMorphTree(tree);
41 else if (tree->OperKind() & GTK_CONST)
43 return fgMorphConst(tree);
46 // assert that oper is unchanged and that it is still a GT_CAST node
47 noway_assert(tree->gtCast.CastOp() == oper);
48 noway_assert(tree->gtOper == GT_CAST);
50 result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
51 assert(result == tree);
55 /*****************************************************************************
57 * Convert the given node into a call to the specified helper passing
58 * the given argument list.
61 GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeArgList* args)
63 // The helper call ought to be semantically equivalent to the original node, so preserve its VN.
64 tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN);
66 tree->gtFlags |= GTF_CALL;
69 tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
71 tree->gtCall.gtCallType = CT_HELPER;
72 tree->gtCall.gtCallMethHnd = eeFindHelper(helper);
73 tree->gtCall.gtCallArgs = args;
74 tree->gtCall.gtCallObjp = nullptr;
75 tree->gtCall.gtCallLateArgs = nullptr;
76 tree->gtCall.fgArgInfo = nullptr;
77 tree->gtCall.gtRetClsHnd = nullptr;
78 tree->gtCall.gtCallMoreFlags = 0;
79 tree->gtCall.gtInlineCandidateInfo = nullptr;
80 tree->gtCall.gtControlExpr = nullptr;
83 tree->gtCall.gtCallRegUsedMask = RBM_NONE;
84 #endif // LEGACY_BACKEND
87 // Helper calls are never candidates.
89 tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
92 #ifdef FEATURE_READYTORUN_COMPILER
93 tree->gtCall.gtEntryPoint.addr = nullptr;
96 #if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND)
97 if (varTypeIsLong(tree))
99 GenTreeCall* callNode = tree->AsCall();
100 ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
101 retTypeDesc->Reset();
102 retTypeDesc->InitializeLongReturnType(this);
103 callNode->ClearOtherRegs();
105 #endif // _TARGET_XXX_
107 /* Perform the morphing */
109 tree = fgMorphArgs(tree->AsCall());
114 /*****************************************************************************
116 * Determine if a relop must be morphed to a qmark to manifest a boolean value.
117 * This is done when code generation can't create straight-line code to do it.
119 bool Compiler::fgMorphRelopToQmark(GenTreePtr tree)
121 #ifndef LEGACY_BACKEND
123 #else // LEGACY_BACKEND
124 return (genActualType(tree->TypeGet()) == TYP_LONG) || varTypeIsFloating(tree->TypeGet());
125 #endif // LEGACY_BACKEND
128 /*****************************************************************************
130 * Morph a cast node (we perform some very simple transformations here).
134 #pragma warning(push)
135 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
137 GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
139 noway_assert(tree->gtOper == GT_CAST);
140 noway_assert(genTypeSize(TYP_I_IMPL) == sizeof(void*));
142 /* The first sub-operand is the thing being cast */
144 GenTreePtr oper = tree->gtCast.CastOp();
145 var_types srcType = genActualType(oper->TypeGet());
148 var_types dstType = tree->CastToType();
149 unsigned dstSize = genTypeSize(dstType);
151 // See if the cast has to be done in two steps. R -> I
152 if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
154 // Only x86 must go through TYP_DOUBLE to get to all
155 // integral types everybody else can get straight there
156 // except for when using helpers
157 if (srcType == TYP_FLOAT
158 #if !FEATURE_STACK_FP_X87
160 #if defined(_TARGET_ARM64_)
161 // Amd64: src = float, dst is overflow conversion.
162 // This goes through helper and hence src needs to be converted to double.
163 && tree->gtOverflow()
164 #elif defined(_TARGET_AMD64_)
165 // Amd64: src = float, dst = uint64 or overflow conversion.
166 // This goes through helper and hence src needs to be converted to double.
167 && (tree->gtOverflow() || (dstType == TYP_ULONG))
168 #elif defined(_TARGET_ARM_)
169 // Arm: src = float, dst = int64/uint64 or overflow conversion.
170 && (tree->gtOverflow() || varTypeIsLong(dstType))
173 #endif // FEATURE_STACK_FP_X87
176 oper = gtNewCastNode(TYP_DOUBLE, oper, TYP_DOUBLE);
179 // do we need to do it in two steps R -> I, '-> smallType
180 CLANG_FORMAT_COMMENT_ANCHOR;
182 #if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
183 if (dstSize < genTypeSize(TYP_INT))
185 oper = gtNewCastNodeL(TYP_INT, oper, TYP_INT);
186 oper->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
187 tree->gtFlags &= ~GTF_UNSIGNED;
190 if (dstSize < sizeof(void*))
192 oper = gtNewCastNodeL(TYP_I_IMPL, oper, TYP_I_IMPL);
193 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
198 /* Note that if we need to use a helper call then we can not morph oper */
199 if (!tree->gtOverflow())
201 #ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
207 #ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
208 #ifdef LEGACY_BACKEND
209 // the RyuJIT backend does not use the x87 FPU and therefore
210 // does not support folding the cast conv.i4(round.d(d))
211 if ((oper->gtOper == GT_INTRINSIC) &&
212 (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
214 /* optimization: conv.i4(round.d(d)) -> round.i(d) */
215 oper->gtType = dstType;
216 return fgMorphTree(oper);
218 // if SSE2 is not enabled, we need the helper
220 #endif // LEGACY_BACKEND
221 if (!opts.compCanUseSSE2)
223 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
226 #endif // _TARGET_X86_
230 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
233 #else // _TARGET_ARM_
235 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
236 #endif // _TARGET_ARM_
238 #ifdef _TARGET_AMD64_
239 // SSE2 has instructions to convert a float/double directly to a long
244 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
245 #endif //_TARGET_AMD64_
247 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
251 #endif // _TARGET_ARM64_
258 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
260 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
262 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
264 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
269 noway_assert(!"Unexpected dstType");
272 #ifndef _TARGET_64BIT_
273 // The code generation phase (for x86 & ARM32) does not handle casts
274 // directly from [u]long to anything other than [u]int. Insert an
275 // intermediate cast to native int.
276 else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
278 oper = gtNewCastNode(TYP_I_IMPL, oper, TYP_I_IMPL);
279 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
280 tree->gtFlags &= ~GTF_UNSIGNED;
282 #endif //!_TARGET_64BIT_
285 else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
286 !varTypeIsLong(oper->gtCast.CastOp()))
288 // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
289 // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
290 // This happens semi-frequently because there is no IL 'conv.r4.un'
291 oper->gtType = TYP_FLOAT;
292 oper->CastToType() = TYP_FLOAT;
293 return fgMorphTree(oper);
295 // converts long/ulong --> float/double casts into helper calls.
296 else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
298 if (dstType == TYP_FLOAT)
300 // there is only a double helper, so we
301 // - change the dsttype to double
302 // - insert a cast from double to float
303 // - recurse into the resulting tree
304 tree->CastToType() = TYP_DOUBLE;
305 tree->gtType = TYP_DOUBLE;
307 tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
309 return fgMorphTree(tree);
311 if (tree->gtFlags & GTF_UNSIGNED)
312 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
313 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
315 #endif //_TARGET_ARM_
317 #ifdef _TARGET_AMD64_
318 // Do we have to do two step U4/8 -> R4/8 ?
319 // Codegen supports the following conversion as one-step operation
323 // The following conversions are performed as two-step operations using above.
324 // U4 -> R4/8 = U4-> Long -> R4/8
325 // U8 -> R4 = U8 -> R8 -> R4
326 else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
328 srcType = genUnsignedType(srcType);
330 if (srcType == TYP_ULONG)
332 if (dstType == TYP_FLOAT)
334 // Codegen can handle U8 -> R8 conversion.
335 // U8 -> R4 = U8 -> R8 -> R4
336 // - change the dsttype to double
337 // - insert a cast from double to float
338 // - recurse into the resulting tree
339 tree->CastToType() = TYP_DOUBLE;
340 tree->gtType = TYP_DOUBLE;
341 tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
342 return fgMorphTree(tree);
345 else if (srcType == TYP_UINT)
347 oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
348 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
349 tree->gtFlags &= ~GTF_UNSIGNED;
352 #endif // _TARGET_AMD64_
355 // Do we have to do two step U4/8 -> R4/8 ?
356 else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
358 srcType = genUnsignedType(srcType);
360 if (srcType == TYP_ULONG)
362 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
364 else if (srcType == TYP_UINT)
366 oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
367 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
368 tree->gtFlags &= ~GTF_UNSIGNED;
369 #ifndef LEGACY_BACKEND
370 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
374 #ifndef LEGACY_BACKEND
375 else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
377 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
380 #endif //_TARGET_XARCH_
381 else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
383 // We are casting away GC information. we would like to just
384 // change the type to int, however this gives the emitter fits because
385 // it believes the variable is a GC variable at the begining of the
386 // instruction group, but is not turned non-gc by the code generator
387 // we fix this by copying the GC pointer to a non-gc pointer temp.
388 noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
390 // We generate an assignment to an int and then do the cast from an int. With this we avoid
391 // the gc problem and we allow casts to bytes, longs, etc...
392 unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
393 oper->gtType = TYP_I_IMPL;
394 GenTreePtr asg = gtNewTempAssign(lclNum, oper);
395 oper->gtType = srcType;
398 GenTreePtr cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), dstType);
400 // Generate the comma tree
401 oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
403 return fgMorphTree(oper);
406 // Look for narrowing casts ([u]long -> [u]int) and try to push them
407 // down into the operand before morphing it.
409 // It doesn't matter if this is cast is from ulong or long (i.e. if
410 // GTF_UNSIGNED is set) because the transformation is only applied to
411 // overflow-insensitive narrowing casts, which always silently truncate.
413 // Note that casts from [u]long to small integer types are handled above.
414 if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
416 // As a special case, look for overflow-sensitive casts of an AND
417 // expression, and see if the second operand is a small constant. Since
418 // the result of an AND is bound by its smaller operand, it may be
419 // possible to prove that the cast won't overflow, which will in turn
420 // allow the cast's operand to be transformed.
421 if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
423 GenTreePtr andOp2 = oper->gtOp.gtOp2;
425 // Special case to the special case: AND with a casted int.
426 if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
428 // gtFoldExprConst will deal with whether the cast is signed or
429 // unsigned, or overflow-sensitive.
430 andOp2 = oper->gtOp.gtOp2 = gtFoldExprConst(andOp2);
433 // Look for a constant less than 2^{32} for a cast to uint, or less
434 // than 2^{31} for a cast to int.
435 int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
437 if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
439 // This cast can't overflow.
440 tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
444 // Only apply this transformation during global morph,
445 // when neither the cast node nor the oper node may throw an exception
446 // based on the upper 32 bits.
448 if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
450 // For these operations the lower 32 bits of the result only depends
451 // upon the lower 32 bits of the operands
453 if ((oper->OperGet() == GT_ADD) || (oper->OperGet() == GT_MUL) || (oper->OperGet() == GT_AND) ||
454 (oper->OperGet() == GT_OR) || (oper->OperGet() == GT_XOR))
456 DEBUG_DESTROY_NODE(tree);
458 // Insert narrowing casts for op1 and op2
459 oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, dstType);
460 oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, dstType);
462 // Clear the GT_MUL_64RSLT if it is set
463 if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
465 oper->gtFlags &= ~GTF_MUL_64RSLT;
468 // The operation now produces a 32-bit result.
469 oper->gtType = TYP_INT;
471 // Remorph the new tree as the casts that we added may be folded away.
472 return fgMorphTree(oper);
478 noway_assert(tree->gtOper == GT_CAST);
480 /* Morph the operand */
481 tree->gtCast.CastOp() = oper = fgMorphTree(oper);
483 /* Reset the call flag */
484 tree->gtFlags &= ~GTF_CALL;
486 /* unless we have an overflow cast, reset the except flag */
487 if (!tree->gtOverflow())
489 tree->gtFlags &= ~GTF_EXCEPT;
492 /* Just in case new side effects were introduced */
493 tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
495 srcType = oper->TypeGet();
497 /* if GTF_UNSIGNED is set then force srcType to an unsigned type */
498 if (tree->gtFlags & GTF_UNSIGNED)
500 srcType = genUnsignedType(srcType);
503 srcSize = genTypeSize(srcType);
505 if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate
507 /* See if we can discard the cast */
508 if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
510 if (srcType == dstType)
511 { // Certainly if they are identical it is pointless
515 if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
517 unsigned varNum = oper->gtLclVarCommon.gtLclNum;
518 LclVarDsc* varDsc = &lvaTable[varNum];
519 if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
525 bool unsignedSrc = varTypeIsUnsigned(srcType);
526 bool unsignedDst = varTypeIsUnsigned(dstType);
527 bool signsDiffer = (unsignedSrc != unsignedDst);
529 // For same sized casts with
530 // the same signs or non-overflow cast we discard them as well
531 if (srcSize == dstSize)
533 /* This should have been handled above */
534 noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
541 if (!tree->gtOverflow())
543 /* For small type casts, when necessary we force
544 the src operand to the dstType and allow the
545 implied load from memory to perform the casting */
546 if (varTypeIsSmall(srcType))
548 switch (oper->gtOper)
554 oper->gtType = dstType;
567 if (srcSize < dstSize) // widening cast
569 // Keep any long casts
570 if (dstSize == sizeof(int))
572 // Only keep signed to unsigned widening cast with overflow check
573 if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
579 // Casts from signed->unsigned can never overflow while widening
581 if (unsignedSrc || !unsignedDst)
583 tree->gtFlags &= ~GTF_OVERFLOW;
588 // Try to narrow the operand of the cast and discard the cast
589 // Note: Do not narrow a cast that is marked as a CSE
590 // And do not narrow if the oper is marked as a CSE either
592 if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
593 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
595 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
597 /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
598 if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
600 oper = oper->gtCast.CastOp();
607 switch (oper->gtOper)
609 /* If the operand is a constant, we'll fold it */
615 GenTreePtr oldTree = tree;
617 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
619 // Did we get a comma throw as a result of gtFoldExprConst?
620 if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
622 noway_assert(fgIsCommaThrow(tree));
623 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
624 fgMorphTreeDone(tree);
627 else if (tree->gtOper != GT_CAST)
632 noway_assert(tree->gtCast.CastOp() == oper); // unchanged
637 /* Check for two consecutive casts into the same dstType */
638 if (!tree->gtOverflow())
640 var_types dstType2 = oper->CastToType();
641 if (dstType == dstType2)
648 /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag
649 so that the code generator will know not to convert the result
650 of the idiv to a regpair */
652 if (dstType == TYP_INT)
654 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
659 if (dstType == TYP_UINT)
661 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
666 // Check for cast of a GT_COMMA with a throw overflow
667 // Bug 110829: Since this optimization will bash the types
668 // neither oper or commaOp2 can be CSE candidates
669 if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
671 GenTreePtr commaOp2 = oper->gtOp.gtOp2;
673 if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
675 // need type of oper to be same as tree
676 if (tree->gtType == TYP_LONG)
678 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
679 commaOp2->gtIntConCommon.SetLngValue(0);
680 /* Change the types of oper and commaOp2 to TYP_LONG */
681 oper->gtType = commaOp2->gtType = TYP_LONG;
683 else if (varTypeIsFloating(tree->gtType))
685 commaOp2->ChangeOperConst(GT_CNS_DBL);
686 commaOp2->gtDblCon.gtDconVal = 0.0;
687 // Change the types of oper and commaOp2
688 // X87 promotes everything to TYP_DOUBLE
689 // But other's are a little more precise
690 const var_types newTyp
691 #if FEATURE_X87_DOUBLES
693 #else // FEATURE_X87_DOUBLES
695 #endif // FEATURE_X87_DOUBLES
696 oper->gtType = commaOp2->gtType = newTyp;
700 commaOp2->ChangeOperConst(GT_CNS_INT);
701 commaOp2->gtIntCon.gtIconVal = 0;
702 /* Change the types of oper and commaOp2 to TYP_INT */
703 oper->gtType = commaOp2->gtType = TYP_INT;
707 if (vnStore != nullptr)
709 fgValueNumberTreeConst(commaOp2);
712 /* Return the GT_COMMA node as the new tree */
719 } /* end switch (oper->gtOper) */
722 if (tree->gtOverflow())
724 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
731 /* Here we've eliminated the cast, so just return it's operand */
732 assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
734 DEBUG_DESTROY_NODE(tree);
741 /*****************************************************************************
743 * Perform an unwrap operation on a Proxy object
746 GenTreePtr Compiler::fgUnwrapProxy(GenTreePtr objRef)
748 assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
750 CORINFO_EE_INFO* pInfo = eeGetEEInfo();
753 // Perform the unwrap:
755 // This requires two extra indirections.
756 // We mark these indirections as 'invariant' and
757 // the CSE logic will hoist them when appropriate.
759 // Note that each dereference is a GC pointer
761 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
763 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
764 objRef->gtFlags |= GTF_IND_INVARIANT;
766 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
768 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
769 objRef->gtFlags |= GTF_IND_INVARIANT;
771 // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
775 /*****************************************************************************
777 * Morph an argument list; compute the pointer argument count in the process.
779 * NOTE: This function can be called from any place in the JIT to perform re-morphing
780 * due to graph altering modifications such as copy / constant propagation
783 unsigned UpdateGT_LISTFlags(GenTreePtr tree)
785 assert(tree->gtOper == GT_LIST);
788 if (tree->gtOp.gtOp2)
790 flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
793 flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
795 tree->gtFlags &= ~GTF_ALL_EFFECT;
796 tree->gtFlags |= flags;
798 return tree->gtFlags;
802 void fgArgTabEntry::Dump()
804 printf("fgArgTabEntry[arg %u", argNum);
805 if (regNum != REG_STK)
807 printf(", %s, regs=%u", getRegName(regNum), numRegs);
811 printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
813 printf(", align=%u", alignment);
814 if (lateArgInx != (unsigned)-1)
816 printf(", lateArgInx=%u", lateArgInx);
824 printf(", tmpNum=V%02u", tmpNum);
828 printf(", needPlace");
836 printf(", processed");
844 printf(", isBackFilled");
848 printf(", isNonStandard");
854 fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
858 argCount = 0; // filled in arg count, starts at zero
859 nextSlotNum = INIT_ARG_STACK_SLOT;
861 #if defined(UNIX_X86_ABI)
862 alignmentDone = false;
866 #if FEATURE_FIXED_OUT_ARGS
870 argTableSize = numArgs; // the allocated table size
873 hasStackArgs = false;
874 argsComplete = false;
877 if (argTableSize == 0)
883 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
887 /*****************************************************************************
889 * fgArgInfo Copy Constructor
891 * This method needs to act like a copy constructor for fgArgInfo.
892 * The newCall needs to have its fgArgInfo initialized such that
893 * we have newCall that is an exact copy of the oldCall.
894 * We have to take care since the argument information
895 * in the argTable contains pointers that must point to the
896 * new arguments and not the old arguments.
898 fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
900 fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo;
902 compiler = oldArgInfo->compiler;
904 argCount = 0; // filled in arg count, starts at zero
905 nextSlotNum = INIT_ARG_STACK_SLOT;
906 stkLevel = oldArgInfo->stkLevel;
907 #if defined(UNIX_X86_ABI)
908 alignmentDone = oldArgInfo->alignmentDone;
909 stkSizeBytes = oldArgInfo->stkSizeBytes;
910 padStkAlign = oldArgInfo->padStkAlign;
912 #if FEATURE_FIXED_OUT_ARGS
913 outArgSize = oldArgInfo->outArgSize;
915 argTableSize = oldArgInfo->argTableSize;
916 argsComplete = false;
918 if (argTableSize > 0)
920 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
921 for (unsigned inx = 0; inx < argTableSize; inx++)
923 argTable[inx] = nullptr;
927 assert(oldArgInfo->argsComplete);
929 // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
930 // so we can iterate over these argument lists more uniformly.
931 // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
932 GenTreeArgList* newArgs;
933 GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs);
934 GenTreeArgList* oldArgs;
935 GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs);
937 if (newCall->gtCallObjp == nullptr)
939 assert(oldCall->gtCallObjp == nullptr);
940 newArgs = newCall->gtCallArgs;
941 oldArgs = oldCall->gtCallArgs;
945 assert(oldCall->gtCallObjp != nullptr);
946 newArgObjp.Current() = newCall->gtCallArgs;
947 newArgs = &newArgObjp;
948 oldArgObjp.Current() = oldCall->gtCallObjp;
949 oldArgs = &oldArgObjp;
954 GenTreeArgList* newParent = nullptr;
955 GenTreeArgList* oldParent = nullptr;
956 fgArgTabEntryPtr* oldArgTable = oldArgInfo->argTable;
957 bool scanRegArgs = false;
961 /* Get hold of the next argument values for the oldCall and newCall */
963 newCurr = newArgs->Current();
964 oldCurr = oldArgs->Current();
965 if (newArgs != &newArgObjp)
972 assert(newParent == nullptr && oldParent == nullptr);
974 newArgs = newArgs->Rest();
975 oldArgs = oldArgs->Rest();
977 fgArgTabEntryPtr oldArgTabEntry = nullptr;
978 fgArgTabEntryPtr newArgTabEntry = nullptr;
980 for (unsigned inx = 0; inx < argTableSize; inx++)
982 oldArgTabEntry = oldArgTable[inx];
984 if (oldArgTabEntry->parent == oldParent)
986 assert((oldParent == nullptr) == (newParent == nullptr));
988 // We have found the matching "parent" field in oldArgTabEntry
990 newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
992 // First block copy all fields
994 *newArgTabEntry = *oldArgTabEntry;
996 // Then update all GenTreePtr fields in the newArgTabEntry
998 newArgTabEntry->parent = newParent;
1000 // The node field is likely to have been updated
1001 // to point at a node in the gtCallLateArgs list
1003 if (oldArgTabEntry->node == oldCurr)
1005 // node is not pointing into the gtCallLateArgs list
1006 newArgTabEntry->node = newCurr;
1010 // node must be pointing into the gtCallLateArgs list
1012 // We will fix this pointer up in the next loop
1014 newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
1019 // Now initialize the proper element in the argTable array
1021 argTable[inx] = newArgTabEntry;
1025 // We should have found the matching oldArgTabEntry and created the newArgTabEntry
1027 assert(newArgTabEntry != nullptr);
1032 newArgs = newCall->gtCallLateArgs;
1033 oldArgs = oldCall->gtCallLateArgs;
1037 /* Get hold of the next argument values for the oldCall and newCall */
1039 assert(newArgs->OperIsList());
1041 newCurr = newArgs->Current();
1042 newArgs = newArgs->Rest();
1044 assert(oldArgs->OperIsList());
1046 oldCurr = oldArgs->Current();
1047 oldArgs = oldArgs->Rest();
1049 fgArgTabEntryPtr oldArgTabEntry = nullptr;
1050 fgArgTabEntryPtr newArgTabEntry = nullptr;
1052 for (unsigned inx = 0; inx < argTableSize; inx++)
1054 oldArgTabEntry = oldArgTable[inx];
1056 if (oldArgTabEntry->node == oldCurr)
1058 // We have found the matching "node" field in oldArgTabEntry
1060 newArgTabEntry = argTable[inx];
1061 assert(newArgTabEntry != nullptr);
1063 // update the "node" GenTreePtr fields in the newArgTabEntry
1065 assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
1067 newArgTabEntry->node = newCurr;
1074 argCount = oldArgInfo->argCount;
1075 nextSlotNum = oldArgInfo->nextSlotNum;
1076 hasRegArgs = oldArgInfo->hasRegArgs;
1077 hasStackArgs = oldArgInfo->hasStackArgs;
1078 argsComplete = true;
1082 void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry)
1084 assert(argCount < argTableSize);
1085 argTable[argCount] = curArgTabEntry;
1089 fgArgTabEntryPtr fgArgInfo::AddRegArg(
1090 unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1092 fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1094 curArgTabEntry->argNum = argNum;
1095 curArgTabEntry->node = node;
1096 curArgTabEntry->parent = parent;
1097 curArgTabEntry->regNum = regNum;
1098 curArgTabEntry->slotNum = 0;
1099 curArgTabEntry->numRegs = numRegs;
1100 curArgTabEntry->numSlots = 0;
1101 curArgTabEntry->alignment = alignment;
1102 curArgTabEntry->lateArgInx = (unsigned)-1;
1103 curArgTabEntry->tmpNum = (unsigned)-1;
1104 curArgTabEntry->isSplit = false;
1105 curArgTabEntry->isTmp = false;
1106 curArgTabEntry->needTmp = false;
1107 curArgTabEntry->needPlace = false;
1108 curArgTabEntry->processed = false;
1109 curArgTabEntry->isHfaRegArg = false;
1110 curArgTabEntry->isBackFilled = false;
1111 curArgTabEntry->isNonStandard = false;
1114 AddArg(curArgTabEntry);
1115 return curArgTabEntry;
1118 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1119 fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
1125 const bool isStruct,
1126 const regNumber otherRegNum,
1127 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
1129 fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
1130 assert(curArgTabEntry != nullptr);
1132 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1133 // PlaceHolder node (in case of needed late argument, for example.)
1134 // This requires using of an extra flag. At creation time the state is right, so
1135 // and this assert enforces that.
1136 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1137 curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
1138 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1140 if (isStruct && structDescPtr != nullptr)
1142 curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
1145 return curArgTabEntry;
1147 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1149 fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
1154 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
1156 fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1158 nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1160 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1161 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1162 // PlaceHolder node (in case of needed late argument, for example.)
1163 // This reqires using of an extra flag. At creation time the state is right, so
1164 // and this assert enforces that.
1165 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1166 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1167 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1169 curArgTabEntry->argNum = argNum;
1170 curArgTabEntry->node = node;
1171 curArgTabEntry->parent = parent;
1172 curArgTabEntry->regNum = REG_STK;
1173 curArgTabEntry->slotNum = nextSlotNum;
1174 curArgTabEntry->numRegs = 0;
1175 curArgTabEntry->numSlots = numSlots;
1176 curArgTabEntry->alignment = alignment;
1177 curArgTabEntry->lateArgInx = (unsigned)-1;
1178 curArgTabEntry->tmpNum = (unsigned)-1;
1179 curArgTabEntry->isSplit = false;
1180 curArgTabEntry->isTmp = false;
1181 curArgTabEntry->needTmp = false;
1182 curArgTabEntry->needPlace = false;
1183 curArgTabEntry->processed = false;
1184 curArgTabEntry->isHfaRegArg = false;
1185 curArgTabEntry->isBackFilled = false;
1186 curArgTabEntry->isNonStandard = false;
1188 hasStackArgs = true;
1189 AddArg(curArgTabEntry);
1191 nextSlotNum += numSlots;
1192 return curArgTabEntry;
1195 void fgArgInfo::RemorphReset()
1197 nextSlotNum = INIT_ARG_STACK_SLOT;
1200 fgArgTabEntry* fgArgInfo::RemorphRegArg(
1201 unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1203 fgArgTabEntryPtr curArgTabEntry = nullptr;
1204 unsigned regArgInx = 0;
1207 for (inx = 0; inx < argCount; inx++)
1209 curArgTabEntry = argTable[inx];
1210 if (curArgTabEntry->argNum == argNum)
1217 if (curArgTabEntry->parent != nullptr)
1219 assert(curArgTabEntry->parent->OperIsList());
1220 argx = curArgTabEntry->parent->Current();
1221 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1225 argx = curArgTabEntry->node;
1234 // if this was a nonstandard arg the table is definitive
1235 if (curArgTabEntry->isNonStandard)
1237 regNum = curArgTabEntry->regNum;
1240 assert(curArgTabEntry->argNum == argNum);
1241 assert(curArgTabEntry->regNum == regNum);
1242 assert(curArgTabEntry->alignment == alignment);
1243 assert(curArgTabEntry->parent == parent);
1245 if (curArgTabEntry->node != node)
1247 GenTreePtr argx = nullptr;
1248 unsigned regIndex = 0;
1250 /* process the register argument list */
1251 for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
1253 argx = list->Current();
1254 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1255 if (regIndex == regArgInx)
1260 assert(regIndex == regArgInx);
1261 assert(regArgInx == curArgTabEntry->lateArgInx);
1263 if (curArgTabEntry->node != argx)
1265 curArgTabEntry->node = argx;
1268 return curArgTabEntry;
1271 void fgArgInfo::RemorphStkArg(
1272 unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, unsigned alignment)
1274 fgArgTabEntryPtr curArgTabEntry = nullptr;
1275 bool isRegArg = false;
1276 unsigned regArgInx = 0;
1280 for (inx = 0; inx < argCount; inx++)
1282 curArgTabEntry = argTable[inx];
1284 if (curArgTabEntry->parent != nullptr)
1286 assert(curArgTabEntry->parent->OperIsList());
1287 argx = curArgTabEntry->parent->Current();
1288 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1292 argx = curArgTabEntry->node;
1296 if (curArgTabEntry->argNum == argNum)
1307 nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1309 assert(curArgTabEntry->argNum == argNum);
1310 assert(curArgTabEntry->slotNum == nextSlotNum);
1311 assert(curArgTabEntry->numSlots == numSlots);
1312 assert(curArgTabEntry->alignment == alignment);
1313 assert(curArgTabEntry->parent == parent);
1314 assert(parent->OperIsList());
1316 #if FEATURE_FIXED_OUT_ARGS
1317 if (curArgTabEntry->node != node)
1321 GenTreePtr argx = nullptr;
1322 unsigned regIndex = 0;
1324 /* process the register argument list */
1325 for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
1327 argx = list->Current();
1328 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1329 if (regIndex == regArgInx)
1334 assert(regIndex == regArgInx);
1335 assert(regArgInx == curArgTabEntry->lateArgInx);
1337 if (curArgTabEntry->node != argx)
1339 curArgTabEntry->node = argx;
1344 assert(parent->Current() == node);
1345 curArgTabEntry->node = node;
1349 curArgTabEntry->node = node;
1352 nextSlotNum += numSlots;
1355 void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
1357 fgArgTabEntryPtr curArgTabEntry = nullptr;
1358 assert(argNum < argCount);
1359 for (unsigned inx = 0; inx < argCount; inx++)
1361 curArgTabEntry = argTable[inx];
1362 if (curArgTabEntry->argNum == argNum)
1368 assert(numRegs > 0);
1369 assert(numSlots > 0);
1371 curArgTabEntry->isSplit = true;
1372 curArgTabEntry->numRegs = numRegs;
1373 curArgTabEntry->numSlots = numSlots;
1375 nextSlotNum += numSlots;
1378 void fgArgInfo::EvalToTmp(unsigned argNum, unsigned tmpNum, GenTreePtr newNode)
1380 fgArgTabEntryPtr curArgTabEntry = nullptr;
1381 assert(argNum < argCount);
1382 for (unsigned inx = 0; inx < argCount; inx++)
1384 curArgTabEntry = argTable[inx];
1385 if (curArgTabEntry->argNum == argNum)
1390 assert(curArgTabEntry->parent->Current() == newNode);
1392 curArgTabEntry->node = newNode;
1393 curArgTabEntry->tmpNum = tmpNum;
1394 curArgTabEntry->isTmp = true;
1397 void fgArgInfo::ArgsComplete()
1399 bool hasStackArgs = false;
1400 bool hasStructRegArg = false;
1402 for (unsigned curInx = 0; curInx < argCount; curInx++)
1404 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1405 assert(curArgTabEntry != nullptr);
1406 GenTreePtr argx = curArgTabEntry->node;
1408 if (curArgTabEntry->regNum == REG_STK)
1410 hasStackArgs = true;
1411 #if !FEATURE_FIXED_OUT_ARGS
1412 // On x86 we use push instructions to pass arguments:
1413 // The non-register arguments are evaluated and pushed in order
1414 // and they are never evaluated into temps
1419 else // we have a register argument, next we look for a struct type.
1421 if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
1423 hasStructRegArg = true;
1427 /* If the argument tree contains an assignment (GTF_ASG) then the argument and
1428 and every earlier argument (except constants) must be evaluated into temps
1429 since there may be other arguments that follow and they may use the value being assigned.
1431 EXAMPLE: ArgTab is "a, a=5, a"
1432 -> when we see the second arg "a=5"
1433 we know the first two arguments "a, a=5" have to be evaluated into temps
1435 For the case of an assignment, we only know that there exist some assignment someplace
1436 in the tree. We don't know what is being assigned so we are very conservative here
1437 and assume that any local variable could have been assigned.
1440 if (argx->gtFlags & GTF_ASG)
1442 // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
1443 // a tmp, then we need a temp in the late arg list.
1444 if ((argCount > 1) || argx->OperIsCopyBlkOp()
1445 #ifdef FEATURE_FIXED_OUT_ARGS
1446 || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
1447 // that we only have late non-register args when that feature is on.
1448 #endif // FEATURE_FIXED_OUT_ARGS
1451 curArgTabEntry->needTmp = true;
1454 // For all previous arguments, unless they are a simple constant
1455 // we require that they be evaluated into temps
1456 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1458 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1459 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1461 assert(prevArgTabEntry->node);
1462 if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
1464 prevArgTabEntry->needTmp = true;
1469 #if FEATURE_FIXED_OUT_ARGS
1470 // Like calls, if this argument has a tree that will do an inline throw,
1471 // a call to a jit helper, then we need to treat it like a call (but only
1472 // if there are/were any stack args).
1473 // This means unnesting, sorting, etc. Technically this is overly
1474 // conservative, but I want to avoid as much special-case debug-only code
1475 // as possible, so leveraging the GTF_CALL flag is the easiest.
1476 if (!(argx->gtFlags & GTF_CALL) && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) &&
1477 compiler->opts.compDbgCode &&
1478 (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
1480 for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
1482 if (otherInx == curInx)
1487 if (argTable[otherInx]->regNum == REG_STK)
1489 argx->gtFlags |= GTF_CALL;
1494 #endif // FEATURE_FIXED_OUT_ARGS
1496 /* If it contains a call (GTF_CALL) then itself and everything before the call
1497 with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
1498 has to be kept in the right order since we will move the call to the first position)
1500 For calls we don't have to be quite as conservative as we are with an assignment
1501 since the call won't be modifying any non-address taken LclVars.
1504 if (argx->gtFlags & GTF_CALL)
1506 if (argCount > 1) // If this is not the only argument
1508 curArgTabEntry->needTmp = true;
1510 else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
1512 // Spill all arguments that are floating point calls
1513 curArgTabEntry->needTmp = true;
1516 // All previous arguments may need to be evaluated into temps
1517 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1519 fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
1520 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1521 assert(prevArgTabEntry->node);
1523 // For all previous arguments, if they have any GTF_ALL_EFFECT
1524 // we require that they be evaluated into a temp
1525 if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
1527 prevArgTabEntry->needTmp = true;
1529 #if FEATURE_FIXED_OUT_ARGS
1530 // Or, if they are stored into the FIXED_OUT_ARG area
1531 // we require that they be moved to the gtCallLateArgs
1532 // and replaced with a placeholder node
1533 else if (prevArgTabEntry->regNum == REG_STK)
1535 prevArgTabEntry->needPlace = true;
1541 #ifndef LEGACY_BACKEND
1542 #if FEATURE_MULTIREG_ARGS
1543 // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
1544 // with multiple indirections, so here we consider spilling it into a tmp LclVar.
1546 // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
1547 // so we skip this for ARM32 until it is ported to use RyuJIT backend
1550 bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
1552 if ((argx->TypeGet() == TYP_STRUCT) && (curArgTabEntry->needTmp == false))
1554 if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
1556 // Spill multireg struct arguments that have Assignments or Calls embedded in them
1557 curArgTabEntry->needTmp = true;
1561 // We call gtPrepareCost to measure the cost of evaluating this tree
1562 compiler->gtPrepareCost(argx);
1564 if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
1566 // Spill multireg struct arguments that are expensive to evaluate twice
1567 curArgTabEntry->needTmp = true;
1569 else if (argx->OperGet() == GT_OBJ)
1571 GenTreeObj* argObj = argx->AsObj();
1572 CORINFO_CLASS_HANDLE objClass = argObj->gtClass;
1573 unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass);
1580 // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
1582 if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
1584 // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
1585 // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
1587 curArgTabEntry->needTmp = true;
1595 // Spill any GT_OBJ multireg structs that are difficult to extract
1597 // When we have a GT_OBJ of a struct with the above sizes we would need
1598 // to use 3 or 4 load instructions to load the exact size of this struct.
1599 // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
1600 // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
1601 // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
1604 curArgTabEntry->needTmp = true;
1613 #endif // FEATURE_MULTIREG_ARGS
1614 #endif // LEGACY_BACKEND
1617 // We only care because we can't spill structs and qmarks involve a lot of spilling, but
1618 // if we don't have qmarks, then it doesn't matter.
1619 // So check for Qmark's globally once here, instead of inside the loop.
1621 const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
1623 #if FEATURE_FIXED_OUT_ARGS
1625 // For Arm/x64 we only care because we can't reorder a register
1626 // argument that uses GT_LCLHEAP. This is an optimization to
1627 // save a check inside the below loop.
1629 const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
1633 const bool hasStackArgsWeCareAbout = hasStackArgs;
1635 #endif // FEATURE_FIXED_OUT_ARGS
1637 // If we have any stack args we have to force the evaluation
1638 // of any arguments passed in registers that might throw an exception
1640 // Technically we only a required to handle the following two cases:
1641 // a GT_IND with GTF_IND_RNGCHK (only on x86) or
1642 // a GT_LCLHEAP node that allocates stuff on the stack
1644 if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
1646 for (unsigned curInx = 0; curInx < argCount; curInx++)
1648 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1649 assert(curArgTabEntry != nullptr);
1650 GenTreePtr argx = curArgTabEntry->node;
1652 // Examine the register args that are currently not marked needTmp
1654 if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
1656 if (hasStackArgsWeCareAbout)
1658 #if !FEATURE_FIXED_OUT_ARGS
1659 // On x86 we previously recorded a stack depth of zero when
1660 // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
1661 // Thus we can not reorder the argument after any stack based argument
1662 // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
1663 // check for it explicitly
1665 if (argx->gtFlags & GTF_EXCEPT)
1667 curArgTabEntry->needTmp = true;
1671 // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
1673 if (argx->gtFlags & GTF_EXCEPT)
1675 assert(compiler->compLocallocUsed);
1677 // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
1679 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
1681 curArgTabEntry->needTmp = true;
1687 if (hasStructRegArgWeCareAbout)
1689 // Returns true if a GT_QMARK node is encountered in the argx tree
1691 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
1693 curArgTabEntry->needTmp = true;
1701 argsComplete = true;
1704 void fgArgInfo::SortArgs()
1706 assert(argsComplete == true);
1709 if (compiler->verbose)
1711 printf("\nSorting the arguments:\n");
1715 /* Shuffle the arguments around before we build the gtCallLateArgs list.
1716 The idea is to move all "simple" arguments like constants and local vars
1717 to the end of the table, and move the complex arguments towards the beginning
1718 of the table. This will help prevent registers from being spilled by
1719 allowing us to evaluate the more complex arguments before the simpler arguments.
1720 The argTable ends up looking like:
1721 +------------------------------------+ <--- argTable[argCount - 1]
1723 +------------------------------------+
1724 | local var / local field |
1725 +------------------------------------+
1726 | remaining arguments sorted by cost |
1727 +------------------------------------+
1728 | temps (argTable[].needTmp = true) |
1729 +------------------------------------+
1730 | args with calls (GTF_CALL) |
1731 +------------------------------------+ <--- argTable[0]
1734 /* Set the beginning and end for the new argument table */
1737 unsigned begTab = 0;
1738 unsigned endTab = argCount - 1;
1739 unsigned argsRemaining = argCount;
1741 // First take care of arguments that are constants.
1742 // [We use a backward iterator pattern]
1749 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1751 if (curArgTabEntry->regNum != REG_STK)
1756 // Skip any already processed args
1758 if (!curArgTabEntry->processed)
1760 GenTreePtr argx = curArgTabEntry->node;
1762 // put constants at the end of the table
1764 if (argx->gtOper == GT_CNS_INT)
1766 noway_assert(curInx <= endTab);
1768 curArgTabEntry->processed = true;
1770 // place curArgTabEntry at the endTab position by performing a swap
1772 if (curInx != endTab)
1774 argTable[curInx] = argTable[endTab];
1775 argTable[endTab] = curArgTabEntry;
1782 } while (curInx > 0);
1784 if (argsRemaining > 0)
1786 // Next take care of arguments that are calls.
1787 // [We use a forward iterator pattern]
1789 for (curInx = begTab; curInx <= endTab; curInx++)
1791 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1793 // Skip any already processed args
1795 if (!curArgTabEntry->processed)
1797 GenTreePtr argx = curArgTabEntry->node;
1799 // put calls at the beginning of the table
1801 if (argx->gtFlags & GTF_CALL)
1803 curArgTabEntry->processed = true;
1805 // place curArgTabEntry at the begTab position by performing a swap
1807 if (curInx != begTab)
1809 argTable[curInx] = argTable[begTab];
1810 argTable[begTab] = curArgTabEntry;
1820 if (argsRemaining > 0)
1822 // Next take care arguments that are temps.
1823 // These temps come before the arguments that are
1824 // ordinary local vars or local fields
1825 // since this will give them a better chance to become
1826 // enregistered into their actual argument register.
1827 // [We use a forward iterator pattern]
1829 for (curInx = begTab; curInx <= endTab; curInx++)
1831 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1833 // Skip any already processed args
1835 if (!curArgTabEntry->processed)
1837 if (curArgTabEntry->needTmp)
1839 curArgTabEntry->processed = true;
1841 // place curArgTabEntry at the begTab position by performing a swap
1843 if (curInx != begTab)
1845 argTable[curInx] = argTable[begTab];
1846 argTable[begTab] = curArgTabEntry;
1856 if (argsRemaining > 0)
1858 // Next take care of local var and local field arguments.
1859 // These are moved towards the end of the argument evaluation.
1860 // [We use a backward iterator pattern]
1862 curInx = endTab + 1;
1867 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1869 // Skip any already processed args
1871 if (!curArgTabEntry->processed)
1873 GenTreePtr argx = curArgTabEntry->node;
1875 if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
1877 noway_assert(curInx <= endTab);
1879 curArgTabEntry->processed = true;
1881 // place curArgTabEntry at the endTab position by performing a swap
1883 if (curInx != endTab)
1885 argTable[curInx] = argTable[endTab];
1886 argTable[endTab] = curArgTabEntry;
1893 } while (curInx > begTab);
1896 // Finally, take care of all the remaining arguments.
1897 // Note that we fill in one arg at a time using a while loop.
1898 bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
1899 while (argsRemaining > 0)
1901 /* Find the most expensive arg remaining and evaluate it next */
1903 fgArgTabEntryPtr expensiveArgTabEntry = nullptr;
1904 unsigned expensiveArg = UINT_MAX;
1905 unsigned expensiveArgCost = 0;
1907 // [We use a forward iterator pattern]
1909 for (curInx = begTab; curInx <= endTab; curInx++)
1911 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1913 // Skip any already processed args
1915 if (!curArgTabEntry->processed)
1917 GenTreePtr argx = curArgTabEntry->node;
1919 // We should have already handled these kinds of args
1920 assert(argx->gtOper != GT_LCL_VAR);
1921 assert(argx->gtOper != GT_LCL_FLD);
1922 assert(argx->gtOper != GT_CNS_INT);
1924 // This arg should either have no persistent side effects or be the last one in our table
1925 // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
1927 if (argsRemaining == 1)
1929 // This is the last arg to place
1930 expensiveArg = curInx;
1931 expensiveArgTabEntry = curArgTabEntry;
1932 assert(begTab == endTab);
1939 /* We call gtPrepareCost to measure the cost of evaluating this tree */
1940 compiler->gtPrepareCost(argx);
1943 if (argx->gtCostEx > expensiveArgCost)
1945 // Remember this arg as the most expensive one that we have yet seen
1946 expensiveArgCost = argx->gtCostEx;
1947 expensiveArg = curInx;
1948 expensiveArgTabEntry = curArgTabEntry;
1954 noway_assert(expensiveArg != UINT_MAX);
1956 // put the most expensive arg towards the beginning of the table
1958 expensiveArgTabEntry->processed = true;
1960 // place expensiveArgTabEntry at the begTab position by performing a swap
1962 if (expensiveArg != begTab)
1964 argTable[expensiveArg] = argTable[begTab];
1965 argTable[begTab] = expensiveArgTabEntry;
1971 costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
1974 // The table should now be completely filled and thus begTab should now be adjacent to endTab
1975 // and regArgsRemaining should be zero
1976 assert(begTab == (endTab + 1));
1977 assert(argsRemaining == 0);
1979 #if !FEATURE_FIXED_OUT_ARGS
1980 // Finally build the regArgList
1982 callTree->gtCall.regArgList = NULL;
1983 callTree->gtCall.regArgListCount = regCount;
1985 unsigned regInx = 0;
1986 for (curInx = 0; curInx < argCount; curInx++)
1988 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
1990 if (curArgTabEntry->regNum != REG_STK)
1992 // Encode the argument register in the register mask
1994 callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
1998 #endif // !FEATURE_FIXED_OUT_ARGS
2003 //------------------------------------------------------------------------------
2004 // fgMakeTmpArgNode : This function creates a tmp var only if needed.
2005 // We need this to be done in order to enforce ordering
2006 // of the evaluation of arguments.
2009 // tmpVarNum - the var num which we clone into the newly created temp var.
2012 // the newly created temp var tree.
2014 GenTreePtr Compiler::fgMakeTmpArgNode(
2015 unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
2017 LclVarDsc* varDsc = &lvaTable[tmpVarNum];
2018 assert(varDsc->lvIsTemp);
2019 var_types type = varDsc->TypeGet();
2021 // Create a copy of the temp to go into the late argument list
2022 GenTreePtr arg = gtNewLclvNode(tmpVarNum, type);
2023 GenTreePtr addrNode = nullptr;
2025 if (varTypeIsStruct(type))
2028 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
2030 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2032 arg->gtFlags |= GTF_DONT_CSE;
2034 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2035 // Can this type be passed in a single register?
2036 // If so, the following call will return the corresponding primitive type.
2037 // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.
2039 bool passedInRegisters = false;
2040 structPassingKind kind;
2041 CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
2042 var_types structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
2044 if (structBaseType != TYP_UNKNOWN)
2046 passedInRegisters = true;
2047 type = structBaseType;
2049 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2051 // If it is passed in registers, don't get the address of the var. Make it a
2052 // field instead. It will be loaded in registers with putarg_reg tree in lower.
2053 if (passedInRegisters)
2055 arg->ChangeOper(GT_LCL_FLD);
2060 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2061 // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
2062 var_types addrType = type;
2064 var_types addrType = TYP_BYREF;
2066 arg = gtNewOperNode(GT_ADDR, addrType, arg);
2069 #if FEATURE_MULTIREG_ARGS
2070 #ifdef _TARGET_ARM64_
2071 assert(varTypeIsStruct(type));
2072 if (lvaIsMultiregStruct(varDsc))
2074 // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD);
2075 // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
2076 // We will create a GT_OBJ for the argument below.
2077 // This will be passed by value in two registers.
2078 assert(addrNode != nullptr);
2080 // Create an Obj of the temp to use it as a call argument.
2081 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2083 // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
2084 // this is only to preserve former behavior (though some CSE'ing of struct
2085 // values can be pessimizing, so enabling this may require some additional tuning).
2086 arg->gtFlags |= GTF_DONT_CSE;
2088 #endif // _TARGET_ARM64_
2089 #endif // FEATURE_MULTIREG_ARGS
2092 #else // not (_TARGET_AMD64_ or _TARGET_ARM64_)
2094 // other targets, we pass the struct by value
2095 assert(varTypeIsStruct(type));
2097 addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
2099 // Get a new Obj node temp to use it as a call argument.
2100 // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
2101 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
2103 #endif // not (_TARGET_AMD64_ or _TARGET_ARM64_)
2105 } // (varTypeIsStruct(type))
2107 if (addrNode != nullptr)
2109 assert(addrNode->gtOper == GT_ADDR);
2111 // This will prevent this LclVar from being optimized away
2112 lvaSetVarAddrExposed(tmpVarNum);
2114 // the child of a GT_ADDR is required to have this flag set
2115 addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
2121 void fgArgInfo::EvalArgsToTemps()
2123 assert(argsSorted == true);
2125 unsigned regArgInx = 0;
2126 // Now go through the argument table and perform the necessary evaluation into temps
2127 GenTreeArgList* tmpRegArgNext = nullptr;
2128 for (unsigned curInx = 0; curInx < argCount; curInx++)
2130 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2132 GenTreePtr argx = curArgTabEntry->node;
2133 GenTreePtr setupArg = nullptr;
2136 #if !FEATURE_FIXED_OUT_ARGS
2137 // Only ever set for FEATURE_FIXED_OUT_ARGS
2138 assert(curArgTabEntry->needPlace == false);
2140 // On x86 and other archs that use push instructions to pass arguments:
2141 // Only the register arguments need to be replaced with placeholder nodes.
2142 // Stacked arguments are evaluated and pushed (or stored into the stack) in order.
2144 if (curArgTabEntry->regNum == REG_STK)
2148 if (curArgTabEntry->needTmp)
2152 if (curArgTabEntry->isTmp == true)
2154 // Create a copy of the temp to go into the late argument list
2155 tmpVarNum = curArgTabEntry->tmpNum;
2156 defArg = compiler->fgMakeTmpArgNode(tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
2157 argTable[curInx]->structDesc.passedInRegisters));
2159 // mark the original node as a late argument
2160 argx->gtFlags |= GTF_LATE_ARG;
2164 // Create a temp assignment for the argument
2165 // Put the temp in the gtCallLateArgs list
2166 CLANG_FORMAT_COMMENT_ANCHOR;
2169 if (compiler->verbose)
2171 printf("Argument with 'side effect'...\n");
2172 compiler->gtDispTree(argx);
2176 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2177 noway_assert(argx->gtType != TYP_STRUCT);
2180 tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
2181 if (argx->gtOper == GT_MKREFANY)
2183 // For GT_MKREFANY, typically the actual struct copying does
2184 // not have any side-effects and can be delayed. So instead
2185 // of using a temp for the whole struct, we can just use a temp
2186 // for operand that that has a side-effect
2188 if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
2190 operand = argx->gtOp.gtOp1;
2192 // In the early argument evaluation, place an assignment to the temp
2193 // from the source operand of the mkrefany
2194 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2196 // Replace the operand for the mkrefany with the new temp.
2197 argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2199 else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
2201 operand = argx->gtOp.gtOp2;
2203 // In the early argument evaluation, place an assignment to the temp
2204 // from the source operand of the mkrefany
2205 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2207 // Replace the operand for the mkrefany with the new temp.
2208 argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2212 if (setupArg != nullptr)
2214 // Now keep the mkrefany for the late argument list
2217 // Clear the side-effect flags because now both op1 and op2 have no side-effects
2218 defArg->gtFlags &= ~GTF_ALL_EFFECT;
2222 setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
2224 LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
2226 #ifndef LEGACY_BACKEND
2227 if (compiler->fgOrder == Compiler::FGOrderLinear)
2229 // We'll reference this temporary variable just once
2230 // when we perform the function call after
2231 // setting up this argument.
2232 varDsc->lvRefCnt = 1;
2234 #endif // !LEGACY_BACKEND
2236 var_types lclVarType = genActualType(argx->gtType);
2237 var_types scalarType = TYP_UNKNOWN;
2239 if (setupArg->OperIsCopyBlkOp())
2241 setupArg = compiler->fgMorphCopyBlock(setupArg);
2242 #ifdef _TARGET_ARM64_
2243 // This scalar LclVar widening step is only performed for ARM64
2245 CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum);
2246 unsigned structSize = varDsc->lvExactSize;
2248 scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd);
2249 #endif // _TARGET_ARM64_
2252 // scalarType can be set to a wider type for ARM64: (3 => 4) or (5,6,7 => 8)
2253 if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
2255 // Create a GT_LCL_FLD using the wider type to go to the late argument list
2256 defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
2260 // Create a copy of the temp to go to the late argument list
2261 defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
2264 curArgTabEntry->isTmp = true;
2265 curArgTabEntry->tmpNum = tmpVarNum;
2268 // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
2269 // might have left holes in the used registers (see
2270 // fgAddSkippedRegsInPromotedStructArg).
2271 // Too bad we're not that smart for these intermediate temps...
2272 if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
2274 regNumber argReg = curArgTabEntry->regNum;
2275 regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
2276 for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
2278 argReg = genRegArgNext(argReg);
2279 allUsedRegs |= genRegMask(argReg);
2281 #ifdef LEGACY_BACKEND
2282 callTree->gtCall.gtCallRegUsedMask |= allUsedRegs;
2283 #endif // LEGACY_BACKEND
2285 #endif // _TARGET_ARM_
2288 /* mark the assignment as a late argument */
2289 setupArg->gtFlags |= GTF_LATE_ARG;
2292 if (compiler->verbose)
2294 printf("\n Evaluate to a temp:\n");
2295 compiler->gtDispTree(setupArg);
2300 else // curArgTabEntry->needTmp == false
2303 // Only register args are replaced with placeholder nodes
2304 // and the stack based arguments are evaluated and pushed in order.
2306 // On Arm/x64 - When needTmp is false and needPlace is false,
2307 // the non-register arguments are evaluated and stored in order.
2308 // When needPlace is true we have a nested call that comes after
2309 // this argument so we have to replace it in the gtCallArgs list
2310 // (the initial argument evaluation list) with a placeholder.
2312 if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
2317 /* No temp needed - move the whole node to the gtCallLateArgs list */
2319 /* The argument is deferred and put in the late argument list */
2323 // Create a placeholder node to put in its place in gtCallLateArgs.
2325 // For a struct type we also need to record the class handle of the arg.
2326 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
2328 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2330 // All structs are either passed (and retyped) as integral types, OR they
2331 // are passed by reference.
2332 noway_assert(argx->gtType != TYP_STRUCT);
2334 #else // !defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2336 if (varTypeIsStruct(defArg))
2338 // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
2339 GenTreePtr defArgTmp = defArg;
2341 // The GT_OBJ may be be a child of a GT_COMMA.
2342 while (defArgTmp->gtOper == GT_COMMA)
2344 defArgTmp = defArgTmp->gtOp.gtOp2;
2346 assert(varTypeIsStruct(defArgTmp));
2348 // We handle two opcodes: GT_MKREFANY and GT_OBJ.
2349 if (defArgTmp->gtOper == GT_MKREFANY)
2351 clsHnd = compiler->impGetRefAnyClass();
2353 else if (defArgTmp->gtOper == GT_OBJ)
2355 clsHnd = defArgTmp->AsObj()->gtClass;
2359 BADCODE("Unhandled struct argument tree in fgMorphArgs");
2363 #endif // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
2365 setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
2367 /* mark the placeholder node as a late argument */
2368 setupArg->gtFlags |= GTF_LATE_ARG;
2371 if (compiler->verbose)
2373 if (curArgTabEntry->regNum == REG_STK)
2375 printf("Deferred stack argument :\n");
2379 printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
2382 compiler->gtDispTree(argx);
2383 printf("Replaced with placeholder node:\n");
2384 compiler->gtDispTree(setupArg);
2389 if (setupArg != nullptr)
2391 if (curArgTabEntry->parent)
2393 GenTreePtr parent = curArgTabEntry->parent;
2394 /* a normal argument from the list */
2395 noway_assert(parent->OperIsList());
2396 noway_assert(parent->gtOp.gtOp1 == argx);
2398 parent->gtOp.gtOp1 = setupArg;
2402 /* must be the gtCallObjp */
2403 noway_assert(callTree->gtCall.gtCallObjp == argx);
2405 callTree->gtCall.gtCallObjp = setupArg;
2409 /* deferred arg goes into the late argument list */
2411 if (tmpRegArgNext == nullptr)
2413 tmpRegArgNext = compiler->gtNewArgList(defArg);
2414 callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
2418 noway_assert(tmpRegArgNext->OperIsList());
2419 noway_assert(tmpRegArgNext->Current());
2420 tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
2421 tmpRegArgNext = tmpRegArgNext->Rest();
2424 curArgTabEntry->node = defArg;
2425 curArgTabEntry->lateArgInx = regArgInx++;
2429 if (compiler->verbose)
2431 printf("\nShuffled argument table: ");
2432 for (unsigned curInx = 0; curInx < argCount; curInx++)
2434 fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
2436 if (curArgTabEntry->regNum != REG_STK)
2438 printf("%s ", getRegName(curArgTabEntry->regNum));
2446 // Get the late arg for arg at position argIndex.
2447 // argIndex - 0-based position to get late arg for.
2448 // Caller must ensure this position has a late arg.
2449 GenTreePtr fgArgInfo::GetLateArg(unsigned argIndex)
2451 for (unsigned j = 0; j < this->ArgCount(); j++)
2453 if (this->ArgTable()[j]->argNum == argIndex)
2455 return this->ArgTable()[j]->node;
2458 // Caller must ensure late arg exists.
2462 void fgArgInfo::RecordStkLevel(unsigned stkLvl)
2464 assert(!IsUninitialized(stkLvl));
2465 this->stkLevel = stkLvl;
2468 unsigned fgArgInfo::RetrieveStkLevel()
2470 assert(!IsUninitialized(stkLevel));
2474 // Return a conservative estimate of the stack size in bytes.
2475 // It will be used only on the intercepted-for-host code path to copy the arguments.
2476 int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
2480 for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
2486 if (numArgs > MAX_REG_ARG)
2488 numStkArgs = numArgs - MAX_REG_ARG;
2495 return numStkArgs * REGSIZE_BYTES;
2498 //------------------------------------------------------------------------------
2499 // fgMakeMultiUse : If the node is a local, clone it and increase the ref count
2500 // otherwise insert a comma form temp
2503 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2504 // evaluates ppTree to a temp and returns the result
2507 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2510 // The result tree MUST be added to the tree structure since the ref counts are
2511 // already incremented.
2513 GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
2515 GenTree* tree = *pOp;
2516 if (tree->IsLocal())
2518 auto result = gtClone(tree);
2519 if (lvaLocalVarRefCounted)
2521 lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2527 GenTree* result = fgInsertCommaFormTemp(pOp);
2529 // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
2530 // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
2531 // be added by the caller.
2532 if (lvaLocalVarRefCounted)
2534 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2535 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2536 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2543 //------------------------------------------------------------------------------
2544 // fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
2545 // and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
2548 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2549 // evaluates ppTree to a temp and returns the result
2551 // structType - value type handle if the temp created is of TYP_STRUCT.
2554 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2557 GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
2559 GenTree* subTree = *ppTree;
2561 unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
2563 if (varTypeIsStruct(subTree))
2565 assert(structType != nullptr);
2566 lvaSetStruct(lclNum, structType, false);
2569 // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
2570 // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for
2571 // setting type of lcl vars created.
2572 GenTree* asg = gtNewTempAssign(lclNum, subTree);
2574 GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2576 GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
2580 return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2583 //------------------------------------------------------------------------
2584 // fgMorphArgs: Walk and transform (morph) the arguments of a call
2587 // callNode - the call for which we are doing the argument morphing
2590 // Like most morph methods, this method returns the morphed node,
2591 // though in this case there are currently no scenarios where the
2592 // node itself is re-created.
2595 // This method is even less idempotent than most morph methods.
2596 // That is, it makes changes that should not be redone. It uses the existence
2597 // of gtCallLateArgs (the late arguments list) to determine if it has
2598 // already done that work.
2600 // The first time it is called (i.e. during global morphing), this method
2601 // computes the "late arguments". This is when it determines which arguments
2602 // need to be evaluated to temps prior to the main argument setup, and which
2603 // can be directly evaluated into the argument location. It also creates a
2604 // second argument list (gtCallLateArgs) that does the final placement of the
2605 // arguments, e.g. into registers or onto the stack.
2607 // The "non-late arguments", aka the gtCallArgs, are doing the in-order
2608 // evaluation of the arguments that might have side-effects, such as embedded
2609 // assignments, calls or possible throws. In these cases, it and earlier
2610 // arguments must be evaluated to temps.
2612 // On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
2613 // if we have any nested calls, we need to defer the copying of the argument
2614 // into the fixed argument area until after the call. If the argument did not
2615 // otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
2616 // replaced in the "early" arg list (gtCallArgs) with a placeholder node.
2619 #pragma warning(push)
2620 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2622 GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
2627 unsigned flagsSummary = 0;
2628 unsigned genPtrArgCntSav = fgPtrArgCntCur;
2630 unsigned argIndex = 0;
2632 unsigned intArgRegNum = 0;
2633 unsigned fltArgRegNum = 0;
2636 regMaskTP argSkippedRegMask = RBM_NONE;
2637 regMaskTP fltArgSkippedRegMask = RBM_NONE;
2638 #endif // _TARGET_ARM_
2640 #if defined(_TARGET_X86_)
2641 unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
2643 const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
2646 unsigned argSlots = 0;
2647 unsigned nonRegPassedStructSlots = 0;
2648 bool reMorphing = call->AreArgsComplete();
2649 bool callHasRetBuffArg = call->HasRetBufArg();
2651 #ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
2652 bool callIsVararg = call->IsVarargs();
2655 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2656 // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
2657 // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
2658 // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
2659 bool hasStackArgCopy = false;
2662 #ifndef LEGACY_BACKEND
2663 // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
2664 // following the normal calling convention or in the normal argument registers. We either mark existing
2665 // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
2666 // non-standard arguments into the argument list, below.
2667 class NonStandardArgs
2669 struct NonStandardArg
2671 regNumber reg; // The register to be assigned to this non-standard argument.
2672 GenTree* node; // The tree node representing this non-standard argument.
2673 // Note that this must be updated if the tree node changes due to morphing!
2676 ArrayStack<NonStandardArg> args;
2679 NonStandardArgs(Compiler* compiler) : args(compiler, 3) // We will have at most 3 non-standard arguments
2683 //-----------------------------------------------------------------------------
2684 // Add: add a non-standard argument to the table of non-standard arguments
2687 // node - a GenTree node that has a non-standard argument.
2688 // reg - the register to assign to this node.
2693 void Add(GenTree* node, regNumber reg)
2695 NonStandardArg nsa = {reg, node};
2699 //-----------------------------------------------------------------------------
2700 // Find: Look for a GenTree* in the set of non-standard args.
2703 // node - a GenTree node to look for
2706 // The index of the non-standard argument (a non-negative, unique, stable number).
2707 // If the node is not a non-standard argument, return -1.
2709 int Find(GenTree* node)
2711 for (int i = 0; i < args.Height(); i++)
2713 if (node == args.Index(i).node)
2721 //-----------------------------------------------------------------------------
2722 // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
2723 // set the register to use for the node.
2726 // node - a GenTree node to look for
2727 // pReg - an OUT argument. *pReg is set to the non-standard register to use if
2728 // 'node' is found in the non-standard argument set.
2731 // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
2733 // 'false' otherwise (in this case, *pReg is unmodified).
2735 bool FindReg(GenTree* node, regNumber* pReg)
2737 for (int i = 0; i < args.Height(); i++)
2739 NonStandardArg& nsa = args.IndexRef(i);
2740 if (node == nsa.node)
2749 //-----------------------------------------------------------------------------
2750 // Replace: Replace the non-standard argument node at a given index. This is done when
2751 // the original node was replaced via morphing, but we need to continue to assign a
2752 // particular non-standard arg to it.
2755 // index - the index of the non-standard arg. It must exist.
2756 // node - the new GenTree node.
2761 void Replace(int index, GenTree* node)
2763 args.IndexRef(index).node = node;
2766 } nonStandardArgs(this);
2767 #endif // !LEGACY_BACKEND
2769 // Count of args. On first morph, this is counted before we've filled in the arg table.
2770 // On remorph, we grab it from the arg table.
2771 unsigned numArgs = 0;
2773 // Process the late arguments (which were determined by a previous caller).
2774 // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
2775 // may need to refer to it.
2778 // We need to reMorph the gtCallLateArgs early since that is what triggers
2779 // the expression folding and we need to have the final folded gtCallLateArgs
2780 // available when we call RemorphRegArg so that we correctly update the fgArgInfo
2781 // with the folded tree that represents the final optimized argument nodes.
2783 // However if a range-check needs to be generated for any of these late
2784 // arguments we also need to "know" what the stack depth will be when we generate
2785 // code to branch to the throw range check failure block as that is part of the
2786 // GC information contract for that block.
2788 // Since the late arguments are evaluated last we have pushed all of the
2789 // other arguments on the stack before we evaluate these late arguments,
2790 // so we record the stack depth on the first morph call when reMorphing
2791 // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
2793 if (call->gtCallLateArgs != nullptr)
2795 unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
2796 fgPtrArgCntCur += callStkLevel;
2797 call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
2798 flagsSummary |= call->gtCallLateArgs->gtFlags;
2799 fgPtrArgCntCur -= callStkLevel;
2801 assert(call->fgArgInfo != nullptr);
2802 call->fgArgInfo->RemorphReset();
2804 numArgs = call->fgArgInfo->ArgCount();
2808 // First we need to count the args
2809 if (call->gtCallObjp)
2813 for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
2818 // Insert or mark non-standard args. These are either outside the normal calling convention, or
2819 // arguments registers that don't follow the normal progression of argument registers in the calling
2820 // convention (such as for the ARM64 fixed return buffer argument x8).
2822 // *********** NOTE *************
2823 // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
2824 // in the implementation of fast tail call.
2825 // *********** END NOTE *********
2826 CLANG_FORMAT_COMMENT_ANCHOR;
2828 #if !defined(LEGACY_BACKEND)
2829 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2830 // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
2831 // Set the argument registers correctly here.
2832 if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
2834 GenTreeArgList* args = call->gtCallArgs;
2835 GenTree* arg1 = args->Current();
2836 assert(arg1 != nullptr);
2837 nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
2839 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2840 #if defined(_TARGET_X86_)
2841 // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
2842 // hi part to be in EDX. This sets the argument registers up correctly.
2843 else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
2844 call->IsHelperCall(this, CORINFO_HELP_LRSZ))
2846 GenTreeArgList* args = call->gtCallArgs;
2847 GenTree* arg1 = args->Current();
2848 assert(arg1 != nullptr);
2849 nonStandardArgs.Add(arg1, REG_LNGARG_LO);
2851 args = args->Rest();
2852 GenTree* arg2 = args->Current();
2853 assert(arg2 != nullptr);
2854 nonStandardArgs.Add(arg2, REG_LNGARG_HI);
2856 #else // !defined(_TARGET_X86_)
2857 // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
2858 // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
2859 // convention for x86/SSE.
2861 // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
2863 if (hasFixedRetBuffReg() && call->HasRetBufArg())
2865 args = call->gtCallArgs;
2866 assert(args != nullptr);
2867 assert(args->OperIsList());
2869 argx = call->gtCallArgs->Current();
2871 // We don't increment numArgs here, since we already counted this argument above.
2873 nonStandardArgs.Add(argx, theFixedRetBuffReg());
2876 // We are allowed to have a Fixed Return Buffer argument combined
2877 // with any of the remaining non-standard arguments
2879 if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
2881 assert(!call->gtCallCookie);
2882 // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
2883 // It will be used only on the intercepted-for-host code path to copy the arguments.
2885 GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
2886 call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
2889 nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
2891 else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT) && !call->IsTailCallViaHelper())
2893 // indirect VSD stubs need the base of the indirection cell to be
2894 // passed in addition. At this point that is the value in gtCallAddr.
2895 // The actual call target will be derived from gtCallAddr in call
2898 // If it is a VSD call getting dispatched via tail call helper,
2899 // fgMorphTailCall() would materialize stub addr as an additional
2900 // parameter added to the original arg list and hence no need to
2901 // add as a non-standard arg.
2903 GenTree* arg = call->gtCallAddr;
2904 if (arg->OperIsLocal())
2906 arg = gtClone(arg, true);
2910 call->gtCallAddr = fgInsertCommaFormTemp(&arg);
2911 call->gtFlags |= GTF_ASG;
2913 noway_assert(arg != nullptr);
2915 // And push the stub address onto the list of arguments
2916 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2919 nonStandardArgs.Add(arg, REG_VIRTUAL_STUB_PARAM);
2922 #endif // defined(_TARGET_X86_)
2923 if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
2925 assert(!call->IsUnmanaged());
2927 GenTree* arg = call->gtCallCookie;
2928 noway_assert(arg != nullptr);
2929 call->gtCallCookie = nullptr;
2931 #if defined(_TARGET_X86_)
2932 // x86 passes the cookie on the stack as the final argument to the call.
2933 GenTreeArgList** insertionPoint = &call->gtCallArgs;
2934 for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
2937 *insertionPoint = gtNewListNode(arg, nullptr);
2938 #else // !defined(_TARGET_X86_)
2939 // All other architectures pass the cookie in a register.
2940 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2941 #endif // defined(_TARGET_X86_)
2943 nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
2946 // put destination into R10/EAX
2947 arg = gtClone(call->gtCallAddr, true);
2948 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2951 nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
2953 // finally change this call to a helper call
2954 call->gtCallType = CT_HELPER;
2955 call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
2957 #endif // !defined(LEGACY_BACKEND)
2959 // Allocate the fgArgInfo for the call node;
2961 call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
2964 if (varTypeIsStruct(call))
2966 fgFixupStructReturn(call);
2969 /* First we morph the argument subtrees ('this' pointer, arguments, etc.).
2970 * During the first call to fgMorphArgs we also record the
2971 * information about late arguments we have in 'fgArgInfo'.
2972 * This information is used later to contruct the gtCallLateArgs */
2974 /* Process the 'this' argument value, if present */
2976 argx = call->gtCallObjp;
2980 argx = fgMorphTree(argx);
2981 call->gtCallObjp = argx;
2982 flagsSummary |= argx->gtFlags;
2984 assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
2986 assert(argIndex == 0);
2988 /* We must fill in or update the argInfo table */
2992 /* this is a register argument - possibly update it in the table */
2993 call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
2997 assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
2999 /* this is a register argument - put it in the table */
3000 call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
3001 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3003 false, REG_STK, nullptr
3004 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3007 // this can't be a struct.
3008 assert(argx->gtType != TYP_STRUCT);
3010 /* Increment the argument register count and argument index */
3011 if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
3014 #ifdef WINDOWS_AMD64_ABI
3015 // Whenever we pass an integer register argument
3016 // we skip the corresponding floating point register argument
3018 #endif // WINDOWS_AMD64_ABI
3022 noway_assert(!"the 'this' pointer can not be a floating point type");
3029 // Compute the maximum number of arguments that can be passed in registers.
3030 // For X86 we handle the varargs and unmanaged calling conventions
3032 if (call->gtFlags & GTF_CALL_POP_ARGS)
3034 noway_assert(intArgRegNum < MAX_REG_ARG);
3035 // No more register arguments for varargs (CALL_POP_ARGS)
3036 maxRegArgs = intArgRegNum;
3038 // Add in the ret buff arg
3039 if (callHasRetBuffArg)
3043 if (call->IsUnmanaged())
3045 noway_assert(intArgRegNum == 0);
3047 if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
3049 noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
3050 call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
3051 call->gtCallArgs->gtOp.gtOp1->gtOper ==
3052 GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
3060 // Add in the ret buff arg
3061 if (callHasRetBuffArg)
3064 #endif // _TARGET_X86_
3066 /* Morph the user arguments */
3067 CLANG_FORMAT_COMMENT_ANCHOR;
3069 #if defined(_TARGET_ARM_)
3071 // The ARM ABI has a concept of back-filling of floating-point argument registers, according
3072 // to the "Procedure Call Standard for the ARM Architecture" document, especially
3073 // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
3074 // appear in a lower-numbered register than floating point argument N. That is, argument
3075 // register allocation is not strictly increasing. To support this, we need to keep track of unused
3076 // floating-point argument registers that we can back-fill. We only support 4-byte float and
3077 // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
3078 // only back-fill single registers, since there is no way with these types to create
3079 // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
3080 // available (with 16 FP argument registers). Consider this code:
3082 // struct HFA { float x, y, z; }; // a three element HFA
3083 // void bar(float a1, // passed in f0
3084 // double a2, // passed in f2/f3; skip f1 for alignment
3085 // HFA a3, // passed in f4/f5/f6
3086 // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
3087 // HFA a5, // passed in f10/f11/f12
3088 // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
3090 // float a7, // passed in f1 (back-filled)
3091 // float a8, // passed in f7 (back-filled)
3092 // float a9, // passed in f13 (back-filled)
3093 // float a10) // passed on the stack in [OutArg+0]
3095 // Note that if we ever support FP types with larger alignment requirements, then there could
3096 // be more than single register back-fills.
3098 // Once we assign a floating-pointer register to the stack, they all must be on the stack.
3099 // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
3100 // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
3101 // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
3102 // and prevent any additional floating-point arguments from going in registers.
3104 bool anyFloatStackArgs = false;
3106 #endif // _TARGET_ARM_
3108 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3109 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3110 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3112 bool hasStructArgument = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
3113 bool hasMultiregStructArgs = false;
3114 for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
3116 GenTreePtr* parentArgx = &args->gtOp.gtOp1;
3118 #if FEATURE_MULTIREG_ARGS
3119 if (!hasStructArgument)
3121 hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
3123 #endif // FEATURE_MULTIREG_ARGS
3125 #ifndef LEGACY_BACKEND
3126 // Record the index of any nonStandard arg that we may be processing here, as we are
3127 // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
3128 GenTreePtr orig_argx = *parentArgx;
3129 int nonStandard_index = nonStandardArgs.Find(orig_argx);
3130 #endif // !LEGACY_BACKEND
3132 argx = fgMorphTree(*parentArgx);
3134 flagsSummary |= argx->gtFlags;
3136 assert(args->OperIsList());
3137 assert(argx == args->Current());
3139 #ifndef LEGACY_BACKEND
3140 if ((nonStandard_index != -1) && (argx != orig_argx))
3142 // We need to update the node field for this nonStandard arg here
3143 // as it was changed by the call to fgMorphTree
3144 nonStandardArgs.Replace(nonStandard_index, argx);
3146 #endif // !LEGACY_BACKEND
3148 /* Change the node to TYP_I_IMPL so we don't report GC info
3149 * NOTE: We deferred this from the importer because of the inliner */
3151 if (argx->IsVarAddr())
3153 argx->gtType = TYP_I_IMPL;
3156 bool passUsingFloatRegs;
3157 unsigned argAlign = 1;
3158 // Setup any HFA information about 'argx'
3159 var_types hfaType = GetHfaType(argx);
3160 bool isHfaArg = varTypeIsFloating(hfaType);
3161 unsigned hfaSlots = 0;
3165 hfaSlots = GetHfaCount(argx);
3167 // If we have a HFA struct it's possible we transition from a method that originally
3168 // only had integer types to now start having FP types. We have to communicate this
3169 // through this flag since LSRA later on will use this flag to determine whether
3170 // or not to track the FP register set.
3172 compFloatingPointUsed = true;
3176 CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
3177 bool isRegArg = false;
3178 bool isNonStandard = false;
3179 regNumber nonStdRegNum = REG_NA;
3181 fgArgTabEntryPtr argEntry = nullptr;
3185 argEntry = gtArgEntryByArgNum(call, argIndex);
3190 bool passUsingIntRegs;
3193 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3194 passUsingIntRegs = isValidIntArgReg(argEntry->regNum);
3198 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
3199 passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
3202 GenTreePtr curArg = argx;
3203 // If late args have already been computed, use the node in the argument table.
3204 if (argEntry != NULL && argEntry->isTmp)
3206 curArg = argEntry->node;
3209 // We don't use the "size" return value from InferOpSizeAlign().
3210 codeGen->InferOpSizeAlign(curArg, &argAlign);
3212 argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
3213 argAlign /= TARGET_POINTER_SIZE;
3217 if (passUsingFloatRegs)
3219 if (fltArgRegNum % 2 == 1)
3221 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3225 else if (passUsingIntRegs)
3227 if (intArgRegNum % 2 == 1)
3229 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3234 if (argSlots % 2 == 1)
3240 #elif defined(_TARGET_ARM64_)
3244 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3248 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
3251 #elif defined(_TARGET_AMD64_)
3254 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3258 passUsingFloatRegs = varTypeIsFloating(argx);
3260 #elif defined(_TARGET_X86_)
3262 passUsingFloatRegs = false;
3265 #error Unsupported or unset target architecture
3268 bool isBackFilled = false;
3269 unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
3270 var_types structBaseType = TYP_STRUCT;
3271 unsigned structSize = 0;
3273 bool isStructArg = varTypeIsStruct(argx);
3277 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3278 // Get the struct description for the already completed struct argument.
3279 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, argx);
3280 assert(fgEntryPtr != nullptr);
3282 // As described in few other places, this can happen when the argx was morphed
3283 // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
3284 // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
3285 // was a struct and the struct classification.
3286 isStructArg = fgEntryPtr->isStruct;
3290 structDesc.CopyFrom(fgEntryPtr->structDesc);
3292 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3294 assert(argEntry != nullptr);
3295 if (argEntry->IsBackFilled())
3298 size = argEntry->numRegs;
3299 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
3301 isBackFilled = true;
3303 else if (argEntry->regNum == REG_STK)
3306 assert(argEntry->numRegs == 0);
3307 size = argEntry->numSlots;
3312 assert(argEntry->numRegs > 0);
3313 size = argEntry->numRegs + argEntry->numSlots;
3316 // This size has now been computed
3322 // Figure out the size of the argument. This is either in number of registers, or number of
3323 // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
3326 if (argx->IsArgPlaceHolderNode() || (!isStructArg))
3328 #if defined(_TARGET_AMD64_)
3329 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3332 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3336 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3337 TARGET_POINTER_SIZE)) /
3338 TARGET_POINTER_SIZE;
3339 eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
3342 hasMultiregStructArgs = true;
3345 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3346 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3347 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3348 #elif defined(_TARGET_ARM64_)
3353 size = GetHfaCount(argx);
3354 // HFA structs are passed by value in multiple registers
3355 hasMultiregStructArgs = true;
3359 // Structs are either passed in 1 or 2 (64-bit) slots
3360 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3361 TARGET_POINTER_SIZE)) /
3362 TARGET_POINTER_SIZE;
3366 // Structs that are the size of 2 pointers are passed by value in multiple registers
3367 hasMultiregStructArgs = true;
3371 size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
3372 // reference (to a copy)
3375 // Note that there are some additional rules for multireg structs.
3376 // (i.e they cannot be split between registers and the stack)
3380 size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
3382 #elif defined(_TARGET_ARM_)
3385 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3386 TARGET_POINTER_SIZE)) /
3387 TARGET_POINTER_SIZE;
3392 size = genTypeStSz(argx->gtType);
3394 #elif defined(_TARGET_X86_)
3395 size = genTypeStSz(argx->gtType);
3397 #error Unsupported or unset target architecture
3398 #endif // _TARGET_XXX_
3403 size = GetHfaCount(argx);
3405 #endif // _TARGET_ARM_
3408 // We handle two opcodes: GT_MKREFANY and GT_OBJ
3409 if (argx->gtOper == GT_MKREFANY)
3411 if (varTypeIsStruct(argx))
3415 #ifdef _TARGET_AMD64_
3416 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3417 if (varTypeIsStruct(argx))
3419 size = info.compCompHnd->getClassSize(impGetRefAnyClass());
3420 unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
3421 size = roundupSize / TARGET_POINTER_SIZE;
3422 eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
3425 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3433 else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
3435 GenTreePtr argObj = argx;
3436 GenTreePtr* parentOfArgObj = parentArgx;
3438 assert(args->OperIsList());
3439 assert(argx == args->Current());
3441 /* The GT_OBJ may be be a child of a GT_COMMA */
3442 while (argObj->gtOper == GT_COMMA)
3444 parentOfArgObj = &argObj->gtOp.gtOp2;
3445 argObj = argObj->gtOp.gtOp2;
3448 // TODO-1stClassStructs: An OBJ node should not be required for lclVars.
3449 if (argObj->gtOper != GT_OBJ)
3451 BADCODE("illegal argument tree in fgMorphArgs");
3454 CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
3455 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3456 eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
3457 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3459 unsigned originalSize = info.compCompHnd->getClassSize(objClass);
3460 originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
3461 unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
3463 structSize = originalSize;
3465 structPassingKind howToPassStruct;
3466 structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);
3468 #ifdef _TARGET_ARM64_
3469 if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
3470 !isPow2(originalSize)) // size is 3,5,6 or 7 bytes
3472 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3474 // For ARM64 we pass structs that are 3,5,6,7 bytes in size
3475 // we can read 4 or 8 bytes from the LclVar to pass this arg
3476 originalSize = genTypeSize(structBaseType);
3479 #endif // _TARGET_ARM64_
3481 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3482 // On System V OS-es a struct is never passed by reference.
3483 // It is either passed by value on the stack or in registers.
3484 bool passStructInRegisters = false;
3485 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3486 bool passStructByRef = false;
3487 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3489 // The following if-then-else needs to be carefully refactored.
3490 // Basically the else portion wants to turn a struct load (a GT_OBJ)
3491 // into a GT_IND of the appropriate size.
3492 // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
3493 // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined (Why?)
3494 // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
3495 // It also can't do this if we have a HFA arg,
3496 // unless we have a 1-elem HFA in which case we want to do the optimization.
3497 CLANG_FORMAT_COMMENT_ANCHOR;
3499 #ifndef _TARGET_X86_
3500 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3501 // Check for struct argument with size 1, 2, 4 or 8 bytes
3502 // As we can optimize these by turning them into a GT_IND of the correct type
3504 // Check for cases that we cannot optimize:
3506 if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
3507 !isPow2(originalSize) || // it is not a power of two (1, 2, 4 or 8)
3508 (isHfaArg && (hfaSlots != 1))) // it is a one element HFA struct
3509 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3511 // Normalize 'size' to the number of pointer sized items
3512 // 'size' is the number of register slots that we will use to pass the argument
3513 size = roundupSize / TARGET_POINTER_SIZE;
3514 #if defined(_TARGET_AMD64_)
3515 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3516 size = 1; // This must be copied to a temp and passed by address
3517 passStructByRef = true;
3518 copyBlkClass = objClass;
3519 #else // FEATURE_UNIX_AMD64_STRUCT_PASSING
3520 if (!structDesc.passedInRegisters)
3522 GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
3523 bool needCpyBlk = false;
3524 if (lclVar != nullptr)
3526 // If the struct is promoted to registers, it has to be materialized
3527 // on stack. We may want to support promoted structures in
3528 // codegening pugarg_stk instead of creating a copy here.
3529 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3530 needCpyBlk = varDsc->lvPromoted;
3534 // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
3535 // sets structDesc.passedInRegisters to be false.
3537 // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
3538 // by rationalizer. For now we will let SIMD struct arg to be copied to
3539 // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
3542 // | \--* addr byref
3543 // | | /--* lclVar simd16 V05 loc4
3544 // | \--* simd simd16 int -
3545 // | \--* lclVar simd16 V08 tmp1
3547 // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
3548 // so that we don't need to generate a copy here.
3549 GenTree* addr = argObj->gtOp.gtOp1;
3550 if (addr->OperGet() == GT_ADDR)
3552 GenTree* addrChild = addr->gtOp.gtOp1;
3553 if (addrChild->OperGet() == GT_SIMD)
3559 passStructInRegisters = false;
3562 copyBlkClass = objClass;
3566 copyBlkClass = NO_CLASS_HANDLE;
3571 // The objClass is used to materialize the struct on stack.
3572 // For SystemV, the code below generates copies for struct arguments classified
3573 // as register argument.
3574 // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
3575 // can be passed on registers or can be copied directly to outgoing area.
3576 passStructInRegisters = true;
3577 copyBlkClass = objClass;
3580 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3581 #elif defined(_TARGET_ARM64_)
3582 if ((size > 2) && !isHfaArg)
3584 size = 1; // This must be copied to a temp and passed by address
3585 passStructByRef = true;
3586 copyBlkClass = objClass;
3591 // If we're passing a promoted struct local var,
3592 // we may need to skip some registers due to alignment; record those.
3593 GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
3596 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3597 if (varDsc->lvPromoted)
3599 assert(argObj->OperGet() == GT_OBJ);
3600 if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
3602 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3606 #endif // _TARGET_ARM_
3608 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3609 // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
3610 // generated for struct 1, 2, 4, or 8.
3611 else // We have a struct argument with size 1, 2, 4 or 8 bytes
3613 // change our GT_OBJ into a GT_IND of the correct type.
3614 // We've already ensured above that size is a power of 2, and less than or equal to pointer
3617 assert(howToPassStruct == SPK_PrimitiveType);
3619 // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
3623 // If we reach here with an HFA arg it has to be a one element HFA
3624 assert(hfaSlots == 1);
3625 structBaseType = hfaType; // change the indirection type to a floating point type
3628 noway_assert(structBaseType != TYP_UNKNOWN);
3630 argObj->ChangeOper(GT_IND);
3632 // Now see if we can fold *(&X) into X
3633 if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
3635 GenTreePtr temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
3637 // Keep the DONT_CSE flag in sync
3638 // (as the addr always marks it for its op1)
3639 temp->gtFlags &= ~GTF_DONT_CSE;
3640 temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
3641 DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
3642 DEBUG_DESTROY_NODE(argObj); // GT_IND
3645 *parentOfArgObj = temp;
3647 // If the OBJ had been the top level node, we've now changed argx.
3648 if (parentOfArgObj == parentArgx)
3653 if (argObj->gtOper == GT_LCL_VAR)
3655 unsigned lclNum = argObj->gtLclVarCommon.gtLclNum;
3656 LclVarDsc* varDsc = &lvaTable[lclNum];
3658 if (varDsc->lvPromoted)
3660 if (varDsc->lvFieldCnt == 1)
3662 // get the first and only promoted field
3663 LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
3664 if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
3666 // we will use the first and only promoted field
3667 argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
3669 if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
3670 (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
3672 // Just use the existing field's type
3673 argObj->gtType = fieldVarDsc->TypeGet();
3677 // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
3679 argObj->ChangeOper(GT_LCL_FLD);
3680 argObj->gtType = structBaseType;
3682 assert(varTypeCanReg(argObj->TypeGet()));
3683 assert(copyBlkClass == NO_CLASS_HANDLE);
3687 // use GT_LCL_FLD to swizzle the single field struct to a new type
3688 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
3689 argObj->ChangeOper(GT_LCL_FLD);
3690 argObj->gtType = structBaseType;
3695 // The struct fits into a single register, but it has been promoted into its
3696 // constituent fields, and so we have to re-assemble it
3697 copyBlkClass = objClass;
3699 // Alignment constraints may cause us not to use (to "skip") some argument
3700 // registers. Add those, if any, to the skipped (int) arg reg mask.
3701 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3702 #endif // _TARGET_ARM_
3705 else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
3707 // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
3708 argObj->ChangeOper(GT_LCL_FLD);
3709 argObj->gtType = structBaseType;
3714 // Not a GT_LCL_VAR, so we can just change the type on the node
3715 argObj->gtType = structBaseType;
3717 assert(varTypeCanReg(argObj->TypeGet()) ||
3718 ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
3722 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3724 #endif // not _TARGET_X86_
3725 // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
3726 if ((structBaseType == TYP_STRUCT) &&
3727 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3728 !passStructInRegisters
3729 #else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3731 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3734 if (isHfaArg && passUsingFloatRegs)
3736 size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA
3740 // If the valuetype size is not a multiple of sizeof(void*),
3741 // we must copyblk to a temp before doing the obj to avoid
3742 // the obj reading memory past the end of the valuetype
3743 CLANG_FORMAT_COMMENT_ANCHOR;
3745 if (roundupSize > originalSize)
3747 copyBlkClass = objClass;
3749 // There are a few special cases where we can omit using a CopyBlk
3750 // where we normally would need to use one.
3752 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3754 copyBlkClass = NO_CLASS_HANDLE;
3758 size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
3763 #ifndef _TARGET_X86_
3764 // TODO-Arm: Does this apply for _TARGET_ARM_, where structs passed by value can be split between
3765 // registers and stack?
3768 hasMultiregStructArgs = true;
3770 #endif // !_TARGET_X86_
3773 // The 'size' value has now must have been set. (the original value of zero is an invalid value)
3777 // Figure out if the argument will be passed in a register.
3780 if (isRegParamType(genActualType(argx->TypeGet()))
3781 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3782 && (!isStructArg || structDesc.passedInRegisters)
3787 if (passUsingFloatRegs)
3789 // First, see if it can be back-filled
3790 if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
3791 (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
3792 (size == 1)) // The size to back-fill is one float register
3794 // Back-fill the register.
3795 isBackFilled = true;
3796 regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
3797 fltArgSkippedRegMask &=
3798 ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
3799 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
3800 assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
3803 // Does the entire float, double, or HFA fit in the FP arg registers?
3804 // Check if the last register needed is still in the argument register range.
3805 isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
3809 anyFloatStackArgs = true;
3814 isRegArg = intArgRegNum < MAX_REG_ARG;
3816 #elif defined(_TARGET_ARM64_)
3817 if (passUsingFloatRegs)
3819 // Check if the last register needed is still in the fp argument register range.
3820 isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
3822 // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
3823 if (isHfaArg && !isRegArg)
3825 // recompute the 'size' so that it represent the number of stack slots rather than the number of
3828 unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
3829 size = roundupSize / TARGET_POINTER_SIZE;
3831 // We also must update fltArgRegNum so that we no longer try to
3832 // allocate any new floating point registers for args
3833 // This prevents us from backfilling a subsequent arg into d7
3835 fltArgRegNum = MAX_FLOAT_REG_ARG;
3840 // Check if the last register needed is still in the int argument register range.
3841 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3843 // Did we run out of registers when we had a 16-byte struct (size===2) ?
3844 // (i.e we only have one register remaining but we needed two registers to pass this arg)
3845 // This prevents us from backfilling a subsequent arg into x7
3847 if (!isRegArg && (size > 1))
3849 // We also must update intArgRegNum so that we no longer try to
3850 // allocate any new general purpose registers for args
3852 intArgRegNum = maxRegArgs;
3855 #else // not _TARGET_ARM_ or _TARGET_ARM64_
3857 #if defined(UNIX_AMD64_ABI)
3859 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3860 // Here a struct can be passed in register following the classifications of its members and size.
3861 // Now make sure there are actually enough registers to do so.
3864 unsigned int structFloatRegs = 0;
3865 unsigned int structIntRegs = 0;
3866 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3868 if (structDesc.IsIntegralSlot(i))
3872 else if (structDesc.IsSseSlot(i))
3878 isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
3879 ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
3882 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3884 if (passUsingFloatRegs)
3886 isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
3890 isRegArg = intArgRegNum < MAX_REG_ARG;
3893 #else // !defined(UNIX_AMD64_ABI)
3894 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3895 #endif // !defined(UNIX_AMD64_ABI)
3896 #endif // _TARGET_ARM_
3903 #ifndef LEGACY_BACKEND
3904 // If there are nonstandard args (outside the calling convention) they were inserted above
3905 // and noted them in a table so we can recognize them here and build their argInfo.
3907 // They should not affect the placement of any other args or stack space required.
3908 // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
3909 isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum);
3910 if (isNonStandard && (nonStdRegNum == REG_STK))
3914 #if defined(_TARGET_X86_)
3915 else if (call->IsTailCallViaHelper())
3917 // We have already (before calling fgMorphArgs()) appended the 4 special args
3918 // required by the x86 tailcall helper. These args are required to go on the
3919 // stack. Force them to the stack here.
3920 assert(numArgs >= 4);
3921 if (argIndex >= numArgs - 4)
3926 #endif // defined(_TARGET_X86_)
3927 #endif // !LEGACY_BACKEND
3928 } // end !reMorphing
3931 // Now we know if the argument goes in registers or not and how big it is,
3932 // whether we had to just compute it or this is a re-morph call and we looked it up.
3934 CLANG_FORMAT_COMMENT_ANCHOR;
3937 // If we ever allocate a floating point argument to the stack, then all
3938 // subsequent HFA/float/double arguments go on the stack.
3939 if (!isRegArg && passUsingFloatRegs)
3941 for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
3943 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3947 // If we think we're going to split a struct between integer registers and the stack, check to
3948 // see if we've already assigned a floating-point arg to the stack.
3949 if (isRegArg && // We decided above to use a register for the argument
3950 !passUsingFloatRegs && // We're using integer registers
3951 (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
3952 anyFloatStackArgs) // We've already used the stack for a floating-point argument
3954 isRegArg = false; // Change our mind; don't pass this struct partially in registers
3956 // Skip the rest of the integer argument registers
3957 for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
3959 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3963 #endif // _TARGET_ARM_
3967 regNumber nextRegNum = REG_STK;
3968 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3969 regNumber nextOtherRegNum = REG_STK;
3970 unsigned int structFloatRegs = 0;
3971 unsigned int structIntRegs = 0;
3973 if (isStructArg && structDesc.passedInRegisters)
3975 // It is a struct passed in registers. Assign the next available register.
3976 assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
3977 regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
3978 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3980 if (structDesc.IsIntegralSlot(i))
3982 *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
3985 else if (structDesc.IsSseSlot(i))
3987 *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
3993 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3995 // fill in or update the argInfo table
3996 nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
3997 : genMapIntRegArgNumToRegNum(intArgRegNum);
4000 #ifdef _TARGET_AMD64_
4001 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
4006 fgArgTabEntryPtr newArgEntry;
4009 // This is a register argument - possibly update it in the table
4010 newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
4016 nextRegNum = nonStdRegNum;
4019 // This is a register argument - put it in the table
4020 newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
4021 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4023 isStructArg, nextOtherRegNum, &structDesc
4024 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4027 newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
4028 isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
4029 newArgEntry->SetIsBackFilled(isBackFilled);
4030 newArgEntry->isNonStandard = isNonStandard;
4033 if (newArgEntry->isNonStandard)
4038 // Set up the next intArgRegNum and fltArgRegNum values.
4041 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4044 intArgRegNum += structIntRegs;
4045 fltArgRegNum += structFloatRegs;
4048 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4050 if (passUsingFloatRegs)
4052 fltArgRegNum += size;
4054 #ifdef WINDOWS_AMD64_ABI
4055 // Whenever we pass an integer register argument
4056 // we skip the corresponding floating point register argument
4057 intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
4058 #endif // WINDOWS_AMD64_ABI
4060 if (fltArgRegNum > MAX_FLOAT_REG_ARG)
4062 // This indicates a partial enregistration of a struct type
4063 assert(varTypeIsStruct(argx));
4064 unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
4065 assert((unsigned char)numRegsPartial == numRegsPartial);
4066 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4067 fltArgRegNum = MAX_FLOAT_REG_ARG;
4069 #endif // _TARGET_ARM_
4073 if (hasFixedRetBuffReg() && (nextRegNum == theFixedRetBuffReg()))
4075 // we are setting up the fixed return buffer register argument
4076 // so don't increment intArgRegNum
4081 // Increment intArgRegNum by 'size' registers
4082 intArgRegNum += size;
4085 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
4086 fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
4087 #endif // _TARGET_AMD64_
4089 if (intArgRegNum > MAX_REG_ARG)
4091 // This indicates a partial enregistration of a struct type
4092 assert((isStructArg) || argx->OperIsCopyBlkOp() ||
4093 (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
4094 unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
4095 assert((unsigned char)numRegsPartial == numRegsPartial);
4096 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4097 intArgRegNum = MAX_REG_ARG;
4098 fgPtrArgCntCur += size - numRegsPartial;
4100 #endif // _TARGET_ARM_
4105 else // We have an argument that is not passed in a register
4107 fgPtrArgCntCur += size;
4109 // If the register arguments have not been determined then we must fill in the argInfo
4113 // This is a stack argument - possibly update it in the table
4114 call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
4118 // This is a stack argument - put it in the table
4119 call->fgArgInfo->AddStkArg(argIndex, argx, args, size,
4120 argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
4124 if (copyBlkClass != NO_CLASS_HANDLE)
4126 noway_assert(!reMorphing);
4127 fgMakeOutgoingStructArgCopy(call, args, argIndex,
4128 copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
4130 // This can cause a GTF_EXCEPT flag to be set.
4131 // TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags.
4132 // This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and
4133 // there are no register arguments. Then reMorphing is never true, so we keep re-copying
4134 // any struct arguments.
4135 // i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0)
4136 flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT);
4138 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4139 hasStackArgCopy = true;
4143 #ifndef LEGACY_BACKEND
4144 if (argx->gtOper == GT_MKREFANY)
4146 // 'Lower' the MKREFANY tree and insert it.
4147 noway_assert(!reMorphing);
4151 // Build the mkrefany as a GT_FIELD_LIST
4152 GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST)
4153 GenTreeFieldList(argx->gtOp.gtOp1, offsetof(CORINFO_RefAny, dataPtr), TYP_BYREF, nullptr);
4154 (void)new (this, GT_FIELD_LIST)
4155 GenTreeFieldList(argx->gtOp.gtOp2, offsetof(CORINFO_RefAny, type), TYP_I_IMPL, fieldList);
4156 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
4157 fp->node = fieldList;
4158 args->gtOp.gtOp1 = fieldList;
4160 #else // !_TARGET_X86_
4163 // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
4164 unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
4165 lvaSetStruct(tmp, impGetRefAnyClass(), false);
4167 // Build the mkrefany as a comma node:
4168 // (tmp.ptr=argx),(tmp.type=handle)
4169 GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr));
4170 GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type));
4171 destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
4172 destPtrSlot->gtFlags |= GTF_VAR_DEF;
4173 destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
4174 destTypeSlot->gtFlags |= GTF_VAR_DEF;
4176 GenTreePtr asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
4177 GenTreePtr asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
4178 GenTreePtr asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
4180 // Change the expression to "(tmp=val)"
4181 args->gtOp.gtOp1 = asg;
4183 // EvalArgsToTemps will cause tmp to actually get loaded as the argument
4184 call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
4185 lvaSetVarAddrExposed(tmp);
4186 #endif // !_TARGET_X86_
4188 #endif // !LEGACY_BACKEND
4190 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
4193 GenTree* lclNode = fgIsIndirOfAddrOfLocal(argx);
4194 if ((lclNode != nullptr) &&
4195 (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT))
4197 // Make a GT_FIELD_LIST of the field lclVars.
4198 GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon();
4199 LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]);
4200 GenTreeFieldList* fieldList = nullptr;
4201 for (unsigned fieldLclNum = varDsc->lvFieldLclStart;
4202 fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum)
4204 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
4205 if (fieldList == nullptr)
4207 lcl->SetLclNum(fieldLclNum);
4208 lcl->ChangeOper(GT_LCL_VAR);
4209 lcl->gtType = fieldVarDsc->lvType;
4210 fieldList = new (this, GT_FIELD_LIST)
4211 GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr);
4212 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
4213 fp->node = fieldList;
4214 args->gtOp.gtOp1 = fieldList;
4218 GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
4219 fieldList = new (this, GT_FIELD_LIST)
4220 GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList);
4225 #endif // defined (_TARGET_X86_) && !defined(LEGACY_BACKEND)
4227 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4228 if (isStructArg && !isRegArg)
4230 nonRegPassedStructSlots += size;
4233 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4237 } // end foreach argument loop
4241 call->fgArgInfo->ArgsComplete();
4243 #ifdef LEGACY_BACKEND
4244 call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
4245 #if defined(_TARGET_ARM_)
4246 call->gtCallRegUsedMask &= ~argSkippedRegMask;
4248 if (fltArgRegNum > 0)
4250 #if defined(_TARGET_ARM_)
4251 call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask;
4254 #endif // LEGACY_BACKEND
4257 if (call->gtCallArgs)
4259 UpdateGT_LISTFlags(call->gtCallArgs);
4262 /* Process the function address, if indirect call */
4264 if (call->gtCallType == CT_INDIRECT)
4266 call->gtCallAddr = fgMorphTree(call->gtCallAddr);
4269 call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur);
4271 if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
4276 /* Remember the maximum value we ever see */
4278 if (fgPtrArgCntMax < fgPtrArgCntCur)
4280 JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
4281 fgPtrArgCntMax = fgPtrArgCntCur;
4284 assert(fgPtrArgCntCur >= genPtrArgCntSav);
4285 call->fgArgInfo->SetStkSizeBytes((fgPtrArgCntCur - genPtrArgCntSav) * TARGET_POINTER_SIZE);
4287 /* The call will pop all the arguments we pushed */
4289 fgPtrArgCntCur = genPtrArgCntSav;
4291 #if FEATURE_FIXED_OUT_ARGS
4293 // Record the outgoing argument size. If the call is a fast tail
4294 // call, it will setup its arguments in incoming arg area instead
4295 // of the out-going arg area, so we don't need to track the
4296 // outgoing arg size.
4297 if (!call->IsFastTailCall())
4299 unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
4301 #if defined(UNIX_AMD64_ABI)
4302 opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly
4304 // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above.
4306 // First slots go in registers only, no stack needed.
4307 // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments,
4308 // and ignores floating point args (it is overly conservative in that case).
4309 preallocatedArgCount = nonRegPassedStructSlots;
4310 if (argSlots > MAX_REG_ARG)
4312 preallocatedArgCount += argSlots - MAX_REG_ARG;
4314 #endif // UNIX_AMD64_ABI
4316 const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
4317 call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL));
4322 printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots,
4323 preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize);
4327 #endif // FEATURE_FIXED_OUT_ARGS
4329 /* Update the 'side effect' flags value for the call */
4331 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4333 // If the register arguments have already been determined
4334 // or we have no register arguments then we don't need to
4335 // call SortArgs() and EvalArgsToTemps()
4337 // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
4338 // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
4339 // is added to make sure to call EvalArgsToTemp.
4340 if (!reMorphing && (call->fgArgInfo->HasRegArgs()
4341 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4343 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4346 // This is the first time that we morph this call AND it has register arguments.
4347 // Follow into the code below and do the 'defer or eval to temp' analysis.
4349 call->fgArgInfo->SortArgs();
4351 call->fgArgInfo->EvalArgsToTemps();
4353 // We may have updated the arguments
4354 if (call->gtCallArgs)
4356 UpdateGT_LISTFlags(call->gtCallArgs);
4360 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4362 // Rewrite the struct args to be passed by value on stack or in registers.
4363 fgMorphSystemVStructArgs(call, hasStructArgument);
4365 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4367 #ifndef LEGACY_BACKEND
4368 // In the future we can migrate UNIX_AMD64 to use this
4369 // method instead of fgMorphSystemVStructArgs
4371 // We only build GT_FIELD_LISTs for MultiReg structs for the RyuJIT backend
4372 if (hasMultiregStructArgs)
4374 fgMorphMultiregStructArgs(call);
4376 #endif // LEGACY_BACKEND
4378 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4383 fgArgInfoPtr argInfo = call->fgArgInfo;
4384 for (unsigned curInx = 0; curInx < argInfo->ArgCount(); curInx++)
4386 fgArgTabEntryPtr curArgEntry = argInfo->ArgTable()[curInx];
4387 curArgEntry->Dump();
4395 #pragma warning(pop)
4398 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4399 // fgMorphSystemVStructArgs:
4400 // Rewrite the struct args to be passed by value on stack or in registers.
4403 // call: The call whose arguments need to be morphed.
4404 // hasStructArgument: Whether this call has struct arguments.
4406 void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
4408 unsigned flagsSummary = 0;
4412 if (hasStructArgument)
4414 fgArgInfoPtr allArgInfo = call->fgArgInfo;
4416 for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4418 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4419 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4420 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4421 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4422 // otherwise points to the list in the late args list.
4423 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4424 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4425 assert(fgEntryPtr != nullptr);
4426 GenTreePtr argx = fgEntryPtr->node;
4427 GenTreePtr lateList = nullptr;
4428 GenTreePtr lateNode = nullptr;
4432 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4434 assert(list->OperIsList());
4436 GenTreePtr argNode = list->Current();
4437 if (argx == argNode)
4444 assert(lateList != nullptr && lateNode != nullptr);
4446 GenTreePtr arg = argx;
4447 bool argListCreated = false;
4449 var_types type = arg->TypeGet();
4451 if (varTypeIsStruct(type))
4453 var_types originalType = type;
4454 // If we have already processed the arg...
4455 if (arg->OperGet() == GT_FIELD_LIST && varTypeIsStruct(arg))
4460 // If already OBJ it is set properly already.
4461 if (arg->OperGet() == GT_OBJ)
4463 assert(!fgEntryPtr->structDesc.passedInRegisters);
4467 assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
4468 (arg->OperGet() == GT_ADDR &&
4469 (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
4471 GenTreeLclVarCommon* lclCommon =
4472 arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
4473 if (fgEntryPtr->structDesc.passedInRegisters)
4475 if (fgEntryPtr->structDesc.eightByteCount == 1)
4477 // Change the type and below the code will change the LclVar to a LCL_FLD
4478 type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4479 fgEntryPtr->structDesc.eightByteSizes[0]);
4481 else if (fgEntryPtr->structDesc.eightByteCount == 2)
4483 // Create LCL_FLD for each eightbyte.
4484 argListCreated = true;
4487 arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
4489 GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4490 fgEntryPtr->structDesc.eightByteSizes[0]);
4491 GenTreeFieldList* fieldList =
4492 new (this, GT_FIELD_LIST) GenTreeFieldList(arg, 0, originalType, nullptr);
4493 fieldList->gtType = originalType; // Preserve the type. It is a special case.
4496 // Second eightbyte.
4497 GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
4498 GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
4499 .eightByteClassifications[1],
4500 fgEntryPtr->structDesc.eightByteSizes[1]),
4501 lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
4503 fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(newLclField, 0, originalType, fieldList);
4504 fieldList->gtType = originalType; // Preserve the type. It is a special case.
4505 newLclField->gtFieldSeq = FieldSeqStore::NotAField();
4509 assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
4514 // If we didn't change the type of the struct, it means
4515 // its classification doesn't support to be passed directly through a
4516 // register, so we need to pass a pointer to the destination where
4517 // where we copied the struct to.
4518 if (!argListCreated)
4520 if (fgEntryPtr->structDesc.passedInRegisters)
4526 // Make sure this is an addr node.
4527 if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
4529 arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
4532 assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
4534 // Create an Obj of the temp to use it as a call argument.
4535 arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
4542 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4543 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4544 assert(fgEntryPtr != nullptr);
4545 GenTreePtr argx = fgEntryPtr->node;
4546 GenTreePtr lateList = nullptr;
4547 GenTreePtr lateNode = nullptr;
4550 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4552 assert(list->OperIsList());
4554 GenTreePtr argNode = list->Current();
4555 if (argx == argNode)
4562 assert(lateList != nullptr && lateNode != nullptr);
4565 fgEntryPtr->node = arg;
4568 lateList->gtOp.gtOp1 = arg;
4572 args->gtOp.gtOp1 = arg;
4579 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4581 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4583 //-----------------------------------------------------------------------------
4584 // fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
4585 // call fgMorphMultiregStructArg on each of them.
4588 // call: a GenTreeCall node that has one or more TYP_STRUCT arguments
4591 // We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments.
4592 // The call to fgMorphMultiregStructArg will mutate the argument into the GT_FIELD_LIST form
4593 // which is only used for struct arguments.
4594 // If this method fails to find any TYP_STRUCT arguments it will assert.
4596 void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
4600 bool foundStructArg = false;
4601 unsigned initialFlags = call->gtFlags;
4602 unsigned flagsSummary = 0;
4603 fgArgInfoPtr allArgInfo = call->fgArgInfo;
4605 // Currently only ARM64 is using this method to morph the MultiReg struct args
4606 // in the future AMD64_UNIX and for HFAs ARM32, will also use this method
4608 CLANG_FORMAT_COMMENT_ANCHOR;
4611 NYI_ARM("fgMorphMultiregStructArgs");
4614 assert(!"Logic error: no MultiregStructArgs for X86");
4616 #ifdef _TARGET_AMD64_
4617 #if defined(UNIX_AMD64_ABI)
4618 NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
4619 #else // WINDOWS_AMD64_ABI
4620 assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
4621 #endif // !UNIX_AMD64_ABI
4624 for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4626 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4627 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4628 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4629 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4630 // otherwise points to the list in the late args list.
4631 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4632 fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4633 assert(fgEntryPtr != nullptr);
4634 GenTreePtr argx = fgEntryPtr->node;
4635 GenTreePtr lateList = nullptr;
4636 GenTreePtr lateNode = nullptr;
4640 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
4642 assert(list->OperIsList());
4644 GenTreePtr argNode = list->Current();
4645 if (argx == argNode)
4652 assert(lateList != nullptr && lateNode != nullptr);
4655 GenTreePtr arg = argx;
4657 if (arg->TypeGet() == TYP_STRUCT)
4659 foundStructArg = true;
4661 arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
4663 // Did we replace 'argx' with a new tree?
4666 fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
4668 // link the new arg node into either the late arg list or the gtCallArgs list
4671 lateList->gtOp.gtOp1 = arg;
4675 args->gtOp.gtOp1 = arg;
4681 // We should only call this method when we actually have one or more multireg struct args
4682 assert(foundStructArg);
4685 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4688 //-----------------------------------------------------------------------------
4689 // fgMorphMultiregStructArg: Given a multireg TYP_STRUCT arg from a call argument list
4690 // Morph the argument into a set of GT_FIELD_LIST nodes.
4693 // arg - A GenTree node containing a TYP_STRUCT arg that
4694 // is to be passed in multiple registers
4695 // fgEntryPtr - the fgArgTabEntry information for the current 'arg'
4698 // arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable
4699 // for passing in multiple registers.
4700 // If arg is a LclVar we check if it is struct promoted and has the right number of fields
4701 // and if they are at the appropriate offsets we will use the struct promted fields
4702 // in the GT_FIELD_LIST nodes that we create.
4703 // If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
4704 // we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
4705 // this also forces the struct to be stack allocated into the local frame.
4706 // For the GT_OBJ case will clone the address expression and generate two (or more)
4708 // Currently the implementation only handles ARM64 and will NYI for other architectures.
4710 GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr)
4712 assert(arg->TypeGet() == TYP_STRUCT);
4714 #ifndef _TARGET_ARM64_
4715 NYI("fgMorphMultiregStructArg requires implementation for this target");
4718 #if FEATURE_MULTIREG_ARGS
4719 // Examine 'arg' and setup argValue objClass and structSize
4721 CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE;
4722 GenTreePtr argValue = arg; // normally argValue will be arg, but see right below
4723 unsigned structSize = 0;
4725 if (arg->OperGet() == GT_OBJ)
4727 GenTreeObj* argObj = arg->AsObj();
4728 objClass = argObj->gtClass;
4729 structSize = info.compCompHnd->getClassSize(objClass);
4731 // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR
4733 if (argObj->gtOp1->OperGet() == GT_ADDR)
4735 argValue = argObj->gtOp1->gtOp.gtOp1;
4738 else if (arg->OperGet() == GT_LCL_VAR)
4740 GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
4741 unsigned varNum = varNode->gtLclNum;
4742 assert(varNum < lvaCount);
4743 LclVarDsc* varDsc = &lvaTable[varNum];
4745 objClass = lvaGetStruct(varNum);
4746 structSize = varDsc->lvExactSize;
4748 noway_assert(objClass != nullptr);
4750 var_types hfaType = TYP_UNDEF;
4751 var_types elemType = TYP_UNDEF;
4752 unsigned elemCount = 0;
4753 unsigned elemSize = 0;
4754 var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
4756 hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
4757 if (varTypeIsFloating(hfaType))
4760 elemSize = genTypeSize(elemType);
4761 elemCount = structSize / elemSize;
4762 assert(elemSize * elemCount == structSize);
4763 for (unsigned inx = 0; inx < elemCount; inx++)
4765 type[inx] = elemType;
4770 assert(structSize <= 2 * TARGET_POINTER_SIZE);
4771 BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
4772 info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
4774 type[0] = getJitGCType(gcPtrs[0]);
4775 type[1] = getJitGCType(gcPtrs[1]);
4777 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
4779 // We can safely widen this to 16 bytes since we are loading from
4780 // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
4781 // lives in the stack frame or will be a promoted field.
4783 elemSize = TARGET_POINTER_SIZE;
4784 structSize = 2 * TARGET_POINTER_SIZE;
4786 else // we must have a GT_OBJ
4788 assert(argValue->OperGet() == GT_OBJ);
4790 // We need to load the struct from an arbitrary address
4791 // and we can't read past the end of the structSize
4792 // We adjust the second load type here
4794 if (structSize < 2 * TARGET_POINTER_SIZE)
4796 switch (structSize - TARGET_POINTER_SIZE)
4802 type[1] = TYP_SHORT;
4808 noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
4814 // We should still have a TYP_STRUCT
4815 assert(argValue->TypeGet() == TYP_STRUCT);
4817 GenTreeFieldList* newArg = nullptr;
4819 // Are we passing a struct LclVar?
4821 if (argValue->OperGet() == GT_LCL_VAR)
4823 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4824 unsigned varNum = varNode->gtLclNum;
4825 assert(varNum < lvaCount);
4826 LclVarDsc* varDsc = &lvaTable[varNum];
4828 // At this point any TYP_STRUCT LclVar must be a 16-byte struct
4829 // or an HFA struct, both which are passed by value.
4831 assert((varDsc->lvSize() == 2 * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
4833 varDsc->lvIsMultiRegArg = true;
4838 JITDUMP("Multireg struct argument V%02u : ");
4843 // This local variable must match the layout of the 'objClass' type exactly
4844 if (varDsc->lvIsHfa())
4846 // We have a HFA struct
4847 noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
4848 noway_assert(elemSize == genTypeSize(elemType));
4849 noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
4850 noway_assert(elemSize * elemCount == varDsc->lvExactSize);
4852 for (unsigned inx = 0; (inx < elemCount); inx++)
4854 noway_assert(type[inx] == elemType);
4859 // We must have a 16-byte struct (non-HFA)
4860 noway_assert(elemCount == 2);
4862 for (unsigned inx = 0; inx < elemCount; inx++)
4864 CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
4866 // We setup the type[inx] value above using the GC info from 'objClass'
4867 // This GT_LCL_VAR must have the same GC layout info
4869 if (currentGcLayoutType != TYPE_GC_NONE)
4871 noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
4875 // We may have use a small type when we setup the type[inx] values above
4876 // We can safely widen this to TYP_I_IMPL
4877 type[inx] = TYP_I_IMPL;
4882 // Is this LclVar a promoted struct with exactly 2 fields?
4883 // TODO-ARM64-CQ: Support struct promoted HFA types here
4884 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
4886 // See if we have two promoted fields that start at offset 0 and 8?
4887 unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
4888 unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
4890 // Did we find the promoted fields at the necessary offsets?
4891 if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
4893 LclVarDsc* loVarDsc = &lvaTable[loVarNum];
4894 LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
4896 var_types loType = loVarDsc->lvType;
4897 var_types hiType = hiVarDsc->lvType;
4899 if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
4901 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
4902 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
4904 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
4907 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
4912 // We can use the struct promoted field as the two arguments
4914 GenTreePtr loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
4915 GenTreePtr hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
4917 // Create a new tree for 'arg'
4918 // replace the existing LDOBJ(ADDR(LCLVAR))
4919 // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr))
4921 newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr);
4922 (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg);
4929 // We will create a list of GT_LCL_FLDs nodes to pass this struct
4931 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
4935 // If we didn't set newarg to a new List Node tree
4937 if (newArg == nullptr)
4939 if (fgEntryPtr->regNum == REG_STK)
4941 // We leave this stack passed argument alone
4945 // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
4946 // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
4948 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
4950 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4951 unsigned varNum = varNode->gtLclNum;
4952 assert(varNum < lvaCount);
4953 LclVarDsc* varDsc = &lvaTable[varNum];
4955 unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
4956 unsigned lastOffset = baseOffset + (elemCount * elemSize);
4958 // The allocated size of our LocalVar must be at least as big as lastOffset
4959 assert(varDsc->lvSize() >= lastOffset);
4961 if (varDsc->lvStructGcCount > 0)
4963 // alignment of the baseOffset is required
4964 noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
4965 noway_assert(elemSize == TARGET_POINTER_SIZE);
4966 unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE;
4967 const BYTE* gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable
4968 for (unsigned inx = 0; (inx < elemCount); inx++)
4970 // The GC information must match what we setup using 'objClass'
4971 noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
4974 else // this varDsc contains no GC pointers
4976 for (unsigned inx = 0; inx < elemCount; inx++)
4978 // The GC information must match what we setup using 'objClass'
4979 noway_assert(!varTypeIsGC(type[inx]));
4984 // We create a list of GT_LCL_FLDs nodes to pass this struct
4986 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
4988 // Create a new tree for 'arg'
4989 // replace the existing LDOBJ(ADDR(LCLVAR))
4990 // with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
4992 unsigned offset = baseOffset;
4993 GenTreeFieldList* listEntry = nullptr;
4994 for (unsigned inx = 0; inx < elemCount; inx++)
4996 elemSize = genTypeSize(type[inx]);
4997 GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
4998 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry);
4999 if (newArg == nullptr)
5006 // Are we passing a GT_OBJ struct?
5008 else if (argValue->OperGet() == GT_OBJ)
5010 GenTreeObj* argObj = argValue->AsObj();
5011 GenTreePtr baseAddr = argObj->gtOp1;
5012 var_types addrType = baseAddr->TypeGet();
5014 // Create a new tree for 'arg'
5015 // replace the existing LDOBJ(EXPR)
5016 // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...)
5019 unsigned offset = 0;
5020 GenTreeFieldList* listEntry = nullptr;
5021 for (unsigned inx = 0; inx < elemCount; inx++)
5023 elemSize = genTypeSize(type[inx]);
5024 GenTreePtr curAddr = baseAddr;
5027 GenTreePtr baseAddrDup = gtCloneExpr(baseAddr);
5028 noway_assert(baseAddrDup != nullptr);
5029 curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
5035 GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr);
5037 // For safety all GT_IND should have at least GT_GLOB_REF set.
5038 curItem->gtFlags |= GTF_GLOB_REF;
5039 if (fgAddrCouldBeNull(curItem))
5041 // This indirection can cause a GPF if the address could be null.
5042 curItem->gtFlags |= GTF_EXCEPT;
5045 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry);
5046 if (newArg == nullptr)
5056 // If we reach here we should have set newArg to something
5057 if (newArg == nullptr)
5059 gtDispTree(argValue);
5060 assert(!"Missing case in fgMorphMultiregStructArg");
5065 printf("fgMorphMultiregStructArg created tree:\n");
5070 arg = newArg; // consider calling fgMorphTree(newArg);
5072 #endif // FEATURE_MULTIREG_ARGS
5077 // Make a copy of a struct variable if necessary, to pass to a callee.
5078 // returns: tree that computes address of the outgoing arg
5079 void Compiler::fgMakeOutgoingStructArgCopy(
5083 CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
5084 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
5086 GenTree* argx = args->Current();
5087 noway_assert(argx->gtOper != GT_MKREFANY);
5088 // See if we need to insert a copy at all
5089 // Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time
5090 // but if there is only one use and no loops, the use must be last.
5091 GenTreeLclVarCommon* lcl = nullptr;
5092 if (argx->OperIsLocal())
5094 lcl = argx->AsLclVarCommon();
5096 else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
5098 lcl = argx->AsObj()->Addr()->AsLclVarCommon();
5102 unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
5103 if (lvaIsImplicitByRefLocal(varNum))
5105 LclVarDsc* varDsc = &lvaTable[varNum];
5106 // JIT_TailCall helper has an implicit assumption that all tail call arguments live
5107 // on the caller's frame. If an argument lives on the caller caller's frame, it may get
5108 // overwritten if that frame is reused for the tail call. Therefore, we should always copy
5109 // struct parameters if they are passed as arguments to a tail call.
5110 if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
5112 varDsc->lvRefCnt = 0;
5113 args->gtOp.gtOp1 = lcl;
5114 fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
5117 JITDUMP("did not have to make outgoing copy for V%2d", varNum);
5123 if (fgOutgoingArgTemps == nullptr)
5125 fgOutgoingArgTemps = hashBv::Create(this);
5131 // Attempt to find a local we have already used for an outgoing struct and reuse it.
5132 // We do not reuse within a statement.
5133 if (!opts.MinOpts())
5136 FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
5138 LclVarDsc* varDsc = &lvaTable[lclNum];
5139 if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
5140 !fgCurrentlyInUseArgTemps->testBit(lclNum))
5142 tmp = (unsigned)lclNum;
5144 JITDUMP("reusing outgoing struct arg");
5151 // Create the CopyBlk tree and insert it.
5155 // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
5156 tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
5157 lvaSetStruct(tmp, copyBlkClass, false);
5158 fgOutgoingArgTemps->setBit(tmp);
5161 fgCurrentlyInUseArgTemps->setBit(tmp);
5163 // TYP_SIMD structs should not be enregistered, since ABI requires it to be
5164 // allocated on stack and address of it needs to be passed.
5165 if (lclVarIsSIMDType(tmp))
5167 lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
5170 // Create a reference to the temp
5171 GenTreePtr dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
5172 dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
5174 // TODO-Cleanup: This probably shouldn't be done here because arg morphing is done prior
5175 // to ref counting of the lclVars.
5176 lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this);
5179 if (argx->gtOper == GT_OBJ)
5181 argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
5185 argx->gtFlags |= GTF_DONT_CSE;
5188 // Copy the valuetype to the temp
5189 unsigned size = info.compCompHnd->getClassSize(copyBlkClass);
5190 GenTreePtr copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
5191 copyBlk = fgMorphCopyBlock(copyBlk);
5193 #if FEATURE_FIXED_OUT_ARGS
5195 // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
5196 // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
5197 GenTreePtr arg = copyBlk;
5199 #else // FEATURE_FIXED_OUT_ARGS
5201 // Structs are always on the stack, and thus never need temps
5202 // so we have to put the copy and temp all into one expression
5203 GenTreePtr arg = fgMakeTmpArgNode(tmp FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
5205 // Change the expression to "(tmp=val),tmp"
5206 arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
5208 #endif // FEATURE_FIXED_OUT_ARGS
5210 args->gtOp.gtOp1 = arg;
5211 call->fgArgInfo->EvalToTmp(argIndex, tmp, arg);
5217 // See declaration for specification comment.
5218 void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
5219 unsigned firstArgRegNum,
5220 regMaskTP* pArgSkippedRegMask)
5222 assert(varDsc->lvPromoted);
5223 // There's no way to do these calculations without breaking abstraction and assuming that
5224 // integer register arguments are consecutive ints. They are on ARM.
5226 // To start, figure out what register contains the last byte of the first argument.
5227 LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
5228 unsigned lastFldRegOfLastByte =
5229 (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5232 // Now we're keeping track of the register that the last field ended in; see what registers
5233 // subsequent fields start in, and whether any are skipped.
5234 // (We assume here the invariant that the fields are sorted in offset order.)
5235 for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
5237 unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset;
5238 LclVarDsc* fldVarDsc = &lvaTable[fldVarNum];
5239 unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
5240 assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
5241 // This loop should enumerate the offsets of any registers skipped.
5242 // Find what reg contains the last byte:
5243 // And start at the first register after that. If that isn't the first reg of the current
5244 for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
5245 skippedRegOffsets++)
5247 // If the register number would not be an arg reg, we're done.
5248 if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
5250 *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
5252 lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5256 #endif // _TARGET_ARM_
5258 //****************************************************************************
5259 // fgFixupStructReturn:
5260 // The companion to impFixupCallStructReturn. Now that the importer is done
5261 // change the gtType to the precomputed native return type
5262 // requires that callNode currently has a struct type
5264 void Compiler::fgFixupStructReturn(GenTreePtr callNode)
5266 assert(varTypeIsStruct(callNode));
5268 GenTreeCall* call = callNode->AsCall();
5269 bool callHasRetBuffArg = call->HasRetBufArg();
5270 bool isHelperCall = call->IsHelperCall();
5272 // Decide on the proper return type for this call that currently returns a struct
5274 CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd;
5275 Compiler::structPassingKind howToReturnStruct;
5276 var_types returnType;
5278 // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
5279 // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
5281 // CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
5282 // CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
5283 // CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
5287 assert(!callHasRetBuffArg);
5288 assert(retClsHnd == NO_CLASS_HANDLE);
5290 // Now that we are past the importer, re-type this node
5291 howToReturnStruct = SPK_PrimitiveType;
5292 returnType = (var_types)call->gtReturnType;
5296 returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
5299 if (howToReturnStruct == SPK_ByReference)
5301 assert(returnType == TYP_UNKNOWN);
5302 assert(callHasRetBuffArg);
5306 assert(returnType != TYP_UNKNOWN);
5308 if (returnType != TYP_STRUCT)
5310 // Widen the primitive type if necessary
5311 returnType = genActualType(returnType);
5313 call->gtType = returnType;
5316 #if FEATURE_MULTIREG_RET
5317 // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
5318 assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
5319 #else // !FEATURE_MULTIREG_RET
5320 // No more struct returns
5321 assert(call->TypeGet() != TYP_STRUCT);
5324 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5325 // If it was a struct return, it has been transformed into a call
5326 // with a return buffer (that returns TYP_VOID) or into a return
5327 // of a primitive/enregisterable type
5328 assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
5332 /*****************************************************************************
5334 * A little helper used to rearrange nested commutative operations. The
5335 * effect is that nested associative, commutative operations are transformed
5336 * into a 'left-deep' tree, i.e. into something like this:
5338 * (((a op b) op c) op d) op...
5343 void Compiler::fgMoveOpsLeft(GenTreePtr tree)
5351 op1 = tree->gtOp.gtOp1;
5352 op2 = tree->gtOp.gtOp2;
5353 oper = tree->OperGet();
5355 noway_assert(GenTree::OperIsCommutative(oper));
5356 noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
5357 noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
5358 noway_assert(oper == op2->gtOper);
5360 // Commutativity doesn't hold if overflow checks are needed
5362 if (tree->gtOverflowEx() || op2->gtOverflowEx())
5367 if (gtIsActiveCSE_Candidate(op2))
5369 // If we have marked op2 as a CSE candidate,
5370 // we can't perform a commutative reordering
5371 // because any value numbers that we computed for op2
5372 // will be incorrect after performing a commutative reordering
5377 if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
5382 // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
5383 if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
5388 if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
5390 // We could deal with this, but we were always broken and just hit the assert
5391 // below regarding flags, which means it's not frequent, so will just bail out.
5396 noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
5398 GenTreePtr ad1 = op2->gtOp.gtOp1;
5399 GenTreePtr ad2 = op2->gtOp.gtOp2;
5401 // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
5402 // We can not reorder such GT_OR trees
5404 if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
5409 /* Change "(x op (y op z))" to "(x op y) op z" */
5410 /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
5412 GenTreePtr new_op1 = op2;
5414 new_op1->gtOp.gtOp1 = op1;
5415 new_op1->gtOp.gtOp2 = ad1;
5417 /* Change the flags. */
5419 // Make sure we arent throwing away any flags
5420 noway_assert((new_op1->gtFlags &
5421 ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
5422 GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated
5423 GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
5426 (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
5427 (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
5429 /* Retype new_op1 if it has not/become a GC ptr. */
5431 if (varTypeIsGC(op1->TypeGet()))
5433 noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5434 oper == GT_ADD) || // byref(ref + (int+int))
5435 (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5436 oper == GT_OR)); // int(gcref | int(gcref|intval))
5438 new_op1->gtType = tree->gtType;
5440 else if (varTypeIsGC(ad2->TypeGet()))
5442 // Neither ad1 nor op1 are GC. So new_op1 isnt either
5443 noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
5444 new_op1->gtType = TYP_I_IMPL;
5447 // If new_op1 is a new expression. Assign it a new unique value number.
5448 // vnStore is null before the ValueNumber phase has run
5449 if (vnStore != nullptr)
5451 // We can only keep the old value number on new_op1 if both op1 and ad2
5452 // have the same non-NoVN value numbers. Since op is commutative, comparing
5453 // only ad2 and op1 is enough.
5454 if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5455 (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5456 (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
5458 new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
5462 tree->gtOp.gtOp1 = new_op1;
5463 tree->gtOp.gtOp2 = ad2;
5465 /* If 'new_op1' is now the same nested op, process it recursively */
5467 if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
5469 fgMoveOpsLeft(new_op1);
5472 /* If 'ad2' is now the same nested op, process it
5473 * Instead of recursion, we set up op1 and op2 for the next loop.
5478 } while ((op2->gtOper == oper) && !op2->gtOverflowEx());
5485 /*****************************************************************************/
5487 void Compiler::fgSetRngChkTarget(GenTreePtr tree, bool delay)
5489 GenTreeBoundsChk* bndsChk = nullptr;
5490 SpecialCodeKind kind = SCK_RNGCHK_FAIL;
5493 if ((tree->gtOper == GT_ARR_BOUNDS_CHECK) || (tree->gtOper == GT_SIMD_CHK))
5494 #else // FEATURE_SIMD
5495 if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
5496 #endif // FEATURE_SIMD
5498 bndsChk = tree->AsBoundsChk();
5499 kind = tree->gtBoundsChk.gtThrowKind;
5503 noway_assert((tree->gtOper == GT_ARR_ELEM) || (tree->gtOper == GT_ARR_INDEX));
5507 unsigned callStkDepth = fgPtrArgCntCur;
5509 // only x86 pushes args
5510 const unsigned callStkDepth = 0;
5517 // we need to initialize this field
5518 if (fgGlobalMorph && bndsChk != nullptr)
5520 bndsChk->gtStkDepth = callStkDepth;
5524 if (!opts.compDbgCode)
5526 if (delay || compIsForInlining())
5528 /* We delay this until after loop-oriented range check
5529 analysis. For now we merely store the current stack
5530 level in the tree node.
5532 if (bndsChk != nullptr)
5534 noway_assert(!bndsChk->gtIndRngFailBB || previousCompletedPhase >= PHASE_OPTIMIZE_LOOPS);
5535 bndsChk->gtStkDepth = callStkDepth;
5540 /* Create/find the appropriate "range-fail" label */
5542 // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
5543 noway_assert((bndsChk != nullptr) || fgGlobalMorph);
5545 unsigned stkDepth = (bndsChk != nullptr) ? bndsChk->gtStkDepth : callStkDepth;
5547 BasicBlock* rngErrBlk = fgRngChkTarget(compCurBB, stkDepth, kind);
5549 /* Add the label to the indirection node */
5551 if (bndsChk != nullptr)
5553 bndsChk->gtIndRngFailBB = gtNewCodeRef(rngErrBlk);
5559 /*****************************************************************************
5561 * Expand a GT_INDEX node and fully morph the child operands
5563 * The orginal GT_INDEX node is bashed into the GT_IND node that accesses
5564 * the array element. We expand the GT_INDEX node into a larger tree that
5565 * evaluates the array base and index. The simplest expansion is a GT_COMMA
5566 * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
5567 * For complex array or index expressions one or more GT_COMMA assignments
5568 * are inserted so that we only evaluate the array or index expressions once.
5570 * The fully expanded tree is then morphed. This causes gtFoldExpr to
5571 * perform local constant prop and reorder the constants in the tree and
5574 * We then parse the resulting array element expression in order to locate
5575 * and label the constants and variables that occur in the tree.
5578 const int MAX_ARR_COMPLEXITY = 4;
5579 const int MAX_INDEX_COMPLEXITY = 4;
5581 GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
5583 noway_assert(tree->gtOper == GT_INDEX);
5584 GenTreeIndex* asIndex = tree->AsIndex();
5586 var_types elemTyp = tree->TypeGet();
5587 unsigned elemSize = tree->gtIndex.gtIndElemSize;
5588 CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
5590 noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
5593 if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= getSIMDVectorRegisterByteLength())
5595 // If this is a SIMD type, this is the point at which we lose the type information,
5596 // so we need to set the correct type on the GT_IND.
5597 // (We don't care about the base type here, so we only check, but don't retain, the return value).
5598 unsigned simdElemSize = 0;
5599 if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
5601 assert(simdElemSize == elemSize);
5602 elemTyp = getSIMDTypeForSize(elemSize);
5603 // This is the new type of the node.
5604 tree->gtType = elemTyp;
5605 // Now set elemStructType to null so that we don't confuse value numbering.
5606 elemStructType = nullptr;
5609 #endif // FEATURE_SIMD
5611 GenTreePtr arrRef = asIndex->Arr();
5612 GenTreePtr index = asIndex->Index();
5614 // Set up the the array length's offset into lenOffs
5615 // And the the first element's offset into elemOffs
5618 if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
5620 lenOffs = offsetof(CORINFO_String, stringLen);
5621 elemOffs = offsetof(CORINFO_String, chars);
5622 tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
5624 else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
5626 lenOffs = offsetof(CORINFO_RefArray, length);
5627 elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
5629 else // We have a standard array
5631 lenOffs = offsetof(CORINFO_Array, length);
5632 elemOffs = offsetof(CORINFO_Array, u1Elems);
5635 bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
5636 bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
5638 GenTreePtr arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
5639 GenTreePtr indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression
5640 GenTreePtr bndsChk = nullptr;
5642 // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
5645 GenTreePtr arrRef2 = nullptr; // The second copy will be used in array address expression
5646 GenTreePtr index2 = nullptr;
5648 // If the arrRef expression involves an assignment, a call or reads from global memory,
5649 // then we *must* allocate a temporary in which to "localize" those values,
5650 // to ensure that the same values are used in the bounds check and the actual
5652 // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
5653 // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true
5654 // complexity is not exposed. (Without that condition there are cases of local struct
5655 // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was
5656 // fixed, there were some regressions that were mostly ameliorated by adding this condition.)
5658 if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
5659 gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
5661 unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5662 arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
5663 arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5664 arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5668 arrRef2 = gtCloneExpr(arrRef);
5669 noway_assert(arrRef2 != nullptr);
5672 // If the index expression involves an assignment, a call or reads from global memory,
5673 // we *must* allocate a temporary in which to "localize" those values,
5674 // to ensure that the same values are used in the bounds check and the actual
5676 // Also we allocate the temporary when the index is sufficiently complex/expensive.
5678 if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
5679 (arrRef->OperGet() == GT_FIELD))
5681 unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5682 indexDefn = gtNewTempAssign(indexTmpNum, index);
5683 index = gtNewLclvNode(indexTmpNum, index->TypeGet());
5684 index2 = gtNewLclvNode(indexTmpNum, index->TypeGet());
5688 index2 = gtCloneExpr(index);
5689 noway_assert(index2 != nullptr);
5692 // Next introduce a GT_ARR_BOUNDS_CHECK node
5693 var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
5695 #ifdef _TARGET_64BIT_
5696 // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case
5697 // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
5698 // the comparison will have to be widen to 64 bits.
5699 if (index->TypeGet() == TYP_I_IMPL)
5701 bndsChkType = TYP_I_IMPL;
5703 #endif // _TARGET_64BIT_
5705 GenTree* arrLen = new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrRef, (int)lenOffs);
5707 if (bndsChkType != TYP_INT)
5709 arrLen = gtNewCastNode(bndsChkType, arrLen, bndsChkType);
5712 GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
5713 GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);
5715 bndsChk = arrBndsChk;
5717 // Make sure to increment ref-counts if already ref-counted.
5718 if (lvaLocalVarRefCounted)
5720 lvaRecursiveIncRefCounts(index);
5721 lvaRecursiveIncRefCounts(arrRef);
5724 // Now we'll switch to using the second copies for arrRef and index
5725 // to compute the address expression
5731 // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
5735 #ifdef _TARGET_64BIT_
5736 // Widen 'index' on 64-bit targets
5737 if (index->TypeGet() != TYP_I_IMPL)
5739 if (index->OperGet() == GT_CNS_INT)
5741 index->gtType = TYP_I_IMPL;
5745 index = gtNewCastNode(TYP_I_IMPL, index, TYP_I_IMPL);
5748 #endif // _TARGET_64BIT_
5750 /* Scale the index value if necessary */
5753 GenTreePtr size = gtNewIconNode(elemSize, TYP_I_IMPL);
5755 // Fix 392756 WP7 Crossgen
5757 // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
5758 // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
5759 // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
5761 size->gtFlags |= GTF_DONT_CSE;
5763 /* Multiply by the array element size */
5764 addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
5771 /* Add the object ref to the element's offset */
5773 addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
5775 /* Add the first element's offset */
5777 GenTreePtr cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
5779 addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);
5781 #if SMALL_TREE_NODES
5782 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL);
5785 // Change the orginal GT_INDEX node into a GT_IND node
5786 tree->SetOper(GT_IND);
5788 // If the index node is a floating-point type, notify the compiler
5789 // we'll potentially use floating point registers at the time of codegen.
5790 if (varTypeIsFloating(tree->gtType))
5792 this->compFloatingPointUsed = true;
5795 // We've now consumed the GTF_INX_RNGCHK, and the node
5796 // is no longer a GT_INDEX node.
5797 tree->gtFlags &= ~GTF_INX_RNGCHK;
5799 tree->gtOp.gtOp1 = addr;
5801 // This is an array index expression.
5802 tree->gtFlags |= GTF_IND_ARR_INDEX;
5804 /* An indirection will cause a GPF if the address is null */
5805 tree->gtFlags |= GTF_EXCEPT;
5809 tree->gtFlags |= GTF_DONT_CSE;
5812 // Store information about it.
5813 GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));
5815 // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
5817 GenTreePtr indTree = tree;
5819 // Did we create a bndsChk tree?
5822 // Use a GT_COMMA node to prepend the array bound check
5824 tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
5826 /* Mark the indirection node as needing a range check */
5827 fgSetRngChkTarget(bndsChk);
5830 if (indexDefn != nullptr)
5832 // Use a GT_COMMA node to prepend the index assignment
5834 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
5836 if (arrRefDefn != nullptr)
5838 // Use a GT_COMMA node to prepend the arRef assignment
5840 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
5843 // Currently we morph the tree to perform some folding operations prior
5844 // to attaching fieldSeq info and labeling constant array index contributions
5848 // Ideally we just want to proceed to attaching fieldSeq info and labeling the
5849 // constant array index contributions, but the morphing operation may have changed
5850 // the 'tree' into something that now unconditionally throws an exception.
5852 // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
5853 // or it could be left unchanged. If it is unchanged then we should not return,
5854 // instead we should proceed to attaching fieldSeq info, etc...
5856 GenTreePtr arrElem = tree->gtEffectiveVal();
5858 if (fgIsCommaThrow(tree))
5860 if ((arrElem != indTree) || // A new tree node may have been created
5861 (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
5863 return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
5867 assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
5869 addr = arrElem->gtOp.gtOp1;
5871 assert(addr->TypeGet() == TYP_BYREF);
5873 GenTreePtr cnsOff = nullptr;
5874 if (addr->OperGet() == GT_ADD)
5876 if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
5878 cnsOff = addr->gtOp.gtOp2;
5879 addr = addr->gtOp.gtOp1;
5882 while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB))
5884 assert(addr->TypeGet() == TYP_BYREF);
5885 GenTreePtr index = addr->gtOp.gtOp2;
5887 // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
5888 index->LabelIndex(this);
5890 addr = addr->gtOp.gtOp1;
5892 assert(addr->TypeGet() == TYP_REF);
5894 else if (addr->OperGet() == GT_CNS_INT)
5899 FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
5901 if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
5903 // Assign it the [#FirstElem] field sequence
5905 cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
5907 else // We have folded the first element's offset with the index expression
5909 // Build the [#ConstantIndex, #FirstElem] field sequence
5911 FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
5912 FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
5914 if (cnsOff == nullptr) // It must have folded into a zero offset
5916 // Record in the general zero-offset map.
5917 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
5921 cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
5929 /*****************************************************************************
5931 * Wrap fixed stack arguments for varargs functions to go through varargs
5932 * cookie to access them, except for the cookie itself.
5934 * Non-x86 platforms are allowed to access all arguments directly
5935 * so we don't need this code.
5938 GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
5940 /* For the fixed stack arguments of a varargs function, we need to go
5941 through the varargs cookies to access them, except for the
5944 LclVarDsc* varDsc = &lvaTable[lclNum];
5946 if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
5948 // Create a node representing the local pointing to the base of the args
5950 gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
5951 gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*) +
5954 // Access the argument through the local
5956 if (varType == TYP_STRUCT)
5958 tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize);
5962 tree = gtNewOperNode(GT_IND, varType, ptrArg);
5964 tree->gtFlags |= GTF_IND_TGTANYWHERE;
5966 if (varDsc->lvAddrExposed)
5968 tree->gtFlags |= GTF_GLOB_REF;
5971 return fgMorphTree(tree);
5978 /*****************************************************************************
5980 * Transform the given GT_LCL_VAR tree for code generation.
5983 GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree)
5985 noway_assert(tree->gtOper == GT_LCL_VAR);
5987 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
5988 var_types varType = lvaGetRealType(lclNum);
5989 LclVarDsc* varDsc = &lvaTable[lclNum];
5991 if (varDsc->lvAddrExposed)
5993 tree->gtFlags |= GTF_GLOB_REF;
5997 if (info.compIsVarArgs)
5999 GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
6000 if (newTree != nullptr)
6002 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
6004 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
6009 #endif // _TARGET_X86_
6011 /* If not during the global morphing phase bail */
6018 bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
6020 noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
6022 if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
6024 #if LOCAL_ASSERTION_PROP
6025 /* Assertion prop can tell us to omit adding a cast here */
6026 if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
6031 /* Small-typed arguments and aliased locals are normalized on load.
6032 Other small-typed locals are normalized on store.
6033 Also, under the debugger as the debugger could write to the variable.
6034 If this is one of the former, insert a narrowing cast on the load.
6035 ie. Convert: var-short --> cast-short(var-int) */
6037 tree->gtType = TYP_INT;
6038 fgMorphTreeDone(tree);
6039 tree = gtNewCastNode(TYP_INT, tree, varType);
6040 fgMorphTreeDone(tree);
6047 /*****************************************************************************
6048 Grab a temp for big offset morphing.
6049 This method will grab a new temp if no temp of this "type" has been created.
6050 Or it will return the same cached one if it has been created.
6052 unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
6054 unsigned lclNum = fgBigOffsetMorphingTemps[type];
6056 if (lclNum == BAD_VAR_NUM)
6058 // We haven't created a temp for this kind of type. Create one now.
6059 lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
6060 fgBigOffsetMorphingTemps[type] = lclNum;
6064 // We better get the right type.
6065 noway_assert(lvaTable[lclNum].TypeGet() == type);
6068 noway_assert(lclNum != BAD_VAR_NUM);
6072 /*****************************************************************************
6074 * Transform the given GT_FIELD tree for code generation.
6077 GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
6079 assert(tree->gtOper == GT_FIELD);
6081 CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
6082 unsigned fldOffset = tree->gtField.gtFldOffset;
6083 GenTreePtr objRef = tree->gtField.gtFldObj;
6084 bool fieldMayOverlap = false;
6085 bool objIsLocal = false;
6087 noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
6088 ((tree->gtFlags & GTF_GLOB_REF) != 0));
6090 if (tree->gtField.gtFldMayOverlap)
6092 fieldMayOverlap = true;
6093 // Reset the flag because we may reuse the node.
6094 tree->gtField.gtFldMayOverlap = false;
6098 // if this field belongs to simd struct, translate it to simd instrinsic.
6101 GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
6102 if (newTree != tree)
6104 newTree = fgMorphSmpOp(newTree);
6108 else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
6110 GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr();
6113 lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField));
6118 /* Is this an instance data member? */
6123 objIsLocal = objRef->IsLocal();
6125 if (tree->gtFlags & GTF_IND_TLS_REF)
6127 NO_WAY("instance field can not be a TLS ref.");
6130 /* We'll create the expression "*(objRef + mem_offs)" */
6132 noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);
6134 // An optimization for Contextful classes:
6135 // we unwrap the proxy when we have a 'this reference'
6136 if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
6138 objRef = fgUnwrapProxy(objRef);
6142 Now we have a tree like this:
6144 +--------------------+
6146 +----------+---------+
6148 +--------------+-------------+
6149 | tree->gtField.gtFldObj |
6150 +--------------+-------------+
6153 We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6155 +--------------------+
6156 | GT_IND/GT_OBJ | tree
6157 +---------+----------+
6160 +---------+----------+
6162 +---------+----------+
6167 +-------------------+ +----------------------+
6168 | objRef | | fldOffset |
6169 | | | (when fldOffset !=0) |
6170 +-------------------+ +----------------------+
6173 or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6176 +--------------------+
6177 | GT_IND/GT_OBJ | tree
6178 +----------+---------+
6180 +----------+---------+
6182 +----------+---------+
6188 +---------+----------+ +---------+----------+
6189 comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr
6190 +---------+----------+ +---------+----------+
6195 +-----+-----+ +-----+-----+ +---------+ +-----------+
6196 asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset |
6197 +-----+-----+ +-----+-----+ +---------+ +-----------+
6202 +-----+-----+ +-----+-----+ +-----------+
6203 | tmpLcl | | objRef | | tmpLcl |
6204 +-----------+ +-----------+ +-----------+
6209 var_types objRefType = objRef->TypeGet();
6211 GenTreePtr comma = nullptr;
6213 bool addedExplicitNullCheck = false;
6215 // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field,
6216 // and thus is equivalent to a MACK_Ind with zero offset.
6217 MorphAddrContext defMAC(MACK_Ind);
6223 // This flag is set to enable the "conservative" style of explicit null-check insertion.
6224 // This means that we insert an explicit null check whenever we create byref by adding a
6225 // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
6226 // dereferenced). The alternative is "aggressive", which would not insert such checks (for
6227 // small offsets); in this plan, we would transfer some null-checking responsibility to
6228 // callee's of methods taking byref parameters. They would have to add explicit null checks
6229 // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
6230 // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
6231 // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
6232 // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
6233 // This is left here to point out how to implement it.
6234 CLANG_FORMAT_COMMENT_ANCHOR;
6236 #define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
6238 // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression
6239 // whose address is being taken is either a local or static variable, whose address is necessarily
6240 // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
6241 if (objRef->gtOper != GT_ADDR && ((mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind) &&
6242 (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)
6243 #if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
6244 || (mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0))
6246 || (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
6247 (mac->m_totalOffset + fldOffset > 0))
6254 printf("Before explicit null check morphing:\n");
6260 // Create the "comma" subtree
6262 GenTreePtr asg = nullptr;
6267 if (objRef->gtOper != GT_LCL_VAR)
6269 lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
6271 // Create the "asg" node
6272 asg = gtNewTempAssign(lclNum, objRef);
6276 lclNum = objRef->gtLclVarCommon.gtLclNum;
6279 // Create the "nullchk" node.
6280 // Make it TYP_BYTE so we only deference it for 1 byte.
6281 GenTreePtr lclVar = gtNewLclvNode(lclNum, objRefType);
6282 nullchk = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);
6284 nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
6286 // An indirection will cause a GPF if the address is null.
6287 nullchk->gtFlags |= GTF_EXCEPT;
6289 compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
6290 optMethodFlags |= OMF_HAS_NULLCHECK;
6294 // Create the "comma" node.
6295 comma = gtNewOperNode(GT_COMMA,
6296 TYP_VOID, // We don't want to return anything from this "comma" node.
6297 // Set the type to TYP_VOID, so we can select "cmp" instruction
6298 // instead of "mov" instruction later on.
6306 addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
6308 addedExplicitNullCheck = true;
6310 else if (fldOffset == 0)
6312 // Generate the "addr" node.
6314 FieldSeqNode* fieldSeq =
6315 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6316 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6323 #ifdef FEATURE_READYTORUN_COMPILER
6324 if (tree->gtField.gtFieldLookup.addr != nullptr)
6326 GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL);
6328 if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
6330 baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset);
6334 gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, baseOffset);
6339 // Generate the "addr" node.
6340 /* Add the member offset to the object's address */
6341 FieldSeqNode* fieldSeq =
6342 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6343 addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
6344 gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
6347 // Now let's set the "tree" as a GT_IND tree.
6349 tree->SetOper(GT_IND);
6350 tree->gtOp.gtOp1 = addr;
6352 if (fgAddrCouldBeNull(addr))
6354 // This indirection can cause a GPF if the address could be null.
6355 tree->gtFlags |= GTF_EXCEPT;
6358 if (addedExplicitNullCheck)
6361 // Create "comma2" node and link it to "tree".
6364 comma2 = gtNewOperNode(GT_COMMA,
6365 addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
6367 tree->gtOp.gtOp1 = comma2;
6373 if (addedExplicitNullCheck)
6375 printf("After adding explicit null check:\n");
6381 else /* This is a static data member */
6383 if (tree->gtFlags & GTF_IND_TLS_REF)
6385 // Thread Local Storage static field reference
6387 // Field ref is a TLS 'Thread-Local-Storage' reference
6389 // Build this tree: IND(*) #
6397 // IND(I_IMPL) == [Base of this DLL's TLS]
6401 // / CNS(IdValue*4) or MUL
6403 // IND(I_IMPL) / CNS(4)
6405 // CNS(TLS_HDL,0x2C) IND
6409 // # Denotes the orginal node
6411 void** pIdAddr = nullptr;
6412 unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);
6415 // If we can we access the TLS DLL index ID value directly
6416 // then pIdAddr will be NULL and
6417 // IdValue will be the actual TLS DLL index ID
6419 GenTreePtr dllRef = nullptr;
6420 if (pIdAddr == nullptr)
6424 dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
6429 dllRef = gtNewIconHandleNode((size_t)pIdAddr, GTF_ICON_STATIC_HDL);
6430 dllRef = gtNewOperNode(GT_IND, TYP_I_IMPL, dllRef);
6431 dllRef->gtFlags |= GTF_IND_INVARIANT;
6435 dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
6438 #define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
6440 // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
6442 GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
6444 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6446 if (dllRef != nullptr)
6448 /* Add the dllRef */
6449 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
6452 /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
6453 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6457 FieldSeqNode* fieldSeq =
6458 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6459 GenTreePtr fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
6461 /* Add the TLS static field offset to the address */
6463 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
6466 // Final indirect to get to actual value of TLS static field
6468 tree->SetOper(GT_IND);
6469 tree->gtOp.gtOp1 = tlsRef;
6471 noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
6475 // Normal static field reference
6478 // If we can we access the static's address directly
6479 // then pFldAddr will be NULL and
6480 // fldAddr will be the actual address of the static field
6482 void** pFldAddr = nullptr;
6483 void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);
6485 if (pFldAddr == nullptr)
6487 #ifdef _TARGET_64BIT_
6488 if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
6490 // The address is not directly addressible, so force it into a
6491 // constant, so we handle it properly
6493 GenTreePtr addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
6494 addr->gtType = TYP_I_IMPL;
6495 FieldSeqNode* fieldSeq =
6496 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6497 addr->gtIntCon.gtFieldSeq = fieldSeq;
6499 tree->SetOper(GT_IND);
6500 // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
6501 // We must clear it when we transform the node.
6502 // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
6503 // that the logic above does its own checking to determine whether a nullcheck is needed.
6504 tree->gtFlags &= ~GTF_IND_ARR_LEN;
6505 tree->gtOp.gtOp1 = addr;
6507 return fgMorphSmpOp(tree);
6510 #endif // _TARGET_64BIT_
6512 // Only volatile could be set, and it maps over
6513 noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_COMMON_MASK)) == 0);
6514 noway_assert(GTF_FLD_VOLATILE == GTF_IND_VOLATILE);
6515 tree->SetOper(GT_CLS_VAR);
6516 tree->gtClsVar.gtClsVarHnd = symHnd;
6517 FieldSeqNode* fieldSeq =
6518 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6519 tree->gtClsVar.gtFieldSeq = fieldSeq;
6526 GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
6528 // There are two cases here, either the static is RVA based,
6529 // in which case the type of the FIELD node is not a GC type
6530 // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
6531 // a GC type and the handle to it is a TYP_BYREF in the GC heap
6532 // because handles to statics now go into the large object heap
6534 var_types handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
6535 GenTreePtr op1 = gtNewOperNode(GT_IND, handleTyp, addr);
6536 op1->gtFlags |= GTF_IND_INVARIANT;
6538 tree->SetOper(GT_IND);
6539 tree->gtOp.gtOp1 = op1;
6543 noway_assert(tree->gtOper == GT_IND);
6544 // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
6545 // We must clear it when we transform the node.
6546 // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
6547 // that the logic above does its own checking to determine whether a nullcheck is needed.
6548 tree->gtFlags &= ~GTF_IND_ARR_LEN;
6550 GenTreePtr res = fgMorphSmpOp(tree);
6552 // If we have a struct type, this node would previously have been under a GT_ADDR,
6553 // and therefore would have been marked GTF_DONT_CSE.
6554 // TODO-1stClassStructs: revisit this.
6555 if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
6557 res->gtFlags |= GTF_DONT_CSE;
6560 if (fldOffset == 0 && res->OperGet() == GT_IND)
6562 GenTreePtr addr = res->gtOp.gtOp1;
6563 // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
6564 FieldSeqNode* fieldSeq =
6565 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6566 fgAddFieldSeqForZeroOffset(addr, fieldSeq);
6572 //------------------------------------------------------------------------------
6573 // fgMorphCallInline: attempt to inline a call
6576 // call - call expression to inline, inline candidate
6577 // inlineResult - result tracking and reporting
6580 // Attempts to inline the call.
6582 // If successful, callee's IR is inserted in place of the call, and
6583 // is marked with an InlineContext.
6585 // If unsuccessful, the transformations done in anticpation of a
6586 // possible inline are undone, and the candidate flag on the call
6589 void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
6591 // The call must be a candiate for inlining.
6592 assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
6594 // Attempt the inline
6595 fgMorphCallInlineHelper(call, inlineResult);
6597 // We should have made up our minds one way or another....
6598 assert(inlineResult->IsDecided());
6600 // If we failed to inline, we have a bit of work to do to cleanup
6601 if (inlineResult->IsFailure())
6606 // Before we do any cleanup, create a failing InlineContext to
6607 // capture details of the inlining attempt.
6608 m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
6612 // It was an inline candidate, but we haven't expanded it.
6613 if (call->gtCall.gtReturnType != TYP_VOID)
6615 // Detach the GT_CALL tree from the original statement by
6616 // hanging a "nothing" node to it. Later the "nothing" node will be removed
6617 // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
6619 noway_assert(fgMorphStmt->gtStmtExpr == call);
6620 fgMorphStmt->gtStmtExpr = gtNewNothingNode();
6623 // Clear the Inline Candidate flag so we can ensure later we tried
6624 // inlining all candidates.
6626 call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
6630 /*****************************************************************************
6631 * Helper to attempt to inline a call
6632 * Sets success/failure in inline result
6633 * If success, modifies current method's IR with inlinee's IR
6634 * If failed, undoes any speculative modifications to current method
6637 void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
6639 // Don't expect any surprises here.
6640 assert(result->IsCandidate());
6642 if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
6644 // For now, attributing this to call site, though it's really
6645 // more of a budget issue (lvaCount currently includes all
6646 // caller and prospective callee locals). We still might be
6647 // able to inline other callees into this caller, or inline
6648 // this callee in other callers.
6649 result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
6653 if (call->IsVirtual())
6655 result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
6659 // impMarkInlineCandidate() is expected not to mark tail prefixed calls
6660 // and recursive tail calls as inline candidates.
6661 noway_assert(!call->IsTailPrefixedCall());
6662 noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
6664 /* If the caller's stack frame is marked, then we can't do any inlining. Period.
6665 Although we have checked this in impCanInline, it is possible that later IL instructions
6666 might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
6669 if (opts.compNeedSecurityCheck)
6671 result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
6676 // Calling inlinee's compiler to inline the method.
6679 unsigned startVars = lvaCount;
6684 printf("Expanding INLINE_CANDIDATE in statement ");
6685 printTreeID(fgMorphStmt);
6686 printf(" in BB%02u:\n", compCurBB->bbNum);
6687 gtDispTree(fgMorphStmt);
6688 if (call->IsImplicitTailCall())
6690 printf("Note: candidate is implicit tail call\n");
6695 impInlineRoot()->m_inlineStrategy->NoteAttempt(result);
6698 // Invoke the compiler to inline the call.
6701 fgInvokeInlineeCompiler(call, result);
6703 if (result->IsFailure())
6705 // Undo some changes made in anticipation of inlining...
6707 // Zero out the used locals
6708 memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
6709 for (unsigned i = startVars; i < lvaCount; i++)
6711 new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
6714 lvaCount = startVars;
6719 // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
6729 // printf("After inlining lvaCount=%d.\n", lvaCount);
6734 /*****************************************************************************
6736 * Performs checks to see if this tail call can be optimized as epilog+jmp.
6738 bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
6740 #if FEATURE_FASTTAILCALL
6741 // Reached here means that return types of caller and callee are tail call compatible.
6742 // In case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
6744 // In an implicit tail call case callSig may not be available but it is guaranteed to be available
6745 // for explicit tail call cases. The reason implicit tail case callSig may not be available is that
6746 // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
6747 // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
6748 // currently not copying/setting callSig.
6749 CLANG_FORMAT_COMMENT_ANCHOR;
6752 if (callee->IsTailPrefixedCall())
6754 assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
6755 (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
6759 // Note on vararg methods:
6760 // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
6761 // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
6762 // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
6763 // out-going area required for callee is bounded by caller's fixed argument space.
6765 // Note that callee being a vararg method is not a problem since we can account the params being passed.
6767 // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie)
6768 unsigned nCallerArgs = info.compArgsCount;
6770 // Count the callee args including implicit and hidden.
6771 // Note that GenericContext and VarargCookie are added by importer while
6772 // importing the call to gtCallArgs list along with explicit user args.
6773 unsigned nCalleeArgs = 0;
6774 if (callee->gtCallObjp) // thisPtr
6779 if (callee->HasRetBufArg()) // RetBuf
6783 // If callee has RetBuf param, caller too must have it.
6784 // Otherwise go the slow route.
6785 if (info.compRetBuffArg == BAD_VAR_NUM)
6791 // Count user args while tracking whether any of them is a multi-byte params
6792 // that cannot be passed in a register. Note that we don't need to count
6793 // non-standard and secret params passed in registers (e.g. R10, R11) since
6794 // these won't contribute to out-going arg size.
6795 bool hasMultiByteArgs = false;
6796 for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2)
6800 assert(args->OperIsList());
6801 GenTreePtr argx = args->gtOp.gtOp1;
6803 if (varTypeIsStruct(argx))
6805 // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
6806 while (argx->gtOper == GT_COMMA)
6808 argx = argx->gtOp.gtOp2;
6811 // Get the size of the struct and see if it is register passable.
6812 CORINFO_CLASS_HANDLE objClass = nullptr;
6814 if (argx->OperGet() == GT_OBJ)
6816 objClass = argx->AsObj()->gtClass;
6818 else if (argx->IsLocal())
6820 objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
6822 if (objClass != nullptr)
6824 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
6826 unsigned typeSize = 0;
6827 hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
6829 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
6830 // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers.
6831 // Account for the second eightbyte in the nCalleeArgs.
6832 // https://github.com/dotnet/coreclr/issues/2666
6833 // TODO-CQ-Amd64-Unix/arm64: Structs of size between 9 to 16 bytes are conservatively estimated
6834 // as two args, since they need two registers whereas nCallerArgs is
6835 // counting such an arg as one. This would mean we will not be optimizing
6836 // certain calls though technically possible.
6838 if (typeSize > TARGET_POINTER_SIZE)
6840 unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE);
6841 nCalleeArgs += extraArgRegsToAdd;
6843 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
6846 assert(!"Target platform ABI rules regarding passing struct type args in registers");
6848 #endif //_TARGET_AMD64_ || _TARGET_ARM64_
6852 hasMultiByteArgs = true;
6857 // Go the slow route, if it has multi-byte params
6858 if (hasMultiByteArgs)
6863 // If we reached here means that callee has only those argument types which can be passed in
6864 // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
6865 // If we are passing args on stack for callee and it has more args passed on stack than
6866 // caller, then fast tail call cannot be performed.
6868 // Note that the GC'ness of on stack args need not match since the arg setup area is marked
6869 // as non-interruptible for fast tail calls.
6870 if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs))
6881 /*****************************************************************************
6883 * Transform the given GT_CALL tree for tail call code generation.
6885 void Compiler::fgMorphTailCall(GenTreeCall* call)
6887 JITDUMP("fgMorphTailCall (before):\n");
6890 #if defined(_TARGET_ARM_)
6891 // For the helper-assisted tail calls, we need to push all the arguments
6892 // into a single list, and then add a few extra at the beginning
6894 // Check for PInvoke call types that we don't handle in codegen yet.
6895 assert(!call->IsUnmanaged());
6896 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));
6898 // First move the this pointer (if any) onto the regular arg list
6899 GenTreePtr thisPtr = NULL;
6900 if (call->gtCallObjp)
6902 GenTreePtr objp = call->gtCallObjp;
6903 call->gtCallObjp = NULL;
6905 if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
6907 thisPtr = gtClone(objp, true);
6908 var_types vt = objp->TypeGet();
6909 if (thisPtr == NULL)
6911 // Too complex, so use a temp
6912 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
6913 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
6914 if (!call->IsVirtualVtable())
6916 // Add an indirection to get the nullcheck
6917 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
6918 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
6919 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
6921 objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
6922 thisPtr = gtNewLclvNode(lclNum, vt);
6924 else if (!call->IsVirtualVtable())
6926 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
6927 objp = gtNewOperNode(GT_COMMA, vt, ind, objp);
6928 thisPtr = gtClone(thisPtr, true);
6931 call->gtFlags &= ~GTF_CALL_NULLCHECK;
6934 call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
6937 // Add the extra VSD parameter if needed
6938 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
6939 if (call->IsVirtualStub())
6941 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
6944 if (call->gtCallType == CT_INDIRECT)
6946 arg = gtClone(call->gtCallAddr, true);
6947 noway_assert(arg != NULL);
6951 noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
6952 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
6953 arg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
6955 // Change the call type, so we can add the extra indirection here, rather than in codegen
6956 call->gtCallAddr = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
6957 call->gtStubCallStubAddr = NULL;
6958 call->gtCallType = CT_INDIRECT;
6960 // Add the extra indirection to generate the real target
6961 call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr);
6962 call->gtFlags |= GTF_EXCEPT;
6964 // And push the stub address onto the list of arguments
6965 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
6967 else if (call->IsVirtualVtable())
6969 // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER
6971 noway_assert(thisPtr != NULL);
6973 GenTreePtr add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
6974 GenTreePtr vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
6975 vtbl->gtFlags |= GTF_EXCEPT;
6977 unsigned vtabOffsOfIndirection;
6978 unsigned vtabOffsAfterIndirection;
6979 info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection);
6981 /* Get the appropriate vtable chunk */
6983 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
6984 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
6986 /* Now the appropriate vtable slot */
6988 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
6989 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
6991 // Switch this to a plain indirect call
6992 call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
6993 assert(!call->IsVirtual());
6994 call->gtCallType = CT_INDIRECT;
6996 call->gtCallAddr = vtbl;
6997 call->gtCallCookie = NULL;
6998 call->gtFlags |= GTF_EXCEPT;
7001 // Now inject a placeholder for the real call target that codegen
7003 GenTreePtr arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL);
7004 codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR);
7005 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7007 // Lastly inject the pointer for the copy routine
7008 noway_assert(call->callSig != NULL);
7009 void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7010 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7011 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7013 // It is now a varargs tail call
7014 call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
7015 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7017 #elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
7019 // x86 classic codegen doesn't require any morphing
7021 // For the helper-assisted tail calls, we need to push all the arguments
7022 // into a single list, and then add a few extra at the beginning or end.
7024 // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
7026 // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
7028 // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
7029 // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
7030 // for callTarget here which will be replaced later with callTarget in tail call lowering.
7032 // For x86, the tailcall helper is defined as:
7034 // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
7037 // Note that the special arguments are on the stack, whereas the function arguments follow
7038 // the normal convention: there might be register arguments in ECX and EDX. The stack will
7039 // look like (highest address at the top):
7040 // first normal stack argument
7042 // last normal stack argument
7043 // numberOfOldStackArgs
7044 // numberOfNewStackArgs
7048 // Each special arg is 4 bytes.
7050 // 'flags' is a bitmask where:
7051 // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
7052 // callee-saved registers for tailcall functions. Note that the helper assumes
7053 // that the callee-saved registers live immediately below EBP, and must have been
7054 // pushed in this order: EDI, ESI, EBX.
7055 // 2 == call target is a virtual stub dispatch.
7057 // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
7058 // on the custom calling convention.
7060 // Check for PInvoke call types that we don't handle in codegen yet.
7061 assert(!call->IsUnmanaged());
7062 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
7064 // Don't support tail calling helper methods
7065 assert(call->gtCallType != CT_HELPER);
7067 // We come this route only for tail prefixed calls that cannot be dispatched as
7069 assert(!call->IsImplicitTailCall());
7070 assert(!fgCanFastTailCall(call));
7072 // First move the 'this' pointer (if any) onto the regular arg list. We do this because
7073 // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
7074 // and thus shift where the 'this' pointer will be passed to a later argument slot. In
7075 // addition, for all platforms, we are going to change the call into a helper call. Our code
7076 // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
7077 // do this transformation, we must explicitly create a null 'this' pointer check, if required,
7078 // since special 'this' pointer handling will no longer kick in.
7080 // Some call types, such as virtual vtable calls, require creating a call address expression
7081 // that involves the "this" pointer. Lowering will sometimes create an embedded statement
7082 // to create a temporary that is assigned to the "this" pointer expression, and then use
7083 // that temp to create the call address expression. This temp creation embedded statement
7084 // will occur immediately before the "this" pointer argument, and then will be used for both
7085 // the "this" pointer argument as well as the call address expression. In the normal ordering,
7086 // the embedded statement establishing the "this" pointer temp will execute before both uses
7087 // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
7088 // normal call argument list, and insert a placeholder which will hold the call address
7089 // expression. For non-x86, things are ok, because the order of execution of these is not
7090 // altered. However, for x86, the call address expression is inserted as the *last* argument
7091 // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
7092 // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
7093 // for those cases where call lowering creates an embedded form temp of "this", we will
7094 // create a temp here, early, that will later get morphed correctly.
7096 if (call->gtCallObjp)
7098 GenTreePtr thisPtr = nullptr;
7099 GenTreePtr objp = call->gtCallObjp;
7100 call->gtCallObjp = nullptr;
7103 if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
7106 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7107 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
7109 // COMMA(tmp = "this", tmp)
7110 var_types vt = objp->TypeGet();
7111 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7112 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp);
7116 #endif // _TARGET_X86_
7118 #if defined(_TARGET_X86_)
7119 // When targeting x86, the runtime requires that we perforrm a null check on the `this` argument before tail
7120 // calling to a virtual dispatch stub. This requirement is a consequence of limitations in the runtime's
7121 // ability to map an AV to a NullReferenceException if the AV occurs in a dispatch stub.
7122 if (call->NeedsNullCheck() || call->IsVirtualStub())
7124 if (call->NeedsNullCheck())
7125 #endif // defined(_TARGET_X86_)
7127 // clone "this" if "this" has no side effects.
7128 if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
7130 thisPtr = gtClone(objp, true);
7133 var_types vt = objp->TypeGet();
7134 if (thisPtr == nullptr)
7136 // create a temp if either "this" has side effects or "this" is too complex to clone.
7139 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7140 GenTreePtr asg = gtNewTempAssign(lclNum, objp);
7142 // COMMA(tmp = "this", deref(tmp))
7143 GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
7144 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7145 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7147 // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
7148 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7152 // thisPtr = COMMA(deref("this"), "this")
7153 GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7154 thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
7157 call->gtFlags &= ~GTF_CALL_NULLCHECK;
7164 // During rationalization tmp="this" and null check will
7165 // materialize as embedded stmts in right execution order.
7166 assert(thisPtr != nullptr);
7167 call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
7170 #if defined(_TARGET_AMD64_)
7172 // Add the extra VSD parameter to arg list in case of VSD calls.
7173 // Tail call arg copying thunk will move this extra VSD parameter
7174 // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
7175 // in Stublinkerx86.cpp for more details.
7176 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
7177 if (call->IsVirtualStub())
7179 GenTreePtr stubAddrArg;
7181 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
7183 if (call->gtCallType == CT_INDIRECT)
7185 stubAddrArg = gtClone(call->gtCallAddr, true);
7186 noway_assert(stubAddrArg != nullptr);
7190 noway_assert((call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0);
7192 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7193 stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7196 // Push the stub address onto the list of arguments
7197 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
7200 // Now inject a placeholder for the real call target that Lower phase will generate.
7201 GenTreePtr arg = gtNewIconNode(0, TYP_I_IMPL);
7202 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7204 // Inject the pointer for the copy routine to be used for struct copying
7205 noway_assert(call->callSig != nullptr);
7206 void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7207 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7208 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7210 #else // !_TARGET_AMD64_
7212 // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
7213 // append to the list.
7214 GenTreeArgList** ppArg = &call->gtCallArgs;
7215 for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
7217 ppArg = (GenTreeArgList**)&args->gtOp2;
7219 assert(ppArg != nullptr);
7220 assert(*ppArg == nullptr);
7222 unsigned nOldStkArgsWords =
7223 (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
7224 GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
7225 *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
7226 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7228 // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
7229 // The constant will be replaced.
7230 GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
7231 *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
7232 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7234 // Inject a placeholder for the flags.
7235 // The constant will be replaced.
7236 GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
7237 *ppArg = gtNewListNode(arg1, nullptr);
7238 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7240 // Inject a placeholder for the real call target that the Lowering phase will generate.
7241 // The constant will be replaced.
7242 GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
7243 *ppArg = gtNewListNode(arg0, nullptr);
7245 #endif // !_TARGET_AMD64_
7247 // It is now a varargs tail call dispatched via helper.
7248 call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
7249 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7253 JITDUMP("fgMorphTailCall (after):\n");
7257 //------------------------------------------------------------------------------
7258 // fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
7262 // block - basic block ending with a recursive fast tail call
7263 // recursiveTailCall - recursive tail call to transform
7266 // The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
7268 void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
7270 assert(recursiveTailCall->IsTailCallConvertibleToLoop());
7271 GenTreePtr last = block->lastStmt();
7272 assert(recursiveTailCall == last->gtStmt.gtStmtExpr);
7274 // Transform recursive tail call into a loop.
7276 GenTreePtr earlyArgInsertionPoint = last;
7277 IL_OFFSETX callILOffset = last->gtStmt.gtStmtILoffsx;
7279 // Hoist arg setup statement for the 'this' argument.
7280 GenTreePtr thisArg = recursiveTailCall->gtCallObjp;
7281 if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
7283 GenTreePtr thisArgStmt = gtNewStmt(thisArg, callILOffset);
7284 fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
7287 // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
7288 // then the temps need to be assigned to the method parameters. This is done so that the caller
7289 // parameters are not re-assigned before call arguments depending on them are evaluated.
7290 // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
7291 // where the next temp or parameter assignment should be inserted.
7293 // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
7294 // while the second call argument (const 1) doesn't.
7295 // Basic block before tail recursion elimination:
7296 // ***** BB04, stmt 1 (top level)
7297 // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013)
7298 // [000033] --C - G------ - \--* call void RecursiveMethod
7299 // [000030] ------------ | / --* const int - 1
7300 // [000031] ------------arg0 in rcx + --* +int
7301 // [000029] ------------ | \--* lclVar int V00 arg1
7302 // [000032] ------------arg1 in rdx \--* const int 1
7305 // Basic block after tail recursion elimination :
7306 // ***** BB04, stmt 1 (top level)
7307 // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7308 // [000030] ------------ | / --* const int - 1
7309 // [000031] ------------ | / --* +int
7310 // [000029] ------------ | | \--* lclVar int V00 arg1
7311 // [000050] - A---------- \--* = int
7312 // [000049] D------N---- \--* lclVar int V02 tmp0
7314 // ***** BB04, stmt 2 (top level)
7315 // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7316 // [000052] ------------ | / --* lclVar int V02 tmp0
7317 // [000054] - A---------- \--* = int
7318 // [000053] D------N---- \--* lclVar int V00 arg0
7320 // ***** BB04, stmt 3 (top level)
7321 // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7322 // [000032] ------------ | / --* const int 1
7323 // [000057] - A---------- \--* = int
7324 // [000056] D------N---- \--* lclVar int V01 arg1
7326 GenTreePtr tmpAssignmentInsertionPoint = last;
7327 GenTreePtr paramAssignmentInsertionPoint = last;
7329 // Process early args. They may contain both setup statements for late args and actual args.
7330 // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
7331 // below has the correct second argument.
7332 int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
7333 for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
7334 (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
7336 GenTreePtr earlyArg = earlyArgs->Current();
7337 if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
7339 if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
7341 // This is a setup node so we need to hoist it.
7342 GenTreePtr earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
7343 fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
7347 // This is an actual argument that needs to be assigned to the corresponding caller parameter.
7348 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
7349 GenTreePtr paramAssignStmt =
7350 fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
7351 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7352 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7354 // All temp assignments will happen before the first param assignment.
7355 tmpAssignmentInsertionPoint = paramAssignStmt;
7361 // Process late args.
7362 int lateArgIndex = 0;
7363 for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
7364 (lateArgIndex++, lateArgs = lateArgs->Rest()))
7366 // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
7367 GenTreePtr lateArg = lateArgs->Current();
7368 fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
7369 GenTreePtr paramAssignStmt =
7370 fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
7371 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7373 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7375 // All temp assignments will happen before the first param assignment.
7376 tmpAssignmentInsertionPoint = paramAssignStmt;
7380 // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
7381 // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
7382 // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
7383 if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
7385 var_types thisType = lvaTable[info.compThisArg].TypeGet();
7386 GenTreePtr arg0 = gtNewLclvNode(lvaArg0Var, thisType);
7387 GenTreePtr arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
7388 GenTreePtr arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
7389 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
7393 fgRemoveStmt(block, last);
7395 // Set the loop edge.
7396 block->bbJumpKind = BBJ_ALWAYS;
7397 block->bbJumpDest = fgFirstBBisScratch() ? fgFirstBB->bbNext : fgFirstBB;
7398 fgAddRefPred(block->bbJumpDest, block);
7399 block->bbFlags &= ~BBF_HAS_JMP;
7402 //------------------------------------------------------------------------------
7403 // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
7407 // arg - argument to assign
7408 // argTabEntry - argument table entry corresponding to arg
7409 // block --- basic block the call is in
7410 // callILOffset - IL offset of the call
7411 // tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary)
7412 // paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted
7415 // parameter assignment statement if one was inserted; nullptr otherwise.
7417 GenTreePtr Compiler::fgAssignRecursiveCallArgToCallerParam(GenTreePtr arg,
7418 fgArgTabEntryPtr argTabEntry,
7420 IL_OFFSETX callILOffset,
7421 GenTreePtr tmpAssignmentInsertionPoint,
7422 GenTreePtr paramAssignmentInsertionPoint)
7424 // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
7425 // some argument trees may reference parameters directly.
7427 GenTreePtr argInTemp = nullptr;
7428 unsigned originalArgNum = argTabEntry->argNum;
7429 bool needToAssignParameter = true;
7431 // TODO-CQ: enable calls with struct arguments passed in registers.
7432 noway_assert(!varTypeIsStruct(arg->TypeGet()));
7434 if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
7436 // The argument is already assigned to a temp or is a const.
7439 else if (arg->OperGet() == GT_LCL_VAR)
7441 unsigned lclNum = arg->AsLclVar()->gtLclNum;
7442 LclVarDsc* varDsc = &lvaTable[lclNum];
7443 if (!varDsc->lvIsParam)
7445 // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
7448 else if (lclNum == originalArgNum)
7450 // The argument is the same parameter local that we were about to assign so
7451 // we can skip the assignment.
7452 needToAssignParameter = false;
7456 // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
7457 // any caller parameters. Some common cases are handled above but we may be able to eliminate
7458 // more temp assignments.
7460 GenTreePtr paramAssignStmt = nullptr;
7461 if (needToAssignParameter)
7463 if (argInTemp == nullptr)
7465 // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
7466 // TODO: we can avoid a temp assignment if we can prove that the argument tree
7467 // doesn't involve any caller parameters.
7468 unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp"));
7469 GenTreePtr tempSrc = arg;
7470 GenTreePtr tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType);
7471 GenTreePtr tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
7472 GenTreePtr tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
7473 fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
7474 argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
7477 // Now assign the temp to the parameter.
7478 LclVarDsc* paramDsc = lvaTable + originalArgNum;
7479 assert(paramDsc->lvIsParam);
7480 GenTreePtr paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType);
7481 GenTreePtr paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
7482 paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset);
7484 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
7486 return paramAssignStmt;
7489 /*****************************************************************************
7491 * Transform the given GT_CALL tree for code generation.
7494 GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
7496 if (call->CanTailCall())
7498 // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
7499 assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
7501 // It cannot be an inline candidate
7502 assert(!call->IsInlineCandidate());
7504 const char* szFailReason = nullptr;
7505 bool hasStructParam = false;
7506 if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
7508 szFailReason = "Might turn into an intrinsic";
7511 if (opts.compNeedSecurityCheck)
7513 szFailReason = "Needs security check";
7515 else if (compLocallocUsed)
7517 szFailReason = "Localloc used";
7519 #ifdef _TARGET_AMD64_
7520 // Needed for Jit64 compat.
7521 // In future, enabling tail calls from methods that need GS cookie check
7522 // would require codegen side work to emit GS cookie check before a tail
7524 else if (getNeedsGSSecurityCookie())
7526 szFailReason = "GS Security cookie check";
7530 // DDB 99324: Just disable tailcall under compGcChecks stress mode.
7531 else if (opts.compGcChecks)
7533 szFailReason = "GcChecks";
7536 #if FEATURE_TAILCALL_OPT
7539 // We are still not sure whether it can be a tail call. Because, when converting
7540 // a call to an implicit tail call, we must check that there are no locals with
7541 // their address taken. If this is the case, we have to assume that the address
7542 // has been leaked and the current stack frame must live until after the final
7545 // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
7546 // that lvHasLdAddrOp is much more conservative. We cannot just base it on
7547 // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
7548 // during morph stage. The reason for also checking lvAddrExposed is that in case
7549 // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
7550 // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
7551 // never to be incorrect.
7553 // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
7554 // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
7555 // is set. This avoids the need for iterating through all lcl vars of the current
7556 // method. Right now throughout the code base we are not consistently using 'set'
7557 // method to set lvHasLdAddrOp and lvAddrExposed flags.
7560 bool hasAddrExposedVars = false;
7561 bool hasStructPromotedParam = false;
7562 bool hasPinnedVars = false;
7564 for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
7566 // If the method is marked as an explicit tail call we will skip the
7567 // following three hazard checks.
7568 // We still must check for any struct parameters and set 'hasStructParam'
7569 // so that we won't transform the recursive tail call into a loop.
7571 if (call->IsImplicitTailCall())
7573 if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed)
7575 hasAddrExposedVars = true;
7578 if (varDsc->lvPromoted && varDsc->lvIsParam)
7580 hasStructPromotedParam = true;
7583 if (varDsc->lvPinned)
7585 // A tail call removes the method from the stack, which means the pinning
7586 // goes away for the callee. We can't allow that.
7587 hasPinnedVars = true;
7591 if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
7593 hasStructParam = true;
7594 // This prevents transforming a recursive tail call into a loop
7595 // but doesn't prevent tail call optimization so we need to
7596 // look at the rest of parameters.
7601 if (hasAddrExposedVars)
7603 szFailReason = "Local address taken";
7605 if (hasStructPromotedParam)
7607 szFailReason = "Has Struct Promoted Param";
7611 szFailReason = "Has Pinned Vars";
7614 #endif // FEATURE_TAILCALL_OPT
7616 if (varTypeIsStruct(call))
7618 fgFixupStructReturn(call);
7621 var_types callType = call->TypeGet();
7623 // We have to ensure to pass the incoming retValBuf as the
7624 // outgoing one. Using a temp will not do as this function will
7625 // not regain control to do the copy.
7627 if (info.compRetBuffArg != BAD_VAR_NUM)
7629 noway_assert(callType == TYP_VOID);
7630 GenTreePtr retValBuf = call->gtCallArgs->gtOp.gtOp1;
7631 if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
7633 szFailReason = "Need to copy return buffer";
7637 // If this is an opportunistic tail call and cannot be dispatched as
7638 // fast tail call, go the non-tail call route. This is done for perf
7641 // Avoid the cost of determining whether can be dispatched as fast tail
7642 // call if we already know that tail call cannot be honored for other
7644 bool canFastTailCall = false;
7645 if (szFailReason == nullptr)
7647 canFastTailCall = fgCanFastTailCall(call);
7648 if (!canFastTailCall)
7650 // Implicit or opportunistic tail calls are always dispatched via fast tail call
7651 // mechanism and never via tail call helper for perf.
7652 if (call->IsImplicitTailCall())
7654 szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
7656 #ifndef LEGACY_BACKEND
7657 else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
7659 // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
7660 // dispatched as a fast tail call.
7662 // Methods with non-standard args will have indirection cell or cookie param passed
7663 // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
7664 // tail calling the target method and hence ".tail" prefix on such calls needs to be
7667 // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
7668 // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
7669 // This is done by by adding stubAddr as an additional arg before the original list of
7670 // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
7671 // in Stublinkerx86.cpp.
7672 szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
7673 "called via helper";
7675 #ifdef _TARGET_ARM64_
7678 // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
7679 // So, bail out if we can't make fast tail call.
7680 szFailReason = "Non-qualified fast tail call";
7683 #endif // LEGACY_BACKEND
7687 // Clear these flags before calling fgMorphCall() to avoid recursion.
7688 bool isTailPrefixed = call->IsTailPrefixedCall();
7689 call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
7691 #if FEATURE_TAILCALL_OPT
7692 call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
7696 if (!canFastTailCall && szFailReason == nullptr)
7698 szFailReason = "Non fast tail calls disabled for PAL based systems.";
7700 #endif // FEATURE_PAL
7702 if (szFailReason != nullptr)
7707 printf("\nRejecting tail call late for call ");
7709 printf(": %s\n", szFailReason);
7713 // for non user funcs, we have no handles to report
7714 info.compCompHnd->reportTailCallDecision(nullptr,
7715 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
7716 isTailPrefixed, TAILCALL_FAIL, szFailReason);
7721 #if !FEATURE_TAILCALL_OPT_SHARED_RETURN
7722 // We enable shared-ret tail call optimization for recursive calls even if
7723 // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
7724 if (gtIsRecursiveCall(call))
7727 // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
7728 // but if the call falls through to a ret, and we are doing a tailcall, change it here.
7729 if (compCurBB->bbJumpKind != BBJ_RETURN)
7731 compCurBB->bbJumpKind = BBJ_RETURN;
7735 // Set this flag before calling fgMorphCall() to prevent inlining this call.
7736 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
7738 bool fastTailCallToLoop = false;
7739 #if FEATURE_TAILCALL_OPT
7740 // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
7741 // or return type is a struct that can be passed in a register.
7743 // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
7744 // hidden generic context param or through keep alive thisptr), then while transforming a recursive
7745 // call to such a method requires that the generic context stored on stack slot be updated. Right now,
7746 // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
7747 // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the
7748 // generic type parameters of both caller and callee generic method are the same.
7749 if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
7750 !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet()))
7752 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
7753 fastTailCallToLoop = true;
7757 // Do some target-specific transformations (before we process the args, etc.)
7758 // This is needed only for tail prefixed calls that cannot be dispatched as
7760 if (!canFastTailCall)
7762 fgMorphTailCall(call);
7765 // Implementation note : If we optimize tailcall to do a direct jump
7766 // to the target function (after stomping on the return address, etc),
7767 // without using CORINFO_HELP_TAILCALL, we have to make certain that
7768 // we don't starve the hijacking logic (by stomping on the hijacked
7769 // return address etc).
7771 // At this point, we are committed to do the tailcall.
7772 compTailCallUsed = true;
7774 CorInfoTailCall tailCallResult;
7776 if (fastTailCallToLoop)
7778 tailCallResult = TAILCALL_RECURSIVE;
7780 else if (canFastTailCall)
7782 tailCallResult = TAILCALL_OPTIMIZED;
7786 tailCallResult = TAILCALL_HELPER;
7789 // for non user funcs, we have no handles to report
7790 info.compCompHnd->reportTailCallDecision(nullptr,
7791 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
7792 isTailPrefixed, tailCallResult, nullptr);
7794 // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
7795 // to avoid doing any extra work for the return value.
7796 call->gtType = TYP_VOID;
7801 printf("\nGTF_CALL_M_TAILCALL bit set for call ");
7804 if (fastTailCallToLoop)
7806 printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
7813 GenTreePtr stmtExpr = fgMorphStmt->gtStmtExpr;
7816 // Tail call needs to be in one of the following IR forms
7817 // Either a call stmt or
7818 // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
7819 // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
7820 // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
7822 // GT_CASTS may be nested.
7823 genTreeOps stmtOper = stmtExpr->gtOper;
7824 if (stmtOper == GT_CALL)
7826 noway_assert(stmtExpr == call);
7830 noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA);
7831 GenTreePtr treeWithCall;
7832 if (stmtOper == GT_RETURN)
7834 treeWithCall = stmtExpr->gtGetOp1();
7836 else if (stmtOper == GT_COMMA)
7838 // Second operation must be nop.
7839 noway_assert(stmtExpr->gtGetOp2()->IsNothingNode());
7840 treeWithCall = stmtExpr->gtGetOp1();
7844 treeWithCall = stmtExpr->gtGetOp2();
7848 while (treeWithCall->gtOper == GT_CAST)
7850 noway_assert(!treeWithCall->gtOverflow());
7851 treeWithCall = treeWithCall->gtGetOp1();
7854 noway_assert(treeWithCall == call);
7858 // For void calls, we would have created a GT_CALL in the stmt list.
7859 // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
7860 // For calls returning structs, we would have a void call, followed by a void return.
7861 // For debuggable code, it would be an assignment of the call to a temp
7862 // We want to get rid of any of this extra trees, and just leave
7864 GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt;
7866 #ifdef _TARGET_AMD64_
7867 // Legacy Jit64 Compat:
7868 // There could be any number of GT_NOPs between tail call and GT_RETURN.
7869 // That is tail call pattern could be one of the following:
7870 // 1) tail.call, nop*, ret
7871 // 2) tail.call, nop*, pop, nop*, ret
7872 // 3) var=tail.call, nop*, ret(var)
7873 // 4) var=tail.call, nop*, pop, ret
7874 // 5) comma(tail.call, nop), nop*, ret
7876 // See impIsTailCallILPattern() for details on tail call IL patterns
7877 // that are supported.
7878 if (stmtExpr->gtOper != GT_RETURN)
7880 // First delete all GT_NOPs after the call
7881 GenTreeStmt* morphStmtToRemove = nullptr;
7882 while (nextMorphStmt != nullptr)
7884 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
7885 if (!nextStmtExpr->IsNothingNode())
7890 morphStmtToRemove = nextMorphStmt;
7891 nextMorphStmt = morphStmtToRemove->gtNextStmt;
7892 fgRemoveStmt(compCurBB, morphStmtToRemove);
7895 // Check to see if there is a pop.
7896 // Since tail call is honored, we can get rid of the stmt corresponding to pop.
7897 if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN)
7899 // Note that pop opcode may or may not result in a new stmt (for details see
7900 // impImportBlockCode()). Hence, it is not possible to assert about the IR
7901 // form generated by pop but pop tree must be side-effect free so that we can
7902 // delete it safely.
7903 GenTreeStmt* popStmt = nextMorphStmt;
7904 nextMorphStmt = nextMorphStmt->gtNextStmt;
7906 // Side effect flags on a GT_COMMA may be overly pessimistic, so examine
7907 // the constituent nodes.
7908 GenTreePtr popExpr = popStmt->gtStmtExpr;
7909 bool isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0;
7910 if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA))
7912 isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) &&
7913 ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0);
7915 noway_assert(isSideEffectFree);
7916 fgRemoveStmt(compCurBB, popStmt);
7919 // Next delete any GT_NOP nodes after pop
7920 while (nextMorphStmt != nullptr)
7922 GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
7923 if (!nextStmtExpr->IsNothingNode())
7928 morphStmtToRemove = nextMorphStmt;
7929 nextMorphStmt = morphStmtToRemove->gtNextStmt;
7930 fgRemoveStmt(compCurBB, morphStmtToRemove);
7933 #endif // _TARGET_AMD64_
7935 // Delete GT_RETURN if any
7936 if (nextMorphStmt != nullptr)
7938 GenTreePtr retExpr = nextMorphStmt->gtStmtExpr;
7939 noway_assert(retExpr->gtOper == GT_RETURN);
7941 // If var=call, then the next stmt must be a GT_RETURN(TYP_VOID) or GT_RETURN(var).
7942 // This can occur if impSpillStackEnsure() has introduced an assignment to a temp.
7943 if (stmtExpr->gtOper == GT_ASG && info.compRetType != TYP_VOID)
7945 noway_assert(stmtExpr->gtGetOp1()->OperIsLocal());
7946 noway_assert(stmtExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum ==
7947 retExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum);
7950 fgRemoveStmt(compCurBB, nextMorphStmt);
7953 fgMorphStmt->gtStmtExpr = call;
7955 // Tail call via helper: The VM can't use return address hijacking if we're
7956 // not going to return and the helper doesn't have enough info to safely poll,
7957 // so we poll before the tail call, if the block isn't already safe. Since
7958 // tail call via helper is a slow mechanism it doen't matter whether we emit
7959 // GC poll. This is done to be in parity with Jit64. Also this avoids GC info
7960 // size increase if all most all methods are expected to be tail calls (e.g. F#).
7962 // Note that we can avoid emitting GC-poll if we know that the current BB is
7963 // dominated by a Gc-SafePoint block. But we don't have dominator info at this
7964 // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
7965 // here and remove it in lowering if the block is dominated by a GC-SafePoint. For
7966 // now it not clear whether optimizing slow tail calls is worth the effort. As a
7967 // low cost check, we check whether the first and current basic blocks are
7970 // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
7971 // is going to mark the method as fully interruptible if the block containing this tail
7972 // call is reachable without executing any call.
7973 if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
7974 !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
7976 // We didn't insert a poll block, so we need to morph the call now
7977 // (Normally it will get morphed when we get to the split poll block)
7978 GenTreePtr temp = fgMorphCall(call);
7979 noway_assert(temp == call);
7982 // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
7983 // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
7985 // Fast tail call: in case of fast tail calls, we need a jmp epilog and
7986 // hence mark it as BBJ_RETURN with BBF_JMP flag set.
7987 noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
7989 if (canFastTailCall)
7991 compCurBB->bbFlags |= BBF_HAS_JMP;
7995 compCurBB->bbJumpKind = BBJ_THROW;
7998 // For non-void calls, we return a place holder which will be
7999 // used by the parent GT_RETURN node of this call.
8001 GenTree* result = call;
8002 if (callType != TYP_VOID && info.compRetType != TYP_VOID)
8005 // Return a dummy node, as the return is already removed.
8006 if (callType == TYP_STRUCT)
8008 // This is a HFA, use float 0.
8009 callType = TYP_FLOAT;
8011 #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
8012 // Return a dummy node, as the return is already removed.
8013 if (varTypeIsStruct(callType))
8015 // This is a register-returned struct. Return a 0.
8016 // The actual return registers are hacked in lower and the register allocator.
8021 // Return a dummy node, as the return is already removed.
8022 if (varTypeIsSIMD(callType))
8024 callType = TYP_DOUBLE;
8027 result = gtNewZeroConNode(genActualType(callType));
8028 result = fgMorphTree(result);
8036 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
8037 (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
8038 #ifdef FEATURE_READYTORUN_COMPILER
8039 || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
8042 (call == fgMorphStmt->gtStmtExpr))
8044 // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
8045 // Transform it into a null check.
8047 GenTreePtr thisPtr = call->gtCallArgs->gtOp.gtOp1;
8049 GenTreePtr nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
8050 nullCheck->gtFlags |= GTF_EXCEPT;
8052 return fgMorphTree(nullCheck);
8055 noway_assert(call->gtOper == GT_CALL);
8058 // Only count calls once (only in the global morph phase)
8062 if (call->gtCallType == CT_INDIRECT)
8065 optIndirectCallCount++;
8067 else if (call->gtCallType == CT_USER_FUNC)
8070 if (call->IsVirtual())
8072 optIndirectCallCount++;
8077 // Couldn't inline - remember that this BB contains method calls
8079 // If this is a 'regular' call, mark the basic block as
8080 // having a call (for computing full interruptibility).
8081 CLANG_FORMAT_COMMENT_ANCHOR;
8083 #ifdef _TARGET_AMD64_
8084 // Amd64 note: If this is a fast tail call then don't count it as a call
8085 // since we don't insert GC-polls but instead make the method fully GC
8087 if (!call->IsFastTailCall())
8090 if (call->gtCallType == CT_INDIRECT)
8092 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
8094 else if (call->gtCallType == CT_USER_FUNC)
8096 if ((call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK) == 0)
8098 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
8101 // otherwise we have a CT_HELPER
8104 // Morph Type.op_Equality and Type.op_Inequality
8105 // We need to do this before the arguments are morphed
8106 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
8108 CorInfoIntrinsics methodID = info.compCompHnd->getIntrinsicID(call->gtCallMethHnd);
8110 genTreeOps simpleOp = GT_CALL;
8111 if (methodID == CORINFO_INTRINSIC_TypeEQ)
8115 else if (methodID == CORINFO_INTRINSIC_TypeNEQ)
8120 if (simpleOp == GT_EQ || simpleOp == GT_NE)
8122 noway_assert(call->TypeGet() == TYP_INT);
8124 // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType
8125 // objects. Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to
8126 // GT_NE/GT_NE: One important invariance that should never change is that type equivalency is always
8127 // equivalent to object identity equality for runtime type objects in reflection. This is also reflected
8128 // in RuntimeTypeHandle::TypeEquals. If this invariance would ever be broken, we need to remove the
8129 // optimization below.
8131 GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1;
8132 GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
8134 if (gtCanOptimizeTypeEquality(op1) || gtCanOptimizeTypeEquality(op2))
8136 GenTreePtr compare = gtNewOperNode(simpleOp, TYP_INT, op1, op2);
8138 // fgMorphSmpOp will further optimize the following patterns:
8139 // 1. typeof(...) == typeof(...)
8140 // 2. typeof(...) == obj.GetType()
8141 return fgMorphTree(compare);
8146 // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
8147 GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
8149 unsigned retValTmpNum = BAD_VAR_NUM;
8150 CORINFO_CLASS_HANDLE structHnd = nullptr;
8151 if (call->HasRetBufArg() &&
8152 call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
8154 // We're enforcing the invariant that return buffers pointers (at least for
8155 // struct return types containing GC pointers) are never pointers into the heap.
8156 // The large majority of cases are address of local variables, which are OK.
8157 // Otherwise, allocate a local of the given struct type, pass its address,
8158 // then assign from that into the proper destination. (We don't need to do this
8159 // if we're passing the caller's ret buff arg to the callee, since the caller's caller
8160 // will maintain the same invariant.)
8162 GenTreePtr dest = call->gtCallArgs->gtOp.gtOp1;
8163 assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
8164 if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
8166 // We'll exempt helper calls from this, assuming that the helper implementation
8167 // follows the old convention, and does whatever barrier is required.
8168 if (call->gtCallType != CT_HELPER)
8170 structHnd = call->gtRetClsHnd;
8171 if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
8172 !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR) &&
8173 dest->gtLclVar.gtLclNum == info.compRetBuffArg))
8177 retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
8178 lvaSetStruct(retValTmpNum, structHnd, true);
8179 dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8184 call->gtCallArgs->gtOp.gtOp1 = dest;
8187 /* Process the "normal" argument list */
8188 call = fgMorphArgs(call);
8189 noway_assert(call->gtOper == GT_CALL);
8191 // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
8192 // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
8193 if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
8195 GenTree* value = gtArgEntryByArgNum(call, 2)->node;
8196 if (value->IsIntegralConst(0))
8198 assert(value->OperGet() == GT_CNS_INT);
8200 GenTree* arr = gtArgEntryByArgNum(call, 0)->node;
8201 GenTree* index = gtArgEntryByArgNum(call, 1)->node;
8203 // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy
8204 // the spill trees as well if necessary.
8205 GenTreeOp* argSetup = nullptr;
8206 for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest())
8208 GenTree* const arg = earlyArgs->Current();
8209 if (arg->OperGet() != GT_ASG)
8215 assert(arg != index);
8217 arg->gtFlags &= ~GTF_LATE_ARG;
8219 GenTree* op1 = argSetup;
8222 op1 = gtNewNothingNode();
8224 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8228 argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg);
8231 argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8236 auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult {
8237 (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
8238 return WALK_CONTINUE;
8241 fgWalkTreePost(&arr, resetMorphedFlag);
8242 fgWalkTreePost(&index, resetMorphedFlag);
8243 fgWalkTreePost(&value, resetMorphedFlag);
8246 GenTree* const nullCheckedArr = impCheckForNullPointer(arr);
8247 GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
8248 GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value);
8249 arrStore->gtFlags |= GTF_ASG;
8251 GenTree* result = fgMorphTree(arrStore);
8252 if (argSetup != nullptr)
8254 result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result);
8256 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8264 // Optimize get_ManagedThreadId(get_CurrentThread)
8265 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8266 info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
8268 noway_assert(origDest == nullptr);
8269 noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);
8271 GenTreePtr innerCall = call->gtCallLateArgs->gtOp.gtOp1;
8273 if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8274 info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
8275 CORINFO_INTRINSIC_GetCurrentManagedThread)
8277 // substitute expression with call to helper
8278 GenTreePtr newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT, 0);
8279 JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
8280 return fgMorphTree(newCall);
8284 if (origDest != nullptr)
8286 GenTreePtr retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8287 // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
8288 // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the
8289 // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to
8291 if (origDest->OperGet() == GT_ASG)
8293 if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
8295 GenTreePtr var = origDest->gtOp.gtOp1;
8296 origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
8297 gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
8300 GenTreePtr copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
8301 copyBlk = fgMorphTree(copyBlk);
8302 GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
8304 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8309 if (call->IsNoReturn())
8312 // If we know that the call does not return then we can set fgRemoveRestOfBlock
8313 // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
8314 // As a result the compiler won't need to preserve live registers across the call.
8316 // This isn't need for tail calls as there shouldn't be any code after the call anyway.
8317 // Besides, the tail call code is part of the epilog and converting the block to
8318 // BBJ_THROW would result in the tail call being dropped as the epilog is generated
8319 // only for BBJ_RETURN blocks.
8321 // Currently this doesn't work for non-void callees. Some of the code that handles
8322 // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
8323 // do not have this flag by default. We could add the flag here but the proper solution
8324 // would be to replace the return expression with a local var node during inlining
8325 // so the rest of the call tree stays in a separate statement. That statement can then
8326 // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
8329 if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
8331 fgRemoveRestOfBlock = true;
8338 /*****************************************************************************
8340 * Transform the given GTK_CONST tree for code generation.
8343 GenTreePtr Compiler::fgMorphConst(GenTreePtr tree)
8345 noway_assert(tree->OperKind() & GTK_CONST);
8347 /* Clear any exception flags or other unnecessary flags
8348 * that may have been set before folding this node to a constant */
8350 tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
8352 if (tree->OperGet() != GT_CNS_STR)
8357 // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
8358 // guarantee slow performance for that block. Instead cache the return value
8359 // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
8361 if (compCurBB->bbJumpKind == BBJ_THROW)
8363 CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
8364 if (helper != CORINFO_HELP_UNDEF)
8366 // For un-important blocks, we want to construct the string lazily
8368 GenTreeArgList* args;
8369 if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
8371 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
8375 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
8376 gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
8379 tree = gtNewHelperCallNode(helper, TYP_REF, 0, args);
8380 return fgMorphTree(tree);
8384 assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
8387 InfoAccessType iat =
8388 info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);
8390 tree = gtNewStringLiteralNode(iat, pValue);
8392 return fgMorphTree(tree);
8395 /*****************************************************************************
8397 * Transform the given GTK_LEAF tree for code generation.
8400 GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree)
8402 noway_assert(tree->OperKind() & GTK_LEAF);
8404 if (tree->gtOper == GT_LCL_VAR)
8406 return fgMorphLocalVar(tree);
8409 else if (tree->gtOper == GT_LCL_FLD)
8411 if (info.compIsVarArgs)
8413 GenTreePtr newTree =
8414 fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
8415 if (newTree != nullptr)
8417 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
8419 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
8425 #endif // _TARGET_X86_
8426 else if (tree->gtOper == GT_FTN_ADDR)
8428 CORINFO_CONST_LOOKUP addrInfo;
8430 #ifdef FEATURE_READYTORUN_COMPILER
8431 if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
8433 addrInfo = tree->gtFptrVal.gtEntryPoint;
8438 info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
8441 // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
8443 tree->SetOper(GT_CNS_INT);
8444 tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
8445 tree->gtFlags |= GTF_ICON_FTN_ADDR;
8447 switch (addrInfo.accessType)
8450 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8451 tree->gtFlags |= GTF_IND_INVARIANT;
8456 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8460 tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
8464 noway_assert(!"Unknown addrInfo.accessType");
8467 return fgMorphTree(tree);
8473 void Compiler::fgAssignSetVarDef(GenTreePtr tree)
8475 GenTreeLclVarCommon* lclVarCmnTree;
8476 bool isEntire = false;
8477 if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
8481 lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
8485 // We consider partial definitions to be modeled as uses followed by definitions.
8486 // This captures the idea that precedings defs are not necessarily made redundant
8487 // by this definition.
8488 lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
8493 //------------------------------------------------------------------------
8494 // fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
8497 // tree - The block assignment to be possibly morphed
8500 // The modified tree if successful, nullptr otherwise.
8503 // 'tree' must be a block assignment.
8506 // If successful, this method always returns the incoming tree, modifying only
8509 GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
8511 // This must be a block assignment.
8512 noway_assert(tree->OperIsBlkOp());
8513 var_types asgType = tree->TypeGet();
8515 GenTreePtr asg = tree;
8516 GenTreePtr dest = asg->gtGetOp1();
8517 GenTreePtr src = asg->gtGetOp2();
8518 unsigned destVarNum = BAD_VAR_NUM;
8519 LclVarDsc* destVarDsc = nullptr;
8520 GenTreePtr lclVarTree = nullptr;
8521 bool isCopyBlock = asg->OperIsCopyBlkOp();
8522 bool isInitBlock = !isCopyBlock;
8525 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
8527 // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD)
8528 // The SIMD type in question could be Vector2f which is 8-bytes in size.
8529 // The below check is to make sure that we don't turn that copyblk
8530 // into a assignment, since rationalizer logic will transform the
8531 // copyblk appropriately. Otherwise, the transformation made in this
8532 // routine will prevent rationalizer logic and we might end up with
8533 // GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert
8535 // TODO-1stClassStructs: This is here to preserve old behavior.
8536 // It should be eliminated.
8537 if (src->OperGet() == GT_SIMD)
8543 if (dest->gtEffectiveVal()->OperIsBlk())
8545 GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
8546 size = lhsBlk->Size();
8547 if (impIsAddressInLocal(lhsBlk->Addr(), &lclVarTree))
8549 destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
8550 destVarDsc = &(lvaTable[destVarNum]);
8552 if (lhsBlk->OperGet() == GT_OBJ)
8554 clsHnd = lhsBlk->AsObj()->gtClass;
8559 // Is this an enregisterable struct that is already a simple assignment?
8560 // This can happen if we are re-morphing.
8561 if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
8565 noway_assert(dest->OperIsLocal());
8567 destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
8568 destVarDsc = &(lvaTable[destVarNum]);
8571 clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
8572 size = info.compCompHnd->getClassSize(clsHnd);
8576 size = destVarDsc->lvExactSize;
8581 // See if we can do a simple transformation:
8583 // GT_ASG <TYP_size>
8585 // GT_IND GT_IND or CNS_INT
8590 if (size == REGSIZE_BYTES)
8592 if (clsHnd == NO_CLASS_HANDLE)
8594 // A register-sized cpblk can be treated as an integer asignment.
8595 asgType = TYP_I_IMPL;
8600 info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
8601 asgType = getJitGCType(gcPtr);
8612 asgType = TYP_SHORT;
8615 #ifdef _TARGET_64BIT_
8619 #endif // _TARGET_64BIT_
8623 // TODO-1stClassStructs: Change this to asgType != TYP_STRUCT.
8624 if (!varTypeIsStruct(asgType))
8626 // For initBlk, a non constant source is not going to allow us to fiddle
8627 // with the bits to create a single assigment.
8628 noway_assert(size <= REGSIZE_BYTES);
8630 if (isInitBlock && !src->IsConstInitVal())
8635 if (destVarDsc != nullptr)
8637 #if LOCAL_ASSERTION_PROP
8638 // Kill everything about dest
8639 if (optLocalAssertionProp)
8641 if (optAssertionCount > 0)
8643 fgKillDependentAssertions(destVarNum DEBUGARG(tree));
8646 #endif // LOCAL_ASSERTION_PROP
8648 // A previous incarnation of this code also required the local not to be
8649 // address-exposed(=taken). That seems orthogonal to the decision of whether
8650 // to do field-wise assignments: being address-exposed will cause it to be
8651 // "dependently" promoted, so it will be in the right memory location. One possible
8652 // further reason for avoiding field-wise stores is that the struct might have alignment-induced
8653 // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid
8654 // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
8655 // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
8656 if (varTypeIsStruct(lclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
8658 // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
8661 else if (!varTypeIsFloating(lclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
8663 // Use the dest local var directly, as well as its type.
8665 asgType = destVarDsc->lvType;
8667 // If the block operation had been a write to a local var of a small int type,
8668 // of the exact size of the small int type, and the var is NormalizeOnStore,
8669 // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
8670 // have done that normalization. If we're now making it into an assignment,
8671 // the NormalizeOnStore will work, and it can be a full def.
8672 if (destVarDsc->lvNormalizeOnStore())
8674 dest->gtFlags &= (~GTF_VAR_USEASG);
8679 // Could be a non-promoted struct, or a floating point type local, or
8680 // an int subject to a partial write. Don't enregister.
8681 lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
8683 // Mark the local var tree as a definition point of the local.
8684 lclVarTree->gtFlags |= GTF_VAR_DEF;
8685 if (size < destVarDsc->lvExactSize)
8686 { // If it's not a full-width assignment....
8687 lclVarTree->gtFlags |= GTF_VAR_USEASG;
8690 if (dest == lclVarTree)
8692 dest = gtNewOperNode(GT_IND, asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
8697 // Check to ensure we don't have a reducible *(& ... )
8698 if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
8700 GenTreePtr addrOp = dest->AsIndir()->Addr()->gtGetOp1();
8701 // Ignore reinterpret casts between int/gc
8702 if ((addrOp->TypeGet() == asgType) || (varTypeIsIntegralOrI(addrOp) && (genTypeSize(asgType) == size)))
8705 asgType = addrOp->TypeGet();
8709 if (dest->gtEffectiveVal()->OperIsIndir())
8711 // If we have no information about the destination, we have to assume it could
8712 // live anywhere (not just in the GC heap).
8713 // Mark the GT_IND node so that we use the correct write barrier helper in case
8714 // the field is a GC ref.
8716 if (!fgIsIndirOfAddrOfLocal(dest))
8718 dest->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8719 tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8723 LclVarDsc* srcVarDsc = nullptr;
8726 if (src->OperGet() == GT_LCL_VAR)
8729 srcVarDsc = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
8731 else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &lclVarTree))
8733 srcVarDsc = &(lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum]);
8735 if (srcVarDsc != nullptr)
8737 if (varTypeIsStruct(lclVarTree) && (srcVarDsc->lvPromoted || srcVarDsc->lvIsSIMDType()))
8739 // Let fgMorphCopyBlock handle it.
8742 else if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
8743 size == genTypeSize(genActualType(lclVarTree->TypeGet())))
8745 // Use the src local var directly.
8750 #ifndef LEGACY_BACKEND
8752 // The source argument of the copyblk can potentially
8753 // be accessed only through indir(addr(lclVar))
8754 // or indir(lclVarAddr) in rational form and liveness
8755 // won't account for these uses. That said,
8756 // we have to mark this local as address exposed so
8757 // we don't delete it as a dead store later on.
8758 unsigned lclVarNum = lclVarTree->gtLclVarCommon.gtLclNum;
8759 lvaTable[lclVarNum].lvAddrExposed = true;
8760 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
8762 #else // LEGACY_BACKEND
8763 lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
8764 #endif // LEGACY_BACKEND
8766 if (src == lclVarTree)
8768 srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
8769 src = gtNewOperNode(GT_IND, asgType, srcAddr);
8773 assert(src->OperIsIndir());
8777 // If we have no information about the src, we have to assume it could
8778 // live anywhere (not just in the GC heap).
8779 // Mark the GT_IND node so that we use the correct write barrier helper in case
8780 // the field is a GC ref.
8782 if (!fgIsIndirOfAddrOfLocal(src))
8784 src->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
8791 if (varTypeIsSIMD(asgType))
8793 assert(!isCopyBlock); // Else we would have returned the tree above.
8794 noway_assert(src->IsIntegralConst(0));
8795 noway_assert(destVarDsc != nullptr);
8797 src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
8798 tree->gtOp.gtOp2 = src;
8804 if (src->OperIsInitVal())
8806 src = src->gtGetOp1();
8808 assert(src->IsCnsIntOrI());
8809 // This will mutate the integer constant, in place, to be the correct
8810 // value for the type we are using in the assignment.
8811 src->AsIntCon()->FixupInitBlkValue(asgType);
8815 // Ensure that the dest is setup appropriately.
8816 if (dest->gtEffectiveVal()->OperIsIndir())
8818 dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/);
8821 // Ensure that the rhs is setup appropriately.
8824 src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/);
8827 // Set the lhs and rhs on the assignment.
8828 if (dest != tree->gtOp.gtOp1)
8830 asg->gtOp.gtOp1 = dest;
8832 if (src != asg->gtOp.gtOp2)
8834 asg->gtOp.gtOp2 = src;
8837 asg->ChangeType(asgType);
8838 dest->gtFlags |= GTF_DONT_CSE;
8839 asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
8840 // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
8841 asg->gtFlags &= ~GTF_REVERSE_OPS;
8846 printf("fgMorphOneAsgBlock (after):\n");
8856 //------------------------------------------------------------------------
8857 // fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
8860 // tree - a tree node with a gtOper of GT_INITBLK
8861 // the child nodes for tree have already been Morphed
8864 // We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
8865 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
8866 // If we have performed struct promotion of the Dest() then we will try to
8867 // perform a field by field assignment for each of the promoted struct fields
8870 // If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
8871 // if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
8872 // can not use a field by field assignment and must the orginal GT_INITBLK unmodified.
8874 GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
8876 // We must have the GT_ASG form of InitBlkOp.
8877 noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp());
8879 bool morphed = false;
8882 GenTree* asg = tree;
8883 GenTree* src = tree->gtGetOp2();
8884 GenTree* origDest = tree->gtGetOp1();
8886 GenTree* dest = fgMorphBlkNode(origDest, true);
8887 if (dest != origDest)
8889 tree->gtOp.gtOp1 = dest;
8891 tree->gtType = dest->TypeGet();
8892 // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its
8893 // type will be the type of the original lclVar, in which case we will change it to TYP_INT).
8894 if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src))
8896 src->gtType = TYP_INT;
8898 JITDUMP("\nfgMorphInitBlock:");
8900 GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
8903 JITDUMP(" using oneAsgTree.\n");
8908 GenTree* destAddr = nullptr;
8909 GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src;
8910 GenTree* blockSize = nullptr;
8911 unsigned blockWidth = 0;
8912 FieldSeqNode* destFldSeq = nullptr;
8913 LclVarDsc* destLclVar = nullptr;
8914 bool destDoFldAsg = false;
8915 unsigned destLclNum = BAD_VAR_NUM;
8916 bool blockWidthIsConst = false;
8917 GenTreeLclVarCommon* lclVarTree = nullptr;
8918 if (dest->IsLocal())
8920 lclVarTree = dest->AsLclVarCommon();
8924 if (dest->OperIsBlk())
8926 destAddr = dest->AsBlk()->Addr();
8927 blockWidth = dest->AsBlk()->gtBlkSize;
8931 assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT));
8932 destAddr = dest->gtGetOp1();
8933 blockWidth = genTypeSize(dest->TypeGet());
8936 if (lclVarTree != nullptr)
8938 destLclNum = lclVarTree->gtLclNum;
8939 destLclVar = &lvaTable[destLclNum];
8940 blockWidth = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar);
8941 blockWidthIsConst = true;
8945 if (dest->gtOper == GT_DYN_BLK)
8947 // The size must be an integer type
8948 blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize;
8949 assert(varTypeIsIntegral(blockSize->gtType));
8953 assert(blockWidth != 0);
8954 blockWidthIsConst = true;
8957 if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
8959 destLclNum = lclVarTree->gtLclNum;
8960 destLclVar = &lvaTable[destLclNum];
8963 if (destLclNum != BAD_VAR_NUM)
8965 #if LOCAL_ASSERTION_PROP
8966 // Kill everything about destLclNum (and its field locals)
8967 if (optLocalAssertionProp)
8969 if (optAssertionCount > 0)
8971 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
8974 #endif // LOCAL_ASSERTION_PROP
8976 if (destLclVar->lvPromoted && blockWidthIsConst)
8978 assert(initVal->OperGet() == GT_CNS_INT);
8979 noway_assert(varTypeIsStruct(destLclVar));
8980 noway_assert(!opts.MinOpts());
8981 if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
8983 JITDUMP(" dest is address exposed");
8987 if (blockWidth == destLclVar->lvExactSize)
8989 JITDUMP(" (destDoFldAsg=true)");
8990 // We may decide later that a copyblk is required when this struct has holes
8991 destDoFldAsg = true;
8995 JITDUMP(" with mismatched size");
9001 // Can we use field by field assignment for the dest?
9002 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9004 JITDUMP(" dest contains holes");
9005 destDoFldAsg = false;
9008 JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n");
9010 // If we're doing an InitBlock and we've transformed the dest to a non-Blk
9011 // we need to change it back.
9012 if (!destDoFldAsg && !dest->OperIsBlk())
9014 noway_assert(blockWidth != 0);
9015 tree->gtOp.gtOp1 = origDest;
9016 tree->gtType = origDest->gtType;
9019 if (!destDoFldAsg && (destLclVar != nullptr))
9021 // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
9022 if (!destLclVar->lvRegStruct)
9024 // Mark it as DoNotEnregister.
9025 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
9029 // Mark the dest struct as DoNotEnreg
9030 // when they are LclVar structs and we are using a CopyBlock
9031 // or the struct is not promoted
9035 #if CPU_USES_BLOCK_MOVE
9036 compBlkOpUsed = true;
9038 dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true);
9039 tree->gtOp.gtOp1 = dest;
9040 tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
9044 // The initVal must be a constant of TYP_INT
9045 noway_assert(initVal->OperGet() == GT_CNS_INT);
9046 noway_assert(genActualType(initVal->gtType) == TYP_INT);
9048 // The dest must be of a struct type.
9049 noway_assert(varTypeIsStruct(destLclVar));
9052 // Now, convert InitBlock to individual assignments
9056 INDEBUG(morphed = true);
9060 unsigned fieldLclNum;
9061 unsigned fieldCnt = destLclVar->lvFieldCnt;
9063 for (unsigned i = 0; i < fieldCnt; ++i)
9065 fieldLclNum = destLclVar->lvFieldLclStart + i;
9066 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
9068 noway_assert(lclVarTree->gtOper == GT_LCL_VAR);
9069 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
9070 dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));
9072 srcCopy = gtCloneExpr(initVal);
9073 noway_assert(srcCopy != nullptr);
9075 // need type of oper to be same as tree
9076 if (dest->gtType == TYP_LONG)
9078 srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
9079 // copy and extend the value
9080 srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
9081 /* Change the types of srcCopy to TYP_LONG */
9082 srcCopy->gtType = TYP_LONG;
9084 else if (varTypeIsFloating(dest->gtType))
9086 srcCopy->ChangeOperConst(GT_CNS_DBL);
9087 // setup the bit pattern
9088 memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal,
9089 sizeof(srcCopy->gtDblCon.gtDconVal));
9090 /* Change the types of srcCopy to TYP_DOUBLE */
9091 srcCopy->gtType = TYP_DOUBLE;
9095 noway_assert(srcCopy->gtOper == GT_CNS_INT);
9096 noway_assert(srcCopy->TypeGet() == TYP_INT);
9097 // setup the bit pattern
9098 memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal,
9099 sizeof(srcCopy->gtIntCon.gtIconVal));
9102 srcCopy->gtType = dest->TypeGet();
9104 asg = gtNewAssignNode(dest, srcCopy);
9106 #if LOCAL_ASSERTION_PROP
9107 if (optLocalAssertionProp)
9109 optAssertionGen(asg);
9111 #endif // LOCAL_ASSERTION_PROP
9115 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
9128 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9132 printf("fgMorphInitBlock (after):\n");
9141 //------------------------------------------------------------------------
9142 // fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
9145 // tree - the node to be modified.
9146 // type - the type of indirection to change it to.
9149 // Returns the node, modified in place.
9152 // This doesn't really warrant a separate method, but is here to abstract
9153 // the fact that these nodes can be modified in-place.
9155 GenTreePtr Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
9157 tree->SetOper(GT_IND);
9158 tree->gtType = type;
9162 //------------------------------------------------------------------------
9163 // fgMorphGetStructAddr: Gets the address of a struct object
9166 // pTree - the parent's pointer to the struct object node
9167 // clsHnd - the class handle for the struct type
9168 // isRValue - true if this is a source (not dest)
9171 // Returns the address of the struct value, possibly modifying the existing tree to
9172 // sink the address below any comma nodes (this is to canonicalize for value numbering).
9173 // If this is a source, it will morph it to an GT_IND before taking its address,
9174 // since it may not be remorphed (and we don't want blk nodes as rvalues).
9176 GenTreePtr Compiler::fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
9179 GenTree* tree = *pTree;
9180 // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
9181 // need to hang onto that for the purposes of value numbering.
9182 if (tree->OperIsIndir())
9184 if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
9186 addr = tree->gtOp.gtOp1;
9190 if (isRValue && tree->OperIsBlk())
9192 tree->ChangeOper(GT_IND);
9194 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9197 else if (tree->gtOper == GT_COMMA)
9199 // If this is a comma, we're going to "sink" the GT_ADDR below it.
9200 (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
9201 tree->gtType = TYP_BYREF;
9206 switch (tree->gtOper)
9213 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9217 // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
9218 // not going to use "temp"
9219 GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
9220 addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
9229 //------------------------------------------------------------------------
9230 // fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
9233 // tree - The struct type node
9234 // isDest - True if this is the destination of the assignment
9237 // Returns the possibly-morphed node. The caller is responsible for updating
9238 // the parent of this node..
9240 GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
9242 if (tree->gtOper == GT_COMMA)
9244 GenTree* effectiveVal = tree->gtEffectiveVal();
9245 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9247 addr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9249 // In order to CSE and value number array index expressions and bounds checks,
9250 // the commas in which they are contained need to match.
9251 // The pattern is that the COMMA should be the address expression.
9252 // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
9253 // TODO-1stClassStructs: Consider whether this can be improved.
9254 // Also consider whether some of this can be included in gtNewBlockVal (though note
9255 // that doing so may cause us to query the type system before we otherwise would).
9256 GenTree* lastComma = nullptr;
9257 for (GenTree* next = tree; next != nullptr && next->gtOper == GT_COMMA; next = next->gtGetOp2())
9259 next->gtType = TYP_BYREF;
9262 if (lastComma != nullptr)
9264 noway_assert(lastComma->gtGetOp2() == effectiveVal);
9265 lastComma->gtOp.gtOp2 = addr;
9268 var_types structType = effectiveVal->TypeGet();
9269 if (structType == TYP_STRUCT)
9271 CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(effectiveVal);
9272 if (structHnd == NO_CLASS_HANDLE)
9274 tree = gtNewOperNode(GT_IND, effectiveVal->TypeGet(), addr);
9278 tree = gtNewObjNode(structHnd, addr);
9279 if (tree->OperGet() == GT_OBJ)
9281 gtSetObjGcInfo(tree->AsObj());
9287 tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
9290 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9294 if (!tree->OperIsBlk())
9298 GenTreeBlk* blkNode = tree->AsBlk();
9299 if (blkNode->OperGet() == GT_DYN_BLK)
9301 if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
9303 unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
9304 // A GT_BLK with size of zero is not supported,
9305 // so if we encounter such a thing we just leave it as a GT_DYN_BLK
9308 blkNode->AsDynBlk()->gtDynamicSize = nullptr;
9309 blkNode->ChangeOper(GT_BLK);
9310 blkNode->gtBlkSize = size;
9322 if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
9323 (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
9325 GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
9326 if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
9328 lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
9335 //------------------------------------------------------------------------
9336 // fgMorphBlockOperand: Canonicalize an operand of a block assignment
9339 // tree - The block operand
9340 // asgType - The type of the assignment
9341 // blockWidth - The size of the block
9342 // isDest - true iff this is the destination of the assignment
9345 // Returns the morphed block operand
9348 // This does the following:
9349 // - Ensures that a struct operand is a block node or (for non-LEGACY_BACKEND) lclVar.
9350 // - Ensures that any COMMAs are above ADDR nodes.
9351 // Although 'tree' WAS an operand of a block assignment, the assignment
9352 // may have been retyped to be a scalar assignment.
9354 GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest)
9356 GenTree* effectiveVal = tree->gtEffectiveVal();
9358 if (!varTypeIsStruct(asgType))
9360 if (effectiveVal->OperIsIndir())
9362 GenTree* addr = effectiveVal->AsIndir()->Addr();
9363 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
9365 effectiveVal = addr->gtGetOp1();
9367 else if (effectiveVal->OperIsBlk())
9369 effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9373 effectiveVal->gtType = asgType;
9376 else if (effectiveVal->TypeGet() != asgType)
9378 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9379 effectiveVal = gtNewOperNode(GT_IND, asgType, addr);
9384 GenTreeIndir* indirTree = nullptr;
9385 GenTreeLclVarCommon* lclNode = nullptr;
9386 bool needsIndirection = true;
9388 if (effectiveVal->OperIsIndir())
9390 indirTree = effectiveVal->AsIndir();
9391 GenTree* addr = effectiveVal->AsIndir()->Addr();
9392 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
9394 lclNode = addr->gtGetOp1()->AsLclVarCommon();
9397 else if (effectiveVal->OperGet() == GT_LCL_VAR)
9399 lclNode = effectiveVal->AsLclVarCommon();
9402 if (varTypeIsSIMD(asgType))
9404 if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) &&
9405 (indirTree->Addr()->gtGetOp1()->gtOper == GT_SIMD))
9408 needsIndirection = false;
9409 effectiveVal = indirTree->Addr()->gtGetOp1();
9411 if (effectiveVal->OperIsSIMD())
9413 needsIndirection = false;
9416 #endif // FEATURE_SIMD
9417 if (lclNode != nullptr)
9419 LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]);
9420 if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth))
9422 #ifndef LEGACY_BACKEND
9423 effectiveVal = lclNode;
9424 needsIndirection = false;
9425 #endif // !LEGACY_BACKEND
9429 // This may be a lclVar that was determined to be address-exposed.
9430 effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT);
9433 if (needsIndirection)
9435 if (indirTree != nullptr)
9437 // We should never find a struct indirection on the lhs of an assignment.
9438 assert(!isDest || indirTree->OperIsBlk());
9439 if (!isDest && indirTree->OperIsBlk())
9441 (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9447 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9450 CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
9451 if (clsHnd == NO_CLASS_HANDLE)
9453 newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
9457 newTree = gtNewObjNode(clsHnd, addr);
9458 if (isDest && (newTree->OperGet() == GT_OBJ))
9460 gtSetObjGcInfo(newTree->AsObj());
9462 if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
9464 // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
9465 // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
9466 // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
9467 // separately now to avoid excess diffs.
9468 newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
9474 newTree = new (this, GT_IND) GenTreeIndir(GT_IND, asgType, addr, nullptr);
9476 effectiveVal = newTree;
9480 tree = effectiveVal;
9484 //------------------------------------------------------------------------
9485 // fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk
9488 // dest - the GT_OBJ or GT_STORE_OBJ
9491 // The destination must be known (by the caller) to be on the stack.
9494 // If we have a CopyObj with a dest on the stack, and its size is small enouch
9495 // to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a
9496 // GC Unsafe CopyBlk that is non-interruptible.
9497 // This is not supported for the JIT32_GCENCODER, in which case this method is a no-op.
9499 void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest)
9501 #if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9502 assert(dest->gtGcPtrCount != 0);
9503 unsigned blockWidth = dest->AsBlk()->gtBlkSize;
9505 bool destOnStack = false;
9506 GenTree* destAddr = dest->Addr();
9507 assert(destAddr->IsLocalAddrExpr() != nullptr);
9509 if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT))
9511 genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK;
9512 dest->SetOper(newOper);
9513 dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
9515 #endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9518 //------------------------------------------------------------------------
9519 // fgMorphCopyBlock: Perform the Morphing of block copy
9522 // tree - a block copy (i.e. an assignment with a block op on the lhs).
9525 // We can return the orginal block copy unmodified (least desirable, but always correct)
9526 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
9527 // If we have performed struct promotion of the Source() or the Dest() then we will try to
9528 // perform a field by field assignment for each of the promoted struct fields.
9531 // The child nodes for tree have already been Morphed.
9534 // If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
9535 // When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
9536 // and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
9537 // if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9538 // can not use a field by field assignment and must the orginal block copy unmodified.
9540 GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
9542 noway_assert(tree->OperIsCopyBlkOp());
9544 JITDUMP("\nfgMorphCopyBlock:");
9546 bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
9548 GenTree* asg = tree;
9549 GenTree* rhs = asg->gtGetOp2();
9550 GenTree* dest = asg->gtGetOp1();
9552 #if FEATURE_MULTIREG_RET
9553 // If this is a multi-reg return, we will not do any morphing of this node.
9554 if (rhs->IsMultiRegCall())
9556 assert(dest->OperGet() == GT_LCL_VAR);
9557 JITDUMP(" not morphing a multireg call return\n");
9560 #endif // FEATURE_MULTIREG_RET
9562 // If we have an array index on the lhs, we need to create an obj node.
9564 dest = fgMorphBlkNode(dest, true);
9565 if (dest != asg->gtGetOp1())
9567 asg->gtOp.gtOp1 = dest;
9568 if (dest->IsLocal())
9570 dest->gtFlags |= GTF_VAR_DEF;
9573 asg->gtType = dest->TypeGet();
9574 rhs = fgMorphBlkNode(rhs, false);
9576 asg->gtOp.gtOp2 = rhs;
9578 GenTreePtr oldTree = tree;
9579 GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
9583 JITDUMP(" using oneAsgTree.\n");
9588 unsigned blockWidth;
9589 bool blockWidthIsConst = false;
9590 GenTreeLclVarCommon* lclVarTree = nullptr;
9591 GenTreeLclVarCommon* srcLclVarTree = nullptr;
9592 unsigned destLclNum = BAD_VAR_NUM;
9593 LclVarDsc* destLclVar = nullptr;
9594 FieldSeqNode* destFldSeq = nullptr;
9595 bool destDoFldAsg = false;
9596 GenTreePtr destAddr = nullptr;
9597 GenTreePtr srcAddr = nullptr;
9598 bool destOnStack = false;
9599 bool hasGCPtrs = false;
9601 JITDUMP("block assignment to morph:\n");
9604 if (dest->IsLocal())
9606 blockWidthIsConst = true;
9608 if (dest->gtOper == GT_LCL_VAR)
9610 lclVarTree = dest->AsLclVarCommon();
9611 destLclNum = lclVarTree->gtLclNum;
9612 destLclVar = &lvaTable[destLclNum];
9613 if (destLclVar->lvType == TYP_STRUCT)
9615 // It would be nice if lvExactSize always corresponded to the size of the struct,
9616 // but it doesn't always for the temps that the importer creates when it spills side
9618 // TODO-Cleanup: Determine when this happens, and whether it can be changed.
9619 blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
9623 blockWidth = genTypeSize(destLclVar->lvType);
9625 hasGCPtrs = destLclVar->lvStructGcCount != 0;
9629 assert(dest->TypeGet() != TYP_STRUCT);
9630 assert(dest->gtOper == GT_LCL_FLD);
9631 blockWidth = genTypeSize(dest->TypeGet());
9632 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
9633 destFldSeq = dest->AsLclFld()->gtFieldSeq;
9638 GenTree* effectiveDest = dest->gtEffectiveVal();
9639 if (effectiveDest->OperGet() == GT_IND)
9641 assert(dest->TypeGet() != TYP_STRUCT);
9642 blockWidth = genTypeSize(effectiveDest->TypeGet());
9643 blockWidthIsConst = true;
9644 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
9646 destAddr = dest->gtGetOp1();
9651 assert(effectiveDest->OperIsBlk());
9652 GenTreeBlk* blk = effectiveDest->AsBlk();
9654 blockWidth = blk->gtBlkSize;
9655 blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
9656 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
9658 destAddr = blk->Addr();
9661 if (destAddr != nullptr)
9663 noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
9664 if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
9667 destLclNum = lclVarTree->gtLclNum;
9668 destLclVar = &lvaTable[destLclNum];
9673 if (destLclVar != nullptr)
9675 #if LOCAL_ASSERTION_PROP
9676 // Kill everything about destLclNum (and its field locals)
9677 if (optLocalAssertionProp)
9679 if (optAssertionCount > 0)
9681 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9684 #endif // LOCAL_ASSERTION_PROP
9686 if (destLclVar->lvPromoted && blockWidthIsConst)
9688 noway_assert(varTypeIsStruct(destLclVar));
9689 noway_assert(!opts.MinOpts());
9691 if (blockWidth == destLclVar->lvExactSize)
9693 JITDUMP(" (destDoFldAsg=true)");
9694 // We may decide later that a copyblk is required when this struct has holes
9695 destDoFldAsg = true;
9699 JITDUMP(" with mismatched dest size");
9704 FieldSeqNode* srcFldSeq = nullptr;
9705 unsigned srcLclNum = BAD_VAR_NUM;
9706 LclVarDsc* srcLclVar = nullptr;
9707 bool srcDoFldAsg = false;
9711 srcLclVarTree = rhs->AsLclVarCommon();
9712 srcLclNum = srcLclVarTree->gtLclNum;
9713 if (rhs->OperGet() == GT_LCL_FLD)
9715 srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
9718 else if (rhs->OperIsIndir())
9720 if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
9722 srcLclNum = srcLclVarTree->gtLclNum;
9726 srcAddr = rhs->gtOp.gtOp1;
9730 if (srcLclNum != BAD_VAR_NUM)
9732 srcLclVar = &lvaTable[srcLclNum];
9734 if (srcLclVar->lvPromoted && blockWidthIsConst)
9736 noway_assert(varTypeIsStruct(srcLclVar));
9737 noway_assert(!opts.MinOpts());
9739 if (blockWidth == srcLclVar->lvExactSize)
9741 JITDUMP(" (srcDoFldAsg=true)");
9742 // We may decide later that a copyblk is required when this struct has holes
9747 JITDUMP(" with mismatched src size");
9752 // Check to see if we are required to do a copy block because the struct contains holes
9753 // and either the src or dest is externally visible
9755 bool requiresCopyBlock = false;
9756 bool srcSingleLclVarAsg = false;
9757 bool destSingleLclVarAsg = false;
9759 if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq))
9761 // Self-assign; no effect.
9762 GenTree* nop = gtNewNothingNode();
9763 INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
9767 // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
9768 if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
9770 requiresCopyBlock = true;
9773 // Can we use field by field assignment for the dest?
9774 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9776 JITDUMP(" dest contains custom layout and contains holes");
9777 // C++ style CopyBlock with holes
9778 requiresCopyBlock = true;
9781 // Can we use field by field assignment for the src?
9782 if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
9784 JITDUMP(" src contains custom layout and contains holes");
9785 // C++ style CopyBlock with holes
9786 requiresCopyBlock = true;
9789 if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
9791 requiresCopyBlock = true;
9794 // Can't use field by field assignment if the src is a call.
9795 if (rhs->OperGet() == GT_CALL)
9797 JITDUMP(" src is a call");
9798 // C++ style CopyBlock with holes
9799 requiresCopyBlock = true;
9802 // If we passed the above checks, then we will check these two
9803 if (!requiresCopyBlock)
9805 // Are both dest and src promoted structs?
9806 if (destDoFldAsg && srcDoFldAsg)
9808 // Both structs should be of the same type, or each have a single field of the same type.
9809 // If not we will use a copy block.
9810 if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
9811 lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
9813 unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart;
9814 unsigned srcFieldNum = lvaTable[srcLclNum].lvFieldLclStart;
9815 if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) ||
9816 (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType))
9818 requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
9819 JITDUMP(" with mismatched types");
9823 // Are neither dest or src promoted structs?
9824 else if (!destDoFldAsg && !srcDoFldAsg)
9826 requiresCopyBlock = true; // Leave as a CopyBlock
9827 JITDUMP(" with no promoted structs");
9829 else if (destDoFldAsg)
9831 // Match the following kinds of trees:
9832 // fgMorphTree BB01, stmt 9 (before)
9833 // [000052] ------------ const int 8
9834 // [000053] -A--G------- copyBlk void
9835 // [000051] ------------ addr byref
9836 // [000050] ------------ lclVar long V07 loc5
9837 // [000054] --------R--- <list> void
9838 // [000049] ------------ addr byref
9839 // [000048] ------------ lclVar struct(P) V06 loc4
9840 // long V06.h (offs=0x00) -> V17 tmp9
9841 // Yields this transformation
9842 // fgMorphCopyBlock (after):
9843 // [000050] ------------ lclVar long V07 loc5
9844 // [000085] -A---------- = long
9845 // [000083] D------N---- lclVar long V17 tmp9
9847 if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
9848 (blockWidth == genTypeSize(srcLclVar->TypeGet())))
9850 // Reject the following tree:
9851 // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe
9853 // fgMorphTree BB01, stmt 6 (before)
9854 // [000038] ------------- const int 4
9855 // [000039] -A--G-------- copyBlk void
9856 // [000037] ------------- addr byref
9857 // [000036] ------------- lclVar int V05 loc3
9858 // [000040] --------R---- <list> void
9859 // [000035] ------------- addr byref
9860 // [000034] ------------- lclVar struct(P) V04 loc2
9861 // float V04.f1 (offs=0x00) -> V13 tmp6
9862 // As this would framsform into
9863 // float V13 = int V05
9865 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
9866 var_types destType = lvaTable[fieldLclNum].TypeGet();
9867 if (srcLclVar->TypeGet() == destType)
9869 srcSingleLclVarAsg = true;
9875 assert(srcDoFldAsg);
9876 // Check for the symmetric case (which happens for the _pointer field of promoted spans):
9878 // [000240] -----+------ /--* lclVar struct(P) V18 tmp9
9879 // /--* byref V18._value (offs=0x00) -> V30 tmp21
9880 // [000245] -A------R--- * = struct (copy)
9881 // [000244] -----+------ \--* obj(8) struct
9882 // [000243] -----+------ \--* addr byref
9883 // [000242] D----+-N---- \--* lclVar byref V28 tmp19
9885 if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) &&
9886 (blockWidth == genTypeSize(destLclVar->TypeGet())))
9888 // Check for type agreement
9889 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart;
9890 var_types srcType = lvaTable[fieldLclNum].TypeGet();
9891 if (destLclVar->TypeGet() == srcType)
9893 destSingleLclVarAsg = true;
9899 // If we require a copy block the set both of the field assign bools to false
9900 if (requiresCopyBlock)
9902 // If a copy block is required then we won't do field by field assignments
9903 destDoFldAsg = false;
9904 srcDoFldAsg = false;
9907 JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
9909 // Mark the dest/src structs as DoNotEnreg
9910 // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
9911 // or the struct is not promoted
9913 if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg)
9915 if (!destLclVar->lvRegStruct)
9917 // Mark it as DoNotEnregister.
9918 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
9922 if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
9924 if (!srcLclVar->lvRegStruct)
9926 lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
9930 if (requiresCopyBlock)
9932 #if CPU_USES_BLOCK_MOVE
9933 compBlkOpUsed = true;
9935 var_types asgType = dest->TypeGet();
9936 dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
9937 asg->gtOp.gtOp1 = dest;
9938 asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
9940 // Note that the unrolling of CopyBlk is only implemented on some platforms.
9941 // Currently that includes x64 and ARM but not x86: the code generation for this
9942 // construct requires the ability to mark certain regions of the generated code
9943 // as non-interruptible, and the GC encoding for the latter platform does not
9944 // have this capability.
9946 // If we have a CopyObj with a dest on the stack
9947 // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
9948 // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes).
9949 // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.)
9951 if (destOnStack && (dest->OperGet() == GT_OBJ))
9953 fgMorphUnsafeBlk(dest->AsObj());
9956 // Eliminate the "OBJ or BLK" node on the rhs.
9957 rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
9958 asg->gtOp.gtOp2 = rhs;
9960 #ifdef LEGACY_BACKEND
9961 if (!rhs->OperIsIndir())
9963 noway_assert(rhs->gtOper == GT_LCL_VAR);
9964 GenTree* rhsAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, rhs);
9965 rhs = gtNewOperNode(GT_IND, TYP_STRUCT, rhsAddr);
9967 #endif // LEGACY_BACKEND
9968 // Formerly, liveness did not consider copyblk arguments of simple types as being
9969 // a use or def, so these variables were marked as address-exposed.
9970 // TODO-1stClassStructs: This should no longer be needed.
9971 if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
9973 JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
9974 lvaTable[srcLclNum].lvAddrExposed = true;
9977 if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
9979 JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
9980 lvaTable[destLclNum].lvAddrExposed = true;
9987 // Otherwise we convert this CopyBlock into individual field by field assignments
9992 GenTreePtr addrSpill = nullptr;
9993 unsigned addrSpillTemp = BAD_VAR_NUM;
9994 bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
9996 unsigned fieldCnt = DUMMY_INIT(0);
9998 if (destDoFldAsg && srcDoFldAsg)
10000 // To do fieldwise assignments for both sides, they'd better be the same struct type!
10001 // All of these conditions were checked above...
10002 assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
10003 assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
10005 fieldCnt = destLclVar->lvFieldCnt;
10006 goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
10009 else if (destDoFldAsg)
10011 fieldCnt = destLclVar->lvFieldCnt;
10012 rhs = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
10013 if (srcAddr == nullptr)
10015 srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
10020 assert(srcDoFldAsg);
10021 fieldCnt = srcLclVar->lvFieldCnt;
10022 dest = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
10023 if (dest->OperIsBlk())
10025 (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
10027 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
10032 noway_assert(!srcDoFldAsg);
10033 if (gtClone(srcAddr))
10035 // srcAddr is simple expression. No need to spill.
10036 noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10040 // srcAddr is complex expression. Clone and spill it (unless the destination is
10041 // a struct local that only has one field, in which case we'd only use the
10042 // address value once...)
10043 if (destLclVar->lvFieldCnt > 1)
10045 addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
10046 noway_assert(addrSpill != nullptr);
10053 noway_assert(!destDoFldAsg);
10055 // If we're doing field-wise stores, to an address within a local, and we copy
10056 // the address into "addrSpill", do *not* declare the original local var node in the
10057 // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
10058 // field-wise assignments as an "indirect" assignment to the local.
10059 // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
10061 if (lclVarTree != nullptr)
10063 lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
10066 if (gtClone(destAddr))
10068 // destAddr is simple expression. No need to spill
10069 noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10073 // destAddr is complex expression. Clone and spill it (unless
10074 // the source is a struct local that only has one field, in which case we'd only
10075 // use the address value once...)
10076 if (srcLclVar->lvFieldCnt > 1)
10078 addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
10079 noway_assert(addrSpill != nullptr);
10082 // TODO-CQ: this should be based on a more general
10083 // "BaseAddress" method, that handles fields of structs, before or after
10085 if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
10087 if (addrSpill->gtOp.gtOp1->IsLocal())
10089 // We will *not* consider this to define the local, but rather have each individual field assign
10090 // be a definition.
10091 addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
10092 assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
10093 PROMOTION_TYPE_INDEPENDENT);
10094 addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
10095 // local stack frame
10101 if (addrSpill != nullptr)
10103 // Spill the (complex) address to a BYREF temp.
10104 // Note, at most one address may need to be spilled.
10105 addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
10107 lvaTable[addrSpillTemp].lvType = TYP_BYREF;
10109 if (addrSpillIsStackDest)
10111 lvaTable[addrSpillTemp].lvStackByref = true;
10114 tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);
10116 #ifndef LEGACY_BACKEND
10117 // If we are assigning the address of a LclVar here
10118 // liveness does not account for this kind of address taken use.
10120 // We have to mark this local as address exposed so
10121 // that we don't delete the definition for this LclVar
10122 // as a dead store later on.
10124 if (addrSpill->OperGet() == GT_ADDR)
10126 GenTreePtr addrOp = addrSpill->gtOp.gtOp1;
10127 if (addrOp->IsLocal())
10129 unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum;
10130 lvaTable[lclVarNum].lvAddrExposed = true;
10131 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
10134 #endif // !LEGACY_BACKEND
10139 for (unsigned i = 0; i < fieldCnt; ++i)
10141 FieldSeqNode* curFieldSeq = nullptr;
10144 noway_assert(destLclNum != BAD_VAR_NUM);
10145 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10146 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10147 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
10148 if (destAddr != nullptr)
10150 noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
10151 dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10155 noway_assert(lclVarTree != nullptr);
10156 dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10158 // Don't CSE the lhs of an assignment.
10159 dest->gtFlags |= GTF_DONT_CSE;
10163 noway_assert(srcDoFldAsg);
10164 noway_assert(srcLclNum != BAD_VAR_NUM);
10165 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10167 if (destSingleLclVarAsg)
10169 noway_assert(fieldCnt == 1);
10170 noway_assert(destLclVar != nullptr);
10171 noway_assert(addrSpill == nullptr);
10173 dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet());
10179 assert(addrSpillTemp != BAD_VAR_NUM);
10180 dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10184 dest = gtCloneExpr(destAddr);
10185 noway_assert(dest != nullptr);
10187 // Is the address of a local?
10188 GenTreeLclVarCommon* lclVarTree = nullptr;
10189 bool isEntire = false;
10190 bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
10191 if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
10193 lclVarTree->gtFlags |= GTF_VAR_DEF;
10196 lclVarTree->gtFlags |= GTF_VAR_USEASG;
10201 GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
10202 // Have to set the field sequence -- which means we need the field handle.
10203 CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
10204 CORINFO_FIELD_HANDLE fieldHnd =
10205 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10206 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10207 fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
10209 dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
10211 dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
10213 // !!! The destination could be on stack. !!!
10214 // This flag will let us choose the correct write barrier.
10215 dest->gtFlags |= GTF_IND_TGTANYWHERE;
10221 noway_assert(srcLclNum != BAD_VAR_NUM);
10222 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10223 src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10225 noway_assert(srcLclVarTree != nullptr);
10226 src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
10227 // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
10228 // but they are when they are under a GT_ADDR.
10229 src->gtFlags |= GTF_DONT_CSE;
10233 noway_assert(destDoFldAsg);
10234 noway_assert(destLclNum != BAD_VAR_NUM);
10235 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10237 if (srcSingleLclVarAsg)
10239 noway_assert(fieldCnt == 1);
10240 noway_assert(srcLclVar != nullptr);
10241 noway_assert(addrSpill == nullptr);
10243 src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet());
10249 assert(addrSpillTemp != BAD_VAR_NUM);
10250 src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10254 src = gtCloneExpr(srcAddr);
10255 noway_assert(src != nullptr);
10258 CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
10259 CORINFO_FIELD_HANDLE fieldHnd =
10260 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10261 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10263 src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
10264 new (this, GT_CNS_INT)
10265 GenTreeIntCon(TYP_I_IMPL, lvaTable[fieldLclNum].lvFldOffset, curFieldSeq));
10267 src = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), src);
10271 noway_assert(dest->TypeGet() == src->TypeGet());
10273 asg = gtNewAssignNode(dest, src);
10275 // If we spilled the address, and we didn't do individual field assignments to promoted fields,
10276 // and it was of a local, record the assignment as an indirect update of a local.
10277 if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
10279 curFieldSeq = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq);
10280 bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType)) == genTypeSize(dest->TypeGet()));
10281 IndirectAssignmentAnnotation* pIndirAnnot =
10282 new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire);
10283 GetIndirAssignMap()->Set(asg, pIndirAnnot);
10286 #if LOCAL_ASSERTION_PROP
10287 if (optLocalAssertionProp)
10289 optAssertionGen(asg);
10291 #endif // LOCAL_ASSERTION_PROP
10295 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
10306 tree->gtFlags |= GTF_LATE_ARG;
10310 if (tree != oldTree)
10312 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10317 printf("\nfgMorphCopyBlock (after):\n");
10326 // insert conversions and normalize to make tree amenable to register
10327 // FP architectures
10328 GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
10330 if (tree->OperIsArithmetic())
10332 if (varTypeIsFloating(tree))
10334 GenTreePtr op1 = tree->gtOp.gtOp1;
10335 GenTreePtr op2 = tree->gtGetOp2();
10337 if (op1->TypeGet() != tree->TypeGet())
10339 tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, tree->TypeGet());
10341 if (op2->TypeGet() != tree->TypeGet())
10343 tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, tree->TypeGet());
10347 else if (tree->OperIsCompare())
10349 GenTreePtr op1 = tree->gtOp.gtOp1;
10351 if (varTypeIsFloating(op1))
10353 GenTreePtr op2 = tree->gtGetOp2();
10354 assert(varTypeIsFloating(op2));
10356 if (op1->TypeGet() != op2->TypeGet())
10358 // both had better be floating, just one bigger than other
10359 if (op1->TypeGet() == TYP_FLOAT)
10361 assert(op2->TypeGet() == TYP_DOUBLE);
10362 tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
10364 else if (op2->TypeGet() == TYP_FLOAT)
10366 assert(op1->TypeGet() == TYP_DOUBLE);
10367 tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
10376 GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
10378 GenTree* op1 = compare->gtOp.gtOp1;
10379 GenTree* op2 = compare->gtOp.gtOp2;
10381 GenTreeCall* opCall;
10383 // recognize this pattern:
10385 // stmtExpr void (IL 0x000... ???)
10389 // call help ref HELPER.CORINFO_HELP_BOX_NULLABLE
10390 // const(h) long 0x7fed96836c8 class
10392 // ld.lclVar struct V00 arg0
10395 // which comes from this code (reported by customer as being slow) :
10397 // private static bool IsNull<T>(T arg)
10399 // return arg==null;
10403 if (op1->IsCnsIntOrI() && op2->IsHelperCall())
10406 opCall = op2->AsCall();
10408 else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
10411 opCall = op1->AsCall();
10418 if (!opCns->IsIntegralConst(0))
10423 if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
10428 // replace the box with an access of the nullable 'hasValue' field which is at the zero offset
10429 GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1);
10433 compare->gtOp.gtOp1 = newOp;
10437 compare->gtOp.gtOp2 = newOp;
10443 #ifdef FEATURE_SIMD
10445 //--------------------------------------------------------------------------------------------------------------
10446 // getSIMDStructFromField:
10447 // Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for
10448 // the struct node, also base type, field index and simd size. If it is not, just return nullptr.
10449 // Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
10450 // should return nullptr, since in this case we should treat SIMD struct as a regular struct.
10451 // However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
10452 // as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
10453 // if the struct is a SIMD struct.
10456 // tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
10457 // struct used for simd intrinsic or not.
10458 // pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
10459 // to simd lclvar's base type.
10460 // indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
10461 // equals to the index number of this field.
10462 // simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
10463 // equals to the simd struct size which this tree belongs to.
10464 // ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
10465 // the UsedInSIMDIntrinsic check.
10468 // A GenTreePtr which points the simd lclvar tree belongs to. If the tree is not the simd
10469 // instrinic related field, return nullptr.
10472 GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree,
10473 var_types* pBaseTypeOut,
10474 unsigned* indexOut,
10475 unsigned* simdSizeOut,
10476 bool ignoreUsedInSIMDIntrinsic /*false*/)
10478 GenTreePtr ret = nullptr;
10479 if (tree->OperGet() == GT_FIELD)
10481 GenTreePtr objRef = tree->gtField.gtFldObj;
10482 if (objRef != nullptr)
10484 GenTreePtr obj = nullptr;
10485 if (objRef->gtOper == GT_ADDR)
10487 obj = objRef->gtOp.gtOp1;
10489 else if (ignoreUsedInSIMDIntrinsic)
10498 if (isSIMDTypeLocal(obj))
10500 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
10501 LclVarDsc* varDsc = &lvaTable[lclNum];
10502 if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
10504 *simdSizeOut = varDsc->lvExactSize;
10505 *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
10509 else if (obj->OperGet() == GT_SIMD)
10512 GenTreeSIMD* simdNode = obj->AsSIMD();
10513 *simdSizeOut = simdNode->gtSIMDSize;
10514 *pBaseTypeOut = simdNode->gtSIMDBaseType;
10518 if (ret != nullptr)
10520 unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
10521 *indexOut = tree->gtField.gtFldOffset / BaseTypeSize;
10526 /*****************************************************************************
10527 * If a read operation tries to access simd struct field, then transform the
10528 * operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
10529 * Otherwise, return the old tree.
10531 * tree - GenTreePtr. If this pointer points to simd struct which is used for simd
10532 * intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem.
10534 * A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
10538 GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
10540 unsigned index = 0;
10541 var_types baseType = TYP_UNKNOWN;
10542 unsigned simdSize = 0;
10543 GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
10544 if (simdStructNode != nullptr)
10546 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
10547 GenTree* op2 = gtNewIconNode(index);
10548 tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
10550 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10556 /*****************************************************************************
10557 * Transform an assignment of a SIMD struct field to SIMD intrinsic
10558 * SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment,
10559 * then return the old tree.
10561 * tree - GenTreePtr. If this pointer points to simd struct which is used for simd
10562 * intrinsic, we will morph it as simd intrinsic set.
10564 * A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
10568 GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree)
10570 assert(tree->OperGet() == GT_ASG);
10571 GenTreePtr op1 = tree->gtGetOp1();
10572 GenTreePtr op2 = tree->gtGetOp2();
10574 unsigned index = 0;
10575 var_types baseType = TYP_UNKNOWN;
10576 unsigned simdSize = 0;
10577 GenTreePtr simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
10578 if (simdOp1Struct != nullptr)
10580 // Generate the simd set intrinsic
10581 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
10583 SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
10587 simdIntrinsicID = SIMDIntrinsicSetX;
10590 simdIntrinsicID = SIMDIntrinsicSetY;
10593 simdIntrinsicID = SIMDIntrinsicSetZ;
10596 simdIntrinsicID = SIMDIntrinsicSetW;
10599 noway_assert(!"There is no set intrinsic for index bigger than 3");
10602 GenTreePtr target = gtClone(simdOp1Struct);
10603 assert(target != nullptr);
10604 GenTreePtr simdTree = gtNewSIMDNode(target->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
10605 tree->gtOp.gtOp1 = target;
10606 tree->gtOp.gtOp2 = simdTree;
10608 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10615 #endif // FEATURE_SIMD
10617 /*****************************************************************************
10619 * Transform the given GTK_SMPOP tree for code generation.
10623 #pragma warning(push)
10624 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
10626 GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
10628 // this extra scope is a workaround for a gcc bug
10629 // the inline destructor for ALLOCA_CHECK confuses the control
10630 // flow and gcc thinks that the function never returns
10633 noway_assert(tree->OperKind() & GTK_SMPOP);
10635 /* The steps in this function are :
10636 o Perform required preorder processing
10637 o Process the first, then second operand, if any
10638 o Perform required postorder morphing
10639 o Perform optional postorder morphing if optimizing
10642 bool isQmarkColon = false;
10644 #if LOCAL_ASSERTION_PROP
10645 AssertionIndex origAssertionCount = DUMMY_INIT(0);
10646 AssertionDsc* origAssertionTab = DUMMY_INIT(NULL);
10648 AssertionIndex thenAssertionCount = DUMMY_INIT(0);
10649 AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL);
10654 #if !FEATURE_STACK_FP_X87
10655 tree = fgMorphForRegisterFP(tree);
10659 genTreeOps oper = tree->OperGet();
10660 var_types typ = tree->TypeGet();
10661 GenTreePtr op1 = tree->gtOp.gtOp1;
10662 GenTreePtr op2 = tree->gtGetOp2IfPresent();
10664 /*-------------------------------------------------------------------------
10665 * First do any PRE-ORDER processing
10670 // Some arithmetic operators need to use a helper call to the EE
10674 tree = fgDoNormalizeOnStore(tree);
10675 /* fgDoNormalizeOnStore can change op2 */
10676 noway_assert(op1 == tree->gtOp.gtOp1);
10677 op2 = tree->gtOp.gtOp2;
10679 #ifdef FEATURE_SIMD
10681 // We should check whether op2 should be assigned to a SIMD field or not.
10682 // If it is, we should tranlate the tree to simd intrinsic.
10683 assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0));
10684 GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
10685 typ = tree->TypeGet();
10686 op1 = tree->gtGetOp1();
10687 op2 = tree->gtGetOp2();
10689 assert((tree == newTree) && (tree->OperGet() == oper));
10690 if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
10692 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
10715 // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
10716 // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former
10717 // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
10718 // TODO-1stClassStructs: improve this.
10719 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
10721 op1->gtFlags |= GTF_DONT_CSE;
10727 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
10728 op1->gtFlags |= GTF_DONT_CSE;
10736 if (op1->OperKind() & GTK_RELOP)
10738 noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
10739 /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
10740 not need to materialize the result as a 0 or 1. */
10742 /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
10743 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
10745 // Request that the codegen for op1 sets the condition flags
10746 // when it generates the code for op1.
10748 // Codegen for op1 must set the condition flags if
10749 // this method returns true.
10751 op1->gtRequestSetFlags();
10755 GenTreePtr effOp1 = op1->gtEffectiveVal();
10756 noway_assert((effOp1->gtOper == GT_CNS_INT) &&
10757 (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
10762 #if LOCAL_ASSERTION_PROP
10763 if (optLocalAssertionProp)
10766 isQmarkColon = true;
10771 return fgMorphArrayIndex(tree);
10774 return fgMorphCast(tree);
10778 #ifndef _TARGET_64BIT_
10779 if (typ == TYP_LONG)
10781 /* For (long)int1 * (long)int2, we dont actually do the
10782 casts, and just multiply the 32 bit values, which will
10783 give us the 64 bit result in edx:eax */
10786 if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
10787 genActualType(op1->CastFromType()) == TYP_INT &&
10788 genActualType(op2->CastFromType()) == TYP_INT) &&
10789 !op1->gtOverflow() && !op2->gtOverflow())
10791 // The casts have to be of the same signedness.
10792 if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
10794 // We see if we can force an int constant to change its signedness
10795 GenTreePtr constOp;
10796 if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
10798 else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
10801 goto NO_MUL_64RSLT;
10803 if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
10804 constOp->gtFlags ^= GTF_UNSIGNED;
10806 goto NO_MUL_64RSLT;
10809 // The only combination that can overflow
10810 if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
10811 goto NO_MUL_64RSLT;
10813 /* Remaining combinations can never overflow during long mul. */
10815 tree->gtFlags &= ~GTF_OVERFLOW;
10817 /* Do unsigned mul only if the casts were unsigned */
10819 tree->gtFlags &= ~GTF_UNSIGNED;
10820 tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
10822 /* Since we are committing to GTF_MUL_64RSLT, we don't want
10823 the casts to be folded away. So morph the castees directly */
10825 op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
10826 op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
10828 // Propagate side effect flags up the tree
10829 op1->gtFlags &= ~GTF_ALL_EFFECT;
10830 op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
10831 op2->gtFlags &= ~GTF_ALL_EFFECT;
10832 op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
10834 // If the GT_MUL can be altogether folded away, we should do that.
10836 if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
10837 opts.OptEnabled(CLFLG_CONSTANTFOLD))
10839 tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
10840 tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
10841 noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
10842 tree = gtFoldExprConst(tree);
10843 noway_assert(tree->OperIsConst());
10847 tree->gtFlags |= GTF_MUL_64RSLT;
10849 // If op1 and op2 are unsigned casts, we need to do an unsigned mult
10850 tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
10852 // Insert GT_NOP nodes for the cast operands so that they do not get folded
10853 // And propagate the new flags. We don't want to CSE the casts because
10854 // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
10856 if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
10858 op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
10859 op1->gtFlags &= ~GTF_ALL_EFFECT;
10860 op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
10863 if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
10865 op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
10866 op2->gtFlags &= ~GTF_ALL_EFFECT;
10867 op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
10870 op1->gtFlags |= GTF_DONT_CSE;
10871 op2->gtFlags |= GTF_DONT_CSE;
10873 tree->gtFlags &= ~GTF_ALL_EFFECT;
10874 tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
10876 goto DONE_MORPHING_CHILDREN;
10878 else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
10881 if (tree->gtOverflow())
10882 helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
10884 helper = CORINFO_HELP_LMUL;
10886 goto USE_HELPER_FOR_ARITH;
10890 /* We are seeing this node again. We have decided to use
10891 GTF_MUL_64RSLT, so leave it alone. */
10893 assert(tree->gtIsValid64RsltMul());
10896 #endif // !_TARGET_64BIT_
10901 #ifndef _TARGET_64BIT_
10902 if (typ == TYP_LONG)
10904 helper = CORINFO_HELP_LDIV;
10905 goto USE_HELPER_FOR_ARITH;
10908 #if USE_HELPERS_FOR_INT_DIV
10909 if (typ == TYP_INT && !fgIsSignedDivOptimizable(op2))
10911 helper = CORINFO_HELP_DIV;
10912 goto USE_HELPER_FOR_ARITH;
10915 #endif // !_TARGET_64BIT_
10917 #ifndef LEGACY_BACKEND
10918 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
10920 op2 = gtFoldExprConst(op2);
10922 #endif // !LEGACY_BACKEND
10927 #ifndef _TARGET_64BIT_
10928 if (typ == TYP_LONG)
10930 helper = CORINFO_HELP_ULDIV;
10931 goto USE_HELPER_FOR_ARITH;
10933 #if USE_HELPERS_FOR_INT_DIV
10934 if (typ == TYP_INT && !fgIsUnsignedDivOptimizable(op2))
10936 helper = CORINFO_HELP_UDIV;
10937 goto USE_HELPER_FOR_ARITH;
10940 #endif // _TARGET_64BIT_
10945 if (varTypeIsFloating(typ))
10947 helper = CORINFO_HELP_DBLREM;
10949 if (op1->TypeGet() == TYP_FLOAT)
10951 if (op2->TypeGet() == TYP_FLOAT)
10953 helper = CORINFO_HELP_FLTREM;
10957 tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
10960 else if (op2->TypeGet() == TYP_FLOAT)
10962 tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
10964 goto USE_HELPER_FOR_ARITH;
10967 // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
10968 // A similar optimization for signed mod will not work for a negative perfectly divisible
10969 // HI-word. To make it correct, we would need to divide without the sign and then flip the
10970 // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
10971 goto ASSIGN_HELPER_FOR_MOD;
10975 #ifdef _TARGET_ARMARCH_
10977 // Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization
10979 #else // _TARGET_XARCH
10980 /* If this is an unsigned long mod with op2 which is a cast to long from a
10981 constant int, then don't morph to a call to the helper. This can be done
10982 faster inline using idiv.
10986 if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
10987 ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
10988 ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
10990 if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
10991 op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
10992 op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
10993 (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
10995 tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
10996 noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
10999 if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
11000 op2->gtIntConCommon.LngValue() <= 0x3fffffff)
11002 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
11003 noway_assert(op1->TypeGet() == TYP_LONG);
11005 // Update flags for op1 morph
11006 tree->gtFlags &= ~GTF_ALL_EFFECT;
11008 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
11010 // If op1 is a constant, then do constant folding of the division operator
11011 if (op1->gtOper == GT_CNS_NATIVELONG)
11013 tree = gtFoldExpr(tree);
11018 #endif // _TARGET_XARCH
11020 ASSIGN_HELPER_FOR_MOD:
11022 // For "val % 1", return 0 if op1 doesn't have any side effects
11023 // and we are not in the CSE phase, we cannot discard 'tree'
11024 // because it may contain CSE expressions that we haven't yet examined.
11026 if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
11028 if (op2->IsIntegralConst(1))
11030 GenTreePtr zeroNode = gtNewZeroConNode(typ);
11032 zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11034 DEBUG_DESTROY_NODE(tree);
11039 #ifndef _TARGET_64BIT_
11040 if (typ == TYP_LONG)
11042 helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
11043 goto USE_HELPER_FOR_ARITH;
11046 #if USE_HELPERS_FOR_INT_DIV
11047 if (typ == TYP_INT)
11049 if (oper == GT_UMOD && !fgIsUnsignedModOptimizable(op2))
11051 helper = CORINFO_HELP_UMOD;
11052 goto USE_HELPER_FOR_ARITH;
11054 else if (oper == GT_MOD && !fgIsSignedModOptimizable(op2))
11056 helper = CORINFO_HELP_MOD;
11057 goto USE_HELPER_FOR_ARITH;
11061 #endif // !_TARGET_64BIT_
11063 #ifndef LEGACY_BACKEND
11064 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11066 op2 = gtFoldExprConst(op2);
11069 #ifdef _TARGET_ARM64_
11071 // For ARM64 we don't have a remainder instruction,
11072 // The architecture manual suggests the following transformation to
11073 // generate code for such operator:
11075 // a % b = a - (a / b) * b;
11077 // NOTE: we should never need to perform this transformation when remorphing, since global morphing
11078 // should already have done so and we do not introduce new modulus nodes in later phases.
11079 assert(!optValnumCSE_phase);
11080 tree = fgMorphModToSubMulDiv(tree->AsOp());
11081 op1 = tree->gtOp.gtOp1;
11082 op2 = tree->gtOp.gtOp2;
11083 #else //_TARGET_ARM64_
11084 // If b is not a power of 2 constant then lowering replaces a % b
11085 // with a - (a / b) * b and applies magic division optimization to
11086 // a / b. The code may already contain an a / b expression (e.g.
11087 // x = a / 10; y = a % 10;) and then we end up with redundant code.
11088 // If we convert % to / here we give CSE the opportunity to eliminate
11089 // the redundant division. If there's no redundant division then
11090 // nothing is lost, lowering would have done this transform anyway.
11092 if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
11094 ssize_t divisorValue = op2->AsIntCon()->IconValue();
11095 size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
11096 : static_cast<size_t>(abs(divisorValue));
11098 if (!isPow2(absDivisorValue))
11100 tree = fgMorphModToSubMulDiv(tree->AsOp());
11101 op1 = tree->gtOp.gtOp1;
11102 op2 = tree->gtOp.gtOp2;
11105 #endif //_TARGET_ARM64_
11106 #endif // !LEGACY_BACKEND
11109 USE_HELPER_FOR_ARITH:
11111 /* We have to morph these arithmetic operations into helper calls
11112 before morphing the arguments (preorder), else the arguments
11113 won't get correct values of fgPtrArgCntCur.
11114 However, try to fold the tree first in case we end up with a
11115 simple node which won't need a helper call at all */
11117 noway_assert(tree->OperIsBinary());
11119 GenTreePtr oldTree = tree;
11121 tree = gtFoldExpr(tree);
11123 // Were we able to fold it ?
11124 // Note that gtFoldExpr may return a non-leaf even if successful
11125 // e.g. for something like "expr / 1" - see also bug #290853
11126 if (tree->OperIsLeaf() || (oldTree != tree))
11129 return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
11132 // Did we fold it into a comma node with throw?
11133 if (tree->gtOper == GT_COMMA)
11135 noway_assert(fgIsCommaThrow(tree));
11136 return fgMorphTree(tree);
11139 return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
11142 // normalize small integer return values
11143 if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) &&
11144 (op1->TypeGet() != TYP_VOID) && fgCastNeeded(op1, info.compRetType))
11146 // Small-typed return values are normalized by the callee
11147 op1 = gtNewCastNode(TYP_INT, op1, info.compRetType);
11149 // Propagate GTF_COLON_COND
11150 op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
11152 tree->gtOp.gtOp1 = fgMorphCast(op1);
11154 // Propagate side effect flags
11155 tree->gtFlags &= ~GTF_ALL_EFFECT;
11156 tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11165 // Check for typeof(...) == obj.GetType()
11166 // Also check for typeof(...) == typeof(...)
11167 // IMPORTANT NOTE: this optimization relies on a one-to-one mapping between
11168 // type handles and instances of System.Type
11169 // If this invariant is ever broken, the optimization will need updating
11170 CLANG_FORMAT_COMMENT_ANCHOR;
11172 #ifdef LEGACY_BACKEND
11173 if (op1->gtOper == GT_CALL && op2->gtOper == GT_CALL &&
11174 ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
11175 (op1->gtCall.gtCallType == CT_HELPER)) &&
11176 ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
11177 (op2->gtCall.gtCallType == CT_HELPER)))
11179 if ((((op1->gtOper == GT_INTRINSIC) &&
11180 (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
11181 ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) &&
11182 (((op2->gtOper == GT_INTRINSIC) &&
11183 (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
11184 ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER))))
11187 GenTreePtr pGetClassFromHandle;
11188 GenTreePtr pGetType;
11190 #ifdef LEGACY_BACKEND
11191 bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall());
11192 bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall());
11194 bool bOp1ClassFromHandle =
11195 op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall()) : false;
11196 bool bOp2ClassFromHandle =
11197 op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall()) : false;
11200 // Optimize typeof(...) == typeof(...)
11201 // Typically this occurs in generic code that attempts a type switch
11202 // e.g. typeof(T) == typeof(int)
11204 if (bOp1ClassFromHandle && bOp2ClassFromHandle)
11206 GenTreePtr classFromHandleArg1 = tree->gtOp.gtOp1->gtCall.gtCallArgs->gtOp.gtOp1;
11207 GenTreePtr classFromHandleArg2 = tree->gtOp.gtOp2->gtCall.gtCallArgs->gtOp.gtOp1;
11209 GenTreePtr compare = gtNewOperNode(oper, TYP_INT, classFromHandleArg1, classFromHandleArg2);
11211 compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
11213 // Morph and return
11214 return fgMorphTree(compare);
11216 else if (bOp1ClassFromHandle || bOp2ClassFromHandle)
11219 // Now check for GetClassFromHandle(handle) == obj.GetType()
11222 if (bOp1ClassFromHandle)
11224 pGetClassFromHandle = tree->gtOp.gtOp1;
11229 pGetClassFromHandle = tree->gtOp.gtOp2;
11233 GenTreePtr pGetClassFromHandleArgument = pGetClassFromHandle->gtCall.gtCallArgs->gtOp.gtOp1;
11234 GenTreePtr pConstLiteral = pGetClassFromHandleArgument;
11236 // Unwrap GT_NOP node used to prevent constant folding
11237 if (pConstLiteral->gtOper == GT_NOP && pConstLiteral->gtType == TYP_I_IMPL)
11239 pConstLiteral = pConstLiteral->gtOp.gtOp1;
11242 // In the ngen case, we have to go thru an indirection to get the right handle.
11243 if (pConstLiteral->gtOper == GT_IND)
11245 pConstLiteral = pConstLiteral->gtOp.gtOp1;
11247 #ifdef LEGACY_BACKEND
11249 if (pGetType->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC &&
11250 info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) ==
11251 CORINFO_INTRINSIC_Object_GetType &&
11253 if ((pGetType->gtOper == GT_INTRINSIC) &&
11254 (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) &&
11256 pConstLiteral->gtOper == GT_CNS_INT && pConstLiteral->gtType == TYP_I_IMPL)
11258 CORINFO_CLASS_HANDLE clsHnd =
11259 CORINFO_CLASS_HANDLE(pConstLiteral->gtIntCon.gtCompileTimeHandle);
11261 if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd))
11263 // Method Table tree
11264 CLANG_FORMAT_COMMENT_ANCHOR;
11265 #ifdef LEGACY_BACKEND
11266 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
11268 GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1);
11270 objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null
11271 compCurBB->bbFlags |= BBF_HAS_VTABREF;
11272 optMethodFlags |= OMF_HAS_VTABLEREF;
11274 // Method table constant
11275 GenTreePtr cnsMT = pGetClassFromHandleArgument;
11277 GenTreePtr compare = gtNewOperNode(oper, TYP_INT, objMT, cnsMT);
11279 compare->gtFlags |=
11280 tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
11282 // Morph and return
11283 return fgMorphTree(compare);
11288 fgMorphRecognizeBoxNullable(tree);
11289 op1 = tree->gtOp.gtOp1;
11290 op2 = tree->gtGetOp2IfPresent();
11294 #ifdef _TARGET_ARM_
11296 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
11298 switch (tree->TypeGet())
11301 return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
11303 return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
11315 #if !CPU_HAS_FP_SUPPORT
11316 tree = fgMorphToEmulatedFP(tree);
11319 /* Could this operator throw an exception? */
11320 if (fgGlobalMorph && tree->OperMayThrow())
11322 if (((tree->OperGet() != GT_IND) && !tree->OperIsBlk()) || fgAddrCouldBeNull(tree->gtOp.gtOp1))
11324 /* Mark the tree node as potentially throwing an exception */
11325 tree->gtFlags |= GTF_EXCEPT;
11329 /*-------------------------------------------------------------------------
11330 * Process the first operand, if any
11336 #if LOCAL_ASSERTION_PROP
11337 // If we are entering the "then" part of a Qmark-Colon we must
11338 // save the state of the current copy assignment table
11339 // so that we can restore this state when entering the "else" part
11342 noway_assert(optLocalAssertionProp);
11343 if (optAssertionCount)
11345 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11346 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
11347 origAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
11348 origAssertionCount = optAssertionCount;
11349 memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
11353 origAssertionCount = 0;
11354 origAssertionTab = nullptr;
11357 #endif // LOCAL_ASSERTION_PROP
11359 // We might need a new MorphAddressContext context. (These are used to convey
11360 // parent context about how addresses being calculated will be used; see the
11361 // specification comment for MorphAddrContext for full details.)
11362 // Assume it's an Ind context to start.
11363 MorphAddrContext subIndMac1(MACK_Ind);
11364 MorphAddrContext* subMac1 = mac;
11365 if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind)
11367 switch (tree->gtOper)
11370 if (subMac1 == nullptr)
11372 subMac1 = &subIndMac1;
11373 subMac1->m_kind = MACK_Addr;
11377 // In a comma, the incoming context only applies to the rightmost arg of the
11378 // comma list. The left arg (op1) gets a fresh context.
11385 subMac1 = &subIndMac1;
11392 // For additions, if we're in an IND context keep track of whether
11393 // all offsets added to the address are constant, and their sum.
11394 if (tree->gtOper == GT_ADD && subMac1 != nullptr)
11396 assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
11397 GenTreePtr otherOp = tree->gtOp.gtOp2;
11398 // Is the other operator a constant?
11399 if (otherOp->IsCnsIntOrI())
11401 ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
11402 totalOffset += otherOp->gtIntConCommon.IconValue();
11403 if (totalOffset.IsOverflow())
11405 // We will consider an offset so large as to overflow as "not a constant" --
11406 // we will do a null check.
11407 subMac1->m_allConstantOffsets = false;
11411 subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11416 subMac1->m_allConstantOffsets = false;
11420 // If gtOp1 is a GT_FIELD, we need to pass down the mac if
11421 // its parent is GT_ADDR, since the address of the field
11422 // is part of an ongoing address computation. Otherwise
11423 // op1 represents the value of the field and so any address
11424 // calculations it does are in a new context.
11425 if ((op1->gtOper == GT_FIELD) && (tree->gtOper != GT_ADDR))
11429 // The impact of this field's value to any ongoing
11430 // address computation is handled below when looking
11434 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
11436 #if LOCAL_ASSERTION_PROP
11437 // If we are exiting the "then" part of a Qmark-Colon we must
11438 // save the state of the current copy assignment table
11439 // so that we can merge this state with the "else" part exit
11442 noway_assert(optLocalAssertionProp);
11443 if (optAssertionCount)
11445 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11446 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
11447 thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
11448 thenAssertionCount = optAssertionCount;
11449 memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
11453 thenAssertionCount = 0;
11454 thenAssertionTab = nullptr;
11457 #endif // LOCAL_ASSERTION_PROP
11459 /* Morphing along with folding and inlining may have changed the
11460 * side effect flags, so we have to reset them
11462 * NOTE: Don't reset the exception flags on nodes that may throw */
11464 noway_assert(tree->gtOper != GT_CALL);
11466 if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId))
11468 tree->gtFlags &= ~GTF_CALL;
11471 if (!tree->OperMayThrow())
11473 tree->gtFlags &= ~GTF_EXCEPT;
11476 /* Propagate the new flags */
11477 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
11479 // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
11480 // Similarly for clsVar
11481 if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
11483 tree->gtFlags &= ~GTF_GLOB_REF;
11487 /*-------------------------------------------------------------------------
11488 * Process the second operand, if any
11494 #if LOCAL_ASSERTION_PROP
11495 // If we are entering the "else" part of a Qmark-Colon we must
11496 // reset the state of the current copy assignment table
11499 noway_assert(optLocalAssertionProp);
11500 optAssertionReset(0);
11501 if (origAssertionCount)
11503 size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
11504 memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
11505 optAssertionReset(origAssertionCount);
11508 #endif // LOCAL_ASSERTION_PROP
11510 // We might need a new MorphAddressContext context to use in evaluating op2.
11511 // (These are used to convey parent context about how addresses being calculated
11512 // will be used; see the specification comment for MorphAddrContext for full details.)
11513 // Assume it's an Ind context to start.
11514 switch (tree->gtOper)
11517 if (mac != nullptr && mac->m_kind == MACK_Ind)
11519 GenTreePtr otherOp = tree->gtOp.gtOp1;
11520 // Is the other operator a constant?
11521 if (otherOp->IsCnsIntOrI())
11523 mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11527 mac->m_allConstantOffsets = false;
11535 // If gtOp2 is a GT_FIELD, we must be taking its value,
11536 // so it should evaluate its address in a new context.
11537 if (op2->gtOper == GT_FIELD)
11539 // The impact of this field's value to any ongoing
11540 // address computation is handled above when looking
11545 tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
11547 /* Propagate the side effect flags from op2 */
11549 tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
11551 #if LOCAL_ASSERTION_PROP
11552 // If we are exiting the "else" part of a Qmark-Colon we must
11553 // merge the state of the current copy assignment table with
11554 // that of the exit of the "then" part.
11557 noway_assert(optLocalAssertionProp);
11558 // If either exit table has zero entries then
11559 // the merged table also has zero entries
11560 if (optAssertionCount == 0 || thenAssertionCount == 0)
11562 optAssertionReset(0);
11566 size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
11567 if ((optAssertionCount != thenAssertionCount) ||
11568 (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
11570 // Yes they are different so we have to find the merged set
11571 // Iterate over the copy asgn table removing any entries
11572 // that do not have an exact match in the thenAssertionTab
11573 AssertionIndex index = 1;
11574 while (index <= optAssertionCount)
11576 AssertionDsc* curAssertion = optGetAssertion(index);
11578 for (unsigned j = 0; j < thenAssertionCount; j++)
11580 AssertionDsc* thenAssertion = &thenAssertionTab[j];
11582 // Do the left sides match?
11583 if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
11584 (curAssertion->assertionKind == thenAssertion->assertionKind))
11586 // Do the right sides match?
11587 if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
11588 (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
11599 // If we fall out of the loop above then we didn't find
11600 // any matching entry in the thenAssertionTab so it must
11601 // have been killed on that path so we remove it here
11604 // The data at optAssertionTabPrivate[i] is to be removed
11605 CLANG_FORMAT_COMMENT_ANCHOR;
11609 printf("The QMARK-COLON ");
11611 printf(" removes assertion candidate #%d\n", index);
11614 optAssertionRemove(index);
11617 // The data at optAssertionTabPrivate[i] is to be kept
11623 #endif // LOCAL_ASSERTION_PROP
11626 DONE_MORPHING_CHILDREN:
11628 /*-------------------------------------------------------------------------
11629 * Now do POST-ORDER processing
11632 #if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_)
11633 // Variable shifts of a long end up being helper calls, so mark the tree as such. This
11634 // is potentially too conservative, since they'll get treated as having side effects.
11635 // It is important to mark them as calls so if they are part of an argument list,
11636 // they will get sorted and processed properly (for example, it is important to handle
11637 // all nested calls before putting struct arguments in the argument registers). We
11638 // could mark the trees just before argument processing, but it would require a full
11639 // tree walk of the argument tree, so we just do it here, instead, even though we'll
11640 // mark non-argument trees (that will still get converted to calls, anyway).
11641 if (GenTree::OperIsShift(oper) && (tree->TypeGet() == TYP_LONG) && (op2->OperGet() != GT_CNS_INT))
11643 tree->gtFlags |= GTF_CALL;
11645 #endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_
11647 if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) &&
11648 (op2 && !varTypeIsGC(op2->TypeGet())))
11650 // The tree is really not GC but was marked as such. Now that the
11651 // children have been unmarked, unmark the tree too.
11653 // Remember that GT_COMMA inherits it's type only from op2
11654 if (tree->gtOper == GT_COMMA)
11656 tree->gtType = genActualType(op2->TypeGet());
11660 tree->gtType = genActualType(op1->TypeGet());
11664 GenTreePtr oldTree = tree;
11666 GenTreePtr qmarkOp1 = nullptr;
11667 GenTreePtr qmarkOp2 = nullptr;
11669 if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
11671 qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
11672 qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
11675 // Try to fold it, maybe we get lucky,
11676 tree = gtFoldExpr(tree);
11678 if (oldTree != tree)
11680 /* if gtFoldExpr returned op1 or op2 then we are done */
11681 if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
11686 /* If we created a comma-throw tree then we need to morph op1 */
11687 if (fgIsCommaThrow(tree))
11689 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
11690 fgMorphTreeDone(tree);
11696 else if (tree->OperKind() & GTK_CONST)
11701 /* gtFoldExpr could have used setOper to change the oper */
11702 oper = tree->OperGet();
11703 typ = tree->TypeGet();
11705 /* gtFoldExpr could have changed op1 and op2 */
11706 op1 = tree->gtOp.gtOp1;
11707 op2 = tree->gtGetOp2IfPresent();
11709 // Do we have an integer compare operation?
11711 if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
11713 // Are we comparing against zero?
11715 if (op2->IsIntegralConst(0))
11717 // Request that the codegen for op1 sets the condition flags
11718 // when it generates the code for op1.
11720 // Codegen for op1 must set the condition flags if
11721 // this method returns true.
11723 op1->gtRequestSetFlags();
11726 /*-------------------------------------------------------------------------
11727 * Perform the required oper-specific postorder morphing
11731 GenTreePtr cns1, cns2;
11732 GenTreePtr thenNode;
11733 GenTreePtr elseNode;
11734 size_t ival1, ival2;
11735 GenTreePtr lclVarTree;
11736 GenTreeLclVarCommon* lclVarCmnTree;
11737 FieldSeqNode* fieldSeq = nullptr;
11743 lclVarTree = fgIsIndirOfAddrOfLocal(op1);
11744 if (lclVarTree != nullptr)
11746 lclVarTree->gtFlags |= GTF_VAR_DEF;
11749 if (op1->gtEffectiveVal()->OperIsConst())
11751 op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
11752 tree->gtOp.gtOp1 = op1;
11755 /* If we are storing a small type, we might be able to omit a cast */
11756 if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet()))
11758 if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
11760 var_types castType = op2->CastToType();
11762 // If we are performing a narrowing cast and
11763 // castType is larger or the same as op1's type
11764 // then we can discard the cast.
11766 if (varTypeIsSmall(castType) && (castType >= op1->TypeGet()))
11768 tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
11771 else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet()))
11773 /* We don't need to zero extend the setcc instruction */
11774 op2->gtType = TYP_BYTE;
11777 // If we introduced a CSE we may need to undo the optimization above
11778 // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
11779 // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
11780 else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
11782 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
11783 LclVarDsc* varDsc = &lvaTable[varNum];
11785 /* We again need to zero extend the setcc instruction */
11786 op2->gtType = varDsc->TypeGet();
11788 fgAssignSetVarDef(tree);
11806 /* We can't CSE the LHS of an assignment */
11807 /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
11808 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
11810 op1->gtFlags |= GTF_DONT_CSE;
11817 /* Make sure we're allowed to do this */
11819 if (optValnumCSE_phase)
11821 // It is not safe to reorder/delete CSE's
11827 /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
11829 if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
11831 op1 = tree->gtOp.gtOp1;
11833 /* Since this can occur repeatedly we use a while loop */
11835 while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) &&
11836 (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) && (op1->gtType == TYP_INT) &&
11837 (op1->gtOverflow() == false))
11839 /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
11841 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
11842 ival2 = cns2->gtIntCon.gtIconVal;
11844 if (op1->gtOper == GT_ADD)
11852 cns2->gtIntCon.gtIconVal = ival2;
11854 #ifdef _TARGET_64BIT_
11855 // we need to properly re-sign-extend or truncate as needed.
11856 cns2->AsIntCon()->TruncateOrSignExtend32();
11857 #endif // _TARGET_64BIT_
11859 op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
11864 // Here we look for the following tree
11870 ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
11872 // cast to unsigned allows test for both 0 and 1
11873 if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
11875 ival2 = (size_t)cns2->gtIntConCommon.IconValue();
11877 else // cast to UINT64 allows test for both 0 and 1
11878 if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
11880 ival2 = (size_t)cns2->gtIntConCommon.LngValue();
11883 if (ival2 != INT_MAX)
11885 // If we don't have a comma and relop, we can't do this optimization
11887 if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
11889 // Here we look for the following transformation
11891 // EQ/NE Possible REVERSE(RELOP)
11893 // COMMA CNS 0/1 -> COMMA relop_op2
11895 // x RELOP x relop_op1
11897 // relop_op1 relop_op2
11901 GenTreePtr comma = op1;
11902 GenTreePtr relop = comma->gtOp.gtOp2;
11904 GenTreePtr relop_op1 = relop->gtOp.gtOp1;
11906 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
11910 gtReverseCond(relop);
11913 relop->gtOp.gtOp1 = comma;
11914 comma->gtOp.gtOp2 = relop_op1;
11916 // Comma now has fewer nodes underneath it, so we need to regenerate its flags
11917 comma->gtFlags &= ~GTF_ALL_EFFECT;
11918 comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
11919 comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
11921 noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
11922 noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
11924 tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);
11929 if (op1->gtOper == GT_COMMA)
11931 // Here we look for the following tree
11932 // and when the LCL_VAR is a temp we can fold the tree:
11936 // COMMA CNS 0/1 -> RELOP CNS 0/1
11944 GenTreePtr asg = op1->gtOp.gtOp1;
11945 GenTreePtr lcl = op1->gtOp.gtOp2;
11947 /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
11948 if (asg->gtOper != GT_ASG)
11953 /* The right side of the comma must be a LCL_VAR temp */
11954 if (lcl->gtOper != GT_LCL_VAR)
11959 unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
11960 noway_assert(lclNum < lvaCount);
11962 /* If the LCL_VAR is not a temp then bail, a temp has a single def */
11963 if (!lvaTable[lclNum].lvIsTemp)
11969 /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
11970 // Fix 383856 X86/ARM ILGEN
11971 if (lclNumIsCSE(lclNum))
11977 /* We also must be assigning the result of a RELOP */
11978 if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
11983 /* Both of the LCL_VAR must match */
11984 if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
11989 /* If right side of asg is not a RELOP then skip */
11990 if (!asg->gtOp.gtOp2->OperIsCompare())
11995 LclVarDsc* varDsc = lvaTable + lclNum;
11997 /* Set op1 to the right side of asg, (i.e. the RELOP) */
11998 op1 = asg->gtOp.gtOp2;
12000 DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
12001 DEBUG_DESTROY_NODE(lcl);
12003 /* This local variable should never be used again */
12005 // VSW 184221: Make RefCnt to zero to indicate that this local var
12006 // is not used any more. (Keey the lvType as is.)
12007 // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars
12008 // And then emitter::emitEndCodeGen will assert in the following line:
12009 // noway_assert( dsc->lvTracked);
12011 noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet.
12012 varDsc->lvRefCnt == 2 // Or, we assume this tmp should only be used here,
12013 // and it only shows up twice.
12015 lvaTable[lclNum].lvRefCnt = 0;
12016 lvaTable[lclNum].lvaResetSortAgainFlag(this);
12019 if (op1->OperIsCompare())
12021 // Here we look for the following tree
12023 // EQ/NE -> RELOP/!RELOP
12028 // Note that we will remove/destroy the EQ/NE node and move
12029 // the RELOP up into it's location.
12031 /* Here we reverse the RELOP if necessary */
12033 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12037 gtReverseCond(op1);
12040 /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
12041 op1->gtType = tree->gtType;
12043 noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
12044 op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12046 DEBUG_DESTROY_NODE(tree);
12051 // Now we check for a compare with the result of an '&' operator
12053 // Here we look for the following transformation:
12057 // AND CNS 0/1 -> AND CNS 0
12059 // RSZ/RSH CNS 1 x CNS (1 << y)
12063 if (op1->gtOper == GT_AND)
12065 GenTreePtr andOp = op1;
12066 GenTreePtr rshiftOp = andOp->gtOp.gtOp1;
12068 if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
12073 if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
12078 ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
12080 if (shiftAmount < 0)
12085 if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
12090 if (andOp->gtType == TYP_INT)
12092 if (shiftAmount > 31)
12097 UINT32 newAndOperand = ((UINT32)1) << shiftAmount;
12099 andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
12101 // Reverse the cond if necessary
12104 gtReverseCond(tree);
12105 cns2->gtIntCon.gtIconVal = 0;
12106 oper = tree->gtOper;
12109 else if (andOp->gtType == TYP_LONG)
12111 if (shiftAmount > 63)
12116 UINT64 newAndOperand = ((UINT64)1) << shiftAmount;
12118 andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
12120 // Reverse the cond if necessary
12123 gtReverseCond(tree);
12124 cns2->gtIntConCommon.SetLngValue(0);
12125 oper = tree->gtOper;
12129 andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
12131 DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
12132 DEBUG_DESTROY_NODE(rshiftOp);
12134 } // END if (ival2 != INT_MAX)
12137 /* Now check for compares with small constant longs that can be cast to int */
12139 if (!cns2->OperIsConst())
12144 if (cns2->TypeGet() != TYP_LONG)
12149 /* Is the constant 31 bits or smaller? */
12151 if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
12156 /* Is the first comparand mask operation of type long ? */
12158 if (op1->gtOper != GT_AND)
12160 /* Another interesting case: cast from int */
12162 if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
12163 !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
12164 !op1->gtOverflow()) // cannot be an overflow checking cast
12166 /* Simply make this into an integer comparison */
12168 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
12169 tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
12175 noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
12177 /* Is the result of the mask effectively an INT ? */
12179 GenTreePtr andMask;
12180 andMask = op1->gtOp.gtOp2;
12181 if (andMask->gtOper != GT_CNS_NATIVELONG)
12185 if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
12190 /* Now we know that we can cast gtOp.gtOp1 of AND to int */
12192 op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, TYP_INT);
12194 /* now replace the mask node (gtOp.gtOp2 of AND node) */
12196 noway_assert(andMask == op1->gtOp.gtOp2);
12198 ival1 = (int)andMask->gtIntConCommon.LngValue();
12199 andMask->SetOper(GT_CNS_INT);
12200 andMask->gtType = TYP_INT;
12201 andMask->gtIntCon.gtIconVal = ival1;
12203 /* now change the type of the AND node */
12205 op1->gtType = TYP_INT;
12207 /* finally we replace the comparand */
12209 ival2 = (int)cns2->gtIntConCommon.LngValue();
12210 cns2->SetOper(GT_CNS_INT);
12211 cns2->gtType = TYP_INT;
12213 noway_assert(cns2 == op2);
12214 cns2->gtIntCon.gtIconVal = ival2;
12223 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
12225 if (op2->gtOper == GT_CNS_INT)
12228 /* Check for "expr relop 1" */
12229 if (cns2->IsIntegralConst(1))
12231 /* Check for "expr >= 1" */
12234 /* Change to "expr > 0" */
12238 /* Check for "expr < 1" */
12239 else if (oper == GT_LT)
12241 /* Change to "expr <= 0" */
12246 /* Check for "expr relop -1" */
12247 else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
12249 /* Check for "expr <= -1" */
12252 /* Change to "expr < 0" */
12256 /* Check for "expr > -1" */
12257 else if (oper == GT_GT)
12259 /* Change to "expr >= 0" */
12263 // IF we get here we should be changing 'oper'
12264 assert(tree->OperGet() != oper);
12266 // Keep the old ValueNumber for 'tree' as the new expr
12267 // will still compute the same value as before
12268 tree->SetOper(oper, GenTree::PRESERVE_VN);
12269 cns2->gtIntCon.gtIconVal = 0;
12271 // vnStore is null before the ValueNumber phase has run
12272 if (vnStore != nullptr)
12274 // Update the ValueNumber for 'cns2', as we just changed it to 0
12275 fgValueNumberTreeConst(cns2);
12278 op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
12283 else // we have an unsigned comparison
12285 if (op2->IsIntegralConst(0))
12287 if ((oper == GT_GT) || (oper == GT_LE))
12289 // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
12290 // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
12291 // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
12292 // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
12293 // occurs as a result of branch inversion.
12294 oper = (oper == GT_LE) ? GT_EQ : GT_NE;
12295 tree->SetOper(oper, GenTree::PRESERVE_VN);
12296 tree->gtFlags &= ~GTF_UNSIGNED;
12303 noway_assert(tree->OperKind() & GTK_RELOP);
12305 /* Check if the result of the comparison is used for a jump.
12306 * If not then only the int (i.e. 32 bit) case is handled in
12307 * the code generator through the (x86) "set" instructions.
12308 * For the rest of the cases, the simplest way is to
12309 * "simulate" the comparison with ?:
12311 * On ARM, we previously used the IT instruction, but the IT instructions
12312 * have mostly been declared obsolete and off-limits, so all cases on ARM
12313 * get converted to ?: */
12315 if (!(tree->gtFlags & GTF_RELOP_JMP_USED) && fgMorphRelopToQmark(op1))
12317 /* We convert it to "(CMP_TRUE) ? (1):(0)" */
12320 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12321 op1->gtRequestSetFlags();
12323 op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0));
12324 op2 = fgMorphTree(op2);
12326 tree = gtNewQmarkNode(TYP_INT, op1, op2);
12328 fgMorphTreeDone(tree);
12336 /* If op1 is a comma throw node then we won't be keeping op2 */
12337 if (fgIsCommaThrow(op1))
12342 /* Get hold of the two branches */
12344 noway_assert(op2->OperGet() == GT_COLON);
12345 elseNode = op2->AsColon()->ElseNode();
12346 thenNode = op2->AsColon()->ThenNode();
12348 /* Try to hoist assignments out of qmark colon constructs.
12349 ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */
12351 if (tree->TypeGet() == TYP_VOID && thenNode->OperGet() == GT_ASG && elseNode->OperGet() == GT_ASG &&
12352 thenNode->TypeGet() != TYP_LONG && GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) &&
12353 thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet())
12355 noway_assert(thenNode->TypeGet() == elseNode->TypeGet());
12357 GenTreePtr asg = thenNode;
12358 GenTreePtr colon = op2;
12359 colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2;
12360 colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2;
12361 tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType;
12362 asg->gtOp.gtOp2 = tree;
12364 // Asg will have all the flags that the QMARK had
12365 asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT);
12367 // Colon flag won't have the flags that x had.
12368 colon->gtFlags &= ~GTF_ALL_EFFECT;
12369 colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags | colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12371 DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1);
12372 DEBUG_DESTROY_NODE(elseNode);
12377 /* If the 'else' branch is empty swap the two branches and reverse the condition */
12379 if (elseNode->IsNothingNode())
12381 /* This can only happen for VOID ?: */
12382 noway_assert(op2->gtType == TYP_VOID);
12384 /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */
12385 if (thenNode->IsNothingNode())
12387 // We may be able to throw away op1 (unless it has side-effects)
12389 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
12391 /* Just return a a Nop Node */
12396 /* Just return the relop, but clear the special flags. Note
12397 that we can't do that for longs and floats (see code under
12398 COMPARE label above) */
12400 if (!fgMorphRelopToQmark(op1->gtOp.gtOp1))
12402 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
12409 GenTreePtr tmp = elseNode;
12411 op2->AsColon()->ElseNode() = elseNode = thenNode;
12412 op2->AsColon()->ThenNode() = thenNode = tmp;
12413 gtReverseCond(op1);
12417 #if !defined(_TARGET_ARM_)
12418 // If we have (cond)?0:1, then we just return "cond" for TYP_INTs
12420 // Don't do this optimization for ARM: we always require assignment
12421 // to boolean to remain ?:, since we don't have any way to generate
12422 // this with straight-line code, like x86 does using setcc (at least
12423 // after the IT instruction is deprecated).
12425 if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT && genActualType(typ) == TYP_INT &&
12426 thenNode->gtOper == GT_CNS_INT && elseNode->gtOper == GT_CNS_INT)
12428 ival1 = thenNode->gtIntCon.gtIconVal;
12429 ival2 = elseNode->gtIntCon.gtIconVal;
12431 // Is one constant 0 and the other 1?
12432 if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0)
12434 // If the constants are {1, 0}, reverse the condition
12437 gtReverseCond(op1);
12440 // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it
12441 // needs to materialize the result as a 0 or 1.
12442 noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED));
12443 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
12445 DEBUG_DESTROY_NODE(tree);
12446 DEBUG_DESTROY_NODE(op2);
12451 #endif // !_TARGET_ARM_
12453 break; // end case GT_QMARK
12457 #ifndef _TARGET_64BIT_
12458 if (typ == TYP_LONG)
12460 // This must be GTF_MUL_64RSLT
12461 assert(tree->gtIsValid64RsltMul());
12464 #endif // _TARGET_64BIT_
12469 if (tree->gtOverflow())
12474 // TODO #4104: there are a lot of other places where
12475 // this condition is not checked before transformations.
12478 /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
12481 if (op2->IsCnsIntOrI())
12483 /* Negate the constant and change the node to be "+" */
12485 op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
12487 tree->ChangeOper(oper);
12491 /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
12494 if (op1->IsCnsIntOrI())
12496 noway_assert(varTypeIsIntOrI(tree));
12498 tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG
12499 // node should be the same
12500 // as the type of the tree, i.e. tree->gtType.
12501 fgMorphTreeDone(op2);
12504 tree->ChangeOper(oper);
12508 /* No match - exit */
12512 #ifdef _TARGET_ARM64_
12514 if (!varTypeIsFloating(tree->gtType))
12516 // Codegen for this instruction needs to be able to throw two exceptions:
12517 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
12518 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
12522 // Codegen for this instruction needs to be able to throw one exception:
12523 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
12530 if (tree->gtOverflow())
12532 tree->gtRequestSetFlags();
12534 // Add the excptn-throwing basic block to jump to on overflow
12536 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
12538 // We can't do any commutative morphing for overflow instructions
12549 /* Commute any non-REF constants to the right */
12552 if (op1->OperIsConst() && (op1->gtType != TYP_REF))
12554 // TODO-Review: We used to assert here that
12555 // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
12556 // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
12557 // and would sometimes hit this assertion. This may indicate a missed "remorph".
12558 // Task is to re-enable this assertion and investigate.
12560 /* Swap the operands */
12561 tree->gtOp.gtOp1 = op2;
12562 tree->gtOp.gtOp2 = op1;
12565 op2 = tree->gtOp.gtOp2;
12568 /* See if we can fold GT_ADD nodes. */
12570 if (oper == GT_ADD)
12572 /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
12574 if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
12575 op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
12576 !op1->gtOverflow() && !op2->gtOverflow())
12578 cns1 = op1->gtOp.gtOp2;
12579 cns2 = op2->gtOp.gtOp2;
12580 cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
12581 #ifdef _TARGET_64BIT_
12582 if (cns1->TypeGet() == TYP_INT)
12584 // we need to properly re-sign-extend or truncate after adding two int constants above
12585 cns1->AsIntCon()->TruncateOrSignExtend32();
12587 #endif //_TARGET_64BIT_
12589 tree->gtOp.gtOp2 = cns1;
12590 DEBUG_DESTROY_NODE(cns2);
12592 op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
12593 op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
12594 DEBUG_DESTROY_NODE(op2);
12595 op2 = tree->gtOp.gtOp2;
12598 if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
12600 /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
12602 if (op1->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op1) && op1->gtOp.gtOp2->IsCnsIntOrI() &&
12603 !op1->gtOverflow() && op1->gtOp.gtOp2->OperGet() == op2->OperGet())
12605 cns1 = op1->gtOp.gtOp2;
12606 op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
12607 op2->gtIntConCommon.IconValue());
12608 #ifdef _TARGET_64BIT_
12609 if (op2->TypeGet() == TYP_INT)
12611 // we need to properly re-sign-extend or truncate after adding two int constants above
12612 op2->AsIntCon()->TruncateOrSignExtend32();
12614 #endif //_TARGET_64BIT_
12616 if (cns1->OperGet() == GT_CNS_INT)
12618 op2->gtIntCon.gtFieldSeq =
12619 GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
12621 DEBUG_DESTROY_NODE(cns1);
12623 tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12624 DEBUG_DESTROY_NODE(op1);
12625 op1 = tree->gtOp.gtOp1;
12630 if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
12633 // If this addition is adding an offset to a null pointer,
12634 // avoid the work and yield the null pointer immediately.
12635 // Dereferencing the pointer in either case will have the
12638 if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) &&
12639 ((op1->gtFlags & GTF_ALL_EFFECT) == 0))
12641 op2->gtType = tree->gtType;
12642 DEBUG_DESTROY_NODE(op1);
12643 DEBUG_DESTROY_NODE(tree);
12647 // Remove the addition iff it won't change the tree type
12650 if (!gtIsActiveCSE_Candidate(op2) &&
12651 ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
12653 if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
12654 (op2->gtIntCon.gtFieldSeq != nullptr) &&
12655 (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
12657 fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
12660 DEBUG_DESTROY_NODE(op2);
12661 DEBUG_DESTROY_NODE(tree);
12668 /* See if we can fold GT_MUL by const nodes */
12669 else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
12671 #ifndef _TARGET_64BIT_
12672 noway_assert(typ <= TYP_UINT);
12673 #endif // _TARGET_64BIT_
12674 noway_assert(!tree->gtOverflow());
12676 ssize_t mult = op2->gtIntConCommon.IconValue();
12677 bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12678 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
12680 assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
12684 // We may be able to throw away op1 (unless it has side-effects)
12686 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
12688 DEBUG_DESTROY_NODE(op1);
12689 DEBUG_DESTROY_NODE(tree);
12690 return op2; // Just return the "0" node
12693 // We need to keep op1 for the side-effects. Hang it off
12696 tree->ChangeOper(GT_COMMA);
12700 size_t abs_mult = (mult >= 0) ? mult : -mult;
12701 size_t lowestBit = genFindLowestBit(abs_mult);
12702 bool changeToShift = false;
12704 // is it a power of two? (positive or negative)
12705 if (abs_mult == lowestBit)
12707 // if negative negate (min-int does not need negation)
12708 if (mult < 0 && mult != SSIZE_T_MIN)
12710 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
12711 fgMorphTreeDone(op1);
12714 // If "op2" is a constant array index, the other multiplicand must be a constant.
12715 // Transfer the annotation to the other one.
12716 if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12717 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
12719 assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
12720 GenTreePtr otherOp = op1;
12721 if (otherOp->OperGet() == GT_NEG)
12723 otherOp = otherOp->gtOp.gtOp1;
12725 assert(otherOp->OperGet() == GT_CNS_INT);
12726 assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
12727 otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
12732 DEBUG_DESTROY_NODE(op2);
12733 DEBUG_DESTROY_NODE(tree);
12737 /* Change the multiplication into a shift by log2(val) bits */
12738 op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
12739 changeToShift = true;
12742 else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
12744 int shift = genLog2(lowestBit);
12745 ssize_t factor = abs_mult >> shift;
12747 if (factor == 3 || factor == 5 || factor == 9)
12749 // if negative negate (min-int does not need negation)
12750 if (mult < 0 && mult != SSIZE_T_MIN)
12752 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
12753 fgMorphTreeDone(op1);
12756 GenTreePtr factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
12757 if (op2IsConstIndex)
12759 factorIcon->AsIntCon()->gtFieldSeq =
12760 GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
12763 // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
12764 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
12765 fgMorphTreeDone(op1);
12767 op2->gtIntConCommon.SetIconValue(shift);
12768 changeToShift = true;
12771 #endif // LEA_AVAILABLE
12774 // vnStore is null before the ValueNumber phase has run
12775 if (vnStore != nullptr)
12777 // Update the ValueNumber for 'op2', as we just changed the constant
12778 fgValueNumberTreeConst(op2);
12781 // Keep the old ValueNumber for 'tree' as the new expr
12782 // will still compute the same value as before
12783 tree->ChangeOper(oper, GenTree::PRESERVE_VN);
12785 goto DONE_MORPHING_CHILDREN;
12788 else if (fgOperIsBitwiseRotationRoot(oper))
12790 tree = fgRecognizeAndMorphBitwiseRotation(tree);
12792 // fgRecognizeAndMorphBitwiseRotation may return a new tree
12793 oper = tree->OperGet();
12794 typ = tree->TypeGet();
12795 op1 = tree->gtOp.gtOp1;
12796 op2 = tree->gtOp.gtOp2;
12805 /* Any constant cases should have been folded earlier */
12806 noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
12811 noway_assert(varTypeIsFloating(op1->TypeGet()));
12813 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN, fgPtrArgCntCur);
12817 // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
12818 // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
12819 // is a local or clsVar, even if it has been address-exposed.
12820 if (op1->OperGet() == GT_ADDR)
12822 tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
12828 // Can not remove a GT_IND if it is currently a CSE candidate.
12829 if (gtIsActiveCSE_Candidate(tree))
12834 bool foldAndReturnTemp;
12835 foldAndReturnTemp = false;
12839 /* Try to Fold *(&X) into X */
12840 if (op1->gtOper == GT_ADDR)
12842 // Can not remove a GT_ADDR if it is currently a CSE candidate.
12843 if (gtIsActiveCSE_Candidate(op1))
12848 temp = op1->gtOp.gtOp1; // X
12850 // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
12851 // they are the *same* struct type. In fact, they almost certainly aren't. If the
12852 // address has an associated field sequence, that identifies this case; go through
12853 // the "lcl_fld" path rather than this one.
12854 FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
12855 if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
12857 foldAndReturnTemp = true;
12859 else if (temp->OperIsLocal())
12861 unsigned lclNum = temp->gtLclVarCommon.gtLclNum;
12862 LclVarDsc* varDsc = &lvaTable[lclNum];
12864 // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
12865 if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
12867 noway_assert(varTypeIsStruct(varDsc));
12869 // We will try to optimize when we have a single field struct that is being struct promoted
12870 if (varDsc->lvFieldCnt == 1)
12872 unsigned lclNumFld = varDsc->lvFieldLclStart;
12873 // just grab the promoted field
12874 LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
12876 // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
12878 if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
12880 // We can just use the existing promoted field LclNum
12881 temp->gtLclVarCommon.SetLclNum(lclNumFld);
12882 temp->gtType = fieldVarDsc->TypeGet();
12884 foldAndReturnTemp = true;
12888 // If the type of the IND (typ) is a "small int", and the type of the local has the
12889 // same width, then we can reduce to just the local variable -- it will be
12890 // correctly normalized, and signed/unsigned differences won't matter.
12892 // The below transformation cannot be applied if the local var needs to be normalized on load.
12893 else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
12894 !lvaTable[lclNum].lvNormalizeOnLoad())
12896 tree->gtType = typ = temp->TypeGet();
12897 foldAndReturnTemp = true;
12901 // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
12903 assert(fieldSeq == nullptr);
12904 bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
12905 assert(b || fieldSeq == nullptr);
12907 if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
12909 // Append the field sequence, change the type.
12910 temp->AsLclFld()->gtFieldSeq =
12911 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
12912 temp->gtType = typ;
12914 foldAndReturnTemp = true;
12917 // Otherwise will will fold this into a GT_LCL_FLD below
12918 // where we check (temp != nullptr)
12920 else // !temp->OperIsLocal()
12922 // We don't try to fold away the GT_IND/GT_ADDR for this case
12926 else if (op1->OperGet() == GT_ADD)
12928 /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
12930 if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
12931 (!(opts.MinOpts() || opts.compDbgCode)))
12933 // No overflow arithmetic with pointers
12934 noway_assert(!op1->gtOverflow());
12936 temp = op1->gtOp.gtOp1->gtOp.gtOp1;
12937 if (!temp->OperIsLocal())
12943 // Can not remove the GT_ADDR if it is currently a CSE candidate.
12944 if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
12949 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
12950 fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
12952 // Does the address have an associated zero-offset field sequence?
12953 FieldSeqNode* addrFieldSeq = nullptr;
12954 if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
12956 fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
12959 if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
12961 noway_assert(!varTypeIsGC(temp->TypeGet()));
12962 foldAndReturnTemp = true;
12966 // The emitter can't handle large offsets
12967 if (ival1 != (unsigned short)ival1)
12972 // The emitter can get confused by invalid offsets
12973 if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
12978 #ifdef _TARGET_ARM_
12979 // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
12981 if (varTypeIsFloating(typ))
12983 if ((ival1 % emitTypeSize(typ)) != 0)
12985 tree->gtFlags |= GTF_IND_UNALIGNED;
12991 // Now we can fold this into a GT_LCL_FLD below
12992 // where we check (temp != nullptr)
12996 // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging:
12997 // - We may have a load of a local where the load has a different type than the local
12998 // - We may have a load of a local plus an offset
13000 // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and
13001 // offset if doing so is legal. The only cases in which this transformation is illegal are if the load
13002 // begins before the local or if the load extends beyond the end of the local (i.e. if the load is
13003 // out-of-bounds w.r.t. the local).
13004 if ((temp != nullptr) && !foldAndReturnTemp)
13006 assert(temp->OperIsLocal());
13008 const unsigned lclNum = temp->AsLclVarCommon()->gtLclNum;
13009 LclVarDsc* const varDsc = &lvaTable[lclNum];
13011 const var_types tempTyp = temp->TypeGet();
13012 const bool useExactSize =
13013 varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK);
13014 const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp);
13016 // If the size of the load is greater than the size of the lclVar, we cannot fold this access into
13017 // a lclFld: the access represented by an lclFld node must begin at or after the start of the
13018 // lclVar and must not extend beyond the end of the lclVar.
13019 if ((ival1 < 0) || ((ival1 + genTypeSize(typ)) > varSize))
13021 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
13025 // Make sure we don't separately promote the fields of this struct.
13026 if (varDsc->lvRegStruct)
13028 // We can enregister, but can't promote.
13029 varDsc->lvPromoted = false;
13033 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
13036 // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
13037 // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
13038 // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
13040 if (temp->OperGet() == GT_LCL_FLD)
13042 temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
13043 temp->AsLclFld()->gtFieldSeq =
13044 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13048 temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
13049 temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
13050 if (fieldSeq != nullptr)
13051 { // If it does represent a field, note that.
13052 temp->AsLclFld()->gtFieldSeq = fieldSeq;
13055 temp->gtType = tree->gtType;
13056 foldAndReturnTemp = true;
13060 if (foldAndReturnTemp)
13062 assert(temp != nullptr);
13063 assert(temp->TypeGet() == typ);
13064 assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR));
13066 // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for
13067 // 'temp' because a GT_ADDR always marks it for its operand.
13068 temp->gtFlags &= ~GTF_DONT_CSE;
13069 temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
13071 if (op1->OperGet() == GT_ADD)
13073 DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
13074 DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
13076 DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR
13077 DEBUG_DESTROY_NODE(tree); // GT_IND
13082 // Only do this optimization when we are in the global optimizer. Doing this after value numbering
13083 // could result in an invalid value number for the newly generated GT_IND node.
13084 if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
13086 // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
13087 // TBD: this transformation is currently necessary for correctness -- it might
13088 // be good to analyze the failures that result if we don't do this, and fix them
13089 // in other ways. Ideally, this should be optional.
13090 GenTreePtr commaNode = op1;
13091 unsigned treeFlags = tree->gtFlags;
13092 commaNode->gtType = typ;
13093 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13094 // dangerous, clear the GTF_REVERSE_OPS at
13097 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13099 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13101 commaNode = commaNode->gtOp.gtOp2;
13102 commaNode->gtType = typ;
13103 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13104 // dangerous, clear the GTF_REVERSE_OPS at
13107 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13110 bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
13114 bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
13116 GetArrayInfoMap()->Remove(tree);
13119 op1 = gtNewOperNode(GT_IND, typ, commaNode->gtOp.gtOp2);
13120 op1->gtFlags = treeFlags;
13123 GetArrayInfoMap()->Set(op1, arrInfo);
13126 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13128 commaNode->gtOp.gtOp2 = op1;
13136 // Can not remove op1 if it is currently a CSE candidate.
13137 if (gtIsActiveCSE_Candidate(op1))
13142 if (op1->OperGet() == GT_IND)
13144 if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
13146 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13147 if (gtIsActiveCSE_Candidate(tree))
13152 // Perform the transform ADDR(IND(...)) == (...).
13153 GenTreePtr addr = op1->gtOp.gtOp1;
13155 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13157 DEBUG_DESTROY_NODE(op1);
13158 DEBUG_DESTROY_NODE(tree);
13163 else if (op1->OperGet() == GT_OBJ)
13165 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13166 if (gtIsActiveCSE_Candidate(tree))
13171 // Perform the transform ADDR(OBJ(...)) == (...).
13172 GenTreePtr addr = op1->AsObj()->Addr();
13174 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13176 DEBUG_DESTROY_NODE(op1);
13177 DEBUG_DESTROY_NODE(tree);
13181 else if (op1->gtOper == GT_CAST)
13183 GenTreePtr casting = op1->gtCast.CastOp();
13184 if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
13186 DEBUG_DESTROY_NODE(op1);
13187 tree->gtOp.gtOp1 = op1 = casting;
13190 else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
13192 // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
13193 // (Be sure to mark "z" as an l-value...)
13194 GenTreePtr commaNode = op1;
13195 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13197 commaNode = commaNode->gtOp.gtOp2;
13199 // The top-level addr might be annotated with a zeroOffset field.
13200 FieldSeqNode* zeroFieldSeq = nullptr;
13201 bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
13203 commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
13205 // If the node we're about to put under a GT_ADDR is an indirection, it
13206 // doesn't need to be materialized, since we only want the addressing mode. Because
13207 // of this, this GT_IND is not a faulting indirection and we don't have to extract it
13208 // as a side effect.
13209 GenTree* commaOp2 = commaNode->gtOp.gtOp2;
13210 if (commaOp2->OperIsBlk())
13212 commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
13214 if (commaOp2->gtOper == GT_IND)
13216 commaOp2->gtFlags |= GTF_IND_NONFAULTING;
13219 op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
13223 // Transfer the annotation to the new GT_ADDR node.
13224 GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
13226 commaNode->gtOp.gtOp2 = op1;
13227 // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform
13228 // might give op1 a type different from byref (like, say, native int). So now go back and give
13229 // all the comma nodes the type of op1.
13230 // TODO: the comma flag update below is conservative and can be improved.
13231 // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
13232 // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
13234 while (commaNode->gtOper == GT_COMMA)
13236 commaNode->gtType = op1->gtType;
13237 commaNode->gtFlags |= op1->gtFlags;
13239 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13241 commaNode = commaNode->gtOp.gtOp2;
13247 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
13248 op1->gtFlags |= GTF_DONT_CSE;
13254 /* Mark the nodes that are conditionally executed */
13255 fgWalkTreePre(&tree, gtMarkColonCond);
13257 /* Since we're doing this postorder we clear this if it got set by a child */
13258 fgRemoveRestOfBlock = false;
13263 /* Special case: trees that don't produce a value */
13264 if ((op2->OperKind() & GTK_ASGOP) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) ||
13267 typ = tree->gtType = TYP_VOID;
13270 // If we are in the Valuenum CSE phase then don't morph away anything as these
13271 // nodes may have CSE defs/uses in them.
13273 if (!optValnumCSE_phase)
13275 // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this
13278 GenTreePtr op1SideEffects = nullptr;
13279 // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
13280 // hoisted expressions in loops.
13281 gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
13282 if (op1SideEffects)
13284 // Replace the left hand side with the side effect list.
13285 tree->gtOp.gtOp1 = op1SideEffects;
13286 tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT);
13290 /* The left operand is worthless, throw it away */
13291 if (lvaLocalVarRefCounted)
13293 lvaRecursiveDecRefCounts(op1);
13295 op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13296 DEBUG_DESTROY_NODE(tree);
13297 DEBUG_DESTROY_NODE(op1);
13301 /* If the right operand is just a void nop node, throw it away */
13302 if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
13304 op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13305 DEBUG_DESTROY_NODE(tree);
13306 DEBUG_DESTROY_NODE(op2);
13315 /* Special case if fgRemoveRestOfBlock is set to true */
13316 if (fgRemoveRestOfBlock)
13318 if (fgIsCommaThrow(op1, true))
13320 GenTreePtr throwNode = op1->gtOp.gtOp1;
13321 noway_assert(throwNode->gtType == TYP_VOID);
13326 noway_assert(op1->OperKind() & GTK_RELOP);
13327 noway_assert(op1->gtFlags & GTF_EXCEPT);
13329 // We need to keep op1 for the side-effects. Hang it off
13332 tree->ChangeOper(GT_COMMA);
13333 tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
13335 // Additionally since we're eliminating the JTRUE
13336 // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
13337 // So we change it into a GT_COMMA as well.
13338 op1->ChangeOper(GT_COMMA);
13339 op1->gtType = op1->gtOp.gtOp1->gtType;
13348 noway_assert(oper == tree->gtOper);
13350 // If we are in the Valuenum CSE phase then don't morph away anything as these
13351 // nodes may have CSE defs/uses in them.
13353 if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList())
13355 /* Check for op1 as a GT_COMMA with a unconditional throw node */
13356 if (op1 && fgIsCommaThrow(op1, true))
13358 if ((op1->gtFlags & GTF_COLON_COND) == 0)
13360 /* We can safely throw out the rest of the statements */
13361 fgRemoveRestOfBlock = true;
13364 GenTreePtr throwNode = op1->gtOp.gtOp1;
13365 noway_assert(throwNode->gtType == TYP_VOID);
13367 if (oper == GT_COMMA)
13369 /* Both tree and op1 are GT_COMMA nodes */
13370 /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
13371 tree->gtOp.gtOp1 = throwNode;
13374 else if (oper != GT_NOP)
13376 if (genActualType(typ) == genActualType(op1->gtType))
13378 /* The types match so, return the comma throw node as the new tree */
13383 if (typ == TYP_VOID)
13385 // Return the throw node
13390 GenTreePtr commaOp2 = op1->gtOp.gtOp2;
13392 // need type of oper to be same as tree
13393 if (typ == TYP_LONG)
13395 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13396 commaOp2->gtIntConCommon.SetLngValue(0);
13397 /* Change the types of oper and commaOp2 to TYP_LONG */
13398 op1->gtType = commaOp2->gtType = TYP_LONG;
13400 else if (varTypeIsFloating(typ))
13402 commaOp2->ChangeOperConst(GT_CNS_DBL);
13403 commaOp2->gtDblCon.gtDconVal = 0.0;
13404 /* Change the types of oper and commaOp2 to TYP_DOUBLE */
13405 op1->gtType = commaOp2->gtType = TYP_DOUBLE;
13409 commaOp2->ChangeOperConst(GT_CNS_INT);
13410 commaOp2->gtIntConCommon.SetIconValue(0);
13411 /* Change the types of oper and commaOp2 to TYP_INT */
13412 op1->gtType = commaOp2->gtType = TYP_INT;
13415 /* Return the GT_COMMA node as the new tree */
13422 /* Check for op2 as a GT_COMMA with a unconditional throw */
13424 if (op2 && fgIsCommaThrow(op2, true))
13426 if ((op2->gtFlags & GTF_COLON_COND) == 0)
13428 /* We can safely throw out the rest of the statements */
13429 fgRemoveRestOfBlock = true;
13432 // If op1 has no side-effects
13433 if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
13435 // If tree is an asg node
13436 if (tree->OperIsAssignment())
13438 /* Return the throw node as the new tree */
13439 return op2->gtOp.gtOp1;
13442 if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
13444 /* Return the throw node as the new tree */
13445 return op2->gtOp.gtOp1;
13448 // If tree is a comma node
13449 if (tree->OperGet() == GT_COMMA)
13451 /* Return the throw node as the new tree */
13452 return op2->gtOp.gtOp1;
13455 /* for the shift nodes the type of op2 can differ from the tree type */
13456 if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
13458 noway_assert(GenTree::OperIsShiftOrRotate(oper));
13460 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13462 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13463 commaOp2->gtIntConCommon.SetLngValue(0);
13465 /* Change the types of oper and commaOp2 to TYP_LONG */
13466 op2->gtType = commaOp2->gtType = TYP_LONG;
13469 if ((genActualType(typ) == TYP_INT) &&
13470 (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
13472 // An example case is comparison (say GT_GT) of two longs or floating point values.
13474 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13476 commaOp2->ChangeOperConst(GT_CNS_INT);
13477 commaOp2->gtIntCon.gtIconVal = 0;
13478 /* Change the types of oper and commaOp2 to TYP_INT */
13479 op2->gtType = commaOp2->gtType = TYP_INT;
13482 if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
13484 noway_assert(tree->OperGet() == GT_ADD);
13486 GenTreePtr commaOp2 = op2->gtOp.gtOp2;
13488 commaOp2->ChangeOperConst(GT_CNS_INT);
13489 commaOp2->gtIntCon.gtIconVal = 0;
13490 /* Change the types of oper and commaOp2 to TYP_BYREF */
13491 op2->gtType = commaOp2->gtType = TYP_BYREF;
13494 /* types should now match */
13495 noway_assert((genActualType(typ) == genActualType(op2->gtType)));
13497 /* Return the GT_COMMA node as the new tree */
13503 /*-------------------------------------------------------------------------
13504 * Optional morphing is done if tree transformations is permitted
13507 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
13512 tree = fgMorphSmpOpOptional(tree->AsOp());
13514 } // extra scope for gcc workaround
13518 #pragma warning(pop)
13521 GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
13523 genTreeOps oper = tree->gtOper;
13524 GenTree* op1 = tree->gtOp1;
13525 GenTree* op2 = tree->gtOp2;
13526 var_types typ = tree->TypeGet();
13528 if (GenTree::OperIsCommutative(oper))
13530 /* Swap the operands so that the more expensive one is 'op1' */
13532 if (tree->gtFlags & GTF_REVERSE_OPS)
13540 tree->gtFlags &= ~GTF_REVERSE_OPS;
13543 if (oper == op2->gtOper)
13545 /* Reorder nested operators at the same precedence level to be
13546 left-recursive. For example, change "(a+(b+c))" to the
13547 equivalent expression "((a+b)+c)".
13550 /* Things are handled differently for floating-point operators */
13552 if (!varTypeIsFloating(tree->TypeGet()))
13554 fgMoveOpsLeft(tree);
13563 /* Change "((x+icon)+y)" to "((x+y)+icon)"
13564 Don't reorder floating-point operations */
13566 if ((oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
13567 varTypeIsIntegralOrI(typ))
13569 GenTreePtr ad2 = op1->gtOp.gtOp2;
13571 if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0)
13583 // And it swaps ad2 and op2. If (op2) is varTypeIsGC, then this implies that (tree) is
13584 // varTypeIsGC. If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
13585 // (op1) with a child that is varTypeIsGC. If we encounter that situation, make (op1) the same
13588 // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
13591 if (varTypeIsGC(op2->TypeGet()))
13593 noway_assert(varTypeIsGC(typ));
13598 op1->gtOp.gtOp2 = op2;
13599 op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
13607 /*-------------------------------------------------------------------------
13608 * Perform optional oper-specific postorder morphing
13614 bool dstIsSafeLclVar;
13617 /* We'll convert "a = a <op> x" into "a <op>= x" */
13618 /* and also "a = x <op> a" into "a <op>= x" for communative ops */
13619 CLANG_FORMAT_COMMENT_ANCHOR;
13621 if (typ == TYP_LONG)
13626 if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
13628 if (tree->OperIsCopyBlkOp())
13630 return fgMorphCopyBlock(tree);
13634 return fgMorphInitBlock(tree);
13638 /* Make sure we're allowed to do this */
13640 if (optValnumCSE_phase)
13642 // It is not safe to reorder/delete CSE's
13646 /* Are we assigning to a GT_LCL_VAR ? */
13648 dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR);
13650 /* If we have a GT_LCL_VAR, then is the address taken? */
13651 if (dstIsSafeLclVar)
13653 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
13654 LclVarDsc* varDsc = lvaTable + lclNum;
13656 noway_assert(lclNum < lvaCount);
13658 /* Is the address taken? */
13659 if (varDsc->lvAddrExposed)
13661 dstIsSafeLclVar = false;
13663 else if (op2->gtFlags & GTF_ASG)
13669 if (!dstIsSafeLclVar)
13671 if (op2->gtFlags & GTF_ASG)
13676 if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
13682 /* Special case: a cast that can be thrown away */
13684 if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
13690 srct = op2->gtCast.CastOp()->TypeGet();
13691 cast = (var_types)op2->CastToType();
13692 dstt = op1->TypeGet();
13694 /* Make sure these are all ints and precision is not lost */
13696 if (cast >= dstt && dstt <= TYP_INT && srct <= TYP_INT)
13698 op2 = tree->gtOp2 = op2->gtCast.CastOp();
13702 /* Make sure we have the operator range right */
13704 noway_assert(GT_SUB == GT_ADD + 1);
13705 noway_assert(GT_MUL == GT_ADD + 2);
13706 noway_assert(GT_DIV == GT_ADD + 3);
13707 noway_assert(GT_MOD == GT_ADD + 4);
13708 noway_assert(GT_UDIV == GT_ADD + 5);
13709 noway_assert(GT_UMOD == GT_ADD + 6);
13711 noway_assert(GT_OR == GT_ADD + 7);
13712 noway_assert(GT_XOR == GT_ADD + 8);
13713 noway_assert(GT_AND == GT_ADD + 9);
13715 noway_assert(GT_LSH == GT_ADD + 10);
13716 noway_assert(GT_RSH == GT_ADD + 11);
13717 noway_assert(GT_RSZ == GT_ADD + 12);
13719 /* Check for a suitable operator on the RHS */
13721 cmop = op2->OperGet();
13726 // GT_CHS only supported for integer types
13727 if (varTypeIsFloating(tree->TypeGet()))
13735 // GT_ASG_MUL only supported for floating point types
13736 if (!varTypeIsFloating(tree->TypeGet()))
13745 if (op2->gtOverflow())
13747 /* Disable folding into "<op>=" if the result can be
13748 visible to anyone as <op> may throw an exception and
13749 the assignment should not proceed
13750 We are safe with an assignment to a local variables
13752 if (ehBlockHasExnFlowDsc(compCurBB))
13756 if (!dstIsSafeLclVar)
13761 #ifndef _TARGET_AMD64_
13762 // This is hard for byte-operations as we need to make
13763 // sure both operands are in RBM_BYTE_REGS.
13764 if (varTypeIsByte(op2->TypeGet()))
13766 #endif // _TARGET_AMD64_
13771 // GT_ASG_DIV only supported for floating point types
13772 if (!varTypeIsFloating(tree->TypeGet()))
13785 bool bReverse = false;
13786 bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse);
13787 if (bAsgOpFoldable)
13791 // We will transform this from "a = x <op> a" to "a <op>= x"
13792 // so we can now destroy the duplicate "a"
13793 DEBUG_DESTROY_NODE(op2->gtOp.gtOp2);
13794 op2->gtOp.gtOp2 = op2->gtOp.gtOp1;
13797 /* Special case: "x |= -1" and "x &= 0" */
13798 if (((cmop == GT_AND) && op2->gtOp.gtOp2->IsIntegralConst(0)) ||
13799 ((cmop == GT_OR) && op2->gtOp.gtOp2->IsIntegralConst(-1)))
13801 /* Simply change to an assignment */
13802 tree->gtOp2 = op2->gtOp.gtOp2;
13806 if (cmop == GT_NEG)
13808 /* This is "x = -x;", use the flipsign operator */
13810 tree->ChangeOper(GT_CHS);
13812 if (op1->gtOper == GT_LCL_VAR)
13814 op1->gtFlags |= GTF_VAR_USEASG;
13817 tree->gtOp2 = gtNewIconNode(0, op1->TypeGet());
13822 if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
13824 // Changing from x = x op y to x op= y when x is a small integer type
13825 // makes the op size smaller (originally the op size was 32 bits, after
13826 // sign or zero extension of x, and there is an implicit truncation in the
13828 // This is ok in most cases because the upper bits were
13829 // lost when assigning the op result to a small type var,
13830 // but it may not be ok for the right shift operation where the higher bits
13831 // could be shifted into the lower bits and preserved.
13832 // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) ==
13833 // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) ==
13834 // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the
13837 // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
13838 // but (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
13839 // The result becomes correct if we use >>unsigned instead of >>signed.
13840 noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet());
13844 /* Replace with an assignment operator */
13845 noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD);
13846 noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD);
13847 noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD);
13848 noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD);
13849 noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD);
13850 noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD);
13851 noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD);
13852 noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD);
13854 tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD));
13855 tree->gtOp2 = op2->gtOp.gtOp2;
13857 /* Propagate GTF_OVERFLOW */
13859 if (op2->gtOverflowEx())
13861 tree->gtType = op2->gtType;
13862 tree->gtFlags |= (op2->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
13865 #if FEATURE_SET_FLAGS
13867 /* Propagate GTF_SET_FLAGS */
13868 if (op2->gtSetFlags())
13870 tree->gtRequestSetFlags();
13873 #endif // FEATURE_SET_FLAGS
13875 DEBUG_DESTROY_NODE(op2);
13878 /* The target is used as well as being defined */
13879 if (op1->OperIsLocal())
13881 op1->gtFlags &= ~GTF_VAR_USEDEF;
13882 op1->gtFlags |= GTF_VAR_USEASG;
13885 #if CPU_HAS_FP_SUPPORT
13886 /* Check for the special case "x += y * x;" */
13888 // GT_ASG_MUL only supported for floating point types
13889 if (cmop != GT_ADD && cmop != GT_SUB)
13894 if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet()))
13896 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
13898 /* Change "x += x * y" into "x *= (y + 1)" */
13900 op2 = op2->gtOp.gtOp2;
13902 else if (GenTree::Compare(op1, op2->gtOp.gtOp2))
13904 /* Change "x += y * x" into "x *= (y + 1)" */
13906 op2 = op2->gtOp.gtOp1;
13913 op1 = gtNewDconNode(1.0);
13915 /* Now make the "*=" node */
13917 if (cmop == GT_ADD)
13919 /* Change "x += x * y" into "x *= (y + 1)" */
13921 tree->gtOp2 = op2 = gtNewOperNode(GT_ADD, tree->TypeGet(), op2, op1);
13925 /* Change "x -= x * y" into "x *= (1 - y)" */
13927 noway_assert(cmop == GT_SUB);
13928 tree->gtOp2 = op2 = gtNewOperNode(GT_SUB, tree->TypeGet(), op1, op2);
13930 tree->ChangeOper(GT_ASG_MUL);
13932 #endif // CPU_HAS_FP_SUPPORT
13940 /* Is the destination identical to the first RHS sub-operand? */
13942 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
13944 /* This is "x = ~x" which is the same as "x ^= -1"
13945 * Transform the node into a GT_ASG_XOR */
13947 noway_assert(genActualType(typ) == TYP_INT || genActualType(typ) == TYP_LONG);
13949 op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT) ? gtNewIconNode(-1) : gtNewLconNode(-1);
13964 /* Check for the case "(val + icon) * icon" */
13966 if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
13968 GenTreePtr add = op1->gtOp.gtOp2;
13970 if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
13972 if (tree->gtOverflow() || op1->gtOverflow())
13977 ssize_t imul = op2->gtIntCon.gtIconVal;
13978 ssize_t iadd = add->gtIntCon.gtIconVal;
13980 /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
13983 tree->ChangeOper(oper);
13985 op2->gtIntCon.gtIconVal = iadd * imul;
13987 op1->ChangeOper(GT_MUL);
13989 add->gtIntCon.gtIconVal = imul;
13990 #ifdef _TARGET_64BIT_
13991 if (add->gtType == TYP_INT)
13993 // we need to properly re-sign-extend or truncate after multiplying two int constants above
13994 add->AsIntCon()->TruncateOrSignExtend32();
13996 #endif //_TARGET_64BIT_
14004 /* For "val / 1", just return "val" */
14006 if (op2->IsIntegralConst(1))
14008 DEBUG_DESTROY_NODE(tree);
14016 /* Check for the case "(val + icon) << icon" */
14018 if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
14020 GenTreePtr cns = op1->gtOp.gtOp2;
14022 if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
14024 ssize_t ishf = op2->gtIntConCommon.IconValue();
14025 ssize_t iadd = cns->gtIntConCommon.IconValue();
14027 // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
14029 /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
14031 tree->ChangeOper(GT_ADD);
14032 ssize_t result = iadd << ishf;
14033 op2->gtIntConCommon.SetIconValue(result);
14034 #ifdef _TARGET_64BIT_
14035 if (op1->gtType == TYP_INT)
14037 op2->AsIntCon()->TruncateOrSignExtend32();
14039 #endif // _TARGET_64BIT_
14041 // we are reusing the shift amount node here, but the type we want is that of the shift result
14042 op2->gtType = op1->gtType;
14044 if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
14045 cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
14047 assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
14048 op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
14051 op1->ChangeOper(GT_LSH);
14053 cns->gtIntConCommon.SetIconValue(ishf);
14061 if (!optValnumCSE_phase)
14063 /* "x ^ -1" is "~x" */
14065 if (op2->IsIntegralConst(-1))
14067 tree->ChangeOper(GT_NOT);
14068 tree->gtOp2 = nullptr;
14069 DEBUG_DESTROY_NODE(op2);
14071 else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
14073 /* "binaryVal ^ 1" is "!binaryVal" */
14074 gtReverseCond(op1);
14075 DEBUG_DESTROY_NODE(op2);
14076 DEBUG_DESTROY_NODE(tree);
14084 // Initialization values for initBlk have special semantics - their lower
14085 // byte is used to fill the struct. However, we allow 0 as a "bare" value,
14086 // which enables them to get a VNForZero, and be propagated.
14087 if (op1->IsIntegralConst(0))
14099 //------------------------------------------------------------------------
14100 // fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b
14101 // (see ECMA III 3.55 and III.3.56).
14104 // tree - The GT_MOD/GT_UMOD tree to morph
14107 // The morphed tree
14110 // For ARM64 we don't have a remainder instruction so this transform is
14111 // always done. For XARCH this transform is done if we know that magic
14112 // division will be used, in that case this transform allows CSE to
14113 // eliminate the redundant div from code like "x = a / 3; y = a % 3;".
14115 // This method will produce the above expression in 'a' and 'b' are
14116 // leaf nodes, otherwise, if any of them is not a leaf it will spill
14117 // its value into a temporary variable, an example:
14118 // (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
14120 GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
14122 if (tree->OperGet() == GT_MOD)
14124 tree->SetOper(GT_DIV);
14126 else if (tree->OperGet() == GT_UMOD)
14128 tree->SetOper(GT_UDIV);
14132 noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
14135 var_types type = tree->gtType;
14136 GenTree* denominator = tree->gtOp2;
14137 GenTree* numerator = tree->gtOp1;
14139 if (!numerator->OperIsLeaf())
14141 numerator = fgMakeMultiUse(&tree->gtOp1);
14143 else if (lvaLocalVarRefCounted && numerator->OperIsLocal())
14145 // Morphing introduces new lclVar references. Increase ref counts
14146 lvaIncRefCnts(numerator);
14149 if (!denominator->OperIsLeaf())
14151 denominator = fgMakeMultiUse(&tree->gtOp2);
14153 else if (lvaLocalVarRefCounted && denominator->OperIsLocal())
14155 // Morphing introduces new lclVar references. Increase ref counts
14156 lvaIncRefCnts(denominator);
14159 // The numerator and denominator may have been assigned to temps, in which case
14160 // their defining assignments are in the current tree. Therefore, we need to
14161 // set the execuction order accordingly on the nodes we create.
14162 // That is, the "mul" will be evaluated in "normal" order, and the "sub" must
14163 // be set to be evaluated in reverse order.
14165 GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
14166 assert(!mul->IsReverseOp());
14167 GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
14168 sub->gtFlags |= GTF_REVERSE_OPS;
14171 sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14177 //------------------------------------------------------------------------------
14178 // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
14182 // oper - Operation to check
14185 // True if the operation can be a root of a bitwise rotation tree; false otherwise.
14187 bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
14189 return (oper == GT_OR) || (oper == GT_XOR);
14192 //------------------------------------------------------------------------------
14193 // fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
14194 // an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
14197 // tree - tree to check for a rotation pattern
14200 // An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
14203 // The input is a GT_OR or a GT_XOR tree.
14205 GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
14207 #ifndef LEGACY_BACKEND
14209 // Check for a rotation pattern, e.g.,
14222 // The patterns recognized:
14223 // (x << (y & M)) op (x >>> ((-y + N) & M))
14224 // (x >>> ((-y + N) & M)) op (x << (y & M))
14226 // (x << y) op (x >>> (-y + N))
14227 // (x >> > (-y + N)) op (x << y)
14229 // (x >>> (y & M)) op (x << ((-y + N) & M))
14230 // (x << ((-y + N) & M)) op (x >>> (y & M))
14232 // (x >>> y) op (x << (-y + N))
14233 // (x << (-y + N)) op (x >>> y)
14235 // (x << c1) op (x >>> c2)
14236 // (x >>> c1) op (x << c2)
14239 // c1 and c2 are const
14240 // c1 + c2 == bitsize(x)
14243 // M & (N - 1) == N - 1
14244 // op is either | or ^
14246 if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
14248 // We can't do anything if the tree has assignments, calls, or volatile
14249 // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
14250 // thrown by the original tree will be thrown by the transformed tree as well.
14254 genTreeOps oper = tree->OperGet();
14255 assert(fgOperIsBitwiseRotationRoot(oper));
14257 // Check if we have an LSH on one side of the OR and an RSZ on the other side.
14258 GenTreePtr op1 = tree->gtGetOp1();
14259 GenTreePtr op2 = tree->gtGetOp2();
14260 GenTreePtr leftShiftTree = nullptr;
14261 GenTreePtr rightShiftTree = nullptr;
14262 if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
14264 leftShiftTree = op1;
14265 rightShiftTree = op2;
14267 else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
14269 leftShiftTree = op2;
14270 rightShiftTree = op1;
14277 // Check if the trees representing the value to shift are identical.
14278 // We already checked that there are no side effects above.
14279 if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
14281 GenTreePtr rotatedValue = leftShiftTree->gtGetOp1();
14282 var_types rotatedValueActualType = genActualType(rotatedValue->gtType);
14283 ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8;
14284 noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
14285 GenTreePtr leftShiftIndex = leftShiftTree->gtGetOp2();
14286 GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();
14288 // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
14289 // shouldn't be masked for the transformation to be valid. If additional
14290 // higher bits are not masked, the transformation is still valid since the result
14291 // of MSIL shift instructions is unspecified if the shift amount is greater or equal
14292 // than the width of the value being shifted.
14293 ssize_t minimalMask = rotatedValueBitSize - 1;
14294 ssize_t leftShiftMask = -1;
14295 ssize_t rightShiftMask = -1;
14297 if ((leftShiftIndex->OperGet() == GT_AND))
14299 if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
14301 leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14302 leftShiftIndex = leftShiftIndex->gtGetOp1();
14310 if ((rightShiftIndex->OperGet() == GT_AND))
14312 if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
14314 rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14315 rightShiftIndex = rightShiftIndex->gtGetOp1();
14323 if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
14325 // The shift index is overmasked, e.g., we have
14326 // something like (x << y & 15) or
14327 // (x >> (32 - y) & 15 with 32 bit x.
14328 // The transformation is not valid.
14332 GenTreePtr shiftIndexWithAdd = nullptr;
14333 GenTreePtr shiftIndexWithoutAdd = nullptr;
14334 genTreeOps rotateOp = GT_NONE;
14335 GenTreePtr rotateIndex = nullptr;
14337 if (leftShiftIndex->OperGet() == GT_ADD)
14339 shiftIndexWithAdd = leftShiftIndex;
14340 shiftIndexWithoutAdd = rightShiftIndex;
14343 else if (rightShiftIndex->OperGet() == GT_ADD)
14345 shiftIndexWithAdd = rightShiftIndex;
14346 shiftIndexWithoutAdd = leftShiftIndex;
14350 if (shiftIndexWithAdd != nullptr)
14352 if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
14354 if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
14356 if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
14358 if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
14360 // We found one of these patterns:
14361 // (x << (y & M)) | (x >>> ((-y + N) & M))
14362 // (x << y) | (x >>> (-y + N))
14363 // (x >>> (y & M)) | (x << ((-y + N) & M))
14364 // (x >>> y) | (x << (-y + N))
14365 // where N == bitsize(x), M is const, and
14366 // M & (N - 1) == N - 1
14367 CLANG_FORMAT_COMMENT_ANCHOR;
14369 #ifndef _TARGET_64BIT_
14370 if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
14372 // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86.
14373 // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
14374 // to add helpers for GT_ROL and GT_ROR.
14379 rotateIndex = shiftIndexWithoutAdd;
14385 else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
14387 if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
14389 // We found this pattern:
14390 // (x << c1) | (x >>> c2)
14391 // where c1 and c2 are const and c1 + c2 == bitsize(x)
14393 rotateIndex = leftShiftIndex;
14397 if (rotateIndex != nullptr)
14399 noway_assert(GenTree::OperIsRotate(rotateOp));
14401 unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
14403 // We can use the same tree only during global morph; reusing the tree in a later morph
14404 // may invalidate value numbers.
14407 tree->gtOp.gtOp1 = rotatedValue;
14408 tree->gtOp.gtOp2 = rotateIndex;
14409 tree->ChangeOper(rotateOp);
14411 unsigned childFlags = 0;
14412 for (GenTree* op : tree->Operands())
14414 childFlags |= (op->gtFlags & GTF_ALL_EFFECT);
14417 // The parent's flags should be a superset of its operands' flags
14418 noway_assert((inputTreeEffects & childFlags) == childFlags);
14422 tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
14423 noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
14429 #endif // LEGACY_BACKEND
14433 #if !CPU_HAS_FP_SUPPORT
14434 GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
14437 genTreeOps oper = tree->OperGet();
14438 var_types typ = tree->TypeGet();
14439 GenTreePtr op1 = tree->gtOp.gtOp1;
14440 GenTreePtr op2 = tree->gtGetOp2IfPresent();
14443 We have to use helper calls for all FP operations:
14445 FP operators that operate on FP values
14446 casts to and from FP
14447 comparisons of FP values
14450 if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
14454 size_t argc = genTypeStSz(typ);
14456 /* Not all FP operations need helper calls */
14470 /* If the result isn't FP, it better be a compare or cast */
14472 if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
14475 noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
14478 /* Keep track of how many arguments we're passing */
14480 fgPtrArgCntCur += argc;
14482 /* Is this a binary operator? */
14486 /* Add the second operand to the argument count */
14488 fgPtrArgCntCur += argc;
14491 /* What kind of an operator do we have? */
14496 helper = CPX_R4_ADD;
14499 helper = CPX_R4_SUB;
14502 helper = CPX_R4_MUL;
14505 helper = CPX_R4_DIV;
14507 // case GT_MOD: helper = CPX_R4_REM; break;
14510 helper = CPX_R4_EQ;
14513 helper = CPX_R4_NE;
14516 helper = CPX_R4_LT;
14519 helper = CPX_R4_LE;
14522 helper = CPX_R4_GE;
14525 helper = CPX_R4_GT;
14532 noway_assert(!"unexpected FP binary op");
14536 args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
14546 noway_assert(!"FP cast");
14549 helper = CPX_R4_NEG;
14556 noway_assert(!"unexpected FP unary op");
14560 args = gtNewArgList(tree->gtOp.gtOp1);
14563 /* If we have double result/operands, modify the helper */
14565 if (typ == TYP_DOUBLE)
14567 noway_assert(CPX_R4_NEG + 1 == CPX_R8_NEG);
14568 noway_assert(CPX_R4_ADD + 1 == CPX_R8_ADD);
14569 noway_assert(CPX_R4_SUB + 1 == CPX_R8_SUB);
14570 noway_assert(CPX_R4_MUL + 1 == CPX_R8_MUL);
14571 noway_assert(CPX_R4_DIV + 1 == CPX_R8_DIV);
14577 noway_assert(tree->OperIsCompare());
14579 noway_assert(CPX_R4_EQ + 1 == CPX_R8_EQ);
14580 noway_assert(CPX_R4_NE + 1 == CPX_R8_NE);
14581 noway_assert(CPX_R4_LT + 1 == CPX_R8_LT);
14582 noway_assert(CPX_R4_LE + 1 == CPX_R8_LE);
14583 noway_assert(CPX_R4_GE + 1 == CPX_R8_GE);
14584 noway_assert(CPX_R4_GT + 1 == CPX_R8_GT);
14587 tree = fgMorphIntoHelperCall(tree, helper, args);
14589 if (fgPtrArgCntMax < fgPtrArgCntCur)
14591 JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
14592 fgPtrArgCntMax = fgPtrArgCntCur;
14595 fgPtrArgCntCur -= argc;
14603 if (compCurBB == genReturnBB)
14605 /* This is the 'exitCrit' call at the exit label */
14607 noway_assert(op1->gtType == TYP_VOID);
14608 noway_assert(op2 == 0);
14610 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
14615 /* This is a (real) return value -- check its type */
14616 CLANG_FORMAT_COMMENT_ANCHOR;
14619 if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
14621 bool allowMismatch = false;
14623 // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
14624 if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
14625 (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
14626 allowMismatch = true;
14628 if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
14629 allowMismatch = true;
14631 if (!allowMismatch)
14632 NO_WAY("Return type mismatch");
14642 /*****************************************************************************
14644 * Transform the given tree for code generation and return an equivalent tree.
14647 GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
14649 noway_assert(tree);
14650 noway_assert(tree->gtOper != GT_STMT);
14655 if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
14657 noway_assert(!"JitBreakMorphTree hit");
14663 int thisMorphNum = 0;
14664 if (verbose && treesBeforeAfterMorph)
14666 thisMorphNum = morphNum++;
14667 printf("\nfgMorphTree (before %d):\n", thisMorphNum);
14672 /*-------------------------------------------------------------------------
14673 * fgMorphTree() can potentially replace a tree with another, and the
14674 * caller has to store the return value correctly.
14675 * Turn this on to always make copy of "tree" here to shake out
14676 * hidden/unupdated references.
14681 if (compStressCompile(STRESS_GENERIC_CHECK, 0))
14685 #ifdef SMALL_TREE_NODES
14686 if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
14688 copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
14693 copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
14696 copy->CopyFrom(tree, this);
14698 #if defined(LATE_DISASM)
14699 // GT_CNS_INT is considered small, so CopyFrom() won't copy all fields
14700 if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
14702 copy->gtIntCon.gtIconHdl.gtIconHdl1 = tree->gtIntCon.gtIconHdl.gtIconHdl1;
14703 copy->gtIntCon.gtIconHdl.gtIconHdl2 = tree->gtIntCon.gtIconHdl.gtIconHdl2;
14707 DEBUG_DESTROY_NODE(tree);
14714 /* Ensure that we haven't morphed this node already */
14715 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
14717 #if LOCAL_ASSERTION_PROP
14718 /* Before morphing the tree, we try to propagate any active assertions */
14719 if (optLocalAssertionProp)
14721 /* Do we have any active assertions? */
14723 if (optAssertionCount > 0)
14725 GenTreePtr newTree = tree;
14726 while (newTree != nullptr)
14729 /* newTree is non-Null if we propagated an assertion */
14730 newTree = optAssertionProp(apFull, tree, nullptr);
14732 noway_assert(tree != nullptr);
14735 PREFAST_ASSUME(tree != nullptr);
14739 /* Save the original un-morphed tree for fgMorphTreeDone */
14741 GenTreePtr oldTree = tree;
14743 /* Figure out what kind of a node we have */
14745 unsigned kind = tree->OperKind();
14747 /* Is this a constant node? */
14749 if (kind & GTK_CONST)
14751 tree = fgMorphConst(tree);
14755 /* Is this a leaf node? */
14757 if (kind & GTK_LEAF)
14759 tree = fgMorphLeaf(tree);
14763 /* Is it a 'simple' unary/binary operator? */
14765 if (kind & GTK_SMPOP)
14767 tree = fgMorphSmpOp(tree, mac);
14771 /* See what kind of a special operator we have here */
14773 switch (tree->OperGet())
14776 tree = fgMorphField(tree, mac);
14780 tree = fgMorphCall(tree->AsCall());
14783 case GT_ARR_BOUNDS_CHECK:
14784 #ifdef FEATURE_SIMD
14786 #endif // FEATURE_SIMD
14788 fgSetRngChkTarget(tree);
14790 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
14791 bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex);
14792 bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
14793 // If the index is a comma(throw, x), just return that.
14794 if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
14796 tree = bndsChk->gtIndex;
14799 // Propagate effects flags upwards
14800 bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
14801 bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
14803 // Otherwise, we don't change the tree.
14808 tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
14809 tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
14812 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
14814 tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
14815 tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
14819 fgSetRngChkTarget(tree, false);
14823 case GT_ARR_OFFSET:
14824 tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
14825 tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
14826 tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
14827 tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
14828 tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
14829 tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
14832 fgSetRngChkTarget(tree, false);
14837 tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
14838 tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue);
14839 tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
14842 case GT_STORE_DYN_BLK:
14843 tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
14846 tree->gtDynBlk.Addr() = fgMorphTree(tree->gtDynBlk.Addr());
14847 tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
14854 noway_assert(!"unexpected operator");
14858 fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
14863 #if LOCAL_ASSERTION_PROP
14864 //------------------------------------------------------------------------
14865 // fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum
14868 // lclNum - The varNum of the lclVar for which we're killing assertions.
14869 // tree - (DEBUG only) the tree responsible for killing its assertions.
14871 void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTreePtr tree))
14873 /* All dependent assertions are killed here */
14875 ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
14879 AssertionIndex index = optAssertionCount;
14880 while (killed && (index > 0))
14882 if (BitVecOps::IsMember(apTraits, killed, index - 1))
14885 AssertionDsc* curAssertion = optGetAssertion(index);
14886 noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
14887 ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
14890 printf("\nThe assignment ");
14892 printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
14893 optPrintAssertion(curAssertion);
14896 // Remove this bit from the killed mask
14897 BitVecOps::RemoveElemD(apTraits, killed, index - 1);
14899 optAssertionRemove(index);
14905 // killed mask should now be zero
14906 noway_assert(BitVecOps::IsEmpty(apTraits, killed));
14909 //------------------------------------------------------------------------
14910 // fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum.
14913 // lclNum - The varNum of the lclVar for which we're killing assertions.
14914 // tree - (DEBUG only) the tree responsible for killing its assertions.
14917 // For structs and struct fields, it will invalidate the children and parent
14919 // Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar.
14921 void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree))
14923 LclVarDsc* varDsc = &lvaTable[lclNum];
14925 if (varDsc->lvPromoted)
14927 noway_assert(varTypeIsStruct(varDsc));
14929 // Kill the field locals.
14930 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
14932 fgKillDependentAssertionsSingle(i DEBUGARG(tree));
14935 // Kill the struct local itself.
14936 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
14938 else if (varDsc->lvIsStructField)
14940 // Kill the field local.
14941 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
14943 // Kill the parent struct.
14944 fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree));
14948 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
14951 #endif // LOCAL_ASSERTION_PROP
14953 /*****************************************************************************
14955 * This function is called to complete the morphing of a tree node
14956 * It should only be called once for each node.
14957 * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
14958 * to enforce the invariant that each node is only morphed once.
14959 * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
14960 * by an equivalent tree.
14964 void Compiler::fgMorphTreeDone(GenTreePtr tree,
14965 GenTreePtr oldTree /* == NULL */
14966 DEBUGARG(int morphNum))
14969 if (verbose && treesBeforeAfterMorph)
14971 printf("\nfgMorphTree (after %d):\n", morphNum);
14973 printf(""); // in our logic this causes a flush
14977 if (!fgGlobalMorph)
14982 if ((oldTree != nullptr) && (oldTree != tree))
14984 /* Ensure that we have morphed this node */
14985 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
14988 TransferTestDataToNode(oldTree, tree);
14993 // Ensure that we haven't morphed this node already
14994 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
14997 if (tree->OperKind() & GTK_CONST)
15002 #if LOCAL_ASSERTION_PROP
15004 if (!optLocalAssertionProp)
15009 /* Do we have any active assertions? */
15011 if (optAssertionCount > 0)
15013 /* Is this an assignment to a local variable */
15014 GenTreeLclVarCommon* lclVarTree = nullptr;
15015 if (tree->DefinesLocal(this, &lclVarTree))
15017 unsigned lclNum = lclVarTree->gtLclNum;
15018 noway_assert(lclNum < lvaCount);
15019 fgKillDependentAssertions(lclNum DEBUGARG(tree));
15023 /* If this tree makes a new assertion - make it available */
15024 optAssertionGen(tree);
15026 #endif // LOCAL_ASSERTION_PROP
15031 /* Mark this node as being morphed */
15032 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
15036 /*****************************************************************************
15038 * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
15039 * Returns true if we modified the flow graph
15042 bool Compiler::fgFoldConditional(BasicBlock* block)
15044 bool result = false;
15046 // We don't want to make any code unreachable
15047 if (opts.compDbgCode || opts.MinOpts())
15052 if (block->bbJumpKind == BBJ_COND)
15054 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15056 GenTreePtr stmt = block->bbTreeList->gtPrev;
15058 noway_assert(stmt->gtNext == nullptr);
15060 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15062 noway_assert(fgRemoveRestOfBlock);
15064 /* Unconditional throw - transform the basic block into a BBJ_THROW */
15065 fgConvertBBToThrowBB(block);
15067 /* Remove 'block' from the predecessor list of 'block->bbNext' */
15068 fgRemoveRefPred(block->bbNext, block);
15070 /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
15071 fgRemoveRefPred(block->bbJumpDest, block);
15076 printf("\nConditional folded at BB%02u\n", block->bbNum);
15077 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
15083 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
15085 /* Did we fold the conditional */
15087 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15089 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15091 if (cond->OperKind() & GTK_CONST)
15093 /* Yupee - we folded the conditional!
15094 * Remove the conditional statement */
15096 noway_assert(cond->gtOper == GT_CNS_INT);
15097 noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));
15099 /* remove the statement from bbTreelist - No need to update
15100 * the reference counts since there are no lcl vars */
15101 fgRemoveStmt(block, stmt);
15103 // block is a BBJ_COND that we are folding the conditional for
15104 // bTaken is the path that will always be taken from block
15105 // bNotTaken is the path that will never be taken from block
15107 BasicBlock* bTaken;
15108 BasicBlock* bNotTaken;
15110 if (cond->gtIntCon.gtIconVal != 0)
15112 /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
15113 block->bbJumpKind = BBJ_ALWAYS;
15114 bTaken = block->bbJumpDest;
15115 bNotTaken = block->bbNext;
15119 /* Unmark the loop if we are removing a backwards branch */
15120 /* dest block must also be marked as a loop head and */
15121 /* We must be able to reach the backedge block */
15122 if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
15123 fgReachable(block->bbJumpDest, block))
15125 optUnmarkLoopBlocks(block->bbJumpDest, block);
15128 /* JTRUE 0 - transform the basic block into a BBJ_NONE */
15129 block->bbJumpKind = BBJ_NONE;
15130 noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
15131 bTaken = block->bbNext;
15132 bNotTaken = block->bbJumpDest;
15135 if (fgHaveValidEdgeWeights)
15137 // We are removing an edge from block to bNotTaken
15138 // and we have already computed the edge weights, so
15139 // we will try to adjust some of the weights
15141 flowList* edgeTaken = fgGetPredForBlock(bTaken, block);
15142 BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block
15144 // We examine the taken edge (block -> bTaken)
15145 // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
15146 // else if bTaken has valid profile weight and block does not we try to adjust block's weight
15147 // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
15149 if (block->hasProfileWeight())
15151 // The edge weights for (block -> bTaken) are 100% of block's weight
15152 edgeTaken->flEdgeWeightMin = block->bbWeight;
15153 edgeTaken->flEdgeWeightMax = block->bbWeight;
15155 if (!bTaken->hasProfileWeight())
15157 if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
15159 // Update the weight of bTaken
15160 bTaken->inheritWeight(block);
15165 else if (bTaken->hasProfileWeight())
15167 if (bTaken->countOfInEdges() == 1)
15169 // There is only one in edge to bTaken
15170 edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
15171 edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
15173 // Update the weight of block
15174 block->inheritWeight(bTaken);
15179 if (bUpdated != nullptr)
15182 // Now fix the weights of the edges out of 'bUpdated'
15183 switch (bUpdated->bbJumpKind)
15186 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15187 edge->flEdgeWeightMax = bUpdated->bbWeight;
15190 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15191 edge->flEdgeWeightMax = bUpdated->bbWeight;
15194 edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
15195 edge->flEdgeWeightMax = bUpdated->bbWeight;
15198 // We don't handle BBJ_SWITCH
15204 /* modify the flow graph */
15206 /* Remove 'block' from the predecessor list of 'bNotTaken' */
15207 fgRemoveRefPred(bNotTaken, block);
15212 printf("\nConditional folded at BB%02u\n", block->bbNum);
15213 printf("BB%02u becomes a %s", block->bbNum,
15214 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15215 if (block->bbJumpKind == BBJ_ALWAYS)
15217 printf(" to BB%02u", block->bbJumpDest->bbNum);
15223 /* if the block was a loop condition we may have to modify
15224 * the loop table */
15226 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
15228 /* Some loops may have been already removed by
15229 * loop unrolling or conditional folding */
15231 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
15236 /* We are only interested in the loop bottom */
15238 if (optLoopTable[loopNum].lpBottom == block)
15240 if (cond->gtIntCon.gtIconVal == 0)
15242 /* This was a bogus loop (condition always false)
15243 * Remove the loop from the table */
15245 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
15249 printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n", loopNum,
15250 optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
15260 else if (block->bbJumpKind == BBJ_SWITCH)
15262 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15264 GenTreePtr stmt = block->bbTreeList->gtPrev;
15266 noway_assert(stmt->gtNext == nullptr);
15268 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15270 noway_assert(fgRemoveRestOfBlock);
15272 /* Unconditional throw - transform the basic block into a BBJ_THROW */
15273 fgConvertBBToThrowBB(block);
15275 /* update the flow graph */
15277 unsigned jumpCnt = block->bbJumpSwt->bbsCount;
15278 BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
15280 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15282 BasicBlock* curJump = *jumpTab;
15284 /* Remove 'block' from the predecessor list of 'curJump' */
15285 fgRemoveRefPred(curJump, block);
15291 printf("\nConditional folded at BB%02u\n", block->bbNum);
15292 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
15298 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
15300 /* Did we fold the conditional */
15302 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15304 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15306 if (cond->OperKind() & GTK_CONST)
15308 /* Yupee - we folded the conditional!
15309 * Remove the conditional statement */
15311 noway_assert(cond->gtOper == GT_CNS_INT);
15313 /* remove the statement from bbTreelist - No need to update
15314 * the reference counts since there are no lcl vars */
15315 fgRemoveStmt(block, stmt);
15317 /* modify the flow graph */
15319 /* Find the actual jump target */
15320 unsigned switchVal;
15321 switchVal = (unsigned)cond->gtIntCon.gtIconVal;
15323 jumpCnt = block->bbJumpSwt->bbsCount;
15324 BasicBlock** jumpTab;
15325 jumpTab = block->bbJumpSwt->bbsDstTab;
15329 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15331 BasicBlock* curJump = *jumpTab;
15333 assert(curJump->countOfInEdges() > 0);
15335 // If val matches switchVal or we are at the last entry and
15336 // we never found the switch value then set the new jump dest
15338 if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
15340 if (curJump != block->bbNext)
15342 /* transform the basic block into a BBJ_ALWAYS */
15343 block->bbJumpKind = BBJ_ALWAYS;
15344 block->bbJumpDest = curJump;
15346 // if we are jumping backwards, make sure we have a GC Poll.
15347 if (curJump->bbNum > block->bbNum)
15349 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15354 /* transform the basic block into a BBJ_NONE */
15355 block->bbJumpKind = BBJ_NONE;
15356 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15362 /* Remove 'block' from the predecessor list of 'curJump' */
15363 fgRemoveRefPred(curJump, block);
15369 printf("\nConditional folded at BB%02u\n", block->bbNum);
15370 printf("BB%02u becomes a %s", block->bbNum,
15371 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15372 if (block->bbJumpKind == BBJ_ALWAYS)
15374 printf(" to BB%02u", block->bbJumpDest->bbNum);
15386 //*****************************************************************************
15388 // Morphs a single statement in a block.
15389 // Can be called anytime, unlike fgMorphStmts() which should only be called once.
15391 // Returns true if 'stmt' was removed from the block.
15392 // Returns false if 'stmt' is still in the block (even if other statements were removed).
15395 bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg))
15397 assert(block != nullptr);
15398 assert(stmt != nullptr);
15401 compCurStmt = stmt;
15403 GenTree* morph = fgMorphTree(stmt->gtStmtExpr);
15405 // Bug 1106830 - During the CSE phase we can't just remove
15406 // morph->gtOp.gtOp2 as it could contain CSE expressions.
15407 // This leads to a noway_assert in OptCSE.cpp when
15408 // searching for the removed CSE ref. (using gtFindLink)
15410 if (!optValnumCSE_phase)
15412 // Check for morph as a GT_COMMA with an unconditional throw
15413 if (fgIsCommaThrow(morph, true))
15418 printf("Folding a top-level fgIsCommaThrow stmt\n");
15419 printf("Removing op2 as unreachable:\n");
15420 gtDispTree(morph->gtOp.gtOp2);
15424 // Use the call as the new stmt
15425 morph = morph->gtOp.gtOp1;
15426 noway_assert(morph->gtOper == GT_CALL);
15429 // we can get a throw as a statement root
15430 if (fgIsThrow(morph))
15435 printf("We have a top-level fgIsThrow stmt\n");
15436 printf("Removing the rest of block as unreachable:\n");
15439 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15440 fgRemoveRestOfBlock = true;
15444 stmt->gtStmtExpr = morph;
15446 if (lvaLocalVarRefCounted)
15448 // fgMorphTree may have introduced new lclVar references. Bump the ref counts if requested.
15449 lvaRecursiveIncRefCounts(stmt->gtStmtExpr);
15452 // Can the entire tree be removed?
15453 bool removedStmt = fgCheckRemoveStmt(block, stmt);
15455 // Or this is the last statement of a conditional branch that was just folded?
15456 if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock)
15458 if (fgFoldConditional(block))
15460 if (block->bbJumpKind != BBJ_THROW)
15462 removedStmt = true;
15469 // Have to re-do the evaluation order since for example some later code does not expect constants as op1
15470 gtSetStmtInfo(stmt);
15472 // Have to re-link the nodes for this statement
15473 fgSetStmtSeq(stmt);
15479 printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
15485 if (fgRemoveRestOfBlock)
15487 // Remove the rest of the stmts in the block
15488 for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
15490 fgRemoveStmt(block, stmt);
15493 // The rest of block has been removed and we will always throw an exception.
15495 // Update succesors of block
15496 fgRemoveBlockAsPred(block);
15498 // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
15499 // We should not convert it to a ThrowBB.
15500 if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
15502 // Convert block to a throw bb
15503 fgConvertBBToThrowBB(block);
15509 printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum);
15512 fgRemoveRestOfBlock = false;
15515 return removedStmt;
15518 /*****************************************************************************
15520 * Morph the statements of the given block.
15521 * This function should be called just once for a block. Use fgMorphBlockStmt()
15522 * for reentrant calls.
15525 void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw)
15527 fgRemoveRestOfBlock = false;
15529 noway_assert(fgExpandInline == false);
15531 /* Make the current basic block address available globally */
15535 *mult = *lnot = *loadw = false;
15537 fgCurrentlyInUseArgTemps = hashBv::Create(this);
15539 GenTreeStmt* stmt = block->firstStmt();
15540 GenTreePtr prev = nullptr;
15541 for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt)
15543 noway_assert(stmt->gtOper == GT_STMT);
15545 if (fgRemoveRestOfBlock)
15547 fgRemoveStmt(block, stmt);
15550 #ifdef FEATURE_SIMD
15551 if (!opts.MinOpts() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT && stmt->gtStmtExpr->OperGet() == GT_ASG)
15553 fgMorphCombineSIMDFieldAssignments(block, stmt);
15557 fgMorphStmt = stmt;
15558 compCurStmt = stmt;
15559 GenTreePtr tree = stmt->gtStmtExpr;
15563 if (stmt == block->bbTreeList)
15565 block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
15568 unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
15572 printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum);
15577 /* Morph this statement tree */
15579 GenTreePtr morph = fgMorphTree(tree);
15581 // mark any outgoing arg temps as free so we can reuse them in the next statement.
15583 fgCurrentlyInUseArgTemps->ZeroAll();
15585 // Has fgMorphStmt been sneakily changed ?
15587 if (stmt->gtStmtExpr != tree)
15589 /* This must be tailcall. Ignore 'morph' and carry on with
15590 the tail-call node */
15592 morph = stmt->gtStmtExpr;
15593 noway_assert(compTailCallUsed);
15594 noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
15595 noway_assert(stmt->gtNextStmt == nullptr);
15597 GenTreeCall* call = morph->AsCall();
15599 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15600 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15602 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15603 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15604 (compCurBB->bbFlags & BBF_HAS_JMP)));
15606 else if (block != compCurBB)
15608 /* This must be a tail call that caused a GCPoll to get
15609 injected. We haven't actually morphed the call yet
15610 but the flag still got set, clear it here... */
15611 CLANG_FORMAT_COMMENT_ANCHOR;
15614 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
15617 noway_assert(compTailCallUsed);
15618 noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
15619 noway_assert(stmt->gtNextStmt == nullptr);
15621 GenTreeCall* call = morph->AsCall();
15624 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15625 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15627 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15628 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15629 (compCurBB->bbFlags & BBF_HAS_JMP)));
15633 if (compStressCompile(STRESS_CLONE_EXPR, 30))
15635 // Clone all the trees to stress gtCloneExpr()
15639 printf("\nfgMorphTree (stressClone from):\n");
15643 morph = gtCloneExpr(morph);
15644 noway_assert(morph);
15648 printf("\nfgMorphTree (stressClone to):\n");
15653 /* If the hash value changes. we modified the tree during morphing */
15656 unsigned newHash = gtHashValue(morph);
15657 if (newHash != oldHash)
15659 printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum);
15665 /* Check for morph as a GT_COMMA with an unconditional throw */
15666 if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
15668 /* Use the call as the new stmt */
15669 morph = morph->gtOp.gtOp1;
15670 noway_assert(morph->gtOper == GT_CALL);
15671 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15673 fgRemoveRestOfBlock = true;
15676 stmt->gtStmtExpr = tree = morph;
15678 noway_assert(fgPtrArgCntCur == 0);
15680 if (fgRemoveRestOfBlock)
15685 /* Has the statement been optimized away */
15687 if (fgCheckRemoveStmt(block, stmt))
15692 /* Check if this block ends with a conditional branch that can be folded */
15694 if (fgFoldConditional(block))
15699 if (ehBlockHasExnFlowDsc(block))
15704 #if OPT_MULT_ADDSUB
15706 /* Note whether we have two or more +=/-= operators in a row */
15708 if (tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB)
15710 if (prev && prev->gtOper == tree->gtOper)
15718 /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */
15720 if (tree->gtOper == GT_ASG_OR && prev && prev->gtOper == GT_ASG)
15726 if (fgRemoveRestOfBlock)
15728 if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
15730 GenTreePtr first = block->bbTreeList;
15731 noway_assert(first);
15732 GenTreePtr last = first->gtPrev;
15733 noway_assert(last && last->gtNext == nullptr);
15734 GenTreePtr lastStmt = last->gtStmt.gtStmtExpr;
15736 if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) ||
15737 ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)))
15739 GenTreePtr op1 = lastStmt->gtOp.gtOp1;
15741 if (op1->OperKind() & GTK_RELOP)
15743 /* Unmark the comparison node with GTF_RELOP_JMP_USED */
15744 op1->gtFlags &= ~GTF_RELOP_JMP_USED;
15747 last->gtStmt.gtStmtExpr = fgMorphTree(op1);
15751 /* Mark block as a BBJ_THROW block */
15752 fgConvertBBToThrowBB(block);
15755 noway_assert(fgExpandInline == false);
15757 #if FEATURE_FASTTAILCALL
15758 GenTreePtr recursiveTailCall = nullptr;
15759 if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
15761 fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
15766 compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
15769 // Reset this back so that it doesn't leak out impacting other blocks
15770 fgRemoveRestOfBlock = false;
15773 /*****************************************************************************
15775 * Morph the blocks of the method.
15776 * Returns true if the basic block list is modified.
15777 * This function should be called just once.
15780 void Compiler::fgMorphBlocks()
15785 printf("\n*************** In fgMorphBlocks()\n");
15789 /* Since fgMorphTree can be called after various optimizations to re-arrange
15790 * the nodes we need a global flag to signal if we are during the one-pass
15791 * global morphing */
15793 fgGlobalMorph = true;
15795 #if LOCAL_ASSERTION_PROP
15797 // Local assertion prop is enabled if we are optimized
15799 optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts());
15801 if (optLocalAssertionProp)
15804 // Initialize for local assertion prop
15806 optAssertionInit(true);
15808 #elif ASSERTION_PROP
15810 // If LOCAL_ASSERTION_PROP is not set
15811 // and we have global assertion prop
15812 // then local assertion prop is always off
15814 optLocalAssertionProp = false;
15818 /*-------------------------------------------------------------------------
15819 * Process all basic blocks in the function
15822 BasicBlock* block = fgFirstBB;
15823 noway_assert(block);
15826 compCurStmtNum = 0;
15831 #if OPT_MULT_ADDSUB
15839 bool loadw = false;
15844 printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName);
15848 #if LOCAL_ASSERTION_PROP
15849 if (optLocalAssertionProp)
15852 // Clear out any currently recorded assertion candidates
15853 // before processing each basic block,
15854 // also we must handle QMARK-COLON specially
15856 optAssertionReset(0);
15860 /* Process all statement trees in the basic block */
15864 fgMorphStmts(block, &mult, &lnot, &loadw);
15866 #if OPT_MULT_ADDSUB
15868 if (mult && (opts.compFlags & CLFLG_TREETRANS) && !opts.compDbgCode && !opts.MinOpts())
15870 for (tree = block->bbTreeList; tree; tree = tree->gtNext)
15872 noway_assert(tree->gtOper == GT_STMT);
15873 GenTreePtr last = tree->gtStmt.gtStmtExpr;
15875 if (last->gtOper == GT_ASG_ADD || last->gtOper == GT_ASG_SUB)
15880 GenTreePtr dst1 = last->gtOp.gtOp1;
15881 GenTreePtr src1 = last->gtOp.gtOp2;
15883 if (!last->IsCnsIntOrI())
15888 if (dst1->gtOper != GT_LCL_VAR)
15892 if (!src1->IsCnsIntOrI())
15902 /* Look at the next statement */
15904 temp = tree->gtNext;
15910 noway_assert(temp->gtOper == GT_STMT);
15911 next = temp->gtStmt.gtStmtExpr;
15913 if (next->gtOper != last->gtOper)
15917 if (next->gtType != last->gtType)
15922 dst2 = next->gtOp.gtOp1;
15923 src2 = next->gtOp.gtOp2;
15925 if (dst2->gtOper != GT_LCL_VAR)
15929 if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum)
15934 if (!src2->IsCnsIntOrI())
15939 if (last->gtOverflow() != next->gtOverflow())
15944 const ssize_t i1 = src1->gtIntCon.gtIconVal;
15945 const ssize_t i2 = src2->gtIntCon.gtIconVal;
15946 const ssize_t itemp = i1 + i2;
15948 /* if the operators are checking for overflow, check for overflow of the operands */
15950 if (next->gtOverflow())
15952 if (next->TypeGet() == TYP_LONG)
15954 if (next->gtFlags & GTF_UNSIGNED)
15956 ClrSafeInt<UINT64> si1(i1);
15957 if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow())
15964 ClrSafeInt<INT64> si1(i1);
15965 if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow())
15971 else if (next->gtFlags & GTF_UNSIGNED)
15973 ClrSafeInt<UINT32> si1(i1);
15974 if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow())
15981 ClrSafeInt<INT32> si1(i1);
15982 if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow())
15989 /* Fold the two increments/decrements into one */
15991 src1->gtIntCon.gtIconVal = itemp;
15992 #ifdef _TARGET_64BIT_
15993 if (src1->gtType == TYP_INT)
15995 src1->AsIntCon()->TruncateOrSignExtend32();
15997 #endif //_TARGET_64BIT_
15999 /* Remove the second statement completely */
16001 noway_assert(tree->gtNext == temp);
16002 noway_assert(temp->gtPrev == tree);
16006 noway_assert(temp->gtNext->gtPrev == temp);
16008 temp->gtNext->gtPrev = tree;
16009 tree->gtNext = temp->gtNext;
16013 tree->gtNext = nullptr;
16015 noway_assert(block->bbTreeList->gtPrev == temp);
16017 block->bbTreeList->gtPrev = tree;
16028 /* Are we using a single return block? */
16030 if (block->bbJumpKind == BBJ_RETURN)
16032 if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
16034 /* We'll jump to the genReturnBB */
16035 CLANG_FORMAT_COMMENT_ANCHOR;
16037 #if !defined(_TARGET_X86_)
16038 if (info.compFlags & CORINFO_FLG_SYNCH)
16040 fgConvertSyncReturnToLeave(block);
16043 #endif // !_TARGET_X86_
16045 block->bbJumpKind = BBJ_ALWAYS;
16046 block->bbJumpDest = genReturnBB;
16050 // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
16051 // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
16052 // Such blocks do materialize as part of in-lining.
16054 // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
16055 // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
16056 // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
16059 // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
16061 GenTreePtr last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
16062 GenTreePtr ret = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;
16064 // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
16065 if (genReturnLocal != BAD_VAR_NUM)
16067 // Method must be returning a value other than TYP_VOID.
16068 noway_assert(compMethodHasRetVal());
16070 // This block must be ending with a GT_RETURN
16071 noway_assert(last != nullptr);
16072 noway_assert(last->gtOper == GT_STMT);
16073 noway_assert(last->gtNext == nullptr);
16074 noway_assert(ret != nullptr);
16076 // GT_RETURN must have non-null operand as the method is returning the value assigned to
16078 noway_assert(ret->OperGet() == GT_RETURN);
16079 noway_assert(ret->gtGetOp1() != nullptr);
16081 GenTreePtr tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
16083 last->gtStmt.gtStmtExpr = (tree->OperIsCopyBlkOp()) ? fgMorphCopyBlock(tree) : tree;
16085 // make sure that copy-prop ignores this assignment.
16086 last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
16088 else if (ret != nullptr && ret->OperGet() == GT_RETURN)
16090 // This block ends with a GT_RETURN
16091 noway_assert(last != nullptr);
16092 noway_assert(last->gtOper == GT_STMT);
16093 noway_assert(last->gtNext == nullptr);
16095 // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block
16096 noway_assert(ret->TypeGet() == TYP_VOID);
16097 noway_assert(ret->gtGetOp1() == nullptr);
16099 fgRemoveStmt(block, last);
16105 printf("morph BB%02u to point at onereturn. New block is\n", block->bbNum);
16106 fgTableDispBasicBlock(block);
16112 block = block->bbNext;
16115 /* We are done with the global morphing phase */
16117 fgGlobalMorph = false;
16122 fgDispBasicBlocks(true);
16127 //------------------------------------------------------------------------
16128 // fgCheckArgCnt: Check whether the maximum arg size will change codegen requirements
16131 // fpPtrArgCntMax records the maximum number of pushed arguments.
16132 // Depending upon this value of the maximum number of pushed arguments
16133 // we may need to use an EBP frame or be partially interuptible.
16134 // This functionality has been factored out of fgSetOptions() because
16135 // the Rationalizer can create new calls.
16138 // This must be called before isFramePointerRequired() is called, because it is a
16139 // phased variable (can only be written before it has been read).
16141 void Compiler::fgCheckArgCnt()
16143 if (!compCanEncodePtrArgCntMax())
16148 printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
16149 "interruptible\n");
16152 genInterruptible = false;
16154 if (fgPtrArgCntMax >= sizeof(unsigned))
16159 printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
16162 codeGen->setFramePointerRequired(true);
16166 /*****************************************************************************
16168 * Make some decisions about the kind of code to generate.
16171 void Compiler::fgSetOptions()
16174 /* Should we force fully interruptible code ? */
16175 if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
16177 noway_assert(!codeGen->isGCTypeFixed());
16178 genInterruptible = true;
16182 if (opts.compDbgCode)
16184 assert(!codeGen->isGCTypeFixed());
16185 genInterruptible = true; // debugging is easier this way ...
16188 /* Assume we won't need an explicit stack frame if this is allowed */
16190 // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
16191 // the callee-saved registers.
16192 noway_assert(!compTailCallUsed || !compLocallocUsed);
16194 if (compLocallocUsed)
16196 codeGen->setFramePointerRequired(true);
16199 #ifdef _TARGET_X86_
16201 if (compTailCallUsed)
16202 codeGen->setFramePointerRequired(true);
16204 #endif // _TARGET_X86_
16206 if (!opts.genFPopt)
16208 codeGen->setFramePointerRequired(true);
16211 // Assert that the EH table has been initialized by now. Note that
16212 // compHndBBtabAllocCount never decreases; it is a high-water mark
16213 // of table allocation. In contrast, compHndBBtabCount does shrink
16214 // if we delete a dead EH region, and if it shrinks to zero, the
16215 // table pointer compHndBBtab is unreliable.
16216 assert(compHndBBtabAllocCount >= info.compXcptnsCount);
16218 #ifdef _TARGET_X86_
16220 // Note: this case, and the !X86 case below, should both use the
16221 // !X86 path. This would require a few more changes for X86 to use
16222 // compHndBBtabCount (the current number of EH clauses) instead of
16223 // info.compXcptnsCount (the number of EH clauses in IL), such as
16224 // in ehNeedsShadowSPslots(). This is because sometimes the IL has
16225 // an EH clause that we delete as statically dead code before we
16226 // get here, leaving no EH clauses left, and thus no requirement
16227 // to use a frame pointer because of EH. But until all the code uses
16228 // the same test, leave info.compXcptnsCount here.
16229 if (info.compXcptnsCount > 0)
16231 codeGen->setFramePointerRequiredEH(true);
16234 #else // !_TARGET_X86_
16236 if (compHndBBtabCount > 0)
16238 codeGen->setFramePointerRequiredEH(true);
16241 #endif // _TARGET_X86_
16243 #ifdef UNIX_X86_ABI
16244 if (info.compXcptnsCount > 0)
16246 assert(!codeGen->isGCTypeFixed());
16247 // Enforce fully interruptible codegen for funclet unwinding
16248 genInterruptible = true;
16250 #endif // UNIX_X86_ABI
16254 if (info.compCallUnmanaged)
16256 codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
16259 if (info.compPublishStubParam)
16261 codeGen->setFramePointerRequiredGCInfo(true);
16264 if (opts.compNeedSecurityCheck)
16266 codeGen->setFramePointerRequiredGCInfo(true);
16268 #ifndef JIT32_GCENCODER
16270 // The decoder only reports objects in frames with exceptions if the frame
16271 // is fully interruptible.
16272 // Even if there is no catch or other way to resume execution in this frame
16273 // the VM requires the security object to remain alive until later, so
16274 // Frames with security objects must be fully interruptible.
16275 genInterruptible = true;
16277 #endif // JIT32_GCENCODER
16280 if (compIsProfilerHookNeeded())
16282 codeGen->setFramePointerRequired(true);
16285 if (info.compIsVarArgs)
16287 // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
16288 codeGen->setFramePointerRequiredGCInfo(true);
16291 if (lvaReportParamTypeArg())
16293 codeGen->setFramePointerRequiredGCInfo(true);
16296 // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not");
16299 /*****************************************************************************/
16301 GenTreePtr Compiler::fgInitThisClass()
16303 noway_assert(!compIsForInlining());
16305 CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
16307 if (!kind.needsRuntimeLookup)
16309 return fgGetSharedCCtor(info.compClassHnd);
16313 #ifdef FEATURE_READYTORUN_COMPILER
16314 // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR.
16315 if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI))
16317 CORINFO_RESOLVED_TOKEN resolvedToken;
16318 memset(&resolvedToken, 0, sizeof(resolvedToken));
16320 // We are in a shared method body, but maybe we don't need a runtime lookup after all.
16321 // This covers the case of a generic method on a non-generic type.
16322 if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
16324 resolvedToken.hClass = info.compClassHnd;
16325 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
16328 // We need a runtime lookup.
16329 GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
16331 // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
16332 // base of the class that owns the method being compiled". If we're in this method, it means we're not
16333 // inlining and there's no ambiguity.
16334 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF,
16335 gtNewArgList(ctxTree), &kind);
16339 // Collectible types requires that for shared generic code, if we use the generic context paramter
16340 // that we report it. (This is a conservative approach, we could detect some cases particularly when the
16341 // context parameter is this that we don't need the eager reporting logic.)
16342 lvaGenericsContextUseCount++;
16344 switch (kind.runtimeLookupKind)
16346 case CORINFO_LOOKUP_THISOBJ:
16347 // This code takes a this pointer; but we need to pass the static method desc to get the right point in
16350 GenTreePtr vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
16351 // Vtable pointer of this object
16352 vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
16353 vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
16354 GenTreePtr methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
16356 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
16357 gtNewArgList(vtTree, methodHnd));
16360 case CORINFO_LOOKUP_CLASSPARAM:
16362 GenTreePtr vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16363 return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, 0, gtNewArgList(vtTree));
16366 case CORINFO_LOOKUP_METHODPARAM:
16368 GenTreePtr methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16369 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
16370 gtNewArgList(gtNewIconNode(0), methHndTree));
16375 noway_assert(!"Unknown LOOKUP_KIND");
16380 /*****************************************************************************
16382 * Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
16383 * except for the allowed ? 1 : 0; pattern.
16385 Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTreePtr* tree, fgWalkData* data)
16387 if ((*tree)->OperGet() == GT_QMARK)
16389 fgCheckQmarkAllowedForm(*tree);
16391 return WALK_CONTINUE;
16394 void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
16396 assert(tree->OperGet() == GT_QMARK);
16397 #ifndef LEGACY_BACKEND
16398 assert(!"Qmarks beyond morph disallowed.");
16399 #else // LEGACY_BACKEND
16400 GenTreePtr colon = tree->gtOp.gtOp2;
16402 assert(colon->gtOp.gtOp1->IsIntegralConst(0));
16403 assert(colon->gtOp.gtOp2->IsIntegralConst(1));
16404 #endif // LEGACY_BACKEND
16407 /*****************************************************************************
16409 * Verify that the importer has created GT_QMARK nodes in a way we can
16410 * process them. The following is allowed:
16412 * 1. A top level qmark. Top level qmark is of the form:
16413 * a) (bool) ? (void) : (void) OR
16414 * b) V0N = (bool) ? (type) : (type)
16416 * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
16417 * of either op1 of colon or op2 of colon but not a child of any other
16420 void Compiler::fgPreExpandQmarkChecks(GenTreePtr expr)
16422 GenTreePtr topQmark = fgGetTopLevelQmark(expr);
16424 // If the top level Qmark is null, then scan the tree to make sure
16425 // there are no qmarks within it.
16426 if (topQmark == nullptr)
16428 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
16432 // We could probably expand the cond node also, but don't think the extra effort is necessary,
16433 // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
16434 fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);
16436 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
16437 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
16442 /*****************************************************************************
16444 * Get the top level GT_QMARK node in a given "expr", return NULL if such a
16445 * node is not present. If the top level GT_QMARK node is assigned to a
16446 * GT_LCL_VAR, then return the lcl node in ppDst.
16449 GenTreePtr Compiler::fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst /* = NULL */)
16451 if (ppDst != nullptr)
16456 GenTreePtr topQmark = nullptr;
16457 if (expr->gtOper == GT_QMARK)
16461 else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16463 topQmark = expr->gtOp.gtOp2;
16464 if (ppDst != nullptr)
16466 *ppDst = expr->gtOp.gtOp1;
16472 /*********************************************************************************
16474 * For a castclass helper call,
16475 * Importer creates the following tree:
16476 * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
16478 * This method splits the qmark expression created by the importer into the
16479 * following blocks: (block, asg, cond1, cond2, helper, remainder)
16480 * Notice that op1 is the result for both the conditions. So we coalesce these
16481 * assignments into a single block instead of two blocks resulting a nested diamond.
16483 * +---------->-----------+
16487 * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
16489 * We expect to achieve the following codegen:
16490 * mov rsi, rdx tmp = op1 // asgBlock
16491 * test rsi, rsi goto skip if tmp == null ? // cond1Block
16493 * mov rcx, 0x76543210 cns = op2 // cond2Block
16494 * cmp qword ptr [rsi], rcx goto skip if *tmp == op2
16496 * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock
16498 * SKIP: // remainderBlock
16499 * tmp has the result.
16502 void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt)
16507 printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum);
16508 fgDispBasicBlocks(block, block, true);
16512 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16514 GenTreePtr dst = nullptr;
16515 GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
16516 noway_assert(dst != nullptr);
16518 assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
16520 // Get cond, true, false exprs for the qmark.
16521 GenTreePtr condExpr = qmark->gtGetOp1();
16522 GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
16523 GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16525 // Get cond, true, false exprs for the nested qmark.
16526 GenTreePtr nestedQmark = falseExpr;
16527 GenTreePtr cond2Expr;
16528 GenTreePtr true2Expr;
16529 GenTreePtr false2Expr;
16531 if (nestedQmark->gtOper == GT_QMARK)
16533 cond2Expr = nestedQmark->gtGetOp1();
16534 true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
16535 false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
16537 assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
16538 cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
16542 // This is a rare case that arises when we are doing minopts and encounter isinst of null
16543 // gtFoldExpr was still is able to optimize away part of the tree (but not all).
16544 // That means it does not match our pattern.
16546 // Rather than write code to handle this case, just fake up some nodes to make it match the common
16547 // case. Synthesize a comparison that is always true, and for the result-on-true, use the
16548 // entire subtree we expected to be the nested question op.
16550 cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
16551 true2Expr = nestedQmark;
16552 false2Expr = gtNewIconNode(0, TYP_I_IMPL);
16554 assert(false2Expr->OperGet() == trueExpr->OperGet());
16556 // Clear flags as they are now going to be part of JTRUE.
16557 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16558 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16560 // Create the chain of blocks. See method header comment.
16561 // The order of blocks after this is the following:
16562 // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
16564 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16565 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16566 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16567 // remainderBlock will still be GC safe.
16568 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16569 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16570 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16572 BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
16573 BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true);
16574 BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true);
16575 BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true);
16577 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16579 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16580 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16581 if ((block->bbFlags & BBF_INTERNAL) == 0)
16583 helperBlock->bbFlags &= ~BBF_INTERNAL;
16584 cond2Block->bbFlags &= ~BBF_INTERNAL;
16585 cond1Block->bbFlags &= ~BBF_INTERNAL;
16586 asgBlock->bbFlags &= ~BBF_INTERNAL;
16587 helperBlock->bbFlags |= BBF_IMPORTED;
16588 cond2Block->bbFlags |= BBF_IMPORTED;
16589 cond1Block->bbFlags |= BBF_IMPORTED;
16590 asgBlock->bbFlags |= BBF_IMPORTED;
16593 // Chain the flow correctly.
16594 fgAddRefPred(asgBlock, block);
16595 fgAddRefPred(cond1Block, asgBlock);
16596 fgAddRefPred(cond2Block, cond1Block);
16597 fgAddRefPred(helperBlock, cond2Block);
16598 fgAddRefPred(remainderBlock, helperBlock);
16599 fgAddRefPred(remainderBlock, cond1Block);
16600 fgAddRefPred(remainderBlock, cond2Block);
16602 cond1Block->bbJumpDest = remainderBlock;
16603 cond2Block->bbJumpDest = remainderBlock;
16605 // Set the weights; some are guesses.
16606 asgBlock->inheritWeight(block);
16607 cond1Block->inheritWeight(block);
16608 cond2Block->inheritWeightPercentage(cond1Block, 50);
16609 helperBlock->inheritWeightPercentage(cond2Block, 50);
16611 // Append cond1 as JTRUE to cond1Block
16612 GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
16613 GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16614 fgInsertStmtAtEnd(cond1Block, jmpStmt);
16616 // Append cond2 as JTRUE to cond2Block
16617 jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
16618 jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16619 fgInsertStmtAtEnd(cond2Block, jmpStmt);
16621 // AsgBlock should get tmp = op1 assignment.
16622 trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
16623 GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
16624 fgInsertStmtAtEnd(asgBlock, trueStmt);
16626 // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
16627 gtReverseCond(cond2Expr);
16628 GenTreePtr helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
16629 GenTreePtr helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
16630 fgInsertStmtAtEnd(helperBlock, helperStmt);
16632 // Finally remove the nested qmark stmt.
16633 fgRemoveStmt(block, stmt);
16638 printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum);
16639 fgDispBasicBlocks(block, remainderBlock, true);
16644 /*****************************************************************************
16646 * Expand a statement with a top level qmark node. There are three cases, based
16647 * on whether the qmark has both "true" and "false" arms, or just one of them.
16658 * S0 -->-- ~C -->-- T F -->-- S1
16663 * -----------------------------------------
16672 * S0 -->-- ~C -->-- T -->-- S1
16674 * +-->-------------+
16677 * -----------------------------------------
16686 * S0 -->-- C -->-- F -->-- S1
16688 * +-->------------+
16691 * If the qmark assigns to a variable, then create tmps for "then"
16692 * and "else" results and assign the temp to the variable as a writeback step.
16694 void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTreePtr stmt)
16696 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16698 // Retrieve the Qmark node to be expanded.
16699 GenTreePtr dst = nullptr;
16700 GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
16701 if (qmark == nullptr)
16706 if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
16708 fgExpandQmarkForCastInstOf(block, stmt);
16715 printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum);
16716 fgDispBasicBlocks(block, block, true);
16720 // Retrieve the operands.
16721 GenTreePtr condExpr = qmark->gtGetOp1();
16722 GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
16723 GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16725 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16726 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16728 assert(!varTypeIsFloating(condExpr->TypeGet()));
16730 bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP);
16731 bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
16732 assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
16734 // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
16735 // block ... condBlock ... elseBlock ... remainderBlock
16737 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16738 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16739 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16740 // remainderBlock will still be GC safe.
16741 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16742 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16743 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16745 BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
16746 BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
16748 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16749 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16750 if ((block->bbFlags & BBF_INTERNAL) == 0)
16752 condBlock->bbFlags &= ~BBF_INTERNAL;
16753 elseBlock->bbFlags &= ~BBF_INTERNAL;
16754 condBlock->bbFlags |= BBF_IMPORTED;
16755 elseBlock->bbFlags |= BBF_IMPORTED;
16758 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16760 condBlock->inheritWeight(block);
16762 fgAddRefPred(condBlock, block);
16763 fgAddRefPred(elseBlock, condBlock);
16764 fgAddRefPred(remainderBlock, elseBlock);
16766 BasicBlock* thenBlock = nullptr;
16767 if (hasTrueExpr && hasFalseExpr)
16772 // S0 -->-- ~C -->-- T F -->-- S1
16777 gtReverseCond(condExpr);
16778 condBlock->bbJumpDest = elseBlock;
16780 thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
16781 thenBlock->bbJumpDest = remainderBlock;
16782 if ((block->bbFlags & BBF_INTERNAL) == 0)
16784 thenBlock->bbFlags &= ~BBF_INTERNAL;
16785 thenBlock->bbFlags |= BBF_IMPORTED;
16788 elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
16790 fgAddRefPred(thenBlock, condBlock);
16791 fgAddRefPred(remainderBlock, thenBlock);
16793 thenBlock->inheritWeightPercentage(condBlock, 50);
16794 elseBlock->inheritWeightPercentage(condBlock, 50);
16796 else if (hasTrueExpr)
16799 // S0 -->-- ~C -->-- T -->-- S1
16801 // +-->-------------+
16804 gtReverseCond(condExpr);
16805 condBlock->bbJumpDest = remainderBlock;
16806 fgAddRefPred(remainderBlock, condBlock);
16807 // Since we have no false expr, use the one we'd already created.
16808 thenBlock = elseBlock;
16809 elseBlock = nullptr;
16811 thenBlock->inheritWeightPercentage(condBlock, 50);
16813 else if (hasFalseExpr)
16816 // S0 -->-- C -->-- F -->-- S1
16818 // +-->------------+
16821 condBlock->bbJumpDest = remainderBlock;
16822 fgAddRefPred(remainderBlock, condBlock);
16824 elseBlock->inheritWeightPercentage(condBlock, 50);
16827 GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
16828 GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16829 fgInsertStmtAtEnd(condBlock, jmpStmt);
16831 // Remove the original qmark statement.
16832 fgRemoveStmt(block, stmt);
16834 // Since we have top level qmarks, we either have a dst for it in which case
16835 // we need to create tmps for true and falseExprs, else just don't bother
16837 unsigned lclNum = BAD_VAR_NUM;
16838 if (dst != nullptr)
16840 assert(dst->gtOper == GT_LCL_VAR);
16841 lclNum = dst->gtLclVar.gtLclNum;
16845 assert(qmark->TypeGet() == TYP_VOID);
16850 if (dst != nullptr)
16852 trueExpr = gtNewTempAssign(lclNum, trueExpr);
16854 GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
16855 fgInsertStmtAtEnd(thenBlock, trueStmt);
16858 // Assign the falseExpr into the dst or tmp, insert in elseBlock
16861 if (dst != nullptr)
16863 falseExpr = gtNewTempAssign(lclNum, falseExpr);
16865 GenTreePtr falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
16866 fgInsertStmtAtEnd(elseBlock, falseStmt);
16872 printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum);
16873 fgDispBasicBlocks(block, remainderBlock, true);
16878 /*****************************************************************************
16880 * Expand GT_QMARK nodes from the flow graph into basic blocks.
16884 void Compiler::fgExpandQmarkNodes()
16888 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
16890 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
16892 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16894 fgPreExpandQmarkChecks(expr);
16896 fgExpandQmarkStmt(block, stmt);
16900 fgPostExpandQmarkChecks();
16903 compQmarkRationalized = true;
16907 /*****************************************************************************
16909 * Make sure we don't have any more GT_QMARK nodes.
16912 void Compiler::fgPostExpandQmarkChecks()
16914 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
16916 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
16918 GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
16919 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
16925 /*****************************************************************************
16927 * Transform all basic blocks for codegen.
16930 void Compiler::fgMorph()
16932 noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
16934 fgOutgoingArgTemps = nullptr;
16939 printf("*************** In fgMorph()\n");
16943 fgDispBasicBlocks(true);
16947 // Insert call to class constructor as the first basic block if
16948 // we were asked to do so.
16949 if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
16950 impTokenLookupContextHandle /* context */) &
16951 CORINFO_INITCLASS_USE_HELPER)
16953 fgEnsureFirstBBisScratch();
16954 fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
16958 if (opts.compGcChecks)
16960 for (unsigned i = 0; i < info.compArgsCount; i++)
16962 if (lvaTable[i].TypeGet() == TYP_REF)
16964 // confirm that the argument is a GC pointer (for debugging (GC stress))
16965 GenTreePtr op = gtNewLclvNode(i, TYP_REF);
16966 GenTreeArgList* args = gtNewArgList(op);
16967 op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, 0, args);
16969 fgEnsureFirstBBisScratch();
16970 fgInsertStmtAtEnd(fgFirstBB, op);
16975 if (opts.compStackCheckOnRet)
16977 lvaReturnEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck"));
16978 lvaTable[lvaReturnEspCheck].lvType = TYP_INT;
16981 if (opts.compStackCheckOnCall)
16983 lvaCallEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck"));
16984 lvaTable[lvaCallEspCheck].lvType = TYP_INT;
16988 /* Filter out unimported BBs */
16990 fgRemoveEmptyBlocks();
16993 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
16994 fgDebugCheckBBlist(false, false);
16997 EndPhase(PHASE_MORPH_INIT);
17002 JITDUMP("trees after inlining\n");
17003 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
17006 RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
17008 EndPhase(PHASE_MORPH_INLINE);
17010 /* Add any internal blocks/trees we may need */
17015 fgMultipleNots = false;
17019 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
17020 fgDebugCheckBBlist(false, false);
17023 fgRemoveEmptyTry();
17025 EndPhase(PHASE_EMPTY_TRY);
17027 fgRemoveEmptyFinally();
17029 EndPhase(PHASE_EMPTY_FINALLY);
17031 fgMergeFinallyChains();
17033 EndPhase(PHASE_MERGE_FINALLY_CHAINS);
17037 EndPhase(PHASE_CLONE_FINALLY);
17039 fgUpdateFinallyTargetFlags();
17041 /* For x64 and ARM64 we need to mark irregular parameters early so that they don't get promoted */
17042 fgMarkImplicitByRefArgs();
17044 EndPhase(PHASE_MORPH_IMPBYREF);
17046 /* Promote struct locals if necessary */
17047 fgPromoteStructs();
17049 /* Now it is the time to figure out what locals have address-taken. */
17050 fgMarkAddressExposedLocals();
17053 /* Now that locals have address-taken marked, we can safely apply stress. */
17055 fgStress64RsltMul();
17058 EndPhase(PHASE_STR_ADRLCL);
17060 /* Morph the trees in all the blocks of the method */
17064 EndPhase(PHASE_MORPH_GLOBAL);
17067 JITDUMP("trees after fgMorphBlocks\n");
17068 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
17071 /* Decide the kind of code we want to generate */
17075 fgExpandQmarkNodes();
17078 compCurBB = nullptr;
17082 /*****************************************************************************
17084 * Promoting struct locals
17086 void Compiler::fgPromoteStructs()
17091 printf("*************** In fgPromoteStructs()\n");
17095 if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
17100 if (fgNoStructPromotion)
17106 // The code in this #if has been useful in debugging struct promotion issues, by
17107 // enabling selective enablement of the struct promotion optimization according to
17110 unsigned methHash = info.compMethodHash();
17111 char* lostr = getenv("structpromohashlo");
17112 unsigned methHashLo = 0;
17115 sscanf_s(lostr, "%x", &methHashLo);
17117 char* histr = getenv("structpromohashhi");
17118 unsigned methHashHi = UINT32_MAX;
17121 sscanf_s(histr, "%x", &methHashHi);
17123 if (methHash < methHashLo || methHash > methHashHi)
17129 printf("Promoting structs for method %s, hash = 0x%x.\n",
17130 info.compFullName, info.compMethodHash());
17131 printf(""); // in our logic this causes a flush
17136 if (info.compIsVarArgs)
17141 if (getNeedsGSSecurityCookie())
17149 printf("\nlvaTable before fgPromoteStructs\n");
17154 // The lvaTable might grow as we grab temps. Make a local copy here.
17155 unsigned startLvaCount = lvaCount;
17158 // Loop through the original lvaTable. Looking for struct locals to be promoted.
17160 lvaStructPromotionInfo structPromotionInfo;
17161 bool tooManyLocals = false;
17163 for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
17165 // Whether this var got promoted
17166 bool promotedVar = false;
17167 LclVarDsc* varDsc = &lvaTable[lclNum];
17169 // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
17170 // its fields. Instead, we will attempt to enregister the entire struct.
17171 if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic())
17173 varDsc->lvRegStruct = true;
17175 // Don't promote if we have reached the tracking limit.
17176 else if (lvaHaveManyLocals())
17178 // Print the message first time when we detected this condition
17179 if (!tooManyLocals)
17181 JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
17183 tooManyLocals = true;
17185 else if (varTypeIsStruct(varDsc))
17187 bool shouldPromote;
17189 lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
17190 if (structPromotionInfo.canPromote)
17192 shouldPromote = lvaShouldPromoteStructVar(lclNum, &structPromotionInfo);
17196 shouldPromote = false;
17200 // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
17201 // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
17202 static int structPromoVarNum = 0;
17203 structPromoVarNum++;
17204 if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
17209 // Promote the this struct local var.
17210 lvaPromoteStructVar(lclNum, &structPromotionInfo);
17211 promotedVar = true;
17213 #ifdef _TARGET_ARM_
17214 if (structPromotionInfo.requiresScratchVar)
17216 // Ensure that the scratch variable is allocated, in case we
17217 // pass a promoted struct as an argument.
17218 if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
17220 lvaPromotedStructAssemblyScratchVar =
17221 lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
17222 lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
17225 #endif // _TARGET_ARM_
17229 if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed)
17231 // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
17232 // we will treat it as a reg struct.
17233 varDsc->lvRegStruct = true;
17240 printf("\nlvaTable after fgPromoteStructs\n");
17246 Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
17248 noway_assert(tree->OperGet() == GT_FIELD);
17250 GenTreePtr objRef = tree->gtField.gtFldObj;
17251 GenTreePtr obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
17252 noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));
17254 /* Is this an instance data member? */
17256 if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
17258 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
17259 LclVarDsc* varDsc = &lvaTable[lclNum];
17261 if (varTypeIsStruct(obj))
17263 if (varDsc->lvPromoted)
17266 unsigned fldOffset = tree->gtField.gtFldOffset;
17267 unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17268 noway_assert(fieldLclIndex != BAD_VAR_NUM);
17270 tree->SetOper(GT_LCL_VAR);
17271 tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
17272 tree->gtType = lvaTable[fieldLclIndex].TypeGet();
17273 tree->gtFlags &= GTF_NODE_MASK;
17274 tree->gtFlags &= ~GTF_GLOB_REF;
17276 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17277 if (parent->gtOper == GT_ASG)
17279 if (parent->gtOp.gtOp1 == tree)
17281 tree->gtFlags |= GTF_VAR_DEF;
17282 tree->gtFlags |= GTF_DONT_CSE;
17285 // Promotion of struct containing struct fields where the field
17286 // is a struct with a single pointer sized scalar type field: in
17287 // this case struct promotion uses the type of the underlying
17288 // scalar field as the type of struct field instead of recursively
17289 // promoting. This can lead to a case where we have a block-asgn
17290 // with its RHS replaced with a scalar type. Mark RHS value as
17291 // DONT_CSE so that assertion prop will not do const propagation.
17292 // The reason this is required is that if RHS of a block-asg is a
17293 // constant, then it is interpreted as init-block incorrectly.
17295 // TODO - This can also be avoided if we implement recursive struct
17297 if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree))
17299 tree->gtFlags |= GTF_DONT_CSE;
17305 printf("Replacing the field in promoted struct with a local var:\n");
17306 fgWalkPre->printModified = true;
17309 return WALK_SKIP_SUBTREES;
17315 // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
17316 // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
17317 // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
17318 // there is one extremely rare case where that won't be true. An enum type is a special value type
17319 // that contains exactly one element of a primitive integer type (that, for CLS programs is named
17320 // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
17321 // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
17322 // ldfld. For example:
17324 // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
17326 // .field public specialname rtspecialname int16 value__
17327 // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
17329 // .method public hidebysig static void Main() cil managed
17331 // .locals init (valuetype mynamespace.e_t V_0)
17334 // ldflda int16 mynamespace.e_t::value__
17338 // Normally, compilers will not generate the ldflda, since it is superfluous.
17340 // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
17341 // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
17342 // mismatch like this, don't do this morphing. The local var may end up getting marked as
17343 // address taken, and the appropriate SHORT load will be done from memory in that case.
17345 if (tree->TypeGet() == obj->TypeGet())
17347 tree->ChangeOper(GT_LCL_VAR);
17348 tree->gtLclVarCommon.SetLclNum(lclNum);
17349 tree->gtFlags &= GTF_NODE_MASK;
17351 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17352 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17354 tree->gtFlags |= GTF_VAR_DEF;
17355 tree->gtFlags |= GTF_DONT_CSE;
17360 printf("Replacing the field in normed struct with the local var:\n");
17361 fgWalkPre->printModified = true;
17364 return WALK_SKIP_SUBTREES;
17369 return WALK_CONTINUE;
17372 Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre)
17374 noway_assert(tree->OperGet() == GT_LCL_FLD);
17376 unsigned lclNum = tree->gtLclFld.gtLclNum;
17377 LclVarDsc* varDsc = &lvaTable[lclNum];
17379 if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
17382 unsigned fldOffset = tree->gtLclFld.gtLclOffs;
17383 unsigned fieldLclIndex = 0;
17384 LclVarDsc* fldVarDsc = nullptr;
17386 if (fldOffset != BAD_VAR_NUM)
17388 fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17389 noway_assert(fieldLclIndex != BAD_VAR_NUM);
17390 fldVarDsc = &lvaTable[fieldLclIndex];
17393 if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
17394 #ifdef _TARGET_X86_
17395 && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
17399 // There is an existing sub-field we can use
17400 tree->gtLclFld.SetLclNum(fieldLclIndex);
17402 // We need to keep the types 'compatible'. If we can switch back to a GT_LCL_VAR
17403 CLANG_FORMAT_COMMENT_ANCHOR;
17405 #ifdef _TARGET_ARM_
17406 assert(varTypeIsIntegralOrI(tree->TypeGet()) || varTypeIsFloating(tree->TypeGet()));
17408 assert(varTypeIsIntegralOrI(tree->TypeGet()));
17410 if (varTypeCanReg(fldVarDsc->TypeGet()))
17412 // If the type is integer-ish, then we can use it as-is
17413 tree->ChangeOper(GT_LCL_VAR);
17414 assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
17415 tree->gtType = fldVarDsc->TypeGet();
17419 printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n");
17420 fgWalkPre->printModified = true;
17425 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17426 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17428 tree->gtFlags |= GTF_VAR_DEF;
17429 tree->gtFlags |= GTF_DONT_CSE;
17434 // There is no existing field that has all the parts that we need
17435 // So we must ensure that the struct lives in memory.
17436 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
17439 // We can't convert this guy to a float because he really does have his
17441 varDsc->lvKeepType = 1;
17445 return WALK_SKIP_SUBTREES;
17448 return WALK_CONTINUE;
17451 /*****************************************************************************
17453 * Mark irregular parameters. For x64 this is 3, 5, 6, 7, >8 byte structs that are passed by reference.
17454 * For ARM64, this is structs larger than 16 bytes that are also not HFAs that are passed by reference.
17456 void Compiler::fgMarkImplicitByRefArgs()
17458 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
17462 printf("\n*************** In fgMarkImplicitByRefs()\n");
17466 for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++)
17468 LclVarDsc* varDsc = &lvaTable[lclNum];
17470 assert(!varDsc->lvPromoted); // Called in the wrong order?
17472 if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
17476 if (varDsc->lvSize() > REGSIZE_BYTES)
17478 size = varDsc->lvSize();
17482 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
17483 size = info.compCompHnd->getClassSize(typeHnd);
17486 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
17487 #if defined(_TARGET_AMD64_)
17488 if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
17489 #elif defined(_TARGET_ARM64_)
17490 if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc))
17493 // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
17494 // So I am now using it to indicate that this is one of the weird implicit
17496 // The address taken cleanup will look for references to locals marked like
17497 // this, and transform them appropriately.
17498 varDsc->lvIsTemp = 1;
17500 // Also marking them as BYREF will hide them from struct promotion.
17501 varDsc->lvType = TYP_BYREF;
17502 varDsc->lvRefCnt = 0;
17504 // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
17505 // make sure that the following flag is not set as these will force SSA to
17506 // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa)
17508 varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
17511 // This should not be converted to a double in stress mode,
17512 // because it is really a pointer
17513 varDsc->lvKeepType = 1;
17517 printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
17521 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
17525 #endif // _TARGET_AMD64_ || _TARGET_ARM64_
17528 /*****************************************************************************
17530 * Morph irregular parameters
17531 * for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
17533 bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr* pTree, fgWalkData* fgWalkPre)
17535 #if !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
17539 #else // _TARGET_AMD64_ || _TARGET_ARM64_
17541 GenTree* tree = *pTree;
17542 assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
17544 bool isAddr = (tree->gtOper == GT_ADDR);
17545 GenTreePtr lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
17546 unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
17547 LclVarDsc* lclVarDsc = &lvaTable[lclNum];
17549 if (!lvaIsImplicitByRefLocal(lclNum))
17551 // We only need to tranform the 'marked' implicit by ref parameters
17555 // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
17556 // re-invoke the traversal to mark address-taken locals.
17557 // So, we may encounter a tree that has already been transformed to TYP_BYREF.
17558 // If we do, leave it as-is.
17559 if (!varTypeIsStruct(lclVarTree))
17561 assert(lclVarTree->TypeGet() == TYP_BYREF);
17565 // We are overloading the lvRefCnt field here because real ref counts have not been set.
17566 lclVarDsc->lvRefCnt++;
17568 // This is no longer a def of the lclVar, even if it WAS a def of the struct.
17569 lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);
17573 // change &X into just plain X
17574 tree->CopyFrom(lclVarTree, this);
17575 tree->gtType = TYP_BYREF;
17580 printf("Replacing address of implicit by ref struct parameter with byref:\n");
17581 fgWalkPre->printModified = true;
17587 // Change X into OBJ(X)
17588 var_types structType = tree->gtType;
17589 tree->gtType = TYP_BYREF;
17590 tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
17591 if (structType == TYP_STRUCT)
17593 gtSetObjGcInfo(tree->AsObj());
17596 // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
17597 // we could remove TGTANYWHERE
17598 tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
17603 printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
17604 gtDispTree(tree, nullptr, nullptr, true);
17605 fgWalkPre->printModified = true;
17613 #endif // _TARGET_AMD64_ || _TARGET_ARM64_
17616 // An "AddrExposedContext" expresses the calling context in which an address expression occurs.
17617 enum AddrExposedContext
17619 AXC_None, // None of the below seen yet.
17620 AXC_Ind, // The address being computed is to be dereferenced.
17621 AXC_Addr, // We're computing a raw address (not dereferenced, at least not immediately).
17622 AXC_IndWide, // A block operation dereferenced an address referencing more bytes than the address
17623 // addresses -- if the address addresses a field of a struct local, we need to consider
17624 // the entire local address taken (not just the field).
17625 AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
17626 // on more bytes than the width of the storage location addressed. If this is a
17627 // field of a promoted struct local, declare the entire struct local address-taken.
17628 AXC_IndAdd, // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
17629 // If one arg is a constant int, evaluate the other in an IND context. Otherwise, none.
17632 typedef ArrayStack<AddrExposedContext> AXCStack;
17634 // We use pre-post to simulate passing an argument in a recursion, via a stack.
17635 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
17637 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
17638 (void)axcStack->Pop();
17639 return WALK_CONTINUE;
17642 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
17644 GenTreePtr tree = *pTree;
17645 Compiler* comp = fgWalkPre->compiler;
17646 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
17647 AddrExposedContext axc = axcStack->Top();
17649 // In some situations, we have to figure out what the effective context is in which to
17650 // evaluate the current tree, depending on which argument position it is in its parent.
17657 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17658 assert(parent->OperGet() == GT_ADD);
17659 // Is one of the args a constant representing a field offset,
17660 // and is this the other? If so, Ind context.
17661 if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree)
17665 else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree)
17680 // Now recurse properly for the tree.
17681 switch (tree->gtOper)
17684 if (axc != AXC_Addr)
17686 axcStack->Push(AXC_Ind);
17690 axcStack->Push(AXC_None);
17692 return WALK_CONTINUE;
17696 if (axc == AXC_Addr)
17698 axcStack->Push(AXC_None);
17700 else if (tree->TypeGet() == TYP_STRUCT)
17702 // The block operation will derefence its argument(s) -- usually. If the size of the initblk
17703 // or copyblk exceeds the size of a storage location whose address is used as one of the
17704 // arguments, then we have to consider that storage location (indeed, it's underlying containing
17705 // location) to be address taken. So get the width of the initblk or copyblk.
17707 GenTreePtr parent = fgWalkPre->parentStack->Index(1);
17708 GenTreeBlk* blk = tree->AsBlk();
17709 unsigned width = blk->gtBlkSize;
17710 noway_assert(width != 0);
17712 GenTree* addr = blk->Addr();
17713 if (addr->OperGet() == GT_ADDR)
17715 if (parent->gtOper == GT_ASG)
17717 if ((tree == parent->gtOp.gtOp1) &&
17718 ((width == 0) || !comp->fgFitsInOrNotLoc(addr->gtGetOp1(), width)))
17725 assert(parent->gtOper == GT_CALL);
17728 axcStack->Push(axc);
17732 // This is like a regular GT_IND.
17733 axcStack->Push(AXC_Ind);
17735 return WALK_CONTINUE;
17738 // Assume maximal width.
17739 axcStack->Push(AXC_IndWide);
17740 return WALK_CONTINUE;
17743 case GT_FIELD_LIST:
17744 axcStack->Push(AXC_None);
17745 return WALK_CONTINUE;
17748 // Taking the address of an array element never takes the address of a local.
17749 axcStack->Push(AXC_None);
17750 return WALK_CONTINUE;
17753 // If we have ADDR(lcl), and "lcl" is an implicit byref parameter, fgMorphImplicitByRefArgs will
17754 // convert to just "lcl". This is never an address-context use, since the local is already a
17755 // byref after this transformation.
17756 if (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR && comp->fgMorphImplicitByRefArgs(pTree, fgWalkPre))
17758 // Push something to keep the PostCB, which will pop it, happy.
17759 axcStack->Push(AXC_None);
17760 // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second
17762 return WALK_SKIP_SUBTREES;
17764 #ifdef FEATURE_SIMD
17765 if (tree->gtOp.gtOp1->OperGet() == GT_SIMD)
17767 axcStack->Push(AXC_None);
17770 #endif // FEATURE_SIMD
17771 if (axc == AXC_Ind)
17773 axcStack->Push(AXC_None);
17775 else if (axc == AXC_IndWide)
17777 axcStack->Push(AXC_AddrWide);
17781 assert(axc == AXC_None);
17782 axcStack->Push(AXC_Addr);
17784 return WALK_CONTINUE;
17787 // First, handle a couple of special cases: field of promoted struct local, field
17788 // of "normed" struct.
17789 if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES)
17791 // It (may have) replaced the field with a local var or local field. If we're in an addr context,
17792 // label it addr-taken.
17793 if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide))
17795 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
17796 comp->lvaSetVarAddrExposed(lclNum);
17797 if (axc == AXC_AddrWide)
17799 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
17800 if (varDsc->lvIsStructField)
17802 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
17806 // Push something to keep the PostCB, which will pop it, happy.
17807 axcStack->Push(AXC_None);
17808 return WALK_SKIP_SUBTREES;
17812 // GT_FIELD is an implicit deref.
17813 if (axc == AXC_Addr)
17815 axcStack->Push(AXC_None);
17817 else if (axc == AXC_AddrWide)
17819 axcStack->Push(AXC_IndWide);
17823 axcStack->Push(AXC_Ind);
17825 return WALK_CONTINUE;
17830 assert(axc != AXC_Addr);
17831 // This recognizes certain forms, and does all the work. In that case, returns WALK_SKIP_SUBTREES,
17832 // else WALK_CONTINUE. We do the same here.
17833 fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre);
17834 if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide))
17836 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
17837 comp->lvaSetVarAddrExposed(lclNum);
17838 if (axc == AXC_AddrWide)
17840 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
17841 if (varDsc->lvIsStructField)
17843 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
17847 // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter
17848 // what, but something to be popped by the post callback. If we're going
17849 // to analyze children, the LCL_FLD creates an Ind context, so use that.
17850 axcStack->Push(AXC_Ind);
17855 // On some architectures, some arguments are passed implicitly by reference.
17856 // Modify the trees to reflect that, if this local is one of those.
17857 if (comp->fgMorphImplicitByRefArgs(pTree, fgWalkPre))
17859 // We can't be in an address context; the ADDR(lcl), where lcl is an implicit byref param, was
17860 // handled earlier. (And we can't have added anything to this address, since it was implicit.)
17861 assert(axc != AXC_Addr);
17865 if (axc == AXC_Addr || axc == AXC_AddrWide)
17867 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
17868 comp->lvaSetVarAddrExposed(lclNum);
17869 if (axc == AXC_AddrWide)
17871 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
17872 if (varDsc->lvIsStructField)
17874 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
17878 // We may need to Quirk the storage size for this LCL_VAR
17879 // some PInvoke signatures incorrectly specify a ByRef to an INT32
17880 // when they actually write a SIZE_T or INT64
17881 if (axc == AXC_Addr)
17883 comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack);
17887 // Push something to keep the PostCB, which will pop it, happy.
17888 axcStack->Push(AXC_None);
17889 // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second case.
17890 return WALK_SKIP_SUBTREES;
17893 assert(axc != AXC_Addr);
17894 // See below about treating pointer operations as wider indirection.
17895 if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
17897 axcStack->Push(AXC_IndWide);
17899 else if (axc == AXC_Ind)
17901 // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
17902 // If it's an add of a constant and an address, and the constant represents a field,
17903 // then we'll evaluate the address argument in an Ind context; otherwise, the None context.
17904 axcStack->Push(AXC_IndAdd);
17908 axcStack->Push(axc);
17910 return WALK_CONTINUE;
17912 // !!! Treat Pointer Operations as Wider Indirection
17914 // If we are performing pointer operations, make sure we treat that as equivalent to a wider
17915 // indirection. This is because the pointers could be pointing to the address of struct fields
17916 // and could be used to perform operations on the whole struct or passed to another method.
17918 // When visiting a node in this pre-order walk, we do not know if we would in the future
17919 // encounter a GT_ADDR of a GT_FIELD below.
17921 // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
17922 // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
17923 // wider indirection context down the expr tree.
17925 // Example, in unsafe code,
17927 // IL_000e 12 00 ldloca.s 0x0
17928 // IL_0010 7c 02 00 00 04 ldflda 0x4000002
17929 // IL_0015 12 00 ldloca.s 0x0
17930 // IL_0017 7c 01 00 00 04 ldflda 0x4000001
17933 // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
17934 // consider GT_SUB to be equivalent of an AXC_IndWide.
17936 // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
17937 // them as AXC_IndWide.
17961 if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) ||
17962 (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
17964 axcStack->Push(AXC_IndWide);
17965 return WALK_CONTINUE;
17970 // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None". We must
17971 // handle the "Ind" propogation explicitly above.
17972 if (axc == AXC_Addr || axc == AXC_AddrWide)
17974 axcStack->Push(axc);
17978 axcStack->Push(AXC_None);
17980 return WALK_CONTINUE;
17984 bool Compiler::fgFitsInOrNotLoc(GenTreePtr tree, unsigned width)
17986 if (tree->TypeGet() != TYP_STRUCT)
17988 return width <= genTypeSize(tree->TypeGet());
17990 else if (tree->OperGet() == GT_LCL_VAR)
17992 assert(tree->TypeGet() == TYP_STRUCT);
17993 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
17994 return width <= lvaTable[lclNum].lvExactSize;
17996 else if (tree->OperGet() == GT_FIELD)
17998 CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass(tree->gtField.gtFldHnd);
17999 return width <= info.compCompHnd->getClassSize(fldClass);
18001 else if (tree->OperGet() == GT_INDEX)
18003 return width <= tree->gtIndex.gtIndElemSize;
18011 void Compiler::fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq)
18013 assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);
18015 switch (op1->OperGet())
18018 if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
18020 GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
18021 lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
18026 if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
18028 FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
18029 if (op1Fs != nullptr)
18031 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18032 op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
18035 else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
18037 FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
18038 if (op2Fs != nullptr)
18040 op2Fs = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
18041 op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
18048 FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
18049 if (op1Fs != nullptr)
18051 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18052 op1->gtIntCon.gtFieldSeq = op1Fs;
18058 // Record in the general zero-offset map.
18059 GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
18064 /*****************************************************************************
18066 * Mark address-taken locals.
18069 void Compiler::fgMarkAddressExposedLocals()
18074 printf("\n*************** In fgMarkAddressExposedLocals()\n");
18078 BasicBlock* block = fgFirstBB;
18079 noway_assert(block);
18083 /* Make the current basic block address available globally */
18089 for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
18091 // Call Compiler::fgMarkAddrTakenLocalsCB on each node
18092 AXCStack stk(this);
18093 stk.Push(AXC_None); // We start in neither an addr or ind context.
18094 fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
18097 block = block->bbNext;
18102 // fgNodesMayInterfere:
18103 // return true if moving nodes relative to each other can change the result of a computation
18106 // read: a node which reads
18109 bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read)
18111 LclVarDsc* srcVar = nullptr;
18113 bool readIsIndir = read->OperIsIndir() || read->OperIsImplicitIndir();
18114 bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir();
18116 if (read->OperIsLocal())
18118 srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum];
18123 if (srcVar && srcVar->lvAddrExposed)
18127 else if (readIsIndir)
18133 else if (write->OperIsLocal())
18135 LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum];
18138 return dstVar->lvAddrExposed;
18140 else if (read->OperIsLocal())
18142 if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum)
18159 /** This predicate decides whether we will fold a tree with the structure:
18160 * x = x <op> y where x could be any arbitrary expression into
18163 * This modification is only performed when the target architecture supports
18164 * complex addressing modes. In the case of ARM for example, this transformation
18165 * yields no benefit.
18167 * In case this functions decides we can proceed to fold into an assignment operator
18168 * we need to inspect whether the operator is commutative to tell fgMorph whether we need to
18169 * reverse the tree due to the fact we saw x = y <op> x and we want to fold that into
18170 * x <op>= y because the operator property.
18172 bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse)
18174 #if CPU_LOAD_STORE_ARCH
18175 /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */
18177 #elif !defined(LEGACY_BACKEND)
18179 #else // defined(LEGACY_BACKEND)
18181 GenTreePtr op1 = tree->gtOp.gtOp1;
18182 GenTreePtr op2 = tree->gtGetOp2();
18183 genTreeOps cmop = op2->OperGet();
18185 /* Is the destination identical to the first RHS sub-operand? */
18186 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
18189 Do not transform the following tree
18191 [0024CFA4] ----------- const int 1
18192 [0024CFDC] ----G------ | int
18193 [0024CF5C] ----------- lclVar ubyte V01 tmp0
18194 [0024D05C] -A--G------ = ubyte
18195 [0024D014] D------N--- lclVar ubyte V01 tmp0
18199 [0024CFA4] ----------- const int 1
18200 [0024D05C] -A--G------ |= ubyte
18201 [0024D014] U------N--- lclVar ubyte V01 tmp0
18203 , when V01 is a struct field local.
18206 if (op1->gtOper == GT_LCL_VAR && varTypeIsSmall(op1->TypeGet()) && op1->TypeGet() != op2->gtOp.gtOp2->TypeGet())
18208 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
18209 LclVarDsc* varDsc = lvaTable + lclNum;
18211 if (varDsc->lvIsStructField)
18220 else if (GenTree::OperIsCommutative(cmop))
18222 /* For commutative ops only, check for "a = x <op> a" */
18224 /* Should we be doing this at all? */
18225 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
18230 /* Can we swap the operands to cmop ... */
18231 if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) && (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT))
18233 // Both sides must have side effects to prevent swap */
18237 /* Is the destination identical to the second RHS sub-operand? */
18238 if (GenTree::Compare(op1, op2->gtOp.gtOp2))
18245 #endif // defined(LEGACY_BACKEND)
18248 #ifdef FEATURE_SIMD
18250 //-----------------------------------------------------------------------------------
18251 // fgMorphCombineSIMDFieldAssignments:
18252 // If the RHS of the input stmt is a read for simd vector X Field, then this function
18253 // will keep reading next few stmts based on the vector size(2, 3, 4).
18254 // If the next stmts LHS are located contiguous and RHS are also located
18255 // contiguous, then we replace those statements with a copyblk.
18258 // block - BasicBlock*. block which stmt belongs to
18259 // stmt - GenTreeStmt*. the stmt node we want to check
18262 // if this funciton successfully optimized the stmts, then return true. Otherwise
18265 bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt)
18268 noway_assert(stmt->gtOper == GT_STMT);
18269 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
18270 assert(tree->OperGet() == GT_ASG);
18272 GenTreePtr originalLHS = tree->gtOp.gtOp1;
18273 GenTreePtr prevLHS = tree->gtOp.gtOp1;
18274 GenTreePtr prevRHS = tree->gtOp.gtOp2;
18275 unsigned index = 0;
18276 var_types baseType = TYP_UNKNOWN;
18277 unsigned simdSize = 0;
18278 GenTreePtr simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
18280 if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
18282 // if the RHS is not from a SIMD vector field X, then there is no need to check further.
18286 var_types simdType = getSIMDTypeForSize(simdSize);
18287 int assignmentsCount = simdSize / genTypeSize(baseType) - 1;
18288 int remainingAssignments = assignmentsCount;
18289 GenTreePtr curStmt = stmt->gtNext;
18290 GenTreePtr lastStmt = stmt;
18292 while (curStmt != nullptr && remainingAssignments > 0)
18294 GenTreePtr exp = curStmt->gtStmt.gtStmtExpr;
18295 if (exp->OperGet() != GT_ASG)
18299 GenTreePtr curLHS = exp->gtGetOp1();
18300 GenTreePtr curRHS = exp->gtGetOp2();
18302 if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
18307 remainingAssignments--;
18311 lastStmt = curStmt;
18312 curStmt = curStmt->gtNext;
18315 if (remainingAssignments > 0)
18317 // if the left assignments number is bigger than zero, then this means
18318 // that the assignments are not assgining to the contiguously memory
18319 // locations from same vector.
18325 printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
18326 printf("From BB%02u, stmt", block->bbNum);
18328 printf(" to stmt");
18329 printTreeID(lastStmt);
18334 for (int i = 0; i < assignmentsCount; i++)
18336 fgRemoveStmt(block, stmt->gtNext);
18339 GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
18340 if (simdStructNode->OperIsLocal())
18342 setLclRelatedToSIMDIntrinsic(simdStructNode);
18344 GenTree* copyBlkAddr = copyBlkDst;
18345 if (copyBlkAddr->gtOper == GT_LEA)
18347 copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
18349 GenTreeLclVarCommon* localDst = nullptr;
18350 if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
18352 setLclRelatedToSIMDIntrinsic(localDst);
18355 GenTree* simdStructAddr;
18356 if (simdStructNode->TypeGet() == TYP_BYREF)
18358 assert(simdStructNode->OperIsLocal());
18359 assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
18360 simdStructNode = gtNewOperNode(GT_IND, simdType, simdStructNode);
18364 assert(varTypeIsSIMD(simdStructNode));
18370 printf("\nBB%02u stmt", block->bbNum);
18372 printf("(before)\n");
18377 // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
18378 GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
18379 blkNode->gtType = simdType;
18380 tree = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
18381 false, // not volatile
18382 true); // copyBlock
18384 stmt->gtStmt.gtStmtExpr = tree;
18386 // Since we generated a new address node which didn't exist before,
18387 // we should expose this address manually here.
18388 AXCStack stk(this);
18389 stk.Push(AXC_None);
18390 fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
18395 printf("\nReplaced BB%02u stmt", block->bbNum);
18397 printf("(after)\n");
18404 #endif // FEATURE_SIMD