1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 #include "allocacheck.h" // for alloca
21 // Convert the given node into a call to the specified helper passing
22 // the given argument list.
24 // Tries to fold constants and also adds an edge for overflow exception
25 // returns the morphed tree
26 GenTree* Compiler::fgMorphCastIntoHelper(GenTree* tree, int helper, GenTree* oper)
30 /* If the operand is a constant, we'll try to fold it */
31 if (oper->OperIsConst())
33 GenTree* oldTree = tree;
35 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
39 return fgMorphTree(tree);
41 else if (tree->OperKind() & GTK_CONST)
43 return fgMorphConst(tree);
46 // assert that oper is unchanged and that it is still a GT_CAST node
47 noway_assert(tree->gtCast.CastOp() == oper);
48 noway_assert(tree->gtOper == GT_CAST);
50 result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
51 assert(result == tree);
55 /*****************************************************************************
57 * Convert the given node into a call to the specified helper passing
58 * the given argument list.
61 GenTree* Compiler::fgMorphIntoHelperCall(GenTree* tree, int helper, GenTreeArgList* args, bool morphArgs)
63 // The helper call ought to be semantically equivalent to the original node, so preserve its VN.
64 tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN);
66 tree->gtCall.gtCallType = CT_HELPER;
67 tree->gtCall.gtCallMethHnd = eeFindHelper(helper);
68 tree->gtCall.gtCallArgs = args;
69 tree->gtCall.gtCallObjp = nullptr;
70 tree->gtCall.gtCallLateArgs = nullptr;
71 tree->gtCall.fgArgInfo = nullptr;
72 tree->gtCall.gtRetClsHnd = nullptr;
73 tree->gtCall.gtCallMoreFlags = 0;
74 tree->gtCall.gtInlineCandidateInfo = nullptr;
75 tree->gtCall.gtControlExpr = nullptr;
78 // Helper calls are never candidates.
80 tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
83 #ifdef FEATURE_READYTORUN_COMPILER
84 tree->gtCall.gtEntryPoint.addr = nullptr;
85 tree->gtCall.gtEntryPoint.accessType = IAT_VALUE;
88 #ifndef _TARGET_64BIT_
89 if (varTypeIsLong(tree))
91 GenTreeCall* callNode = tree->AsCall();
92 ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
94 retTypeDesc->InitializeLongReturnType(this);
95 callNode->ClearOtherRegs();
97 #endif // !_TARGET_64BIT_
99 if (tree->OperMayThrow(this))
101 tree->gtFlags |= GTF_EXCEPT;
105 tree->gtFlags &= ~GTF_EXCEPT;
107 tree->gtFlags |= GTF_CALL;
110 tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
113 /* Perform the morphing */
117 tree = fgMorphArgs(tree->AsCall());
123 /*****************************************************************************
125 * Morph a cast node (we perform some very simple transformations here).
129 #pragma warning(push)
130 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
132 GenTree* Compiler::fgMorphCast(GenTree* tree)
134 noway_assert(tree->gtOper == GT_CAST);
135 noway_assert(genTypeSize(TYP_I_IMPL) == TARGET_POINTER_SIZE);
137 /* The first sub-operand is the thing being cast */
139 GenTree* oper = tree->gtCast.CastOp();
141 if (fgGlobalMorph && (oper->gtOper == GT_ADDR))
143 // Make sure we've checked if 'oper' is an address of an implicit-byref parameter.
144 // If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast
145 // morphing code to see that type.
146 fgMorphImplicitByRefArgs(oper);
149 var_types srcType = genActualType(oper->TypeGet());
151 var_types dstType = tree->CastToType();
152 unsigned dstSize = genTypeSize(dstType);
154 // See if the cast has to be done in two steps. R -> I
155 if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
157 if (srcType == TYP_FLOAT
158 #if defined(_TARGET_ARM64_)
159 // Arm64: src = float, dst is overflow conversion.
160 // This goes through helper and hence src needs to be converted to double.
161 && tree->gtOverflow()
162 #elif defined(_TARGET_AMD64_)
163 // Amd64: src = float, dst = uint64 or overflow conversion.
164 // This goes through helper and hence src needs to be converted to double.
165 && (tree->gtOverflow() || (dstType == TYP_ULONG))
166 #elif defined(_TARGET_ARM_)
167 // Arm: src = float, dst = int64/uint64 or overflow conversion.
168 && (tree->gtOverflow() || varTypeIsLong(dstType))
170 // x86: src = float, dst = uint32/int64/uint64 or overflow conversion.
171 && (tree->gtOverflow() || varTypeIsLong(dstType) || (dstType == TYP_UINT))
175 oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE);
178 // do we need to do it in two steps R -> I, '-> smallType
179 CLANG_FORMAT_COMMENT_ANCHOR;
181 #if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
182 if (dstSize < genTypeSize(TYP_INT))
184 oper = gtNewCastNodeL(TYP_INT, oper, tree->IsUnsigned(), TYP_INT);
185 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
186 tree->gtFlags &= ~GTF_UNSIGNED;
189 if (dstSize < TARGET_POINTER_SIZE)
191 oper = gtNewCastNodeL(TYP_I_IMPL, oper, false, TYP_I_IMPL);
192 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
197 /* Note that if we need to use a helper call then we can not morph oper */
198 if (!tree->gtOverflow())
200 #ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
209 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
211 #else // _TARGET_X86_
212 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
213 #endif // _TARGET_X86_
216 #ifdef _TARGET_AMD64_
217 // SSE2 has instructions to convert a float/double directly to a long
219 #else // !_TARGET_AMD64_
220 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
221 #endif // !_TARGET_AMD64_
224 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
228 #endif // _TARGET_ARM64_
235 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
237 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
239 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
241 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
246 noway_assert(!"Unexpected dstType");
249 #ifndef _TARGET_64BIT_
250 // The code generation phase (for x86 & ARM32) does not handle casts
251 // directly from [u]long to anything other than [u]int. Insert an
252 // intermediate cast to native int.
253 else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
255 oper = gtNewCastNode(TYP_I_IMPL, oper, tree->IsUnsigned(), TYP_I_IMPL);
256 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
257 tree->gtFlags &= ~GTF_UNSIGNED;
259 #endif //!_TARGET_64BIT_
262 else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
263 !varTypeIsLong(oper->gtCast.CastOp()))
265 // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
266 // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
267 // This happens semi-frequently because there is no IL 'conv.r4.un'
268 oper->gtType = TYP_FLOAT;
269 oper->CastToType() = TYP_FLOAT;
270 return fgMorphTree(oper);
272 // converts long/ulong --> float/double casts into helper calls.
273 else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
275 if (dstType == TYP_FLOAT)
277 // there is only a double helper, so we
278 // - change the dsttype to double
279 // - insert a cast from double to float
280 // - recurse into the resulting tree
281 tree->CastToType() = TYP_DOUBLE;
282 tree->gtType = TYP_DOUBLE;
284 tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT);
286 return fgMorphTree(tree);
288 if (tree->gtFlags & GTF_UNSIGNED)
289 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
290 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
292 #endif //_TARGET_ARM_
294 #ifdef _TARGET_AMD64_
295 // Do we have to do two step U4/8 -> R4/8 ?
296 // Codegen supports the following conversion as one-step operation
300 // The following conversions are performed as two-step operations using above.
301 // U4 -> R4/8 = U4-> Long -> R4/8
302 // U8 -> R4 = U8 -> R8 -> R4
303 else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
305 srcType = genUnsignedType(srcType);
307 if (srcType == TYP_ULONG)
309 if (dstType == TYP_FLOAT)
311 // Codegen can handle U8 -> R8 conversion.
312 // U8 -> R4 = U8 -> R8 -> R4
313 // - change the dsttype to double
314 // - insert a cast from double to float
315 // - recurse into the resulting tree
316 tree->CastToType() = TYP_DOUBLE;
317 tree->gtType = TYP_DOUBLE;
318 tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT);
319 return fgMorphTree(tree);
322 else if (srcType == TYP_UINT)
324 oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
325 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
326 tree->gtFlags &= ~GTF_UNSIGNED;
329 #endif // _TARGET_AMD64_
332 // Do we have to do two step U4/8 -> R4/8 ?
333 else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
335 srcType = genUnsignedType(srcType);
337 if (srcType == TYP_ULONG)
339 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
341 else if (srcType == TYP_UINT)
343 oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
344 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
345 tree->gtFlags &= ~GTF_UNSIGNED;
346 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
349 else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
351 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
353 #endif //_TARGET_X86_
354 else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
356 // We are casting away GC information. we would like to just
357 // change the type to int, however this gives the emitter fits because
358 // it believes the variable is a GC variable at the begining of the
359 // instruction group, but is not turned non-gc by the code generator
360 // we fix this by copying the GC pointer to a non-gc pointer temp.
361 noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
363 // We generate an assignment to an int and then do the cast from an int. With this we avoid
364 // the gc problem and we allow casts to bytes, longs, etc...
365 unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
366 oper->gtType = TYP_I_IMPL;
367 GenTree* asg = gtNewTempAssign(lclNum, oper);
368 oper->gtType = srcType;
371 GenTree* cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), false, dstType);
373 // Generate the comma tree
374 oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
376 return fgMorphTree(oper);
379 // Look for narrowing casts ([u]long -> [u]int) and try to push them
380 // down into the operand before morphing it.
382 // It doesn't matter if this is cast is from ulong or long (i.e. if
383 // GTF_UNSIGNED is set) because the transformation is only applied to
384 // overflow-insensitive narrowing casts, which always silently truncate.
386 // Note that casts from [u]long to small integer types are handled above.
387 if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
389 // As a special case, look for overflow-sensitive casts of an AND
390 // expression, and see if the second operand is a small constant. Since
391 // the result of an AND is bound by its smaller operand, it may be
392 // possible to prove that the cast won't overflow, which will in turn
393 // allow the cast's operand to be transformed.
394 if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
396 GenTree* andOp2 = oper->gtOp.gtOp2;
398 // Special case to the special case: AND with a casted int.
399 if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
401 // gtFoldExprConst will deal with whether the cast is signed or
402 // unsigned, or overflow-sensitive.
403 andOp2 = gtFoldExprConst(andOp2);
404 oper->gtOp.gtOp2 = andOp2;
407 // Look for a constant less than 2^{32} for a cast to uint, or less
408 // than 2^{31} for a cast to int.
409 int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
411 if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
413 // This cast can't overflow.
414 tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
418 // Only apply this transformation during global morph,
419 // when neither the cast node nor the oper node may throw an exception
420 // based on the upper 32 bits.
422 if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
424 // For these operations the lower 32 bits of the result only depends
425 // upon the lower 32 bits of the operands.
427 bool canPushCast = oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG);
429 // For long LSH cast to int, there is a discontinuity in behavior
430 // when the shift amount is 32 or larger.
432 // CAST(INT, LSH(1LL, 31)) == LSH(1, 31)
433 // LSH(CAST(INT, 1LL), CAST(INT, 31)) == LSH(1, 31)
435 // CAST(INT, LSH(1LL, 32)) == 0
436 // LSH(CAST(INT, 1LL), CAST(INT, 32)) == LSH(1, 32) == LSH(1, 0) == 1
438 // So some extra validation is needed.
440 if (oper->OperIs(GT_LSH))
442 GenTree* shiftAmount = oper->gtOp.gtOp2;
444 // Expose constant value for shift, if possible, to maximize the number
445 // of cases we can handle.
446 shiftAmount = gtFoldExpr(shiftAmount);
447 oper->gtOp.gtOp2 = shiftAmount;
450 // We may remorph the shift amount tree again later, so clear any morphed flag.
451 shiftAmount->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
454 if (shiftAmount->IsIntegralConst())
456 const ssize_t shiftAmountValue = shiftAmount->AsIntCon()->IconValue();
458 if ((shiftAmountValue >= 64) || (shiftAmountValue < 0))
460 // Shift amount is large enough or negative so result is undefined.
461 // Don't try to optimize.
462 assert(!canPushCast);
464 else if ((shiftAmountValue >= 32) && ((tree->gtFlags & GTF_ALL_EFFECT) == 0))
466 // Result of the shift is zero.
467 DEBUG_DESTROY_NODE(tree);
468 GenTree* zero = gtNewZeroConNode(TYP_INT);
469 return fgMorphTree(zero);
473 // Shift amount is positive and small enough that we can push the cast through.
479 // Shift amount is unknown. We can't optimize this case.
480 assert(!canPushCast);
486 DEBUG_DESTROY_NODE(tree);
488 // Insert narrowing casts for op1 and op2.
489 oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, false, dstType);
490 if (oper->gtOp.gtOp2 != nullptr)
492 oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, false, dstType);
495 // Clear the GT_MUL_64RSLT if it is set.
496 if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
498 oper->gtFlags &= ~GTF_MUL_64RSLT;
501 // The operation now produces a 32-bit result.
502 oper->gtType = TYP_INT;
504 // Remorph the new tree as the casts that we added may be folded away.
505 return fgMorphTree(oper);
511 noway_assert(tree->gtOper == GT_CAST);
513 /* Morph the operand */
514 tree->gtCast.CastOp() = oper = fgMorphTree(oper);
516 /* Reset the call flag */
517 tree->gtFlags &= ~GTF_CALL;
519 /* Reset the assignment flag */
520 tree->gtFlags &= ~GTF_ASG;
522 /* unless we have an overflow cast, reset the except flag */
523 if (!tree->gtOverflow())
525 tree->gtFlags &= ~GTF_EXCEPT;
528 /* Just in case new side effects were introduced */
529 tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
531 if (!gtIsActiveCSE_Candidate(tree) && !gtIsActiveCSE_Candidate(oper))
533 srcType = oper->TypeGet();
535 /* See if we can discard the cast */
536 if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
538 if (tree->IsUnsigned() && !varTypeIsUnsigned(srcType))
540 if (varTypeIsSmall(srcType))
542 // Small signed values are automatically sign extended to TYP_INT. If the cast is interpreting the
543 // resulting TYP_INT value as unsigned then the "sign" bits end up being "value" bits and srcType
544 // must be TYP_UINT, not the original small signed type. Otherwise "conv.ovf.i2.un(i1(-1))" is
545 // wrongly treated as a widening conversion from i1 to i2 when in fact it is a narrowing conversion
547 srcType = genActualType(srcType);
550 srcType = genUnsignedType(srcType);
553 if (srcType == dstType)
554 { // Certainly if they are identical it is pointless
558 if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
560 unsigned varNum = oper->gtLclVarCommon.gtLclNum;
561 LclVarDsc* varDsc = &lvaTable[varNum];
562 if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
568 bool unsignedSrc = varTypeIsUnsigned(srcType);
569 bool unsignedDst = varTypeIsUnsigned(dstType);
570 bool signsDiffer = (unsignedSrc != unsignedDst);
571 unsigned srcSize = genTypeSize(srcType);
573 // For same sized casts with
574 // the same signs or non-overflow cast we discard them as well
575 if (srcSize == dstSize)
577 /* This should have been handled above */
578 noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
585 if (!tree->gtOverflow())
587 /* For small type casts, when necessary we force
588 the src operand to the dstType and allow the
589 implied load from memory to perform the casting */
590 if (varTypeIsSmall(srcType))
592 switch (oper->gtOper)
598 oper->gtType = dstType;
599 // We're changing the type here so we need to update the VN;
600 // in other cases we discard the cast without modifying oper
601 // so the VN doesn't change.
602 oper->SetVNsFromNode(tree);
614 else if (srcSize < dstSize) // widening cast
616 // Keep any long casts
617 if (dstSize == sizeof(int))
619 // Only keep signed to unsigned widening cast with overflow check
620 if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
626 // Widening casts from unsigned or to signed can never overflow
628 if (unsignedSrc || !unsignedDst)
630 tree->gtFlags &= ~GTF_OVERFLOW;
631 if (!(oper->gtFlags & GTF_EXCEPT))
633 tree->gtFlags &= ~GTF_EXCEPT;
637 else // if (srcSize > dstSize)
639 // Try to narrow the operand of the cast and discard the cast
640 // Note: Do not narrow a cast that is marked as a CSE
641 // And do not narrow if the oper is marked as a CSE either
643 if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
644 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
646 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
648 /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
649 if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
651 oper = oper->gtCast.CastOp();
658 switch (oper->gtOper)
660 /* If the operand is a constant, we'll fold it */
666 GenTree* oldTree = tree;
668 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
670 // Did we get a comma throw as a result of gtFoldExprConst?
671 if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
673 noway_assert(fgIsCommaThrow(tree));
674 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
675 fgMorphTreeDone(tree);
678 else if (tree->gtOper != GT_CAST)
683 noway_assert(tree->gtCast.CastOp() == oper); // unchanged
688 /* Check for two consecutive casts into the same dstType */
689 if (!tree->gtOverflow())
691 var_types dstType2 = oper->CastToType();
692 if (dstType == dstType2)
700 // Check for cast of a GT_COMMA with a throw overflow
701 // Bug 110829: Since this optimization will bash the types
702 // neither oper or commaOp2 can be CSE candidates
703 if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
705 GenTree* commaOp2 = oper->gtOp.gtOp2;
707 if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
709 // need type of oper to be same as tree
710 if (tree->gtType == TYP_LONG)
712 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
713 commaOp2->gtIntConCommon.SetLngValue(0);
714 /* Change the types of oper and commaOp2 to TYP_LONG */
715 oper->gtType = commaOp2->gtType = TYP_LONG;
717 else if (varTypeIsFloating(tree->gtType))
719 commaOp2->ChangeOperConst(GT_CNS_DBL);
720 commaOp2->gtDblCon.gtDconVal = 0.0;
721 // Change the types of oper and commaOp2
722 oper->gtType = commaOp2->gtType = tree->gtType;
726 commaOp2->ChangeOperConst(GT_CNS_INT);
727 commaOp2->gtIntCon.gtIconVal = 0;
728 /* Change the types of oper and commaOp2 to TYP_INT */
729 oper->gtType = commaOp2->gtType = TYP_INT;
733 if (vnStore != nullptr)
735 fgValueNumberTreeConst(commaOp2);
738 /* Return the GT_COMMA node as the new tree */
745 } /* end switch (oper->gtOper) */
748 if (tree->gtOverflow())
750 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW);
756 /* Here we've eliminated the cast, so just return it's operand */
757 assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
759 DEBUG_DESTROY_NODE(tree);
766 /*****************************************************************************
768 * Perform an unwrap operation on a Proxy object
771 GenTree* Compiler::fgUnwrapProxy(GenTree* objRef)
773 assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
775 CORINFO_EE_INFO* pInfo = eeGetEEInfo();
778 // Perform the unwrap:
780 // This requires two extra indirections.
781 // We mark these indirections as 'invariant' and
782 // the CSE logic will hoist them when appropriate.
784 // Note that each dereference is a GC pointer
786 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
788 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
789 objRef->gtFlags |= GTF_IND_INVARIANT;
791 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
793 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
794 objRef->gtFlags |= GTF_IND_INVARIANT;
796 // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
800 /*****************************************************************************
802 * Morph an argument list; compute the pointer argument count in the process.
804 * NOTE: This function can be called from any place in the JIT to perform re-morphing
805 * due to graph altering modifications such as copy / constant propagation
808 unsigned UpdateGT_LISTFlags(GenTree* tree)
810 assert(tree->gtOper == GT_LIST);
813 if (tree->gtOp.gtOp2)
815 flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
818 flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
820 tree->gtFlags &= ~GTF_ALL_EFFECT;
821 tree->gtFlags |= flags;
823 return tree->gtFlags;
827 void fgArgTabEntry::Dump()
829 printf("fgArgTabEntry[arg %u", argNum);
830 printf(" %d.%s", node->gtTreeID, GenTree::OpName(node->gtOper));
831 printf(" %s", varTypeName(argType));
832 if (regNum != REG_STK)
834 printf(", %u reg%s:", numRegs, numRegs == 1 ? "" : "s");
835 for (unsigned i = 0; i < numRegs; i++)
837 printf(" %s", getRegName(regNums[i]));
842 printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
844 printf(", align=%u", alignment);
847 printf(", lateArgInx=%u", lateArgInx);
855 printf(", tmpNum=V%02u", tmpNum);
859 printf(", needPlace");
867 printf(", processed");
871 printf(", isHfa(%s)", varTypeName(GetHfaType()));
875 printf(", isBackFilled");
879 printf(", isNonStandard");
883 printf(", isStruct");
889 fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
893 argCount = 0; // filled in arg count, starts at zero
894 nextSlotNum = INIT_ARG_STACK_SLOT;
896 #if defined(UNIX_X86_ABI)
897 alignmentDone = false;
901 #if FEATURE_FIXED_OUT_ARGS
905 argTableSize = numArgs; // the allocated table size
908 hasStackArgs = false;
909 argsComplete = false;
913 if (argTableSize == 0)
919 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize];
923 /*****************************************************************************
925 * fgArgInfo Copy Constructor
927 * This method needs to act like a copy constructor for fgArgInfo.
928 * The newCall needs to have its fgArgInfo initialized such that
929 * we have newCall that is an exact copy of the oldCall.
930 * We have to take care since the argument information
931 * in the argTable contains pointers that must point to the
932 * new arguments and not the old arguments.
934 fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
936 fgArgInfo* oldArgInfo = oldCall->gtCall.fgArgInfo;
938 compiler = oldArgInfo->compiler;
940 argCount = 0; // filled in arg count, starts at zero
941 nextSlotNum = INIT_ARG_STACK_SLOT;
942 stkLevel = oldArgInfo->stkLevel;
943 #if defined(UNIX_X86_ABI)
944 alignmentDone = oldArgInfo->alignmentDone;
945 stkSizeBytes = oldArgInfo->stkSizeBytes;
946 padStkAlign = oldArgInfo->padStkAlign;
948 #if FEATURE_FIXED_OUT_ARGS
949 outArgSize = oldArgInfo->outArgSize;
951 argTableSize = oldArgInfo->argTableSize;
952 argsComplete = false;
954 if (argTableSize > 0)
956 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize];
957 for (unsigned inx = 0; inx < argTableSize; inx++)
959 argTable[inx] = nullptr;
963 assert(oldArgInfo->argsComplete);
965 // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
966 // so we can iterate over these argument lists more uniformly.
967 // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
968 GenTreeArgList* newArgs;
969 GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs);
970 GenTreeArgList* oldArgs;
971 GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs);
973 if (newCall->gtCallObjp == nullptr)
975 assert(oldCall->gtCallObjp == nullptr);
976 newArgs = newCall->gtCallArgs;
977 oldArgs = oldCall->gtCallArgs;
981 assert(oldCall->gtCallObjp != nullptr);
982 newArgObjp.Current() = newCall->gtCallArgs;
983 newArgs = &newArgObjp;
984 oldArgObjp.Current() = oldCall->gtCallObjp;
985 oldArgs = &oldArgObjp;
990 GenTreeArgList* newParent = nullptr;
991 GenTreeArgList* oldParent = nullptr;
992 fgArgTabEntry** oldArgTable = oldArgInfo->argTable;
993 bool scanRegArgs = false;
997 /* Get hold of the next argument values for the oldCall and newCall */
999 newCurr = newArgs->Current();
1000 oldCurr = oldArgs->Current();
1001 if (newArgs != &newArgObjp)
1003 newParent = newArgs;
1004 oldParent = oldArgs;
1008 assert(newParent == nullptr && oldParent == nullptr);
1010 newArgs = newArgs->Rest();
1011 oldArgs = oldArgs->Rest();
1013 fgArgTabEntry* oldArgTabEntry = nullptr;
1014 fgArgTabEntry* newArgTabEntry = nullptr;
1016 for (unsigned inx = 0; inx < argTableSize; inx++)
1018 oldArgTabEntry = oldArgTable[inx];
1020 if (oldArgTabEntry->parent == oldParent)
1022 assert((oldParent == nullptr) == (newParent == nullptr));
1024 // We have found the matching "parent" field in oldArgTabEntry
1026 newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1028 // First block copy all fields
1030 *newArgTabEntry = *oldArgTabEntry;
1032 // Then update all GenTree* fields in the newArgTabEntry
1034 newArgTabEntry->parent = newParent;
1036 // The node field is likely to have been updated
1037 // to point at a node in the gtCallLateArgs list
1039 if (oldArgTabEntry->node == oldCurr)
1041 // node is not pointing into the gtCallLateArgs list
1042 newArgTabEntry->node = newCurr;
1046 // node must be pointing into the gtCallLateArgs list
1048 // We will fix this pointer up in the next loop
1050 newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
1055 // Now initialize the proper element in the argTable array
1057 argTable[inx] = newArgTabEntry;
1061 // We should have found the matching oldArgTabEntry and created the newArgTabEntry
1063 assert(newArgTabEntry != nullptr);
1068 newArgs = newCall->gtCallLateArgs;
1069 oldArgs = oldCall->gtCallLateArgs;
1073 /* Get hold of the next argument values for the oldCall and newCall */
1075 assert(newArgs->OperIsList());
1077 newCurr = newArgs->Current();
1078 newArgs = newArgs->Rest();
1080 assert(oldArgs->OperIsList());
1082 oldCurr = oldArgs->Current();
1083 oldArgs = oldArgs->Rest();
1085 fgArgTabEntry* oldArgTabEntry = nullptr;
1086 fgArgTabEntry* newArgTabEntry = nullptr;
1088 for (unsigned inx = 0; inx < argTableSize; inx++)
1090 oldArgTabEntry = oldArgTable[inx];
1092 if (oldArgTabEntry->node == oldCurr)
1094 // We have found the matching "node" field in oldArgTabEntry
1096 newArgTabEntry = argTable[inx];
1097 assert(newArgTabEntry != nullptr);
1099 // update the "node" GenTree* fields in the newArgTabEntry
1101 assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
1103 newArgTabEntry->node = newCurr;
1110 argCount = oldArgInfo->argCount;
1111 nextSlotNum = oldArgInfo->nextSlotNum;
1112 hasRegArgs = oldArgInfo->hasRegArgs;
1113 hasStackArgs = oldArgInfo->hasStackArgs;
1114 argsComplete = true;
1118 void fgArgInfo::AddArg(fgArgTabEntry* curArgTabEntry)
1120 assert(argCount < argTableSize);
1121 argTable[argCount] = curArgTabEntry;
1125 fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum,
1132 bool isVararg /*=false*/)
1134 fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1136 // Any additional register numbers are set by the caller.
1137 // This is primarily because on ARM we don't yet know if it
1138 // will be split or if it is a double HFA, so the number of registers
1139 // may actually be less.
1140 curArgTabEntry->setRegNum(0, regNum);
1142 curArgTabEntry->argNum = argNum;
1143 curArgTabEntry->node = node;
1144 curArgTabEntry->argType = node->TypeGet();
1145 curArgTabEntry->parent = parent;
1146 curArgTabEntry->slotNum = 0;
1147 curArgTabEntry->numRegs = numRegs;
1148 curArgTabEntry->numSlots = 0;
1149 curArgTabEntry->alignment = alignment;
1150 curArgTabEntry->lateArgInx = UINT_MAX;
1151 curArgTabEntry->tmpNum = BAD_VAR_NUM;
1152 curArgTabEntry->isSplit = false;
1153 curArgTabEntry->isTmp = false;
1154 curArgTabEntry->needTmp = false;
1155 curArgTabEntry->needPlace = false;
1156 curArgTabEntry->processed = false;
1158 curArgTabEntry->_hfaElemKind = HFA_ELEM_NONE;
1160 curArgTabEntry->isBackFilled = false;
1161 curArgTabEntry->isNonStandard = false;
1162 curArgTabEntry->isStruct = isStruct;
1163 curArgTabEntry->isVararg = isVararg;
1166 AddArg(curArgTabEntry);
1167 return curArgTabEntry;
1170 #if defined(UNIX_AMD64_ABI)
1171 fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum,
1177 const bool isStruct,
1178 const bool isVararg,
1179 const regNumber otherRegNum,
1180 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
1182 fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment, isStruct, isVararg);
1183 assert(curArgTabEntry != nullptr);
1185 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1187 curArgTabEntry->checkIsStruct();
1188 assert(numRegs <= 2);
1191 curArgTabEntry->setRegNum(1, otherRegNum);
1194 if (isStruct && structDescPtr != nullptr)
1196 curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
1199 return curArgTabEntry;
1201 #endif // defined(UNIX_AMD64_ABI)
1203 fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum,
1209 bool isVararg /*=false*/)
1211 fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1213 nextSlotNum = roundUp(nextSlotNum, alignment);
1215 curArgTabEntry->setRegNum(0, REG_STK);
1216 curArgTabEntry->argNum = argNum;
1217 curArgTabEntry->node = node;
1218 curArgTabEntry->argType = node->TypeGet();
1219 curArgTabEntry->parent = parent;
1220 curArgTabEntry->slotNum = nextSlotNum;
1221 curArgTabEntry->numRegs = 0;
1222 curArgTabEntry->numSlots = numSlots;
1223 curArgTabEntry->alignment = alignment;
1224 curArgTabEntry->lateArgInx = UINT_MAX;
1225 curArgTabEntry->tmpNum = BAD_VAR_NUM;
1226 curArgTabEntry->isSplit = false;
1227 curArgTabEntry->isTmp = false;
1228 curArgTabEntry->needTmp = false;
1229 curArgTabEntry->needPlace = false;
1230 curArgTabEntry->processed = false;
1232 curArgTabEntry->_hfaElemKind = HFA_ELEM_NONE;
1234 curArgTabEntry->isBackFilled = false;
1235 curArgTabEntry->isNonStandard = false;
1236 curArgTabEntry->isStruct = isStruct;
1237 curArgTabEntry->isVararg = isVararg;
1239 hasStackArgs = true;
1240 AddArg(curArgTabEntry);
1242 nextSlotNum += numSlots;
1243 return curArgTabEntry;
1246 void fgArgInfo::RemorphReset()
1248 nextSlotNum = INIT_ARG_STACK_SLOT;
1251 //------------------------------------------------------------------------
1252 // UpdateRegArg: Update the given fgArgTabEntry while morphing.
1255 // curArgTabEntry - the fgArgTabEntry to update.
1256 // node - the tree node that defines the argument
1257 // reMorphing - a boolean value indicate whether we are remorphing the call
1260 // This must have already been determined to be at least partially passed in registers.
1262 void fgArgInfo::UpdateRegArg(fgArgTabEntry* curArgTabEntry, GenTree* node, bool reMorphing)
1264 bool isLateArg = curArgTabEntry->isLateArg();
1265 // If this is a late arg, we'd better be updating it with a correctly marked node, and vice-versa.
1266 assert((isLateArg && ((node->gtFlags & GTF_LATE_ARG) != 0)) ||
1267 (!isLateArg && ((node->gtFlags & GTF_LATE_ARG) == 0)));
1269 assert(curArgTabEntry->numRegs != 0);
1271 if (curArgTabEntry->parent != nullptr)
1273 assert(curArgTabEntry->parent->OperIsList());
1274 assert(curArgTabEntry->parent->Current() == node);
1277 if (curArgTabEntry->node != node)
1281 // Find the arg in the late args list.
1282 GenTree* argx = Compiler::gtArgNodeByLateArgInx(callTree, curArgTabEntry->lateArgInx);
1283 if (curArgTabEntry->node != argx)
1285 curArgTabEntry->node = argx;
1291 curArgTabEntry->node = node;
1296 //------------------------------------------------------------------------
1297 // UpdateStkArg: Update the given fgArgTabEntry while morphing.
1300 // curArgTabEntry - the fgArgTabEntry to update.
1301 // node - the tree node that defines the argument
1302 // reMorphing - a boolean value indicate whether we are remorphing the call
1305 // This must have already been determined to be passed on the stack.
1307 void fgArgInfo::UpdateStkArg(fgArgTabEntry* curArgTabEntry, GenTree* node, bool reMorphing)
1309 bool isLateArg = curArgTabEntry->isLateArg();
1310 // If this is a late arg, we'd better be updating it with a correctly marked node, and vice-versa.
1311 assert((isLateArg && ((node->gtFlags & GTF_LATE_ARG) != 0)) ||
1312 (!isLateArg && ((node->gtFlags & GTF_LATE_ARG) == 0)));
1314 noway_assert(curArgTabEntry->parent != nullptr);
1315 assert((curArgTabEntry->regNum == REG_STK) || curArgTabEntry->isSplit);
1316 assert(curArgTabEntry->parent->OperIsList());
1317 assert(curArgTabEntry->parent->Current() == node);
1318 nextSlotNum = (unsigned)roundUp(nextSlotNum, curArgTabEntry->alignment);
1319 assert(curArgTabEntry->slotNum == nextSlotNum);
1321 if (curArgTabEntry->node != node)
1323 #if FEATURE_FIXED_OUT_ARGS
1326 GenTree* argx = nullptr;
1327 unsigned lateArgInx = curArgTabEntry->lateArgInx;
1329 // Traverse the late argument list to find this argument so that we can update it.
1330 unsigned listInx = 0;
1331 for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), listInx++)
1333 argx = list->Current();
1334 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1335 if (listInx == lateArgInx)
1340 assert(listInx == lateArgInx);
1341 assert(lateArgInx == curArgTabEntry->lateArgInx);
1343 if (curArgTabEntry->node != argx)
1345 curArgTabEntry->node = argx;
1349 #endif // FEATURE_FIXED_OUT_ARGS
1351 curArgTabEntry->node = node;
1354 nextSlotNum += curArgTabEntry->numSlots;
1357 void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
1359 fgArgTabEntry* curArgTabEntry = nullptr;
1360 assert(argNum < argCount);
1361 for (unsigned inx = 0; inx < argCount; inx++)
1363 curArgTabEntry = argTable[inx];
1364 if (curArgTabEntry->argNum == argNum)
1370 assert(numRegs > 0);
1371 assert(numSlots > 0);
1375 assert(curArgTabEntry->isSplit == true);
1376 assert(curArgTabEntry->numRegs == numRegs);
1377 assert(curArgTabEntry->numSlots == numSlots);
1378 assert(hasStackArgs == true);
1382 curArgTabEntry->isSplit = true;
1383 curArgTabEntry->numRegs = numRegs;
1384 curArgTabEntry->numSlots = numSlots;
1385 hasStackArgs = true;
1387 nextSlotNum += numSlots;
1390 //------------------------------------------------------------------------
1391 // EvalToTmp: Replace the node in the given fgArgTabEntry with a temp
1394 // curArgTabEntry - the fgArgTabEntry for the argument
1395 // tmpNum - the varNum for the temp
1396 // newNode - the assignment of the argument value to the temp
1399 // Although the name of this method is EvalToTmp, it doesn't actually create
1400 // the temp or the copy.
1402 void fgArgInfo::EvalToTmp(fgArgTabEntry* curArgTabEntry, unsigned tmpNum, GenTree* newNode)
1404 assert(curArgTabEntry->parent->Current() == newNode);
1406 curArgTabEntry->node = newNode;
1407 curArgTabEntry->tmpNum = tmpNum;
1408 curArgTabEntry->isTmp = true;
1411 void fgArgInfo::ArgsComplete()
1413 bool hasStackArgs = false;
1414 bool hasStructRegArg = false;
1416 for (unsigned curInx = 0; curInx < argCount; curInx++)
1418 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1419 assert(curArgTabEntry != nullptr);
1420 GenTree* argx = curArgTabEntry->node;
1422 if (curArgTabEntry->regNum == REG_STK)
1424 hasStackArgs = true;
1425 #if !FEATURE_FIXED_OUT_ARGS
1426 // On x86 we use push instructions to pass arguments:
1427 // The non-register arguments are evaluated and pushed in order
1428 // and they are never evaluated into temps
1433 #if FEATURE_ARG_SPLIT
1434 else if (curArgTabEntry->isSplit)
1436 hasStructRegArg = true;
1437 hasStackArgs = true;
1439 #endif // FEATURE_ARG_SPLIT
1440 else // we have a register argument, next we look for a struct type.
1442 if (varTypeIsStruct(argx) UNIX_AMD64_ABI_ONLY(|| curArgTabEntry->isStruct))
1444 hasStructRegArg = true;
1448 /* If the argument tree contains an assignment (GTF_ASG) then the argument and
1449 and every earlier argument (except constants) must be evaluated into temps
1450 since there may be other arguments that follow and they may use the value being assigned.
1452 EXAMPLE: ArgTab is "a, a=5, a"
1453 -> when we see the second arg "a=5"
1454 we know the first two arguments "a, a=5" have to be evaluated into temps
1456 For the case of an assignment, we only know that there exist some assignment someplace
1457 in the tree. We don't know what is being assigned so we are very conservative here
1458 and assume that any local variable could have been assigned.
1461 if (argx->gtFlags & GTF_ASG)
1463 // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
1464 // a tmp, then we need a temp in the late arg list.
1465 if ((argCount > 1) || argx->OperIsCopyBlkOp()
1466 #ifdef FEATURE_FIXED_OUT_ARGS
1467 || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
1468 // that we only have late non-register args when that feature is on.
1469 #endif // FEATURE_FIXED_OUT_ARGS
1472 curArgTabEntry->needTmp = true;
1476 // For all previous arguments, unless they are a simple constant
1477 // we require that they be evaluated into temps
1478 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1480 fgArgTabEntry* prevArgTabEntry = argTable[prevInx];
1481 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1483 assert(prevArgTabEntry->node);
1484 if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
1486 prevArgTabEntry->needTmp = true;
1492 bool treatLikeCall = ((argx->gtFlags & GTF_CALL) != 0);
1493 #if FEATURE_FIXED_OUT_ARGS
1494 // Like calls, if this argument has a tree that will do an inline throw,
1495 // a call to a jit helper, then we need to treat it like a call (but only
1496 // if there are/were any stack args).
1497 // This means unnesting, sorting, etc. Technically this is overly
1498 // conservative, but I want to avoid as much special-case debug-only code
1499 // as possible, so leveraging the GTF_CALL flag is the easiest.
1501 if (!treatLikeCall && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) && compiler->opts.compDbgCode &&
1502 (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
1504 for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
1506 if (otherInx == curInx)
1511 if (argTable[otherInx]->regNum == REG_STK)
1513 treatLikeCall = true;
1518 #endif // FEATURE_FIXED_OUT_ARGS
1520 /* If it contains a call (GTF_CALL) then itself and everything before the call
1521 with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
1522 has to be kept in the right order since we will move the call to the first position)
1524 For calls we don't have to be quite as conservative as we are with an assignment
1525 since the call won't be modifying any non-address taken LclVars.
1530 if (argCount > 1) // If this is not the only argument
1532 curArgTabEntry->needTmp = true;
1535 else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
1537 // Spill all arguments that are floating point calls
1538 curArgTabEntry->needTmp = true;
1542 // All previous arguments may need to be evaluated into temps
1543 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1545 fgArgTabEntry* prevArgTabEntry = argTable[prevInx];
1546 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1547 assert(prevArgTabEntry->node);
1549 // For all previous arguments, if they have any GTF_ALL_EFFECT
1550 // we require that they be evaluated into a temp
1551 if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
1553 prevArgTabEntry->needTmp = true;
1556 #if FEATURE_FIXED_OUT_ARGS
1557 // Or, if they are stored into the FIXED_OUT_ARG area
1558 // we require that they be moved to the gtCallLateArgs
1559 // and replaced with a placeholder node
1560 else if (prevArgTabEntry->regNum == REG_STK)
1562 prevArgTabEntry->needPlace = true;
1564 #if FEATURE_ARG_SPLIT
1565 else if (prevArgTabEntry->isSplit)
1567 prevArgTabEntry->needPlace = true;
1569 #endif // _TARGET_ARM_
1574 #if FEATURE_MULTIREG_ARGS
1575 // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
1576 // with multiple indirections, so here we consider spilling it into a tmp LclVar.
1578 CLANG_FORMAT_COMMENT_ANCHOR;
1580 bool isMultiRegArg = (curArgTabEntry->numRegs > 0) && (curArgTabEntry->numRegs + curArgTabEntry->numSlots > 1);
1582 bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
1585 if ((varTypeIsStruct(argx->TypeGet())) && (curArgTabEntry->needTmp == false))
1587 if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
1589 // Spill multireg struct arguments that have Assignments or Calls embedded in them
1590 curArgTabEntry->needTmp = true;
1595 // We call gtPrepareCost to measure the cost of evaluating this tree
1596 compiler->gtPrepareCost(argx);
1598 if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
1600 // Spill multireg struct arguments that are expensive to evaluate twice
1601 curArgTabEntry->needTmp = true;
1604 #if defined(FEATURE_SIMD) && defined(_TARGET_ARM64_)
1605 else if (isMultiRegArg && varTypeIsSIMD(argx->TypeGet()))
1607 // SIMD types do not need the optimization below due to their sizes
1608 if (argx->OperIsSimdOrHWintrinsic() ||
1609 (argx->OperIs(GT_OBJ) && argx->AsObj()->gtOp1->OperIs(GT_ADDR) &&
1610 argx->AsObj()->gtOp1->gtOp.gtOp1->OperIsSimdOrHWintrinsic()))
1612 curArgTabEntry->needTmp = true;
1617 #ifndef _TARGET_ARM_
1618 // TODO-Arm: This optimization is not implemented for ARM32
1619 // so we skip this for ARM32 until it is ported to use RyuJIT backend
1621 else if (argx->OperGet() == GT_OBJ)
1623 GenTreeObj* argObj = argx->AsObj();
1624 CORINFO_CLASS_HANDLE objClass = argObj->gtClass;
1625 unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass);
1632 // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
1634 if (argObj->gtObj.gtOp1->IsLocalAddrExpr() == nullptr) // Is the source not a LclVar?
1636 // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
1637 // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
1639 curArgTabEntry->needTmp = true;
1647 // Spill any GT_OBJ multireg structs that are difficult to extract
1649 // When we have a GT_OBJ of a struct with the above sizes we would need
1650 // to use 3 or 4 load instructions to load the exact size of this struct.
1651 // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
1652 // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
1653 // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
1656 curArgTabEntry->needTmp = true;
1664 #endif // !_TARGET_ARM_
1667 #endif // FEATURE_MULTIREG_ARGS
1670 // We only care because we can't spill structs and qmarks involve a lot of spilling, but
1671 // if we don't have qmarks, then it doesn't matter.
1672 // So check for Qmark's globally once here, instead of inside the loop.
1674 const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
1676 #if FEATURE_FIXED_OUT_ARGS
1678 // For Arm/x64 we only care because we can't reorder a register
1679 // argument that uses GT_LCLHEAP. This is an optimization to
1680 // save a check inside the below loop.
1682 const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
1686 const bool hasStackArgsWeCareAbout = hasStackArgs;
1688 #endif // FEATURE_FIXED_OUT_ARGS
1690 // If we have any stack args we have to force the evaluation
1691 // of any arguments passed in registers that might throw an exception
1693 // Technically we only a required to handle the following two cases:
1694 // a GT_IND with GTF_IND_RNGCHK (only on x86) or
1695 // a GT_LCLHEAP node that allocates stuff on the stack
1697 if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
1699 for (unsigned curInx = 0; curInx < argCount; curInx++)
1701 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1702 assert(curArgTabEntry != nullptr);
1703 GenTree* argx = curArgTabEntry->node;
1705 // Examine the register args that are currently not marked needTmp
1707 if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
1709 if (hasStackArgsWeCareAbout)
1711 #if !FEATURE_FIXED_OUT_ARGS
1712 // On x86 we previously recorded a stack depth of zero when
1713 // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
1714 // Thus we can not reorder the argument after any stack based argument
1715 // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
1716 // check for it explicitly.)
1718 if (argx->gtFlags & GTF_EXCEPT)
1720 curArgTabEntry->needTmp = true;
1725 // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
1727 if (argx->gtFlags & GTF_EXCEPT)
1729 assert(compiler->compLocallocUsed);
1731 // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
1733 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
1735 curArgTabEntry->needTmp = true;
1742 if (hasStructRegArgWeCareAbout)
1744 // Returns true if a GT_QMARK node is encountered in the argx tree
1746 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
1748 curArgTabEntry->needTmp = true;
1757 argsComplete = true;
1760 void fgArgInfo::SortArgs()
1762 assert(argsComplete == true);
1765 if (compiler->verbose)
1767 printf("\nSorting the arguments:\n");
1771 /* Shuffle the arguments around before we build the gtCallLateArgs list.
1772 The idea is to move all "simple" arguments like constants and local vars
1773 to the end of the table, and move the complex arguments towards the beginning
1774 of the table. This will help prevent registers from being spilled by
1775 allowing us to evaluate the more complex arguments before the simpler arguments.
1776 The argTable ends up looking like:
1777 +------------------------------------+ <--- argTable[argCount - 1]
1779 +------------------------------------+
1780 | local var / local field |
1781 +------------------------------------+
1782 | remaining arguments sorted by cost |
1783 +------------------------------------+
1784 | temps (argTable[].needTmp = true) |
1785 +------------------------------------+
1786 | args with calls (GTF_CALL) |
1787 +------------------------------------+ <--- argTable[0]
1790 /* Set the beginning and end for the new argument table */
1793 unsigned begTab = 0;
1794 unsigned endTab = argCount - 1;
1795 unsigned argsRemaining = argCount;
1797 // First take care of arguments that are constants.
1798 // [We use a backward iterator pattern]
1805 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1807 if (curArgTabEntry->regNum != REG_STK)
1812 // Skip any already processed args
1814 if (!curArgTabEntry->processed)
1816 GenTree* argx = curArgTabEntry->node;
1818 // put constants at the end of the table
1820 if (argx->gtOper == GT_CNS_INT)
1822 noway_assert(curInx <= endTab);
1824 curArgTabEntry->processed = true;
1826 // place curArgTabEntry at the endTab position by performing a swap
1828 if (curInx != endTab)
1830 argTable[curInx] = argTable[endTab];
1831 argTable[endTab] = curArgTabEntry;
1838 } while (curInx > 0);
1840 if (argsRemaining > 0)
1842 // Next take care of arguments that are calls.
1843 // [We use a forward iterator pattern]
1845 for (curInx = begTab; curInx <= endTab; curInx++)
1847 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1849 // Skip any already processed args
1851 if (!curArgTabEntry->processed)
1853 GenTree* argx = curArgTabEntry->node;
1855 // put calls at the beginning of the table
1857 if (argx->gtFlags & GTF_CALL)
1859 curArgTabEntry->processed = true;
1861 // place curArgTabEntry at the begTab position by performing a swap
1863 if (curInx != begTab)
1865 argTable[curInx] = argTable[begTab];
1866 argTable[begTab] = curArgTabEntry;
1876 if (argsRemaining > 0)
1878 // Next take care arguments that are temps.
1879 // These temps come before the arguments that are
1880 // ordinary local vars or local fields
1881 // since this will give them a better chance to become
1882 // enregistered into their actual argument register.
1883 // [We use a forward iterator pattern]
1885 for (curInx = begTab; curInx <= endTab; curInx++)
1887 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1889 // Skip any already processed args
1891 if (!curArgTabEntry->processed)
1893 if (curArgTabEntry->needTmp)
1895 curArgTabEntry->processed = true;
1897 // place curArgTabEntry at the begTab position by performing a swap
1899 if (curInx != begTab)
1901 argTable[curInx] = argTable[begTab];
1902 argTable[begTab] = curArgTabEntry;
1912 if (argsRemaining > 0)
1914 // Next take care of local var and local field arguments.
1915 // These are moved towards the end of the argument evaluation.
1916 // [We use a backward iterator pattern]
1918 curInx = endTab + 1;
1923 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1925 // Skip any already processed args
1927 if (!curArgTabEntry->processed)
1929 GenTree* argx = curArgTabEntry->node;
1931 if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
1933 noway_assert(curInx <= endTab);
1935 curArgTabEntry->processed = true;
1937 // place curArgTabEntry at the endTab position by performing a swap
1939 if (curInx != endTab)
1941 argTable[curInx] = argTable[endTab];
1942 argTable[endTab] = curArgTabEntry;
1949 } while (curInx > begTab);
1952 // Finally, take care of all the remaining arguments.
1953 // Note that we fill in one arg at a time using a while loop.
1954 bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
1955 while (argsRemaining > 0)
1957 /* Find the most expensive arg remaining and evaluate it next */
1959 fgArgTabEntry* expensiveArgTabEntry = nullptr;
1960 unsigned expensiveArg = UINT_MAX;
1961 unsigned expensiveArgCost = 0;
1963 // [We use a forward iterator pattern]
1965 for (curInx = begTab; curInx <= endTab; curInx++)
1967 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1969 // Skip any already processed args
1971 if (!curArgTabEntry->processed)
1973 GenTree* argx = curArgTabEntry->node;
1975 // We should have already handled these kinds of args
1976 assert(argx->gtOper != GT_LCL_VAR);
1977 assert(argx->gtOper != GT_LCL_FLD);
1978 assert(argx->gtOper != GT_CNS_INT);
1980 // This arg should either have no persistent side effects or be the last one in our table
1981 // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
1983 if (argsRemaining == 1)
1985 // This is the last arg to place
1986 expensiveArg = curInx;
1987 expensiveArgTabEntry = curArgTabEntry;
1988 assert(begTab == endTab);
1995 /* We call gtPrepareCost to measure the cost of evaluating this tree */
1996 compiler->gtPrepareCost(argx);
1999 if (argx->gtCostEx > expensiveArgCost)
2001 // Remember this arg as the most expensive one that we have yet seen
2002 expensiveArgCost = argx->gtCostEx;
2003 expensiveArg = curInx;
2004 expensiveArgTabEntry = curArgTabEntry;
2010 noway_assert(expensiveArg != UINT_MAX);
2012 // put the most expensive arg towards the beginning of the table
2014 expensiveArgTabEntry->processed = true;
2016 // place expensiveArgTabEntry at the begTab position by performing a swap
2018 if (expensiveArg != begTab)
2020 argTable[expensiveArg] = argTable[begTab];
2021 argTable[begTab] = expensiveArgTabEntry;
2027 costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
2030 // The table should now be completely filled and thus begTab should now be adjacent to endTab
2031 // and regArgsRemaining should be zero
2032 assert(begTab == (endTab + 1));
2033 assert(argsRemaining == 0);
2035 #if !FEATURE_FIXED_OUT_ARGS
2036 // Finally build the regArgList
2038 callTree->gtCall.regArgList = NULL;
2039 callTree->gtCall.regArgListCount = regCount;
2041 unsigned regInx = 0;
2042 for (curInx = 0; curInx < argCount; curInx++)
2044 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2046 if (curArgTabEntry->regNum != REG_STK)
2048 // Encode the argument register in the register mask
2050 callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
2054 #endif // !FEATURE_FIXED_OUT_ARGS
2060 void fgArgInfo::Dump(Compiler* compiler)
2062 for (unsigned curInx = 0; curInx < ArgCount(); curInx++)
2064 fgArgTabEntry* curArgEntry = ArgTable()[curInx];
2065 curArgEntry->Dump();
2070 //------------------------------------------------------------------------------
2071 // fgMakeTmpArgNode : This function creates a tmp var only if needed.
2072 // We need this to be done in order to enforce ordering
2073 // of the evaluation of arguments.
2079 // the newly created temp var tree.
2081 GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
2083 unsigned tmpVarNum = curArgTabEntry->tmpNum;
2084 LclVarDsc* varDsc = &lvaTable[tmpVarNum];
2085 assert(varDsc->lvIsTemp);
2086 var_types type = varDsc->TypeGet();
2088 // Create a copy of the temp to go into the late argument list
2089 GenTree* arg = gtNewLclvNode(tmpVarNum, type);
2090 GenTree* addrNode = nullptr;
2092 if (varTypeIsStruct(type))
2095 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || defined(_TARGET_ARM_)
2097 // Can this type be passed as a primitive type?
2098 // If so, the following call will return the corresponding primitive type.
2099 // Otherwise, it will return TYP_UNKNOWN and we will pass it as a struct type.
2101 bool passedAsPrimitive = false;
2102 if (curArgTabEntry->isSingleRegOrSlot())
2104 CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
2105 var_types structBaseType =
2106 getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd, curArgTabEntry->isVararg);
2108 if (structBaseType != TYP_UNKNOWN)
2110 passedAsPrimitive = true;
2111 #if defined(UNIX_AMD64_ABI)
2112 // TODO-Cleanup: This is inelegant, but eventually we'll track this in the fgArgTabEntry,
2113 // and otherwise we'd have to either modify getPrimitiveTypeForStruct() to take
2114 // a structDesc or call eeGetSystemVAmd64PassStructInRegisterDescriptor yet again.
2116 if (genIsValidFloatReg(curArgTabEntry->regNum))
2118 if (structBaseType == TYP_INT)
2120 structBaseType = TYP_FLOAT;
2124 assert(structBaseType == TYP_LONG);
2125 structBaseType = TYP_DOUBLE;
2129 type = structBaseType;
2133 // If it is passed in registers, don't get the address of the var. Make it a
2134 // field instead. It will be loaded in registers with putarg_reg tree in lower.
2135 if (passedAsPrimitive)
2137 arg->ChangeOper(GT_LCL_FLD);
2142 var_types addrType = TYP_BYREF;
2143 arg = gtNewOperNode(GT_ADDR, addrType, arg);
2146 #if FEATURE_MULTIREG_ARGS
2147 #ifdef _TARGET_ARM64_
2148 assert(varTypeIsStruct(type));
2149 if (lvaIsMultiregStruct(varDsc, curArgTabEntry->isVararg))
2151 // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD);
2152 // as that is how UNIX_AMD64_ABI works.
2153 // We will create a GT_OBJ for the argument below.
2154 // This will be passed by value in two registers.
2155 assert(addrNode != nullptr);
2157 // Create an Obj of the temp to use it as a call argument.
2158 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2161 // Always create an Obj of the temp to use it as a call argument.
2162 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2163 #endif // !_TARGET_ARM64_
2164 #endif // FEATURE_MULTIREG_ARGS
2167 #else // not (_TARGET_AMD64_ or _TARGET_ARM64_ or _TARGET_ARM_)
2169 // other targets, we pass the struct by value
2170 assert(varTypeIsStruct(type));
2172 addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
2174 // Get a new Obj node temp to use it as a call argument.
2175 // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
2176 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
2178 #endif // not (_TARGET_AMD64_ or _TARGET_ARM64_ or _TARGET_ARM_)
2180 } // (varTypeIsStruct(type))
2182 if (addrNode != nullptr)
2184 assert(addrNode->gtOper == GT_ADDR);
2186 // This will prevent this LclVar from being optimized away
2187 lvaSetVarAddrExposed(tmpVarNum);
2189 // the child of a GT_ADDR is required to have this flag set
2190 addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
2196 //------------------------------------------------------------------------------
2197 // EvalArgsToTemps : Create temp assignments and populate the LateArgs list.
2199 void fgArgInfo::EvalArgsToTemps()
2203 unsigned regArgInx = 0;
2204 // Now go through the argument table and perform the necessary evaluation into temps
2205 GenTreeArgList* tmpRegArgNext = nullptr;
2206 for (unsigned curInx = 0; curInx < argCount; curInx++)
2208 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2210 GenTree* argx = curArgTabEntry->node;
2211 GenTree* setupArg = nullptr;
2214 #if !FEATURE_FIXED_OUT_ARGS
2215 // Only ever set for FEATURE_FIXED_OUT_ARGS
2216 assert(curArgTabEntry->needPlace == false);
2218 // On x86 and other archs that use push instructions to pass arguments:
2219 // Only the register arguments need to be replaced with placeholder nodes.
2220 // Stacked arguments are evaluated and pushed (or stored into the stack) in order.
2222 if (curArgTabEntry->regNum == REG_STK)
2226 if (curArgTabEntry->needTmp)
2228 if (curArgTabEntry->isTmp == true)
2230 // Create a copy of the temp to go into the late argument list
2231 defArg = compiler->fgMakeTmpArgNode(curArgTabEntry);
2233 // mark the original node as a late argument
2234 argx->gtFlags |= GTF_LATE_ARG;
2238 // Create a temp assignment for the argument
2239 // Put the temp in the gtCallLateArgs list
2240 CLANG_FORMAT_COMMENT_ANCHOR;
2243 if (compiler->verbose)
2245 printf("Argument with 'side effect'...\n");
2246 compiler->gtDispTree(argx);
2250 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
2251 noway_assert(argx->gtType != TYP_STRUCT);
2254 unsigned tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
2255 if (argx->gtOper == GT_MKREFANY)
2257 // For GT_MKREFANY, typically the actual struct copying does
2258 // not have any side-effects and can be delayed. So instead
2259 // of using a temp for the whole struct, we can just use a temp
2260 // for operand that that has a side-effect
2262 if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
2264 operand = argx->gtOp.gtOp1;
2266 // In the early argument evaluation, place an assignment to the temp
2267 // from the source operand of the mkrefany
2268 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2270 // Replace the operand for the mkrefany with the new temp.
2271 argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2273 else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
2275 operand = argx->gtOp.gtOp2;
2277 // In the early argument evaluation, place an assignment to the temp
2278 // from the source operand of the mkrefany
2279 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2281 // Replace the operand for the mkrefany with the new temp.
2282 argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2286 if (setupArg != nullptr)
2288 // Now keep the mkrefany for the late argument list
2291 // Clear the side-effect flags because now both op1 and op2 have no side-effects
2292 defArg->gtFlags &= ~GTF_ALL_EFFECT;
2296 setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
2298 LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
2299 var_types lclVarType = genActualType(argx->gtType);
2300 var_types scalarType = TYP_UNKNOWN;
2302 if (setupArg->OperIsCopyBlkOp())
2304 setupArg = compiler->fgMorphCopyBlock(setupArg);
2305 #if defined(_TARGET_ARMARCH_) || defined(UNIX_AMD64_ABI)
2306 if (lclVarType == TYP_STRUCT)
2308 // This scalar LclVar widening step is only performed for ARM architectures.
2310 CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum);
2311 unsigned structSize = varDsc->lvExactSize;
2314 compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->isVararg);
2316 #endif // _TARGET_ARMARCH_ || defined (UNIX_AMD64_ABI)
2319 // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 =>
2321 if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
2323 // Create a GT_LCL_FLD using the wider type to go to the late argument list
2324 defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
2328 // Create a copy of the temp to go to the late argument list
2329 defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
2332 curArgTabEntry->isTmp = true;
2333 curArgTabEntry->tmpNum = tmpVarNum;
2336 // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
2337 // might have left holes in the used registers (see
2338 // fgAddSkippedRegsInPromotedStructArg).
2339 // Too bad we're not that smart for these intermediate temps...
2340 if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
2342 regNumber argReg = curArgTabEntry->regNum;
2343 regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
2344 for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
2346 argReg = genRegArgNext(argReg);
2347 allUsedRegs |= genRegMask(argReg);
2350 #endif // _TARGET_ARM_
2353 /* mark the assignment as a late argument */
2354 setupArg->gtFlags |= GTF_LATE_ARG;
2357 if (compiler->verbose)
2359 printf("\n Evaluate to a temp:\n");
2360 compiler->gtDispTree(setupArg);
2365 else // curArgTabEntry->needTmp == false
2368 // Only register args are replaced with placeholder nodes
2369 // and the stack based arguments are evaluated and pushed in order.
2371 // On Arm/x64 - When needTmp is false and needPlace is false,
2372 // the non-register arguments are evaluated and stored in order.
2373 // When needPlace is true we have a nested call that comes after
2374 // this argument so we have to replace it in the gtCallArgs list
2375 // (the initial argument evaluation list) with a placeholder.
2377 if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
2382 /* No temp needed - move the whole node to the gtCallLateArgs list */
2384 /* The argument is deferred and put in the late argument list */
2388 // Create a placeholder node to put in its place in gtCallLateArgs.
2390 // For a struct type we also need to record the class handle of the arg.
2391 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
2393 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
2395 // All structs are either passed (and retyped) as integral types, OR they
2396 // are passed by reference.
2397 noway_assert(argx->gtType != TYP_STRUCT);
2399 #else // !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)
2401 if (defArg->TypeGet() == TYP_STRUCT)
2403 clsHnd = compiler->gtGetStructHandleIfPresent(defArg);
2404 noway_assert(clsHnd != NO_CLASS_HANDLE);
2407 #endif // !(defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI))
2409 setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
2411 /* mark the placeholder node as a late argument */
2412 setupArg->gtFlags |= GTF_LATE_ARG;
2415 if (compiler->verbose)
2417 if (curArgTabEntry->regNum == REG_STK)
2419 printf("Deferred stack argument :\n");
2423 printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
2426 compiler->gtDispTree(argx);
2427 printf("Replaced with placeholder node:\n");
2428 compiler->gtDispTree(setupArg);
2433 if (setupArg != nullptr)
2435 if (curArgTabEntry->parent)
2437 GenTree* parent = curArgTabEntry->parent;
2438 /* a normal argument from the list */
2439 noway_assert(parent->OperIsList());
2440 noway_assert(parent->gtOp.gtOp1 == argx);
2442 parent->gtFlags |= (setupArg->gtFlags & GTF_ALL_EFFECT);
2444 parent->gtOp.gtOp1 = setupArg;
2448 /* must be the gtCallObjp */
2449 noway_assert(callTree->gtCall.gtCallObjp == argx);
2451 callTree->gtCall.gtCallObjp = setupArg;
2455 /* deferred arg goes into the late argument list */
2457 if (tmpRegArgNext == nullptr)
2459 tmpRegArgNext = compiler->gtNewArgList(defArg);
2460 callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
2464 noway_assert(tmpRegArgNext->OperIsList());
2465 noway_assert(tmpRegArgNext->Current());
2466 tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
2468 tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT);
2469 tmpRegArgNext = tmpRegArgNext->Rest();
2472 tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT);
2474 curArgTabEntry->node = defArg;
2475 curArgTabEntry->lateArgInx = regArgInx++;
2479 if (compiler->verbose)
2481 printf("\nShuffled argument table: ");
2482 for (unsigned curInx = 0; curInx < argCount; curInx++)
2484 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2486 if (curArgTabEntry->regNum != REG_STK)
2488 printf("%s ", getRegName(curArgTabEntry->regNum));
2496 // Return a conservative estimate of the stack size in bytes.
2497 // It will be used only on the intercepted-for-host code path to copy the arguments.
2498 int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
2502 for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
2508 if (numArgs > MAX_REG_ARG)
2510 numStkArgs = numArgs - MAX_REG_ARG;
2517 return numStkArgs * REGSIZE_BYTES;
2520 //------------------------------------------------------------------------------
2521 // fgMakeMultiUse : If the node is a local, clone it, otherwise insert a comma form temp
2524 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2525 // evaluates ppTree to a temp and returns the result
2528 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2530 GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
2532 GenTree* tree = *pOp;
2533 if (tree->IsLocal())
2535 return gtClone(tree);
2539 return fgInsertCommaFormTemp(pOp);
2543 //------------------------------------------------------------------------------
2544 // fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
2545 // and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
2548 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2549 // evaluates ppTree to a temp and returns the result
2551 // structType - value type handle if the temp created is of TYP_STRUCT.
2554 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2557 GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
2559 GenTree* subTree = *ppTree;
2561 unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
2563 if (varTypeIsStruct(subTree))
2565 assert(structType != nullptr);
2566 lvaSetStruct(lclNum, structType, false);
2569 // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
2570 // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for
2571 // setting type of lcl vars created.
2572 GenTree* asg = gtNewTempAssign(lclNum, subTree);
2574 GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum);
2576 GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
2580 return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum);
2583 //------------------------------------------------------------------------
2584 // fgInitArgInfo: Construct the fgArgInfo for the call with the fgArgEntry for each arg
2587 // callNode - the call for which we are generating the fgArgInfo
2593 // This method is idempotent in that it checks whether the fgArgInfo has already been
2594 // constructed, and just returns.
2595 // This method only computes the arg table and arg entries for the call (the fgArgInfo),
2596 // and makes no modification of the args themselves.
2598 void Compiler::fgInitArgInfo(GenTreeCall* call)
2603 unsigned argIndex = 0;
2604 unsigned intArgRegNum = 0;
2605 unsigned fltArgRegNum = 0;
2606 unsigned argSlots = 0;
2608 bool callHasRetBuffArg = call->HasRetBufArg();
2609 bool callIsVararg = call->IsVarargs();
2612 regMaskTP argSkippedRegMask = RBM_NONE;
2613 regMaskTP fltArgSkippedRegMask = RBM_NONE;
2614 #endif // _TARGET_ARM_
2616 #if defined(_TARGET_X86_)
2617 unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
2619 const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
2622 if (call->fgArgInfo != nullptr)
2624 // We've already initialized and set the fgArgInfo.
2627 JITDUMP("Initializing arg info for %d.%s:\n", call->gtTreeID, GenTree::OpName(call->gtOper));
2629 // At this point, we should never have gtCallLateArgs, as this needs to be done before those are determined.
2630 assert(call->gtCallLateArgs == nullptr);
2632 #ifdef _TARGET_UNIX_
2635 // Currently native varargs is not implemented on non windows targets.
2637 // Note that some targets like Arm64 Unix should not need much work as
2638 // the ABI is the same. While other targets may only need small changes
2639 // such as amd64 Unix, which just expects RAX to pass numFPArguments.
2640 NYI("Morphing Vararg call not yet implemented on non Windows targets.");
2642 #endif // _TARGET_UNIX_
2644 // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
2645 // following the normal calling convention or in the normal argument registers. We either mark existing
2646 // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
2647 // non-standard arguments into the argument list, below.
2648 class NonStandardArgs
2650 struct NonStandardArg
2652 regNumber reg; // The register to be assigned to this non-standard argument.
2653 GenTree* node; // The tree node representing this non-standard argument.
2654 // Note that this must be updated if the tree node changes due to morphing!
2657 ArrayStack<NonStandardArg> args;
2660 NonStandardArgs(CompAllocator alloc) : args(alloc, 3) // We will have at most 3 non-standard arguments
2664 //-----------------------------------------------------------------------------
2665 // Add: add a non-standard argument to the table of non-standard arguments
2668 // node - a GenTree node that has a non-standard argument.
2669 // reg - the register to assign to this node.
2674 void Add(GenTree* node, regNumber reg)
2676 NonStandardArg nsa = {reg, node};
2680 //-----------------------------------------------------------------------------
2681 // Find: Look for a GenTree* in the set of non-standard args.
2684 // node - a GenTree node to look for
2687 // The index of the non-standard argument (a non-negative, unique, stable number).
2688 // If the node is not a non-standard argument, return -1.
2690 int Find(GenTree* node)
2692 for (int i = 0; i < args.Height(); i++)
2694 if (node == args.Index(i).node)
2702 //-----------------------------------------------------------------------------
2703 // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
2704 // set the register to use for the node.
2707 // node - a GenTree node to look for
2708 // pReg - an OUT argument. *pReg is set to the non-standard register to use if
2709 // 'node' is found in the non-standard argument set.
2712 // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
2714 // 'false' otherwise (in this case, *pReg is unmodified).
2716 bool FindReg(GenTree* node, regNumber* pReg)
2718 for (int i = 0; i < args.Height(); i++)
2720 NonStandardArg& nsa = args.IndexRef(i);
2721 if (node == nsa.node)
2730 //-----------------------------------------------------------------------------
2731 // Replace: Replace the non-standard argument node at a given index. This is done when
2732 // the original node was replaced via morphing, but we need to continue to assign a
2733 // particular non-standard arg to it.
2736 // index - the index of the non-standard arg. It must exist.
2737 // node - the new GenTree node.
2742 void Replace(int index, GenTree* node)
2744 args.IndexRef(index).node = node;
2747 } nonStandardArgs(getAllocator(CMK_ArrayStack));
2749 // Count of args. On first morph, this is counted before we've filled in the arg table.
2750 // On remorph, we grab it from the arg table.
2751 unsigned numArgs = 0;
2753 // First we need to count the args
2754 if (call->gtCallObjp)
2758 for (GenTree* args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
2763 // Insert or mark non-standard args. These are either outside the normal calling convention, or
2764 // arguments registers that don't follow the normal progression of argument registers in the calling
2765 // convention (such as for the ARM64 fixed return buffer argument x8).
2767 // *********** NOTE *************
2768 // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
2769 // in the implementation of fast tail call.
2770 // *********** END NOTE *********
2771 CLANG_FORMAT_COMMENT_ANCHOR;
2773 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2774 // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
2775 // Set the argument registers correctly here.
2776 if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
2778 GenTreeArgList* args = call->gtCallArgs;
2779 GenTree* arg1 = args->Current();
2780 assert(arg1 != nullptr);
2781 nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
2783 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2784 #if defined(_TARGET_ARM_)
2785 // A non-standard calling convention using secure delegate invoke is used on ARM, only, but not for secure
2786 // delegates. It is used for VSD delegate calls where the VSD custom calling convention ABI requires passing
2787 // R4, a callee-saved register, with a special value. Since R4 is a callee-saved register, its value needs
2788 // to be preserved. Thus, the VM uses a secure delegate IL stub, which preserves R4 and also sets up R4
2789 // correctly for the VSD call. The VM is simply reusing an existing mechanism (secure delegate IL stub)
2790 // to achieve its goal for delegate VSD call. See COMDelegate::NeedsWrapperDelegate() in the VM for details.
2791 else if (call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV)
2793 GenTree* arg = call->gtCallObjp;
2794 if (arg->OperIsLocal())
2796 arg = gtClone(arg, true);
2800 GenTree* tmp = fgInsertCommaFormTemp(&arg);
2801 call->gtCallObjp = arg;
2802 call->gtFlags |= GTF_ASG;
2805 noway_assert(arg != nullptr);
2807 GenTree* newArg = new (this, GT_ADDR)
2808 GenTreeAddrMode(TYP_BYREF, arg, nullptr, 0, eeGetEEInfo()->offsetOfSecureDelegateIndirectCell);
2810 // Append newArg as the last arg
2811 GenTreeArgList** insertionPoint = &call->gtCallArgs;
2812 for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
2815 *insertionPoint = gtNewListNode(newArg, nullptr);
2818 nonStandardArgs.Add(newArg, virtualStubParamInfo->GetReg());
2820 #endif // defined(_TARGET_ARM_)
2821 #if defined(_TARGET_X86_)
2822 // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
2823 // hi part to be in EDX. This sets the argument registers up correctly.
2824 else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
2825 call->IsHelperCall(this, CORINFO_HELP_LRSZ))
2827 GenTreeArgList* args = call->gtCallArgs;
2828 GenTree* arg1 = args->Current();
2829 assert(arg1 != nullptr);
2830 nonStandardArgs.Add(arg1, REG_LNGARG_LO);
2832 args = args->Rest();
2833 GenTree* arg2 = args->Current();
2834 assert(arg2 != nullptr);
2835 nonStandardArgs.Add(arg2, REG_LNGARG_HI);
2837 #else // !_TARGET_X86_
2838 // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
2839 // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
2840 // convention for x86/SSE.
2842 // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
2844 if (hasFixedRetBuffReg() && call->HasRetBufArg())
2846 args = call->gtCallArgs;
2847 assert(args != nullptr);
2848 assert(args->OperIsList());
2850 argx = call->gtCallArgs->Current();
2852 // We don't increment numArgs here, since we already counted this argument above.
2854 nonStandardArgs.Add(argx, theFixedRetBuffReg());
2857 // We are allowed to have a Fixed Return Buffer argument combined
2858 // with any of the remaining non-standard arguments
2860 if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
2862 assert(!call->gtCallCookie);
2863 // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
2864 // It will be used only on the intercepted-for-host code path to copy the arguments.
2866 GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
2867 call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
2870 nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
2872 else if (call->IsVirtualStub())
2874 if (!call->IsTailCallViaHelper())
2876 GenTree* stubAddrArg = fgGetStubAddrArg(call);
2877 // And push the stub address onto the list of arguments
2878 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
2881 nonStandardArgs.Add(stubAddrArg, stubAddrArg->gtRegNum);
2885 // If it is a VSD call getting dispatched via tail call helper,
2886 // fgMorphTailCall() would materialize stub addr as an additional
2887 // parameter added to the original arg list and hence no need to
2888 // add as a non-standard arg.
2892 #endif // !_TARGET_X86_
2893 if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
2895 assert(!call->IsUnmanaged());
2897 GenTree* arg = call->gtCallCookie;
2898 noway_assert(arg != nullptr);
2899 call->gtCallCookie = nullptr;
2901 #if defined(_TARGET_X86_)
2902 // x86 passes the cookie on the stack as the final argument to the call.
2903 GenTreeArgList** insertionPoint = &call->gtCallArgs;
2904 for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
2907 *insertionPoint = gtNewListNode(arg, nullptr);
2908 #else // !defined(_TARGET_X86_)
2909 // All other architectures pass the cookie in a register.
2910 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2911 #endif // defined(_TARGET_X86_)
2913 nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
2916 // put destination into R10/EAX
2917 arg = gtClone(call->gtCallAddr, true);
2918 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2921 nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
2923 // finally change this call to a helper call
2924 call->gtCallType = CT_HELPER;
2925 call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
2927 #if defined(FEATURE_READYTORUN_COMPILER) && defined(_TARGET_ARMARCH_)
2928 // For arm, we dispatch code same as VSD using virtualStubParamInfo->GetReg()
2929 // for indirection cell address, which ZapIndirectHelperThunk expects.
2930 if (call->IsR2RRelativeIndir())
2932 assert(call->gtEntryPoint.addr != nullptr);
2934 size_t addrValue = (size_t)call->gtEntryPoint.addr;
2935 GenTree* indirectCellAddress = gtNewIconHandleNode(addrValue, GTF_ICON_FTN_ADDR);
2936 indirectCellAddress->gtRegNum = REG_R2R_INDIRECT_PARAM;
2938 // Push the stub address onto the list of arguments.
2939 call->gtCallArgs = gtNewListNode(indirectCellAddress, call->gtCallArgs);
2942 nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->gtRegNum);
2945 #endif // FEATURE_READYTORUN_COMPILER && _TARGET_ARMARCH_
2947 // Allocate the fgArgInfo for the call node;
2949 call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
2951 // Add the 'this' argument value, if present.
2952 argx = call->gtCallObjp;
2953 if (argx != nullptr)
2955 assert(argIndex == 0);
2956 assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
2957 assert(varTypeIsGC(argx) || (argx->gtType == TYP_I_IMPL));
2959 // This is a register argument - put it in the table.
2960 call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1, false,
2961 callIsVararg UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(nullptr));
2964 #ifdef WINDOWS_AMD64_ABI
2965 // Whenever we pass an integer register argument
2966 // we skip the corresponding floating point register argument
2968 #endif // WINDOWS_AMD64_ABI
2974 // Compute the maximum number of arguments that can be passed in registers.
2975 // For X86 we handle the varargs and unmanaged calling conventions
2977 if (call->gtFlags & GTF_CALL_POP_ARGS)
2979 noway_assert(intArgRegNum < MAX_REG_ARG);
2980 // No more register arguments for varargs (CALL_POP_ARGS)
2981 maxRegArgs = intArgRegNum;
2983 // Add in the ret buff arg
2984 if (callHasRetBuffArg)
2988 if (call->IsUnmanaged())
2990 noway_assert(intArgRegNum == 0);
2992 if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
2994 noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
2995 call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
2996 call->gtCallArgs->gtOp.gtOp1->gtOper ==
2997 GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
3005 // Add in the ret buff arg
3006 if (callHasRetBuffArg)
3009 #endif // _TARGET_X86_
3011 /* Morph the user arguments */
3012 CLANG_FORMAT_COMMENT_ANCHOR;
3014 #if defined(_TARGET_ARM_)
3016 // The ARM ABI has a concept of back-filling of floating-point argument registers, according
3017 // to the "Procedure Call Standard for the ARM Architecture" document, especially
3018 // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
3019 // appear in a lower-numbered register than floating point argument N. That is, argument
3020 // register allocation is not strictly increasing. To support this, we need to keep track of unused
3021 // floating-point argument registers that we can back-fill. We only support 4-byte float and
3022 // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
3023 // only back-fill single registers, since there is no way with these types to create
3024 // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
3025 // available (with 16 FP argument registers). Consider this code:
3027 // struct HFA { float x, y, z; }; // a three element HFA
3028 // void bar(float a1, // passed in f0
3029 // double a2, // passed in f2/f3; skip f1 for alignment
3030 // HFA a3, // passed in f4/f5/f6
3031 // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
3032 // HFA a5, // passed in f10/f11/f12
3033 // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
3035 // float a7, // passed in f1 (back-filled)
3036 // float a8, // passed in f7 (back-filled)
3037 // float a9, // passed in f13 (back-filled)
3038 // float a10) // passed on the stack in [OutArg+0]
3040 // Note that if we ever support FP types with larger alignment requirements, then there could
3041 // be more than single register back-fills.
3043 // Once we assign a floating-pointer register to the stack, they all must be on the stack.
3044 // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
3045 // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
3046 // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
3047 // and prevent any additional floating-point arguments from going in registers.
3049 bool anyFloatStackArgs = false;
3051 #endif // _TARGET_ARM_
3053 #ifdef UNIX_AMD64_ABI
3054 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3055 #endif // UNIX_AMD64_ABI
3057 for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
3059 assert(args->OperIsList());
3060 argx = args->Current();
3061 fgArgTabEntry* argEntry = nullptr;
3063 // Change the node to TYP_I_IMPL so we don't report GC info
3064 // NOTE: We deferred this from the importer because of the inliner.
3066 if (argx->IsLocalAddrExpr() != nullptr)
3068 argx->gtType = TYP_I_IMPL;
3071 // We should never have any ArgPlaceHolder nodes at this point.
3072 assert(!argx->IsArgPlaceHolderNode());
3074 // Setup any HFA information about 'argx'
3075 bool isHfaArg = false;
3076 var_types hfaType = TYP_UNDEF;
3077 unsigned hfaSlots = 0;
3079 bool passUsingFloatRegs;
3080 unsigned argAlign = 1;
3082 CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
3083 bool isRegArg = false;
3084 bool isNonStandard = false;
3085 regNumber nonStdRegNum = REG_NA;
3088 hfaType = GetHfaType(argx);
3089 isHfaArg = varTypeIsValidHfaType(hfaType);
3091 #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3092 // Make sure for vararg methods isHfaArg is not true.
3093 isHfaArg = callIsVararg ? false : isHfaArg;
3094 #endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3099 hfaSlots = GetHfaCount(argx);
3101 // If we have a HFA struct it's possible we transition from a method that originally
3102 // only had integer types to now start having FP types. We have to communicate this
3103 // through this flag since LSRA later on will use this flag to determine whether
3104 // or not to track the FP register set.
3106 compFloatingPointUsed = true;
3108 #endif // FEATURE_HFA
3111 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
3112 bool passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
3114 // We don't use the "size" return value from InferOpSizeAlign().
3115 codeGen->InferOpSizeAlign(argx, &argAlign);
3117 argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
3118 argAlign /= TARGET_POINTER_SIZE;
3122 if (passUsingFloatRegs)
3124 if (fltArgRegNum % 2 == 1)
3126 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3130 else if (passUsingIntRegs)
3132 if (intArgRegNum % 2 == 1)
3134 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3139 if (argSlots % 2 == 1)
3145 #elif defined(_TARGET_ARM64_)
3147 assert(!callIsVararg || !isHfaArg);
3148 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
3150 #elif defined(_TARGET_AMD64_)
3152 passUsingFloatRegs = varTypeIsFloating(argx);
3154 #elif defined(_TARGET_X86_)
3156 passUsingFloatRegs = false;
3159 #error Unsupported or unset target architecture
3162 bool isBackFilled = false;
3163 unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
3164 var_types structBaseType = TYP_STRUCT;
3165 unsigned structSize = 0;
3166 bool passStructByRef = false;
3169 GenTree* actualArg = argx->gtEffectiveVal(true /* Commas only */);
3172 // Figure out the size of the argument. This is either in number of registers, or number of
3173 // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
3176 isStructArg = varTypeIsStruct(argx);
3177 CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE;
3180 objClass = gtGetStructHandle(argx);
3181 if (argx->TypeGet() == TYP_STRUCT)
3183 // For TYP_STRUCT arguments we must have an OBJ, LCL_VAR or MKREFANY
3184 switch (actualArg->OperGet())
3187 // Get the size off the OBJ node.
3188 structSize = actualArg->AsObj()->gtBlkSize;
3189 assert(structSize == info.compCompHnd->getClassSize(objClass));
3192 structSize = lvaGetDesc(actualArg->AsLclVarCommon())->lvExactSize;
3195 structSize = info.compCompHnd->getClassSize(objClass);
3198 BADCODE("illegal argument tree in fgInitArgInfo");
3204 structSize = genTypeSize(argx);
3205 assert(structSize == info.compCompHnd->getClassSize(objClass));
3208 #if defined(_TARGET_AMD64_)
3209 #ifdef UNIX_AMD64_ABI
3212 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3216 size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
3217 eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
3219 #else // !UNIX_AMD64_ABI
3220 size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot'
3221 #endif // UNIX_AMD64_ABI
3222 #elif defined(_TARGET_ARM64_)
3227 // HFA structs are passed by value in multiple registers.
3228 // The "size" in registers may differ the size in pointer-sized units.
3229 size = GetHfaCount(argx);
3233 // Structs are either passed in 1 or 2 (64-bit) slots.
3234 // Structs that are the size of 2 pointers are passed by value in multiple registers,
3235 // if sufficient registers are available.
3236 // Structs that are larger than 2 pointers (except for HFAs) are passed by
3237 // reference (to a copy)
3238 size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
3245 // Note that there are some additional rules for multireg structs.
3246 // (i.e they cannot be split between registers and the stack)
3250 size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
3252 #elif defined(_TARGET_ARM_) || defined(_TARGET_X86_)
3255 size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
3259 // The typical case.
3260 // Long/double type argument(s) will be modified as needed in Lowering.
3261 size = genTypeStSz(argx->gtType);
3264 #error Unsupported or unset target architecture
3265 #endif // _TARGET_XXX_
3268 // We have an argument with a struct type, but it may be be a child of a GT_COMMA
3269 GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/);
3271 assert(args->OperIsList());
3272 assert(argx == args->Current());
3274 unsigned originalSize = structSize;
3275 originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
3276 unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
3278 structSize = originalSize;
3280 structPassingKind howToPassStruct;
3282 structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, callIsVararg, originalSize);
3284 bool passedInRegisters = false;
3285 passStructByRef = (howToPassStruct == SPK_ByReference);
3287 if (howToPassStruct == SPK_PrimitiveType)
3289 // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register.
3290 // For ARM or AMD64/Windows only power-of-2 structs are passed in registers.
3291 #if !defined(_TARGET_ARM64_) && !defined(UNIX_AMD64_ABI)
3292 if (!isPow2(originalSize))
3293 #endif // !_TARGET_ARM64_ && !UNIX_AMD64_ABI
3295 passedInRegisters = true;
3298 // TODO-CQ: getArgTypeForStruct should *not* return TYP_DOUBLE for a double struct,
3299 // or for a struct of two floats. This causes the struct to be address-taken.
3300 if (structBaseType == TYP_DOUBLE)
3305 #endif // _TARGET_ARM_
3310 else if (passStructByRef)
3316 // The 'size' value has now must have been set. (the original value of zero is an invalid value)
3320 // Figure out if the argument will be passed in a register.
3323 if (isRegParamType(genActualType(argx->TypeGet()))
3324 #ifdef UNIX_AMD64_ABI
3325 && (!isStructArg || structDesc.passedInRegisters)
3330 if (passUsingFloatRegs)
3332 // First, see if it can be back-filled
3333 if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
3334 (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
3335 (size == 1)) // The size to back-fill is one float register
3337 // Back-fill the register.
3338 isBackFilled = true;
3339 regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
3340 fltArgSkippedRegMask &=
3341 ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
3342 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
3343 assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
3346 // Does the entire float, double, or HFA fit in the FP arg registers?
3347 // Check if the last register needed is still in the argument register range.
3348 isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
3352 anyFloatStackArgs = true;
3357 isRegArg = intArgRegNum < MAX_REG_ARG;
3359 #elif defined(_TARGET_ARM64_)
3360 if (passUsingFloatRegs)
3362 // Check if the last register needed is still in the fp argument register range.
3363 isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
3365 // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
3366 if (isHfaArg && !isRegArg)
3368 // recompute the 'size' so that it represent the number of stack slots rather than the number of
3371 unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
3372 size = roundupSize / TARGET_POINTER_SIZE;
3374 // We also must update fltArgRegNum so that we no longer try to
3375 // allocate any new floating point registers for args
3376 // This prevents us from backfilling a subsequent arg into d7
3378 fltArgRegNum = MAX_FLOAT_REG_ARG;
3383 // Check if the last register needed is still in the int argument register range.
3384 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3386 // Did we run out of registers when we had a 16-byte struct (size===2) ?
3387 // (i.e we only have one register remaining but we needed two registers to pass this arg)
3388 // This prevents us from backfilling a subsequent arg into x7
3390 if (!isRegArg && (size > 1))
3392 #if defined(_TARGET_WINDOWS_)
3393 // Arm64 windows native varargs allows splitting a 16 byte struct between stack
3394 // and the last general purpose register.
3397 // Override the decision and force a split.
3398 isRegArg = isRegArg = (intArgRegNum + (size - 1)) <= maxRegArgs;
3401 #endif // defined(_TARGET_WINDOWS_)
3403 // We also must update intArgRegNum so that we no longer try to
3404 // allocate any new general purpose registers for args
3406 intArgRegNum = maxRegArgs;
3410 #else // not _TARGET_ARM_ or _TARGET_ARM64_
3412 #if defined(UNIX_AMD64_ABI)
3414 // Here a struct can be passed in register following the classifications of its members and size.
3415 // Now make sure there are actually enough registers to do so.
3418 unsigned int structFloatRegs = 0;
3419 unsigned int structIntRegs = 0;
3420 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3422 if (structDesc.IsIntegralSlot(i))
3426 else if (structDesc.IsSseSlot(i))
3432 isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
3433 ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
3437 if (passUsingFloatRegs)
3439 isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
3443 isRegArg = intArgRegNum < MAX_REG_ARG;
3446 #else // !defined(UNIX_AMD64_ABI)
3447 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3448 #endif // !defined(UNIX_AMD64_ABI)
3449 #endif // _TARGET_ARM_
3456 // If there are nonstandard args (outside the calling convention) they were inserted above
3457 // and noted them in a table so we can recognize them here and build their argInfo.
3459 // They should not affect the placement of any other args or stack space required.
3460 // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
3461 isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum);
3464 isRegArg = (nonStdRegNum != REG_STK);
3466 #if defined(_TARGET_X86_)
3467 else if (call->IsTailCallViaHelper())
3469 // We have already (before calling fgMorphArgs()) appended the 4 special args
3470 // required by the x86 tailcall helper. These args are required to go on the
3471 // stack. Force them to the stack here.
3472 assert(numArgs >= 4);
3473 if (argIndex >= numArgs - 4)
3478 #endif // defined(_TARGET_X86_)
3480 // Now we know if the argument goes in registers or not and how big it is.
3481 CLANG_FORMAT_COMMENT_ANCHOR;
3484 // If we ever allocate a floating point argument to the stack, then all
3485 // subsequent HFA/float/double arguments go on the stack.
3486 if (!isRegArg && passUsingFloatRegs)
3488 for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
3490 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3494 // If we think we're going to split a struct between integer registers and the stack, check to
3495 // see if we've already assigned a floating-point arg to the stack.
3496 if (isRegArg && // We decided above to use a register for the argument
3497 !passUsingFloatRegs && // We're using integer registers
3498 (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
3499 anyFloatStackArgs) // We've already used the stack for a floating-point argument
3501 isRegArg = false; // Change our mind; don't pass this struct partially in registers
3503 // Skip the rest of the integer argument registers
3504 for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
3506 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3509 #endif // _TARGET_ARM_
3511 // Now create the fgArgTabEntry.
3512 fgArgTabEntry* newArgEntry;
3515 regNumber nextRegNum = REG_STK;
3517 #if defined(UNIX_AMD64_ABI)
3518 regNumber nextOtherRegNum = REG_STK;
3519 unsigned int structFloatRegs = 0;
3520 unsigned int structIntRegs = 0;
3521 #endif // defined(UNIX_AMD64_ABI)
3525 nextRegNum = nonStdRegNum;
3527 #if defined(UNIX_AMD64_ABI)
3528 else if (isStructArg && structDesc.passedInRegisters)
3530 // It is a struct passed in registers. Assign the next available register.
3531 assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
3532 regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
3533 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3535 if (structDesc.IsIntegralSlot(i))
3537 *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
3540 else if (structDesc.IsSseSlot(i))
3542 *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
3547 #endif // defined(UNIX_AMD64_ABI)
3550 // fill in or update the argInfo table
3551 nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
3552 : genMapIntRegArgNumToRegNum(intArgRegNum);
3555 #ifdef _TARGET_AMD64_
3556 #ifndef UNIX_AMD64_ABI
3561 // This is a register argument - put it in the table
3562 newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign, isStructArg,
3563 callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum)
3564 UNIX_AMD64_ABI_ONLY_ARG(&structDesc));
3566 newArgEntry->SetIsBackFilled(isBackFilled);
3567 newArgEntry->isNonStandard = isNonStandard;
3569 // Set up the next intArgRegNum and fltArgRegNum values.
3572 #if defined(UNIX_AMD64_ABI)
3575 // For this case, we've already set the regNums in the argTabEntry
3576 intArgRegNum += structIntRegs;
3577 fltArgRegNum += structFloatRegs;
3580 #endif // defined(UNIX_AMD64_ABI)
3584 #if FEATURE_ARG_SPLIT
3585 // Check for a split (partially enregistered) struct
3586 if (!passUsingFloatRegs && ((intArgRegNum + size) > MAX_REG_ARG))
3588 // This indicates a partial enregistration of a struct type
3589 assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() ||
3590 (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
3591 unsigned numRegsPartial = MAX_REG_ARG - intArgRegNum;
3592 assert((unsigned char)numRegsPartial == numRegsPartial);
3593 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
3595 #endif // FEATURE_ARG_SPLIT
3597 if (passUsingFloatRegs)
3599 fltArgRegNum += size;
3601 #ifdef WINDOWS_AMD64_ABI
3602 // Whenever we pass an integer register argument
3603 // we skip the corresponding floating point register argument
3604 intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
3605 #endif // WINDOWS_AMD64_ABI
3606 // No supported architecture supports partial structs using float registers.
3607 assert(fltArgRegNum <= MAX_FLOAT_REG_ARG);
3611 // Increment intArgRegNum by 'size' registers
3612 intArgRegNum += size;
3614 #ifdef WINDOWS_AMD64_ABI
3615 fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
3616 #endif // WINDOWS_AMD64_ABI
3622 else // We have an argument that is not passed in a register
3624 // This is a stack argument - put it in the table
3625 newArgEntry = call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign, isStructArg, callIsVararg);
3626 #ifdef UNIX_AMD64_ABI
3627 // TODO-Amd64-Unix-CQ: This is temporary (see also in fgMorphArgs).
3628 if (structDesc.passedInRegisters)
3630 newArgEntry->structDesc.CopyFrom(structDesc);
3638 newArgEntry->SetHfaType(hfaType, hfaSlots);
3640 #endif // FEATURE_HFA
3641 newArgEntry->SetMultiRegNums();
3643 noway_assert(newArgEntry != nullptr);
3644 if (newArgEntry->isStruct)
3646 newArgEntry->passedByRef = passStructByRef;
3647 newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType;
3651 newArgEntry->argType = argx->TypeGet();
3655 } // end foreach argument loop
3660 JITDUMP("ArgTable for %d.%s after fgInitArgInfo:\n", call->gtTreeID, GenTree::OpName(call->gtOper));
3661 call->fgArgInfo->Dump(this);
3667 //------------------------------------------------------------------------
3668 // fgMorphArgs: Walk and transform (morph) the arguments of a call
3671 // callNode - the call for which we are doing the argument morphing
3674 // Like most morph methods, this method returns the morphed node,
3675 // though in this case there are currently no scenarios where the
3676 // node itself is re-created.
3679 // This calls fgInitArgInfo to create the 'fgArgInfo' for the call.
3680 // If it has already been created, that method will simply return.
3682 // This method changes the state of the call node. It uses the existence
3683 // of gtCallLateArgs (the late arguments list) to determine if it has
3684 // already done the first round of morphing.
3686 // The first time it is called (i.e. during global morphing), this method
3687 // computes the "late arguments". This is when it determines which arguments
3688 // need to be evaluated to temps prior to the main argument setup, and which
3689 // can be directly evaluated into the argument location. It also creates a
3690 // second argument list (gtCallLateArgs) that does the final placement of the
3691 // arguments, e.g. into registers or onto the stack.
3693 // The "non-late arguments", aka the gtCallArgs, are doing the in-order
3694 // evaluation of the arguments that might have side-effects, such as embedded
3695 // assignments, calls or possible throws. In these cases, it and earlier
3696 // arguments must be evaluated to temps.
3698 // On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
3699 // if we have any nested calls, we need to defer the copying of the argument
3700 // into the fixed argument area until after the call. If the argument did not
3701 // otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
3702 // replaced in the "early" arg list (gtCallArgs) with a placeholder node.
3705 #pragma warning(push)
3706 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
3708 GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
3713 unsigned flagsSummary = 0;
3715 unsigned argIndex = 0;
3716 unsigned argSlots = 0;
3718 bool reMorphing = call->AreArgsComplete();
3720 // Set up the fgArgInfo.
3721 fgInitArgInfo(call);
3722 unsigned numArgs = call->fgArgInfo->ArgCount();
3723 JITDUMP("%sMorphing args for %d.%s:\n", (reMorphing) ? "Re" : "", call->gtTreeID, GenTree::OpName(call->gtOper));
3725 // If we are remorphing, process the late arguments (which were determined by a previous caller).
3728 // We need to reMorph the gtCallLateArgs early since that is what triggers
3729 // the expression folding and we need to have the final folded gtCallLateArgs
3730 // available when we call UpdateRegArg so that we correctly update the fgArgInfo
3731 // with the folded tree that represents the final optimized argument nodes.
3733 if (call->gtCallLateArgs != nullptr)
3736 call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
3737 flagsSummary |= call->gtCallLateArgs->gtFlags;
3739 assert(call->fgArgInfo != nullptr);
3741 call->fgArgInfo->RemorphReset();
3743 // First we morph the argument subtrees ('this' pointer, arguments, etc.).
3744 // During the first call to fgMorphArgs we also record the
3745 // information about late arguments we have in 'fgArgInfo'.
3746 // This information is used later to contruct the gtCallLateArgs */
3748 // Process the 'this' argument value, if present.
3749 argx = call->gtCallObjp;
3752 fgArgTabEntry* thisArgEntry = call->fgArgInfo->GetArgEntry(0, reMorphing);
3753 argx = fgMorphTree(argx);
3754 call->gtCallObjp = argx;
3755 // This is a register argument - possibly update it in the table.
3756 call->fgArgInfo->UpdateRegArg(thisArgEntry, argx, reMorphing);
3757 flagsSummary |= argx->gtFlags;
3758 assert(argIndex == 0);
3763 // Note that this name is a bit of a misnomer - it indicates that there are struct args
3764 // that occupy more than a single slot that are passed by value (not necessarily in regs).
3765 bool hasMultiregStructArgs = false;
3766 for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
3768 GenTree** parentArgx = &args->gtOp.gtOp1;
3769 fgArgTabEntry* argEntry = call->fgArgInfo->GetArgEntry(argIndex, reMorphing);
3771 // Morph the arg node, and update the parent and argEntry pointers.
3773 argx = fgMorphTree(argx);
3775 assert(args->OperIsList());
3776 assert(argx == args->Current());
3778 unsigned argAlign = argEntry->alignment;
3779 unsigned size = argEntry->getSize();
3780 CORINFO_CLASS_HANDLE copyBlkClass = NO_CLASS_HANDLE;
3784 if (argSlots % 2 == 1)
3789 if (argEntry->isNonStandard)
3791 // We need to update the node field for this nonStandard arg here
3792 // as it may have been changed by the call to fgMorphTree.
3793 call->fgArgInfo->UpdateRegArg(argEntry, argx, reMorphing);
3794 flagsSummary |= argx->gtFlags;
3799 argSlots += argEntry->getSlotCount();
3801 if (argx->IsLocalAddrExpr() != nullptr)
3803 argx->gtType = TYP_I_IMPL;
3806 // Get information about this argument.
3807 var_types hfaType = argEntry->hfaType;
3808 bool isHfaArg = (hfaType != TYP_UNDEF);
3809 bool isHfaRegArg = argEntry->isHfaRegArg;
3810 unsigned hfaSlots = argEntry->numRegs;
3811 bool passUsingFloatRegs = argEntry->isPassedInFloatRegisters();
3812 bool isBackFilled = argEntry->IsBackFilled();
3813 unsigned structSize = 0;
3815 // Struct arguments may be morphed into a node that is not a struct type.
3816 // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
3817 // was a struct and the struct classification.
3818 bool isStructArg = argEntry->isStruct;
3820 GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/);
3821 if (isStructArg && varTypeIsStruct(argObj) && !argObj->OperIs(GT_ASG, GT_MKREFANY, GT_FIELD_LIST, GT_ARGPLACE))
3823 CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(argObj);
3824 unsigned originalSize;
3825 if (argObj->TypeGet() == TYP_STRUCT)
3827 if (argObj->OperIs(GT_OBJ))
3829 // Get the size off the OBJ node.
3830 originalSize = argObj->AsObj()->gtBlkSize;
3831 assert(originalSize == info.compCompHnd->getClassSize(objClass));
3835 // We have a BADCODE assert for this in fgInitArgInfo.
3836 assert(argObj->OperIs(GT_LCL_VAR));
3837 originalSize = lvaGetDesc(argObj->AsLclVarCommon())->lvExactSize;
3842 originalSize = genTypeSize(argx);
3843 assert(originalSize == info.compCompHnd->getClassSize(objClass));
3845 unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
3846 var_types structBaseType = argEntry->argType;
3848 // First, handle the case where the argument is passed by reference.
3849 if (argEntry->passedByRef)
3852 copyBlkClass = objClass;
3853 #ifdef UNIX_AMD64_ABI
3854 assert(!"Structs are not passed by reference on x64/ux");
3855 #endif // UNIX_AMD64_ABI
3857 else // This is passed by value.
3860 #ifndef _TARGET_X86_
3861 // Check to see if we can transform this into load of a primitive type.
3862 // 'size' must be the number of pointer sized items
3863 assert(size == roundupSize / TARGET_POINTER_SIZE);
3865 structSize = originalSize;
3866 unsigned passingSize = originalSize;
3868 // Check to see if we can transform this struct load (GT_OBJ) into a GT_IND of the appropriate size.
3869 // When it can do this is platform-dependent:
3870 // - In general, it can be done for power of 2 structs that fit in a single register.
3871 // - For ARM and ARM64 it must also be a non-HFA struct, or have a single field.
3872 // - This is irrelevant for X86, since structs are always passed by value on the stack.
3874 GenTree** parentOfArgObj = parentArgx;
3875 GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj);
3876 bool canTransform = false;
3878 if (structBaseType != TYP_STRUCT)
3880 if (isPow2(passingSize))
3882 canTransform = (!argEntry->isHfaArg || (passingSize == genTypeSize(argEntry->GetHfaType())));
3885 #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
3886 // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register, but we can
3887 // only transform in that case if the arg is a local.
3888 // TODO-CQ: This transformation should be applicable in general, not just for the ARM64
3889 // or UNIX_AMD64_ABI cases where they will be passed in registers.
3892 canTransform = (lclVar != nullptr);
3893 passingSize = genTypeSize(structBaseType);
3895 #endif // _TARGET_ARM64_ || UNIX_AMD64_ABI
3900 #if defined(_TARGET_AMD64_)
3901 #ifndef UNIX_AMD64_ABI
3902 // On Windows structs are always copied and passed by reference (handled above) unless they are
3903 // passed by value in a single register.
3905 copyBlkClass = objClass;
3906 #else // UNIX_AMD64_ABI
3907 // On Unix, structs are always passed by value.
3908 // We only need a copy if we have one of the following:
3909 // - We have a lclVar that has been promoted and is passed in registers.
3910 // - The sizes don't match for a non-lclVar argument.
3911 // - We have a known struct type (e.g. SIMD) that requires multiple registers.
3912 // TODO-Amd64-Unix-CQ: The first case could and should be handled without copies.
3913 // TODO-Amd64-Unix-Throughput: We don't need to keep the structDesc in the argEntry if it's not
3914 // actually passed in registers.
3915 if (argEntry->isPassedInRegisters())
3917 assert(argEntry->structDesc.passedInRegisters);
3918 if (lclVar != nullptr)
3920 if (lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT)
3922 copyBlkClass = objClass;
3925 else if (argObj->OperIs(GT_OBJ))
3927 if (passingSize != structSize)
3929 copyBlkClass = objClass;
3934 // This should only be the case of a value directly producing a known struct type.
3935 assert(argObj->TypeGet() != TYP_STRUCT);
3936 if (argEntry->numRegs > 1)
3938 copyBlkClass = objClass;
3942 #endif // UNIX_AMD64_ABI
3943 #elif defined(_TARGET_ARM64_)
3944 if ((passingSize != structSize) && (lclVar == nullptr))
3946 copyBlkClass = objClass;
3951 // TODO-1stClassStructs: Unify these conditions across targets.
3952 if (((lclVar != nullptr) &&
3953 (lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT)) ||
3954 ((argObj->OperIs(GT_OBJ)) && (passingSize != structSize)))
3956 copyBlkClass = objClass;
3959 if (structSize < TARGET_POINTER_SIZE)
3961 copyBlkClass = objClass;
3963 #endif // _TARGET_ARM_
3967 // We have a struct argument that fits into a register, and it is either a power of 2,
3969 // Change our argument, as needed, into a value of the appropriate type.
3970 CLANG_FORMAT_COMMENT_ANCHOR;
3973 assert((size == 1) || ((structBaseType == TYP_DOUBLE) && (size == 2)));
3975 assert((size == 1) ||
3976 (varTypeIsSIMD(structBaseType) && size == (genTypeSize(structBaseType) / REGSIZE_BYTES)));
3979 assert((structBaseType != TYP_STRUCT) && (genTypeSize(structBaseType) >= originalSize));
3981 if (argObj->OperIs(GT_OBJ))
3983 argObj->ChangeOper(GT_IND);
3985 // Now see if we can fold *(&X) into X
3986 if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
3988 GenTree* temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
3990 // Keep the DONT_CSE flag in sync
3991 // (as the addr always marks it for its op1)
3992 temp->gtFlags &= ~GTF_DONT_CSE;
3993 temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
3994 DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
3995 DEBUG_DESTROY_NODE(argObj); // GT_IND
3998 *parentOfArgObj = temp;
4000 // If the OBJ had been the top level node, we've now changed argx.
4001 if (parentOfArgObj == parentArgx)
4007 if (argObj->gtOper == GT_LCL_VAR)
4009 unsigned lclNum = argObj->gtLclVarCommon.gtLclNum;
4010 LclVarDsc* varDsc = &lvaTable[lclNum];
4012 if (varDsc->lvPromoted)
4014 if (varDsc->lvFieldCnt == 1)
4016 // get the first and only promoted field
4017 LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
4018 if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
4020 // we will use the first and only promoted field
4021 argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
4023 if (varTypeIsEnregisterable(fieldVarDsc->TypeGet()) &&
4024 (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
4026 // Just use the existing field's type
4027 argObj->gtType = fieldVarDsc->TypeGet();
4031 // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
4033 argObj->ChangeOper(GT_LCL_FLD);
4034 argObj->gtType = structBaseType;
4036 assert(varTypeIsEnregisterable(argObj->TypeGet()));
4037 assert(copyBlkClass == NO_CLASS_HANDLE);
4041 // use GT_LCL_FLD to swizzle the single field struct to a new type
4042 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
4043 argObj->ChangeOper(GT_LCL_FLD);
4044 argObj->gtType = structBaseType;
4049 // The struct fits into a single register, but it has been promoted into its
4050 // constituent fields, and so we have to re-assemble it
4051 copyBlkClass = objClass;
4054 else if (genActualType(varDsc->TypeGet()) != structBaseType)
4056 // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
4057 argObj->ChangeOper(GT_LCL_FLD);
4058 argObj->gtType = structBaseType;
4063 // Not a GT_LCL_VAR, so we can just change the type on the node
4064 argObj->gtType = structBaseType;
4066 assert(varTypeIsEnregisterable(argObj->TypeGet()) ||
4067 ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsEnregisterable(structBaseType)));
4069 #endif // !_TARGET_X86_
4071 #ifndef UNIX_AMD64_ABI
4072 // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
4073 if (isHfaArg && passUsingFloatRegs)
4075 size = argEntry->numRegs;
4077 else if (structBaseType == TYP_STRUCT)
4079 // If the valuetype size is not a multiple of TARGET_POINTER_SIZE,
4080 // we must copyblk to a temp before doing the obj to avoid
4081 // the obj reading memory past the end of the valuetype
4082 CLANG_FORMAT_COMMENT_ANCHOR;
4084 if (roundupSize > originalSize)
4086 copyBlkClass = objClass;
4088 // There are a few special cases where we can omit using a CopyBlk
4089 // where we normally would need to use one.
4091 if (argObj->OperIs(GT_OBJ) &&
4092 argObj->AsObj()->gtGetOp1()->IsLocalAddrExpr() != nullptr) // Is the source a LclVar?
4094 copyBlkClass = NO_CLASS_HANDLE;
4098 size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
4101 #endif // !UNIX_AMD64_ABI
4105 if (argEntry->isPassedInRegisters())
4107 call->fgArgInfo->UpdateRegArg(argEntry, argx, reMorphing);
4111 call->fgArgInfo->UpdateStkArg(argEntry, argx, reMorphing);
4114 if (copyBlkClass != NO_CLASS_HANDLE)
4116 fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass);
4119 if (argx->gtOper == GT_MKREFANY)
4121 // 'Lower' the MKREFANY tree and insert it.
4122 noway_assert(!reMorphing);
4126 // Build the mkrefany as a GT_FIELD_LIST
4127 GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST)
4128 GenTreeFieldList(argx->gtOp.gtOp1, OFFSETOF__CORINFO_TypedReference__dataPtr, TYP_BYREF, nullptr);
4129 (void)new (this, GT_FIELD_LIST)
4130 GenTreeFieldList(argx->gtOp.gtOp2, OFFSETOF__CORINFO_TypedReference__type, TYP_I_IMPL, fieldList);
4131 fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx);
4132 fp->node = fieldList;
4133 args->gtOp.gtOp1 = fieldList;
4135 #else // !_TARGET_X86_
4138 // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
4139 unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
4140 lvaSetStruct(tmp, impGetRefAnyClass(), false);
4142 // Build the mkrefany as a comma node:
4143 // (tmp.ptr=argx),(tmp.type=handle)
4144 GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, OFFSETOF__CORINFO_TypedReference__dataPtr);
4145 GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, OFFSETOF__CORINFO_TypedReference__type);
4146 destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
4147 destPtrSlot->gtFlags |= GTF_VAR_DEF;
4148 destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
4149 destTypeSlot->gtFlags |= GTF_VAR_DEF;
4151 GenTree* asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
4152 GenTree* asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
4153 GenTree* asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
4155 // Change the expression to "(tmp=val)"
4156 args->gtOp.gtOp1 = asg;
4158 // EvalArgsToTemps will cause tmp to actually get loaded as the argument
4159 call->fgArgInfo->EvalToTmp(argEntry, tmp, asg);
4160 lvaSetVarAddrExposed(tmp);
4161 #endif // !_TARGET_X86_
4164 #if FEATURE_MULTIREG_ARGS
4167 if (((argEntry->numRegs + argEntry->numSlots) > 1) || (isHfaArg && argx->TypeGet() == TYP_STRUCT))
4169 hasMultiregStructArgs = true;
4173 else if ((argEntry->argType == TYP_LONG) || (argEntry->argType == TYP_DOUBLE))
4175 assert((argEntry->numRegs == 2) || (argEntry->numSlots == 2));
4180 // We must have exactly one register or slot.
4181 assert(((argEntry->numRegs == 1) && (argEntry->numSlots == 0)) ||
4182 ((argEntry->numRegs == 0) && (argEntry->numSlots == 1)));
4186 #if defined(_TARGET_X86_)
4189 GenTree* lclNode = argx->OperIs(GT_LCL_VAR) ? argx : fgIsIndirOfAddrOfLocal(argx);
4190 if ((lclNode != nullptr) &&
4191 (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT))
4193 // Make a GT_FIELD_LIST of the field lclVars.
4194 GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon();
4195 LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]);
4196 GenTreeFieldList* fieldList = nullptr;
4197 for (unsigned fieldLclNum = varDsc->lvFieldLclStart;
4198 fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum)
4200 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
4201 if (fieldList == nullptr)
4203 lcl->SetLclNum(fieldLclNum);
4204 lcl->ChangeOper(GT_LCL_VAR);
4205 lcl->gtType = fieldVarDsc->lvType;
4206 fieldList = new (this, GT_FIELD_LIST)
4207 GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr);
4208 fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx);
4209 fp->node = fieldList;
4210 args->gtOp.gtOp1 = fieldList;
4214 GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
4215 fieldList = new (this, GT_FIELD_LIST)
4216 GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList);
4221 #endif // _TARGET_X86_
4223 flagsSummary |= args->Current()->gtFlags;
4225 } // end foreach argument loop
4229 call->fgArgInfo->ArgsComplete();
4232 if (call->gtCallArgs)
4234 UpdateGT_LISTFlags(call->gtCallArgs);
4237 /* Process the function address, if indirect call */
4239 if (call->gtCallType == CT_INDIRECT)
4241 call->gtCallAddr = fgMorphTree(call->gtCallAddr);
4244 #if FEATURE_FIXED_OUT_ARGS
4246 // Record the outgoing argument size. If the call is a fast tail
4247 // call, it will setup its arguments in incoming arg area instead
4248 // of the out-going arg area, so we don't need to track the
4249 // outgoing arg size.
4250 if (!call->IsFastTailCall())
4252 unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
4254 #if defined(UNIX_AMD64_ABI)
4255 // This is currently required for the UNIX ABI to work correctly.
4256 opts.compNeedToAlignFrame = true;
4257 #endif // UNIX_AMD64_ABI
4259 const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
4260 call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL));
4265 printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots,
4266 preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize);
4270 #endif // FEATURE_FIXED_OUT_ARGS
4272 // Clear the ASG and EXCEPT (if possible) flags on the call node
4273 call->gtFlags &= ~GTF_ASG;
4274 if (!call->OperMayThrow(this))
4276 call->gtFlags &= ~GTF_EXCEPT;
4279 // Union in the side effect flags from the call's operands
4280 call->gtFlags |= flagsSummary & GTF_ALL_EFFECT;
4282 // If we are remorphing or don't have any register arguments or other arguments that need
4283 // temps, then we don't need to call SortArgs() and EvalArgsToTemps().
4285 if (!reMorphing && (call->fgArgInfo->HasRegArgs() || call->fgArgInfo->NeedsTemps()))
4287 // Do the 'defer or eval to temp' analysis.
4289 call->fgArgInfo->SortArgs();
4291 call->fgArgInfo->EvalArgsToTemps();
4293 // We may have updated the arguments
4294 if (call->gtCallArgs)
4296 UpdateGT_LISTFlags(call->gtCallArgs);
4300 if (hasMultiregStructArgs)
4302 fgMorphMultiregStructArgs(call);
4308 JITDUMP("ArgTable for %d.%s after fgMorphArgs:\n", call->gtTreeID, GenTree::OpName(call->gtOper));
4309 call->fgArgInfo->Dump(this);
4316 #pragma warning(pop)
4319 //-----------------------------------------------------------------------------
4320 // fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
4321 // call fgMorphMultiregStructArg on each of them.
4324 // call : a GenTreeCall node that has one or more TYP_STRUCT arguments\.
4327 // We only call fgMorphMultiregStructArg for struct arguments that are not passed as simple types.
4328 // It will ensure that the struct arguments are in the correct form.
4329 // If this method fails to find any TYP_STRUCT arguments it will assert.
4331 void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
4333 bool foundStructArg = false;
4334 unsigned initialFlags = call->gtFlags;
4335 unsigned flagsSummary = 0;
4336 fgArgInfo* allArgInfo = call->fgArgInfo;
4339 assert(!"Logic error: no MultiregStructArgs for X86");
4341 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
4342 assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
4345 for (GenTree* args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4347 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4348 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4349 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4350 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4351 // otherwise points to the list in the late args list.
4352 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4353 fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4354 assert(fgEntryPtr != nullptr);
4355 GenTree* argx = fgEntryPtr->node;
4356 GenTree* lateList = nullptr;
4357 GenTree* lateNode = nullptr;
4361 for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
4363 assert(list->OperIsList());
4365 GenTree* argNode = list->Current();
4366 if (argx == argNode)
4373 assert(lateList != nullptr && lateNode != nullptr);
4376 GenTree* arg = argx;
4378 if (!fgEntryPtr->isStruct)
4383 unsigned size = (fgEntryPtr->numRegs + fgEntryPtr->numSlots);
4384 if ((size > 1) || (fgEntryPtr->isHfaArg && argx->TypeGet() == TYP_STRUCT))
4386 foundStructArg = true;
4387 if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST))
4389 if (fgEntryPtr->isHfaArg)
4391 var_types hfaType = fgEntryPtr->hfaType;
4392 unsigned structSize;
4393 if (argx->OperIs(GT_OBJ))
4395 structSize = argx->AsObj()->gtBlkSize;
4399 assert(argx->OperIs(GT_LCL_VAR));
4400 structSize = lvaGetDesc(argx->AsLclVar()->gtLclNum)->lvExactSize;
4402 assert(structSize > 0);
4403 if (structSize == genTypeSize(hfaType))
4405 if (argx->OperIs(GT_OBJ))
4407 fgMorphBlkToInd(argx->AsObj(), hfaType);
4411 argx->gtType = hfaType;
4415 arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
4417 // Did we replace 'argx' with a new tree?
4420 fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
4422 // link the new arg node into either the late arg list or the gtCallArgs list
4425 lateList->gtOp.gtOp1 = arg;
4429 args->gtOp.gtOp1 = arg;
4436 // We should only call this method when we actually have one or more multireg struct args
4437 assert(foundStructArg);
4440 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4443 //-----------------------------------------------------------------------------
4444 // fgMorphMultiregStructArg: Given a TYP_STRUCT arg from a call argument list,
4445 // morph the argument as needed to be passed correctly.
4448 // arg - A GenTree node containing a TYP_STRUCT arg
4449 // fgEntryPtr - the fgArgTabEntry information for the current 'arg'
4452 // The arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT.
4453 // If 'arg' is a lclVar passed on the stack, we will ensure that any lclVars that must be on the
4454 // stack are marked as doNotEnregister, and then we return.
4456 // If it is passed by register, we mutate the argument into the GT_FIELD_LIST form
4457 // which is only used for struct arguments.
4459 // If arg is a LclVar we check if it is struct promoted and has the right number of fields
4460 // and if they are at the appropriate offsets we will use the struct promted fields
4461 // in the GT_FIELD_LIST nodes that we create.
4462 // If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
4463 // we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
4464 // this also forces the struct to be stack allocated into the local frame.
4465 // For the GT_OBJ case will clone the address expression and generate two (or more)
4467 // Currently the implementation handles ARM64/ARM and will NYI for other architectures.
4469 GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr)
4471 assert(varTypeIsStruct(arg->TypeGet()));
4473 #if !defined(_TARGET_ARMARCH_) && !defined(UNIX_AMD64_ABI)
4474 NYI("fgMorphMultiregStructArg requires implementation for this target");
4478 if ((fgEntryPtr->isSplit && fgEntryPtr->numSlots + fgEntryPtr->numRegs > 4) ||
4479 (!fgEntryPtr->isSplit && fgEntryPtr->regNum == REG_STK))
4481 if (fgEntryPtr->regNum == REG_STK)
4484 GenTreeLclVarCommon* lcl = nullptr;
4485 GenTree* actualArg = arg->gtEffectiveVal();
4487 if (actualArg->OperGet() == GT_OBJ)
4489 if (actualArg->gtGetOp1()->OperIs(GT_ADDR) && actualArg->gtGetOp1()->gtGetOp1()->OperIs(GT_LCL_VAR))
4491 lcl = actualArg->gtGetOp1()->gtGetOp1()->AsLclVarCommon();
4494 else if (actualArg->OperGet() == GT_LCL_VAR)
4496 lcl = actualArg->AsLclVarCommon();
4500 if (lvaGetPromotionType(lcl->gtLclNum) == PROMOTION_TYPE_INDEPENDENT)
4502 arg = fgMorphLclArgToFieldlist(lcl);
4504 else if (arg->TypeGet() == TYP_STRUCT)
4506 // If this is a non-register struct, it must be referenced from memory.
4507 if (!actualArg->OperIs(GT_OBJ))
4509 // Create an Obj of the temp to use it as a call argument.
4510 arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
4511 arg = gtNewObjNode(lvaGetStruct(lcl->gtLclNum), arg);
4513 // Its fields will need to be accessed by address.
4514 lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUG_ARG(DNER_IsStructArg));
4521 #if FEATURE_MULTIREG_ARGS
4522 // Examine 'arg' and setup argValue objClass and structSize
4524 CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg);
4525 noway_assert(objClass != NO_CLASS_HANDLE);
4526 GenTree* argValue = arg; // normally argValue will be arg, but see right below
4527 unsigned structSize = 0;
4529 if (arg->TypeGet() != TYP_STRUCT)
4531 structSize = genTypeSize(arg->TypeGet());
4532 assert(structSize == info.compCompHnd->getClassSize(objClass));
4534 else if (arg->OperGet() == GT_OBJ)
4536 GenTreeObj* argObj = arg->AsObj();
4537 structSize = argObj->Size();
4538 assert(structSize == info.compCompHnd->getClassSize(objClass));
4540 // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR.
4541 GenTree* op1 = argObj->gtOp1;
4542 if (op1->OperGet() == GT_ADDR)
4544 GenTree* underlyingTree = op1->gtOp.gtOp1;
4546 // Only update to the same type.
4547 if (underlyingTree->OperIs(GT_LCL_VAR) && (underlyingTree->TypeGet() == argValue->TypeGet()) &&
4548 (objClass == gtGetStructHandleIfPresent(underlyingTree)))
4550 argValue = underlyingTree;
4554 else if (arg->OperGet() == GT_LCL_VAR)
4556 GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
4557 unsigned varNum = varNode->gtLclNum;
4558 assert(varNum < lvaCount);
4559 LclVarDsc* varDsc = &lvaTable[varNum];
4561 structSize = varDsc->lvExactSize;
4562 assert(structSize == info.compCompHnd->getClassSize(objClass));
4566 structSize = info.compCompHnd->getClassSize(objClass);
4568 noway_assert(objClass != NO_CLASS_HANDLE);
4570 var_types hfaType = TYP_UNDEF;
4571 var_types elemType = TYP_UNDEF;
4572 unsigned elemCount = 0;
4573 unsigned elemSize = 0;
4574 var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
4576 hfaType = fgEntryPtr->hfaType;
4577 if (varTypeIsValidHfaType(hfaType)
4578 #if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
4579 && !fgEntryPtr->isVararg
4580 #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
4584 elemSize = genTypeSize(elemType);
4585 elemCount = structSize / elemSize;
4586 assert(elemSize * elemCount == structSize);
4587 for (unsigned inx = 0; inx < elemCount; inx++)
4589 type[inx] = elemType;
4594 assert(structSize <= MAX_ARG_REG_COUNT * TARGET_POINTER_SIZE);
4595 BYTE gcPtrs[MAX_ARG_REG_COUNT];
4596 elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
4597 info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
4599 for (unsigned inx = 0; inx < elemCount; inx++)
4601 #ifdef UNIX_AMD64_ABI
4602 if (gcPtrs[inx] == TYPE_GC_NONE)
4604 type[inx] = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[inx],
4605 fgEntryPtr->structDesc.eightByteSizes[inx]);
4608 #endif // UNIX_AMD64_ABI
4610 type[inx] = getJitGCType(gcPtrs[inx]);
4614 #ifndef UNIX_AMD64_ABI
4615 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
4617 elemSize = TARGET_POINTER_SIZE;
4618 // We can safely widen this to aligned bytes since we are loading from
4619 // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
4620 // lives in the stack frame or will be a promoted field.
4622 structSize = elemCount * TARGET_POINTER_SIZE;
4624 else // we must have a GT_OBJ
4626 assert(argValue->OperGet() == GT_OBJ);
4628 // We need to load the struct from an arbitrary address
4629 // and we can't read past the end of the structSize
4630 // We adjust the last load type here
4632 unsigned remainingBytes = structSize % TARGET_POINTER_SIZE;
4633 unsigned lastElem = elemCount - 1;
4634 if (remainingBytes != 0)
4636 switch (remainingBytes)
4639 type[lastElem] = TYP_BYTE;
4642 type[lastElem] = TYP_SHORT;
4644 #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
4646 type[lastElem] = TYP_INT;
4648 #endif // (_TARGET_ARM64_) || (UNIX_AMD64_ABI)
4650 noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
4655 #endif // !UNIX_AMD64_ABI
4658 // We should still have a TYP_STRUCT
4659 assert(varTypeIsStruct(argValue->TypeGet()));
4661 GenTreeFieldList* newArg = nullptr;
4663 // Are we passing a struct LclVar?
4665 if (argValue->OperGet() == GT_LCL_VAR)
4667 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4668 unsigned varNum = varNode->gtLclNum;
4669 assert(varNum < lvaCount);
4670 LclVarDsc* varDsc = &lvaTable[varNum];
4672 // At this point any TYP_STRUCT LclVar must be an aligned struct
4673 // or an HFA struct, both which are passed by value.
4675 assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
4677 varDsc->lvIsMultiRegArg = true;
4682 JITDUMP("Multireg struct argument V%02u : ", varNum);
4687 #ifndef UNIX_AMD64_ABI
4688 // This local variable must match the layout of the 'objClass' type exactly
4689 if (varDsc->lvIsHfa()
4690 #if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
4691 && !fgEntryPtr->isVararg
4692 #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
4695 // We have a HFA struct.
4696 // Note that GetHfaType may not be the same as elemType, since TYP_SIMD8 is handled the same as TYP_DOUBLE.
4697 var_types useElemType = elemType;
4698 #if defined(_TARGET_ARM64_) & defined(FEATURE_SIMD)
4699 useElemType = (elemType == TYP_SIMD8) ? TYP_DOUBLE : useElemType;
4700 #endif // _TARGET_ARM64_ && FEATURE_SIMD
4701 noway_assert(useElemType == varDsc->GetHfaType());
4702 noway_assert(elemSize == genTypeSize(elemType));
4703 noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
4704 noway_assert(elemSize * elemCount == varDsc->lvExactSize);
4706 for (unsigned inx = 0; (inx < elemCount); inx++)
4708 noway_assert(type[inx] == elemType);
4713 #if defined(_TARGET_ARM64_)
4714 // We must have a 16-byte struct (non-HFA)
4715 noway_assert(elemCount == 2);
4716 #elif defined(_TARGET_ARM_)
4717 noway_assert(elemCount <= 4);
4720 for (unsigned inx = 0; inx < elemCount; inx++)
4722 CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
4724 // We setup the type[inx] value above using the GC info from 'objClass'
4725 // This GT_LCL_VAR must have the same GC layout info
4727 if (currentGcLayoutType != TYPE_GC_NONE)
4729 noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
4733 // We may have use a small type when we setup the type[inx] values above
4734 // We can safely widen this to TYP_I_IMPL
4735 type[inx] = TYP_I_IMPL;
4739 #endif // !UNIX_AMD64_ABI
4741 #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
4742 // Is this LclVar a promoted struct with exactly 2 fields?
4743 // TODO-ARM64-CQ: Support struct promoted HFA types here
4744 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && (!varDsc->lvIsHfa()
4745 #if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
4746 && !fgEntryPtr->isVararg
4747 #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
4750 // See if we have two promoted fields that start at offset 0 and 8?
4751 unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
4752 unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
4754 // Did we find the promoted fields at the necessary offsets?
4755 if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
4757 LclVarDsc* loVarDsc = &lvaTable[loVarNum];
4758 LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
4760 var_types loType = loVarDsc->lvType;
4761 var_types hiType = hiVarDsc->lvType;
4763 if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
4765 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
4766 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
4768 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
4771 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
4776 // We can use the struct promoted field as the two arguments
4778 GenTree* loLclVar = gtNewLclvNode(loVarNum, loType);
4779 GenTree* hiLclVar = gtNewLclvNode(hiVarNum, hiType);
4781 // Create a new tree for 'arg'
4782 // replace the existing LDOBJ(ADDR(LCLVAR))
4783 // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr))
4785 newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr);
4786 (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg);
4793 // We will create a list of GT_LCL_FLDs nodes to pass this struct
4795 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
4797 #elif defined(_TARGET_ARM_)
4798 // Is this LclVar a promoted struct with exactly same size?
4799 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa())
4801 // See if we have promoted fields?
4802 unsigned varNums[4];
4803 bool hasBadVarNum = false;
4804 for (unsigned inx = 0; inx < elemCount; inx++)
4806 varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx);
4807 if (varNums[inx] == BAD_VAR_NUM)
4809 hasBadVarNum = true;
4814 // Did we find the promoted fields at the necessary offsets?
4817 LclVarDsc* varDscs[4];
4818 var_types varType[4];
4819 bool varIsFloat = false;
4821 for (unsigned inx = 0; inx < elemCount; inx++)
4823 varDscs[inx] = &lvaTable[varNums[inx]];
4824 varType[inx] = varDscs[inx]->lvType;
4825 if (varTypeIsFloating(varType[inx]))
4827 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the
4829 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
4831 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
4834 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
4843 newArg = fgMorphLclArgToFieldlist(varNode);
4850 // We will create a list of GT_LCL_FLDs nodes to pass this struct
4852 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
4854 #endif // _TARGET_ARM_
4857 // If we didn't set newarg to a new List Node tree
4859 if (newArg == nullptr)
4861 if (fgEntryPtr->regNum == REG_STK)
4863 // We leave this stack passed argument alone
4867 // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
4868 // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
4870 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
4872 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4873 unsigned varNum = varNode->gtLclNum;
4874 assert(varNum < lvaCount);
4875 LclVarDsc* varDsc = &lvaTable[varNum];
4877 unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
4878 unsigned lastOffset = baseOffset + structSize;
4880 // The allocated size of our LocalVar must be at least as big as lastOffset
4881 assert(varDsc->lvSize() >= lastOffset);
4883 if (varDsc->lvStructGcCount > 0)
4885 // alignment of the baseOffset is required
4886 noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
4887 #ifndef UNIX_AMD64_ABI
4888 noway_assert(elemSize == TARGET_POINTER_SIZE);
4890 unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE;
4891 const BYTE* gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable
4892 for (unsigned inx = 0; (inx < elemCount); inx++)
4894 // The GC information must match what we setup using 'objClass'
4895 if ((gcPtrs[baseIndex + inx] != TYPE_GC_NONE) || varTypeGCtype(type[inx]))
4897 noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
4901 else // this varDsc contains no GC pointers
4903 for (unsigned inx = 0; inx < elemCount; inx++)
4905 // The GC information must match what we setup using 'objClass'
4906 noway_assert(!varTypeIsGC(type[inx]));
4911 // We create a list of GT_LCL_FLDs nodes to pass this struct
4913 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
4915 // Create a new tree for 'arg'
4916 // replace the existing LDOBJ(ADDR(LCLVAR))
4917 // with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
4919 unsigned offset = baseOffset;
4920 GenTreeFieldList* listEntry = nullptr;
4921 for (unsigned inx = 0; inx < elemCount; inx++)
4923 elemSize = genTypeSize(type[inx]);
4924 GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
4925 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry);
4926 if (newArg == nullptr)
4933 // Are we passing a GT_OBJ struct?
4935 else if (argValue->OperGet() == GT_OBJ)
4937 GenTreeObj* argObj = argValue->AsObj();
4938 GenTree* baseAddr = argObj->gtOp1;
4939 var_types addrType = baseAddr->TypeGet();
4941 if (baseAddr->OperGet() == GT_ADDR)
4943 GenTree* addrTaken = baseAddr->gtOp.gtOp1;
4944 if (addrTaken->IsLocal())
4946 GenTreeLclVarCommon* varNode = addrTaken->AsLclVarCommon();
4947 unsigned varNum = varNode->gtLclNum;
4948 // We access non-struct type (for example, long) as a struct type.
4949 // Make sure lclVar lives on stack to make sure its fields are accessible by address.
4950 lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LocalField));
4954 // Create a new tree for 'arg'
4955 // replace the existing LDOBJ(EXPR)
4956 // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...)
4959 unsigned offset = 0;
4960 GenTreeFieldList* listEntry = nullptr;
4961 for (unsigned inx = 0; inx < elemCount; inx++)
4963 elemSize = genTypeSize(type[inx]);
4964 GenTree* curAddr = baseAddr;
4967 GenTree* baseAddrDup = gtCloneExpr(baseAddr);
4968 noway_assert(baseAddrDup != nullptr);
4969 curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
4975 GenTree* curItem = gtNewIndir(type[inx], curAddr);
4977 // For safety all GT_IND should have at least GT_GLOB_REF set.
4978 curItem->gtFlags |= GTF_GLOB_REF;
4980 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry);
4981 if (newArg == nullptr)
4991 // If we reach here we should have set newArg to something
4992 if (newArg == nullptr)
4994 gtDispTree(argValue);
4995 assert(!"Missing case in fgMorphMultiregStructArg");
4999 noway_assert(newArg != nullptr);
5000 noway_assert(newArg->OperIsFieldList());
5002 // We need to propagate any GTF_ALL_EFFECT flags from the end of the list back to the beginning.
5003 // This is verified in fgDebugCheckFlags().
5005 ArrayStack<GenTree*> stack(getAllocator(CMK_ArrayStack));
5007 for (tree = newArg; (tree->gtGetOp2() != nullptr) && tree->gtGetOp2()->OperIsFieldList(); tree = tree->gtGetOp2())
5012 unsigned propFlags = (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
5013 tree->gtFlags |= propFlags;
5015 while (!stack.Empty())
5018 propFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
5019 propFlags |= (tree->gtGetOp2()->gtFlags & GTF_ALL_EFFECT);
5020 tree->gtFlags |= propFlags;
5026 printf("fgMorphMultiregStructArg created tree:\n");
5031 arg = newArg; // consider calling fgMorphTree(newArg);
5033 #endif // FEATURE_MULTIREG_ARGS
5038 //------------------------------------------------------------------------
5039 // fgMorphLclArgToFieldlist: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields
5042 // lcl - The GT_LCL_VAR node we will transform
5045 // The new GT_FIELD_LIST that we have created.
5047 GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl)
5049 LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]);
5050 assert(varDsc->lvPromoted == true);
5052 unsigned fieldCount = varDsc->lvFieldCnt;
5053 GenTreeFieldList* listEntry = nullptr;
5054 GenTreeFieldList* newArg = nullptr;
5055 unsigned fieldLclNum = varDsc->lvFieldLclStart;
5057 // We can use the struct promoted field as arguments
5058 for (unsigned i = 0; i < fieldCount; i++)
5060 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
5061 GenTree* lclVar = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
5062 listEntry = new (this, GT_FIELD_LIST)
5063 GenTreeFieldList(lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, listEntry);
5064 if (newArg == nullptr)
5073 //------------------------------------------------------------------------
5074 // fgMakeOutgoingStructArgCopy: make a copy of a struct variable if necessary,
5075 // to pass to a callee.
5078 // call - call being processed
5079 // args - args for the call
5080 /// argIndex - arg being processed
5081 // copyBlkClass - class handle for the struct
5084 // tree that computes address of the outgoing arg
5086 void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call,
5089 CORINFO_CLASS_HANDLE copyBlkClass)
5091 GenTree* argx = args->Current();
5092 noway_assert(argx->gtOper != GT_MKREFANY);
5093 fgArgTabEntry* argEntry = Compiler::gtArgEntryByNode(call, argx);
5095 // If we're optimizing, see if we can avoid making a copy.
5097 // We don't need a copy if this is the last use of an implicit by-ref local.
5099 // We can't determine that all of the time, but if there is only
5100 // one use and the method has no loops, then this use must be the last.
5101 if (opts.OptimizationEnabled())
5103 GenTreeLclVarCommon* lcl = nullptr;
5105 if (argx->OperIsLocal())
5107 lcl = argx->AsLclVarCommon();
5109 else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
5111 lcl = argx->AsObj()->Addr()->AsLclVarCommon();
5116 unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
5117 if (lvaIsImplicitByRefLocal(varNum))
5119 LclVarDsc* varDsc = &lvaTable[varNum];
5120 // JIT_TailCall helper has an implicit assumption that all tail call arguments live
5121 // on the caller's frame. If an argument lives on the caller caller's frame, it may get
5122 // overwritten if that frame is reused for the tail call. Therefore, we should always copy
5123 // struct parameters if they are passed as arguments to a tail call.
5124 if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt(RCS_EARLY) == 1) && !fgMightHaveLoop())
5126 varDsc->setLvRefCnt(0, RCS_EARLY);
5127 args->gtOp.gtOp1 = lcl;
5128 argEntry->node = lcl;
5130 JITDUMP("did not have to make outgoing copy for V%2d", varNum);
5137 if (fgOutgoingArgTemps == nullptr)
5139 fgOutgoingArgTemps = hashBv::Create(this);
5145 // Attempt to find a local we have already used for an outgoing struct and reuse it.
5146 // We do not reuse within a statement.
5147 if (!opts.MinOpts())
5150 FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
5152 LclVarDsc* varDsc = &lvaTable[lclNum];
5153 if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
5154 !fgCurrentlyInUseArgTemps->testBit(lclNum))
5156 tmp = (unsigned)lclNum;
5158 JITDUMP("reusing outgoing struct arg");
5165 // Create the CopyBlk tree and insert it.
5169 // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
5170 tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
5171 lvaSetStruct(tmp, copyBlkClass, false);
5172 if (call->IsVarargs())
5174 lvaSetStructUsedAsVarArg(tmp);
5177 fgOutgoingArgTemps->setBit(tmp);
5180 fgCurrentlyInUseArgTemps->setBit(tmp);
5182 // TYP_SIMD structs should not be enregistered, since ABI requires it to be
5183 // allocated on stack and address of it needs to be passed.
5184 if (lclVarIsSIMDType(tmp))
5186 lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
5189 // Create a reference to the temp
5190 GenTree* dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
5191 dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
5193 if (argx->gtOper == GT_OBJ)
5195 argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
5196 argx->SetIndirExceptionFlags(this);
5200 argx->gtFlags |= GTF_DONT_CSE;
5203 // Copy the valuetype to the temp
5204 unsigned size = info.compCompHnd->getClassSize(copyBlkClass);
5205 GenTree* copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
5206 copyBlk = fgMorphCopyBlock(copyBlk);
5208 #if FEATURE_FIXED_OUT_ARGS
5210 // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
5211 // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
5212 GenTree* arg = copyBlk;
5214 #else // FEATURE_FIXED_OUT_ARGS
5216 // Structs are always on the stack, and thus never need temps
5217 // so we have to put the copy and temp all into one expression.
5218 argEntry->tmpNum = tmp;
5219 GenTree* arg = fgMakeTmpArgNode(argEntry);
5221 // Change the expression to "(tmp=val),tmp"
5222 arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
5224 #endif // FEATURE_FIXED_OUT_ARGS
5226 args->gtOp.gtOp1 = arg;
5227 call->fgArgInfo->EvalToTmp(argEntry, tmp, arg);
5233 // See declaration for specification comment.
5234 void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
5235 unsigned firstArgRegNum,
5236 regMaskTP* pArgSkippedRegMask)
5238 assert(varDsc->lvPromoted);
5239 // There's no way to do these calculations without breaking abstraction and assuming that
5240 // integer register arguments are consecutive ints. They are on ARM.
5242 // To start, figure out what register contains the last byte of the first argument.
5243 LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
5244 unsigned lastFldRegOfLastByte =
5245 (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5248 // Now we're keeping track of the register that the last field ended in; see what registers
5249 // subsequent fields start in, and whether any are skipped.
5250 // (We assume here the invariant that the fields are sorted in offset order.)
5251 for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
5253 unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset;
5254 LclVarDsc* fldVarDsc = &lvaTable[fldVarNum];
5255 unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
5256 assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
5257 // This loop should enumerate the offsets of any registers skipped.
5258 // Find what reg contains the last byte:
5259 // And start at the first register after that. If that isn't the first reg of the current
5260 for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
5261 skippedRegOffsets++)
5263 // If the register number would not be an arg reg, we're done.
5264 if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
5266 *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
5268 lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5272 #endif // _TARGET_ARM_
5274 //****************************************************************************
5275 // fgFixupStructReturn:
5276 // The companion to impFixupCallStructReturn. Now that the importer is done
5277 // change the gtType to the precomputed native return type
5278 // requires that callNode currently has a struct type
5280 void Compiler::fgFixupStructReturn(GenTree* callNode)
5282 assert(varTypeIsStruct(callNode));
5284 GenTreeCall* call = callNode->AsCall();
5285 bool callHasRetBuffArg = call->HasRetBufArg();
5286 bool isHelperCall = call->IsHelperCall();
5288 // Decide on the proper return type for this call that currently returns a struct
5290 CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd;
5291 Compiler::structPassingKind howToReturnStruct;
5292 var_types returnType;
5294 // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
5295 // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
5297 // CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
5298 // CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
5299 // CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
5303 assert(!callHasRetBuffArg);
5304 assert(retClsHnd == NO_CLASS_HANDLE);
5306 // Now that we are past the importer, re-type this node
5307 howToReturnStruct = SPK_PrimitiveType;
5308 returnType = (var_types)call->gtReturnType;
5312 returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
5315 if (howToReturnStruct == SPK_ByReference)
5317 assert(returnType == TYP_UNKNOWN);
5318 assert(callHasRetBuffArg);
5322 assert(returnType != TYP_UNKNOWN);
5324 if (!varTypeIsStruct(returnType))
5326 // Widen the primitive type if necessary
5327 returnType = genActualType(returnType);
5329 call->gtType = returnType;
5332 #if FEATURE_MULTIREG_RET
5333 // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
5334 assert((call->gtType != TYP_STRUCT) || call->HasMultiRegRetVal() || callHasRetBuffArg);
5335 #else // !FEATURE_MULTIREG_RET
5336 // No more struct returns
5337 assert(call->TypeGet() != TYP_STRUCT);
5340 #if !defined(UNIX_AMD64_ABI)
5341 // If it was a struct return, it has been transformed into a call
5342 // with a return buffer (that returns TYP_VOID) or into a return
5343 // of a primitive/enregisterable type
5344 assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
5348 /*****************************************************************************
5350 * A little helper used to rearrange nested commutative operations. The
5351 * effect is that nested associative, commutative operations are transformed
5352 * into a 'left-deep' tree, i.e. into something like this:
5354 * (((a op b) op c) op d) op...
5359 void Compiler::fgMoveOpsLeft(GenTree* tree)
5367 op1 = tree->gtOp.gtOp1;
5368 op2 = tree->gtOp.gtOp2;
5369 oper = tree->OperGet();
5371 noway_assert(GenTree::OperIsCommutative(oper));
5372 noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
5373 noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
5374 noway_assert(oper == op2->gtOper);
5376 // Commutativity doesn't hold if overflow checks are needed
5378 if (tree->gtOverflowEx() || op2->gtOverflowEx())
5383 if (gtIsActiveCSE_Candidate(op2))
5385 // If we have marked op2 as a CSE candidate,
5386 // we can't perform a commutative reordering
5387 // because any value numbers that we computed for op2
5388 // will be incorrect after performing a commutative reordering
5393 if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
5398 // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
5399 if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
5404 if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
5406 // We could deal with this, but we were always broken and just hit the assert
5407 // below regarding flags, which means it's not frequent, so will just bail out.
5412 noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
5414 GenTree* ad1 = op2->gtOp.gtOp1;
5415 GenTree* ad2 = op2->gtOp.gtOp2;
5417 // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
5418 // We can not reorder such GT_OR trees
5420 if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
5425 // Don't split up a byref calculation and create a new byref. E.g.,
5426 // [byref]+ (ref, [int]+ (int, int)) => [byref]+ ([byref]+ (ref, int), int).
5427 // Doing this transformation could create a situation where the first
5428 // addition (that is, [byref]+ (ref, int) ) creates a byref pointer that
5429 // no longer points within the ref object. If a GC happens, the byref won't
5430 // get updated. This can happen, for instance, if one of the int components
5431 // is negative. It also requires the address generation be in a fully-interruptible
5434 if (varTypeIsGC(op1->TypeGet()) && op2->TypeGet() == TYP_I_IMPL)
5436 assert(varTypeIsGC(tree->TypeGet()) && (oper == GT_ADD));
5440 /* Change "(x op (y op z))" to "(x op y) op z" */
5441 /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
5443 GenTree* new_op1 = op2;
5445 new_op1->gtOp.gtOp1 = op1;
5446 new_op1->gtOp.gtOp2 = ad1;
5448 /* Change the flags. */
5450 // Make sure we arent throwing away any flags
5451 noway_assert((new_op1->gtFlags &
5452 ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
5453 GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated
5454 GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
5457 (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
5458 (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
5460 /* Retype new_op1 if it has not/become a GC ptr. */
5462 if (varTypeIsGC(op1->TypeGet()))
5464 noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5465 oper == GT_ADD) || // byref(ref + (int+int))
5466 (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5467 oper == GT_OR)); // int(gcref | int(gcref|intval))
5469 new_op1->gtType = tree->gtType;
5471 else if (varTypeIsGC(ad2->TypeGet()))
5473 // Neither ad1 nor op1 are GC. So new_op1 isnt either
5474 noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
5475 new_op1->gtType = TYP_I_IMPL;
5478 // If new_op1 is a new expression. Assign it a new unique value number.
5479 // vnStore is null before the ValueNumber phase has run
5480 if (vnStore != nullptr)
5482 // We can only keep the old value number on new_op1 if both op1 and ad2
5483 // have the same non-NoVN value numbers. Since op is commutative, comparing
5484 // only ad2 and op1 is enough.
5485 if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5486 (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5487 (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
5489 new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
5493 tree->gtOp.gtOp1 = new_op1;
5494 tree->gtOp.gtOp2 = ad2;
5496 /* If 'new_op1' is now the same nested op, process it recursively */
5498 if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
5500 fgMoveOpsLeft(new_op1);
5503 /* If 'ad2' is now the same nested op, process it
5504 * Instead of recursion, we set up op1 and op2 for the next loop.
5509 } while ((op2->gtOper == oper) && !op2->gtOverflowEx());
5516 /*****************************************************************************/
5518 void Compiler::fgSetRngChkTarget(GenTree* tree, bool delay)
5520 if (tree->OperIsBoundsCheck())
5522 GenTreeBoundsChk* const boundsChk = tree->AsBoundsChk();
5523 BasicBlock* const failBlock = fgSetRngChkTargetInner(boundsChk->gtThrowKind, delay);
5524 if (failBlock != nullptr)
5526 boundsChk->gtIndRngFailBB = failBlock;
5529 else if (tree->OperIs(GT_INDEX_ADDR))
5531 GenTreeIndexAddr* const indexAddr = tree->AsIndexAddr();
5532 BasicBlock* const failBlock = fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay);
5533 if (failBlock != nullptr)
5535 indexAddr->gtIndRngFailBB = failBlock;
5540 noway_assert(tree->OperIs(GT_ARR_ELEM, GT_ARR_INDEX));
5541 fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay);
5545 BasicBlock* Compiler::fgSetRngChkTargetInner(SpecialCodeKind kind, bool delay)
5552 if (!opts.compDbgCode)
5554 if (!delay && !compIsForInlining())
5556 // Create/find the appropriate "range-fail" label
5557 return fgRngChkTarget(compCurBB, kind);
5564 /*****************************************************************************
5566 * Expand a GT_INDEX node and fully morph the child operands
5568 * The orginal GT_INDEX node is bashed into the GT_IND node that accesses
5569 * the array element. We expand the GT_INDEX node into a larger tree that
5570 * evaluates the array base and index. The simplest expansion is a GT_COMMA
5571 * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
5572 * For complex array or index expressions one or more GT_COMMA assignments
5573 * are inserted so that we only evaluate the array or index expressions once.
5575 * The fully expanded tree is then morphed. This causes gtFoldExpr to
5576 * perform local constant prop and reorder the constants in the tree and
5579 * We then parse the resulting array element expression in order to locate
5580 * and label the constants and variables that occur in the tree.
5583 const int MAX_ARR_COMPLEXITY = 4;
5584 const int MAX_INDEX_COMPLEXITY = 4;
5586 GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
5588 noway_assert(tree->gtOper == GT_INDEX);
5589 GenTreeIndex* asIndex = tree->AsIndex();
5591 var_types elemTyp = tree->TypeGet();
5592 unsigned elemSize = tree->gtIndex.gtIndElemSize;
5593 CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
5595 noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
5598 if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= maxSIMDStructBytes())
5600 // If this is a SIMD type, this is the point at which we lose the type information,
5601 // so we need to set the correct type on the GT_IND.
5602 // (We don't care about the base type here, so we only check, but don't retain, the return value).
5603 unsigned simdElemSize = 0;
5604 if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
5606 assert(simdElemSize == elemSize);
5607 elemTyp = getSIMDTypeForSize(elemSize);
5608 // This is the new type of the node.
5609 tree->gtType = elemTyp;
5610 // Now set elemStructType to null so that we don't confuse value numbering.
5611 elemStructType = nullptr;
5614 #endif // FEATURE_SIMD
5616 // Set up the the array length's offset into lenOffs
5617 // And the the first element's offset into elemOffs
5620 if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
5622 lenOffs = OFFSETOF__CORINFO_String__stringLen;
5623 elemOffs = OFFSETOF__CORINFO_String__chars;
5624 tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
5626 else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
5628 lenOffs = OFFSETOF__CORINFO_Array__length;
5629 elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
5631 else // We have a standard array
5633 lenOffs = OFFSETOF__CORINFO_Array__length;
5634 elemOffs = OFFSETOF__CORINFO_Array__data;
5637 // In minopts, we expand GT_INDEX to GT_IND(GT_INDEX_ADDR) in order to minimize the size of the IR. As minopts
5638 // compilation time is roughly proportional to the size of the IR, this helps keep compilation times down.
5639 // Furthermore, this representation typically saves on code size in minopts w.r.t. the complete expansion
5640 // performed when optimizing, as it does not require LclVar nodes (which are always stack loads/stores in
5643 // When we *are* optimizing, we fully expand GT_INDEX to:
5644 // 1. Evaluate the array address expression and store the result in a temp if the expression is complex or
5646 // 2. Evaluate the array index expression and store the result in a temp if the expression is complex or
5648 // 3. Perform an explicit bounds check: GT_ARR_BOUNDS_CHK(index, GT_ARR_LENGTH(array))
5649 // 4. Compute the address of the element that will be accessed:
5650 // GT_ADD(GT_ADD(array, firstElementOffset), GT_MUL(index, elementSize))
5651 // 5. Dereference the address with a GT_IND.
5653 // This expansion explicitly exposes the bounds check and the address calculation to the optimizer, which allows
5654 // for more straightforward bounds-check removal, CSE, etc.
5657 GenTree* const array = fgMorphTree(asIndex->Arr());
5658 GenTree* const index = fgMorphTree(asIndex->Index());
5660 GenTreeIndexAddr* const indexAddr =
5661 new (this, GT_INDEX_ADDR) GenTreeIndexAddr(array, index, elemTyp, elemStructType, elemSize,
5662 static_cast<unsigned>(lenOffs), static_cast<unsigned>(elemOffs));
5663 indexAddr->gtFlags |= (array->gtFlags | index->gtFlags) & GTF_ALL_EFFECT;
5665 // Mark the indirection node as needing a range check if necessary.
5666 // Note this will always be true unless JitSkipArrayBoundCheck() is used
5667 if ((indexAddr->gtFlags & GTF_INX_RNGCHK) != 0)
5669 fgSetRngChkTarget(indexAddr);
5672 // Change `tree` into an indirection and return.
5673 tree->ChangeOper(GT_IND);
5674 GenTreeIndir* const indir = tree->AsIndir();
5675 indir->Addr() = indexAddr;
5676 indir->gtFlags = GTF_IND_ARR_INDEX | (indexAddr->gtFlags & GTF_ALL_EFFECT);
5679 indexAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5685 GenTree* arrRef = asIndex->Arr();
5686 GenTree* index = asIndex->Index();
5688 bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
5689 bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
5691 GenTree* arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
5692 GenTree* indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression
5693 GenTree* bndsChk = nullptr;
5695 // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
5698 GenTree* arrRef2 = nullptr; // The second copy will be used in array address expression
5699 GenTree* index2 = nullptr;
5701 // If the arrRef expression involves an assignment, a call or reads from global memory,
5702 // then we *must* allocate a temporary in which to "localize" those values,
5703 // to ensure that the same values are used in the bounds check and the actual
5705 // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
5706 // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true
5707 // complexity is not exposed. (Without that condition there are cases of local struct
5708 // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was
5709 // fixed, there were some regressions that were mostly ameliorated by adding this condition.)
5711 if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
5712 gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
5714 unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5715 arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
5716 arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5717 arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5721 arrRef2 = gtCloneExpr(arrRef);
5722 noway_assert(arrRef2 != nullptr);
5725 // If the index expression involves an assignment, a call or reads from global memory,
5726 // we *must* allocate a temporary in which to "localize" those values,
5727 // to ensure that the same values are used in the bounds check and the actual
5729 // Also we allocate the temporary when the index is sufficiently complex/expensive.
5731 if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
5732 (arrRef->OperGet() == GT_FIELD))
5734 unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5735 indexDefn = gtNewTempAssign(indexTmpNum, index);
5736 index = gtNewLclvNode(indexTmpNum, index->TypeGet());
5737 index2 = gtNewLclvNode(indexTmpNum, index->TypeGet());
5741 index2 = gtCloneExpr(index);
5742 noway_assert(index2 != nullptr);
5745 // Next introduce a GT_ARR_BOUNDS_CHECK node
5746 var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
5748 #ifdef _TARGET_64BIT_
5749 // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case
5750 // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
5751 // the comparison will have to be widen to 64 bits.
5752 if (index->TypeGet() == TYP_I_IMPL)
5754 bndsChkType = TYP_I_IMPL;
5756 #endif // _TARGET_64BIT_
5758 GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs);
5760 if (bndsChkType != TYP_INT)
5762 arrLen = gtNewCastNode(bndsChkType, arrLen, false, bndsChkType);
5765 GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
5766 GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);
5768 bndsChk = arrBndsChk;
5770 // Now we'll switch to using the second copies for arrRef and index
5771 // to compute the address expression
5777 // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
5781 #ifdef _TARGET_64BIT_
5782 // Widen 'index' on 64-bit targets
5783 if (index->TypeGet() != TYP_I_IMPL)
5785 if (index->OperGet() == GT_CNS_INT)
5787 index->gtType = TYP_I_IMPL;
5791 index = gtNewCastNode(TYP_I_IMPL, index, false, TYP_I_IMPL);
5794 #endif // _TARGET_64BIT_
5796 /* Scale the index value if necessary */
5799 GenTree* size = gtNewIconNode(elemSize, TYP_I_IMPL);
5801 // Fix 392756 WP7 Crossgen
5803 // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
5804 // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
5805 // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
5807 size->gtFlags |= GTF_DONT_CSE;
5809 /* Multiply by the array element size */
5810 addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
5817 // Be careful to only create the byref pointer when the full index expression is added to the array reference.
5818 // We don't want to create a partial byref address expression that doesn't include the full index offset:
5819 // a byref must point within the containing object. It is dangerous (especially when optimizations come into
5820 // play) to create a "partial" byref that doesn't point exactly to the correct object; there is risk that
5821 // the partial byref will not point within the object, and thus not get updated correctly during a GC.
5822 // This is mostly a risk in fully-interruptible code regions.
5824 // NOTE: the tree form created here is pattern matched by optExtractArrIndex(), so changes here must
5825 // be reflected there.
5827 /* Add the first element's offset */
5829 GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
5831 addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, cns);
5833 /* Add the object ref to the element's offset */
5835 addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
5837 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) != 0) ||
5838 (GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL));
5840 // Change the orginal GT_INDEX node into a GT_IND node
5841 tree->SetOper(GT_IND);
5843 // If the index node is a floating-point type, notify the compiler
5844 // we'll potentially use floating point registers at the time of codegen.
5845 if (varTypeIsFloating(tree->gtType))
5847 this->compFloatingPointUsed = true;
5850 // We've now consumed the GTF_INX_RNGCHK, and the node
5851 // is no longer a GT_INDEX node.
5852 tree->gtFlags &= ~GTF_INX_RNGCHK;
5854 tree->gtOp.gtOp1 = addr;
5856 // This is an array index expression.
5857 tree->gtFlags |= GTF_IND_ARR_INDEX;
5859 /* An indirection will cause a GPF if the address is null */
5860 tree->gtFlags |= GTF_EXCEPT;
5864 tree->gtFlags |= GTF_DONT_CSE;
5867 // Store information about it.
5868 GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));
5870 // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
5872 GenTree* indTree = tree;
5874 // Did we create a bndsChk tree?
5877 // Use a GT_COMMA node to prepend the array bound check
5879 tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
5881 /* Mark the indirection node as needing a range check */
5882 fgSetRngChkTarget(bndsChk);
5885 if (indexDefn != nullptr)
5887 // Use a GT_COMMA node to prepend the index assignment
5889 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
5891 if (arrRefDefn != nullptr)
5893 // Use a GT_COMMA node to prepend the arRef assignment
5895 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
5898 // Currently we morph the tree to perform some folding operations prior
5899 // to attaching fieldSeq info and labeling constant array index contributions
5903 // Ideally we just want to proceed to attaching fieldSeq info and labeling the
5904 // constant array index contributions, but the morphing operation may have changed
5905 // the 'tree' into something that now unconditionally throws an exception.
5907 // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
5908 // or it could be left unchanged. If it is unchanged then we should not return,
5909 // instead we should proceed to attaching fieldSeq info, etc...
5911 GenTree* arrElem = tree->gtEffectiveVal();
5913 if (fgIsCommaThrow(tree))
5915 if ((arrElem != indTree) || // A new tree node may have been created
5916 (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
5918 return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
5922 assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
5924 addr = arrElem->gtOp.gtOp1;
5926 assert(addr->TypeGet() == TYP_BYREF);
5928 GenTree* cnsOff = nullptr;
5929 if (addr->OperGet() == GT_ADD)
5931 assert(addr->TypeGet() == TYP_BYREF);
5932 assert(addr->gtOp.gtOp1->TypeGet() == TYP_REF);
5934 addr = addr->gtOp.gtOp2;
5936 // Look for the constant [#FirstElem] node here, or as the RHS of an ADD.
5938 if (addr->gtOper == GT_CNS_INT)
5945 if ((addr->OperGet() == GT_ADD) && (addr->gtOp.gtOp2->gtOper == GT_CNS_INT))
5947 cnsOff = addr->gtOp.gtOp2;
5948 addr = addr->gtOp.gtOp1;
5951 // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
5952 addr->LabelIndex(this);
5955 else if (addr->OperGet() == GT_CNS_INT)
5960 FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
5962 if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
5964 // Assign it the [#FirstElem] field sequence
5966 cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
5968 else // We have folded the first element's offset with the index expression
5970 // Build the [#ConstantIndex, #FirstElem] field sequence
5972 FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
5973 FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
5975 if (cnsOff == nullptr) // It must have folded into a zero offset
5977 // Record in the general zero-offset map.
5978 fgAddFieldSeqForZeroOffset(addr, fieldSeq);
5982 cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
5990 /*****************************************************************************
5992 * Wrap fixed stack arguments for varargs functions to go through varargs
5993 * cookie to access them, except for the cookie itself.
5995 * Non-x86 platforms are allowed to access all arguments directly
5996 * so we don't need this code.
5999 GenTree* Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
6001 /* For the fixed stack arguments of a varargs function, we need to go
6002 through the varargs cookies to access them, except for the
6005 LclVarDsc* varDsc = &lvaTable[lclNum];
6007 if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
6009 // Create a node representing the local pointing to the base of the args
6011 gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
6012 gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES +
6015 // Access the argument through the local
6017 if (varTypeIsStruct(varType))
6019 tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize);
6023 tree = gtNewOperNode(GT_IND, varType, ptrArg);
6025 tree->gtFlags |= GTF_IND_TGTANYWHERE;
6027 if (varDsc->lvAddrExposed)
6029 tree->gtFlags |= GTF_GLOB_REF;
6032 return fgMorphTree(tree);
6039 /*****************************************************************************
6041 * Transform the given GT_LCL_VAR tree for code generation.
6044 GenTree* Compiler::fgMorphLocalVar(GenTree* tree, bool forceRemorph)
6046 assert(tree->gtOper == GT_LCL_VAR);
6048 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
6049 var_types varType = lvaGetRealType(lclNum);
6050 LclVarDsc* varDsc = &lvaTable[lclNum];
6052 if (varDsc->lvAddrExposed)
6054 tree->gtFlags |= GTF_GLOB_REF;
6058 if (info.compIsVarArgs)
6060 GenTree* newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
6061 if (newTree != nullptr)
6063 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
6065 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
6070 #endif // _TARGET_X86_
6072 /* If not during the global morphing phase bail */
6074 if (!fgGlobalMorph && !forceRemorph)
6079 bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
6081 noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
6083 if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
6085 #if LOCAL_ASSERTION_PROP
6086 /* Assertion prop can tell us to omit adding a cast here */
6087 if (optLocalAssertionProp && optAssertionIsSubrange(tree, TYP_INT, varType, apFull) != NO_ASSERTION_INDEX)
6092 /* Small-typed arguments and aliased locals are normalized on load.
6093 Other small-typed locals are normalized on store.
6094 Also, under the debugger as the debugger could write to the variable.
6095 If this is one of the former, insert a narrowing cast on the load.
6096 ie. Convert: var-short --> cast-short(var-int) */
6098 tree->gtType = TYP_INT;
6099 fgMorphTreeDone(tree);
6100 tree = gtNewCastNode(TYP_INT, tree, false, varType);
6101 fgMorphTreeDone(tree);
6108 /*****************************************************************************
6109 Grab a temp for big offset morphing.
6110 This method will grab a new temp if no temp of this "type" has been created.
6111 Or it will return the same cached one if it has been created.
6113 unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
6115 unsigned lclNum = fgBigOffsetMorphingTemps[type];
6117 if (lclNum == BAD_VAR_NUM)
6119 // We haven't created a temp for this kind of type. Create one now.
6120 lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
6121 fgBigOffsetMorphingTemps[type] = lclNum;
6125 // We better get the right type.
6126 noway_assert(lvaTable[lclNum].TypeGet() == type);
6129 noway_assert(lclNum != BAD_VAR_NUM);
6133 /*****************************************************************************
6135 * Transform the given GT_FIELD tree for code generation.
6138 GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac)
6140 assert(tree->gtOper == GT_FIELD);
6142 CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
6143 unsigned fldOffset = tree->gtField.gtFldOffset;
6144 GenTree* objRef = tree->gtField.gtFldObj;
6145 bool fieldMayOverlap = false;
6146 bool objIsLocal = false;
6148 if (fgGlobalMorph && (objRef != nullptr) && (objRef->gtOper == GT_ADDR))
6150 // Make sure we've checked if 'objRef' is an address of an implicit-byref parameter.
6151 // If it is, fgMorphImplicitByRefArgs may change it do a different opcode, which the
6152 // simd field rewrites are sensitive to.
6153 fgMorphImplicitByRefArgs(objRef);
6156 noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
6157 ((tree->gtFlags & GTF_GLOB_REF) != 0));
6159 if (tree->gtField.gtFldMayOverlap)
6161 fieldMayOverlap = true;
6162 // Reset the flag because we may reuse the node.
6163 tree->gtField.gtFldMayOverlap = false;
6167 // if this field belongs to simd struct, translate it to simd instrinsic.
6170 GenTree* newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
6171 if (newTree != tree)
6173 newTree = fgMorphSmpOp(newTree);
6177 else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
6179 GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr();
6182 lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField));
6187 /* Is this an instance data member? */
6192 objIsLocal = objRef->IsLocal();
6194 if (tree->gtFlags & GTF_IND_TLS_REF)
6196 NO_WAY("instance field can not be a TLS ref.");
6199 /* We'll create the expression "*(objRef + mem_offs)" */
6201 noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);
6203 // An optimization for Contextful classes:
6204 // we unwrap the proxy when we have a 'this reference'
6205 if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
6207 objRef = fgUnwrapProxy(objRef);
6211 Now we have a tree like this:
6213 +--------------------+
6215 +----------+---------+
6217 +--------------+-------------+
6218 | tree->gtField.gtFldObj |
6219 +--------------+-------------+
6222 We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6224 +--------------------+
6225 | GT_IND/GT_OBJ | tree
6226 +---------+----------+
6229 +---------+----------+
6231 +---------+----------+
6236 +-------------------+ +----------------------+
6237 | objRef | | fldOffset |
6238 | | | (when fldOffset !=0) |
6239 +-------------------+ +----------------------+
6242 or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6245 +--------------------+
6246 | GT_IND/GT_OBJ | tree
6247 +----------+---------+
6249 +----------+---------+
6251 +----------+---------+
6257 +---------+----------+ +---------+----------+
6258 comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr
6259 +---------+----------+ +---------+----------+
6264 +-----+-----+ +-----+-----+ +---------+ +-----------+
6265 asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset |
6266 +-----+-----+ +-----+-----+ +---------+ +-----------+
6271 +-----+-----+ +-----+-----+ +-----------+
6272 | tmpLcl | | objRef | | tmpLcl |
6273 +-----------+ +-----------+ +-----------+
6278 var_types objRefType = objRef->TypeGet();
6280 GenTree* comma = nullptr;
6282 // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field,
6283 // and thus is equivalent to a MACK_Ind with zero offset.
6284 MorphAddrContext defMAC(MACK_Ind);
6290 // This flag is set to enable the "conservative" style of explicit null-check insertion.
6291 // This means that we insert an explicit null check whenever we create byref by adding a
6292 // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
6293 // dereferenced). The alternative is "aggressive", which would not insert such checks (for
6294 // small offsets); in this plan, we would transfer some null-checking responsibility to
6295 // callee's of methods taking byref parameters. They would have to add explicit null checks
6296 // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
6297 // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
6298 // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
6299 // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
6300 // This is left here to point out how to implement it.
6301 CLANG_FORMAT_COMMENT_ANCHOR;
6303 #define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
6305 bool addExplicitNullCheck = false;
6307 // Implicit byref locals are never null.
6308 if (!((objRef->gtOper == GT_LCL_VAR) && lvaIsImplicitByRefLocal(objRef->gtLclVarCommon.gtLclNum)))
6310 // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression
6311 // whose address is being taken is either a local or static variable, whose address is necessarily
6312 // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
6313 if (objRef->gtOper != GT_ADDR && (mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind))
6315 if (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset))
6317 addExplicitNullCheck = true;
6321 // In R2R mode the field offset for some fields may change when the code
6322 // is loaded. So we can't rely on a zero offset here to suppress the null check.
6324 // See GitHub issue #16454.
6325 bool fieldHasChangeableOffset = false;
6327 #ifdef FEATURE_READYTORUN_COMPILER
6328 fieldHasChangeableOffset = (tree->gtField.gtFieldLookup.addr != nullptr);
6331 #if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
6332 addExplicitNullCheck = (mac->m_kind == MACK_Addr) &&
6333 ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset);
6335 addExplicitNullCheck = (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
6336 ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset));
6342 if (addExplicitNullCheck)
6347 printf("Before explicit null check morphing:\n");
6353 // Create the "comma" subtree
6355 GenTree* asg = nullptr;
6360 if (objRef->gtOper != GT_LCL_VAR)
6362 lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
6364 // Create the "asg" node
6365 asg = gtNewTempAssign(lclNum, objRef);
6369 lclNum = objRef->gtLclVarCommon.gtLclNum;
6372 // Create the "nullchk" node.
6373 // Make it TYP_BYTE so we only deference it for 1 byte.
6374 GenTree* lclVar = gtNewLclvNode(lclNum, objRefType);
6375 nullchk = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);
6377 nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
6379 // An indirection will cause a GPF if the address is null.
6380 nullchk->gtFlags |= GTF_EXCEPT;
6382 compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
6383 optMethodFlags |= OMF_HAS_NULLCHECK;
6387 // Create the "comma" node.
6388 comma = gtNewOperNode(GT_COMMA,
6389 TYP_VOID, // We don't want to return anything from this "comma" node.
6390 // Set the type to TYP_VOID, so we can select "cmp" instruction
6391 // instead of "mov" instruction later on.
6399 addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
6406 #ifdef FEATURE_READYTORUN_COMPILER
6407 if (tree->gtField.gtFieldLookup.addr != nullptr)
6409 GenTree* offsetNode = nullptr;
6410 if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
6412 offsetNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)tree->gtField.gtFieldLookup.addr,
6413 GTF_ICON_FIELD_HDL, false);
6417 noway_assert(!"unexpected accessType for R2R field access");
6420 var_types addType = (objRefType == TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF;
6421 addr = gtNewOperNode(GT_ADD, addType, addr, offsetNode);
6426 // Generate the "addr" node.
6427 /* Add the member offset to the object's address */
6428 FieldSeqNode* fieldSeq =
6429 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6430 addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
6431 gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
6434 // Now let's set the "tree" as a GT_IND tree.
6436 tree->SetOper(GT_IND);
6437 tree->gtOp.gtOp1 = addr;
6439 tree->gtFlags &= (~GTF_EXCEPT | addr->gtFlags);
6440 tree->SetIndirExceptionFlags(this);
6442 if (addExplicitNullCheck)
6445 // Create "comma2" node and link it to "tree".
6448 comma2 = gtNewOperNode(GT_COMMA,
6449 addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
6451 tree->gtOp.gtOp1 = comma2;
6457 if (addExplicitNullCheck)
6459 printf("After adding explicit null check:\n");
6465 else /* This is a static data member */
6467 if (tree->gtFlags & GTF_IND_TLS_REF)
6469 // Thread Local Storage static field reference
6471 // Field ref is a TLS 'Thread-Local-Storage' reference
6473 // Build this tree: IND(*) #
6481 // IND(I_IMPL) == [Base of this DLL's TLS]
6485 // / CNS(IdValue*4) or MUL
6487 // IND(I_IMPL) / CNS(4)
6489 // CNS(TLS_HDL,0x2C) IND
6493 // # Denotes the orginal node
6495 void** pIdAddr = nullptr;
6496 unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);
6499 // If we can we access the TLS DLL index ID value directly
6500 // then pIdAddr will be NULL and
6501 // IdValue will be the actual TLS DLL index ID
6503 GenTree* dllRef = nullptr;
6504 if (pIdAddr == nullptr)
6508 dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
6513 dllRef = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)pIdAddr, GTF_ICON_STATIC_HDL, true);
6515 // Next we multiply by 4
6516 dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
6519 #define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
6521 // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
6523 GenTree* tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
6525 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6526 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6528 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6529 tlsRef->gtFlags |= GTF_ICON_INITCLASS;
6532 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6534 if (dllRef != nullptr)
6536 /* Add the dllRef */
6537 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
6540 /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
6541 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6545 FieldSeqNode* fieldSeq =
6546 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6547 GenTree* fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
6549 /* Add the TLS static field offset to the address */
6551 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
6554 // Final indirect to get to actual value of TLS static field
6556 tree->SetOper(GT_IND);
6557 tree->gtOp.gtOp1 = tlsRef;
6559 noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
6563 // Normal static field reference
6566 // If we can we access the static's address directly
6567 // then pFldAddr will be NULL and
6568 // fldAddr will be the actual address of the static field
6570 void** pFldAddr = nullptr;
6571 void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);
6573 if (pFldAddr == nullptr)
6575 #ifdef _TARGET_64BIT_
6576 if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
6578 // The address is not directly addressible, so force it into a
6579 // constant, so we handle it properly
6581 GenTree* addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
6582 addr->gtType = TYP_I_IMPL;
6583 FieldSeqNode* fieldSeq =
6584 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6585 addr->gtIntCon.gtFieldSeq = fieldSeq;
6586 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6587 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6589 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6590 addr->gtFlags |= GTF_ICON_INITCLASS;
6593 tree->SetOper(GT_IND);
6594 tree->gtOp.gtOp1 = addr;
6596 return fgMorphSmpOp(tree);
6599 #endif // _TARGET_64BIT_
6601 // Only volatile or classinit could be set, and they map over
6602 noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0);
6603 static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE);
6604 static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS);
6605 tree->SetOper(GT_CLS_VAR);
6606 tree->gtClsVar.gtClsVarHnd = symHnd;
6607 FieldSeqNode* fieldSeq =
6608 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6609 tree->gtClsVar.gtFieldSeq = fieldSeq;
6616 GenTree* addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
6618 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6619 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6621 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6622 addr->gtFlags |= GTF_ICON_INITCLASS;
6625 // There are two cases here, either the static is RVA based,
6626 // in which case the type of the FIELD node is not a GC type
6627 // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
6628 // a GC type and the handle to it is a TYP_BYREF in the GC heap
6629 // because handles to statics now go into the large object heap
6631 var_types handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
6632 GenTree* op1 = gtNewOperNode(GT_IND, handleTyp, addr);
6633 op1->gtFlags |= GTF_IND_INVARIANT;
6635 tree->SetOper(GT_IND);
6636 tree->gtOp.gtOp1 = op1;
6640 noway_assert(tree->gtOper == GT_IND);
6644 GenTree* addr = tree->gtOp.gtOp1;
6646 // 'addr' may be a GT_COMMA. Skip over any comma nodes
6647 addr = addr->gtEffectiveVal();
6652 printf("\nBefore calling fgAddFieldSeqForZeroOffset:\n");
6657 // We expect 'addr' to be an address at this point.
6658 assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF);
6660 // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
6661 FieldSeqNode* fieldSeq =
6662 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6663 fgAddFieldSeqForZeroOffset(addr, fieldSeq);
6666 // Pass down the current mac; if non null we are computing an address
6667 GenTree* result = fgMorphSmpOp(tree, mac);
6672 printf("\nFinal value of Compiler::fgMorphField after calling fgMorphSmpOp:\n");
6680 //------------------------------------------------------------------------------
6681 // fgMorphCallInline: attempt to inline a call
6684 // call - call expression to inline, inline candidate
6685 // inlineResult - result tracking and reporting
6688 // Attempts to inline the call.
6690 // If successful, callee's IR is inserted in place of the call, and
6691 // is marked with an InlineContext.
6693 // If unsuccessful, the transformations done in anticipation of a
6694 // possible inline are undone, and the candidate flag on the call
6697 void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
6699 bool inliningFailed = false;
6701 // Is this call an inline candidate?
6702 if (call->IsInlineCandidate())
6704 // Attempt the inline
6705 fgMorphCallInlineHelper(call, inlineResult);
6707 // We should have made up our minds one way or another....
6708 assert(inlineResult->IsDecided());
6710 // If we failed to inline, we have a bit of work to do to cleanup
6711 if (inlineResult->IsFailure())
6716 // Before we do any cleanup, create a failing InlineContext to
6717 // capture details of the inlining attempt.
6718 m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
6722 inliningFailed = true;
6724 // Clear the Inline Candidate flag so we can ensure later we tried
6725 // inlining all candidates.
6727 call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
6732 // This wasn't an inline candidate. So it must be a GDV candidate.
6733 assert(call->IsGuardedDevirtualizationCandidate());
6735 // We already know we can't inline this call, so don't even bother to try.
6736 inliningFailed = true;
6739 // If we failed to inline (or didn't even try), do some cleanup.
6742 if (call->gtReturnType != TYP_VOID)
6744 JITDUMP("Inlining [%06u] failed, so bashing [%06u] to NOP\n", dspTreeID(call), dspTreeID(fgMorphStmt));
6746 // Detach the GT_CALL tree from the original statement by
6747 // hanging a "nothing" node to it. Later the "nothing" node will be removed
6748 // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
6750 noway_assert(fgMorphStmt->gtStmtExpr == call);
6751 fgMorphStmt->gtStmtExpr = gtNewNothingNode();
6756 /*****************************************************************************
6757 * Helper to attempt to inline a call
6758 * Sets success/failure in inline result
6759 * If success, modifies current method's IR with inlinee's IR
6760 * If failed, undoes any speculative modifications to current method
6763 void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
6765 // Don't expect any surprises here.
6766 assert(result->IsCandidate());
6768 if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
6770 // For now, attributing this to call site, though it's really
6771 // more of a budget issue (lvaCount currently includes all
6772 // caller and prospective callee locals). We still might be
6773 // able to inline other callees into this caller, or inline
6774 // this callee in other callers.
6775 result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
6779 if (call->IsVirtual())
6781 result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
6785 // Re-check this because guarded devirtualization may allow these through.
6786 if (gtIsRecursiveCall(call) && call->IsImplicitTailCall())
6788 result->NoteFatal(InlineObservation::CALLSITE_IMPLICIT_REC_TAIL_CALL);
6792 // impMarkInlineCandidate() is expected not to mark tail prefixed calls
6793 // and recursive tail calls as inline candidates.
6794 noway_assert(!call->IsTailPrefixedCall());
6795 noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
6797 /* If the caller's stack frame is marked, then we can't do any inlining. Period.
6798 Although we have checked this in impCanInline, it is possible that later IL instructions
6799 might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
6802 if (opts.compNeedSecurityCheck)
6804 result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
6809 // Calling inlinee's compiler to inline the method.
6812 unsigned startVars = lvaCount;
6817 printf("Expanding INLINE_CANDIDATE in statement ");
6818 printTreeID(fgMorphStmt);
6819 printf(" in " FMT_BB ":\n", compCurBB->bbNum);
6820 gtDispTree(fgMorphStmt);
6821 if (call->IsImplicitTailCall())
6823 printf("Note: candidate is implicit tail call\n");
6828 impInlineRoot()->m_inlineStrategy->NoteAttempt(result);
6831 // Invoke the compiler to inline the call.
6834 fgInvokeInlineeCompiler(call, result);
6836 if (result->IsFailure())
6838 // Undo some changes made in anticipation of inlining...
6840 // Zero out the used locals
6841 memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
6842 for (unsigned i = startVars; i < lvaCount; i++)
6844 new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(); // call the constructor.
6847 lvaCount = startVars;
6852 // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
6862 // printf("After inlining lvaCount=%d.\n", lvaCount);
6867 //------------------------------------------------------------------------
6868 // fgCanFastTailCall: Check to see if this tail call can be optimized as epilog+jmp.
6871 // callee - The callee to check
6874 // Returns true or false based on whether the callee can be fastTailCalled
6877 // This function is target specific and each target will make the fastTailCall
6878 // decision differently. See the notes below.
6882 // A fast tail call can be made whenever the number of callee arguments
6883 // is larger than or equal to the number of caller arguments, or we have four
6884 // or fewer callee arguments. This is because, on Windows AMD64, each
6885 // argument uses exactly one register or one 8-byte stack slot. Thus, we only
6886 // need to count arguments, and not be concerned with the size of each
6887 // incoming or outgoing argument.
6889 // Can fast tail call examples (amd64 Windows):
6891 // -- Callee will have all register arguments --
6892 // caller(int, int, int, int)
6893 // callee(int, int, float, int)
6895 // -- Callee requires stack space that is equal to the caller --
6896 // caller(struct, struct, struct, struct, struct, struct)
6897 // callee(int, int, int, int, int, int)
6899 // -- Callee requires stack space that is less than the caller --
6900 // caller(struct, double, struct, float, struct, struct)
6901 // callee(int, int, int, int, int)
6903 // -- Callee will have all register arguments --
6905 // callee(int, int, int, int)
6907 // Cannot fast tail call examples (amd64 Windows):
6909 // -- Callee requires stack space that is larger than the caller --
6910 // caller(struct, double, struct, float, struct, struct)
6911 // callee(int, int, int, int, int, double, double, double)
6913 // Unix Amd64 && Arm64:
6914 // A fastTailCall decision can be made whenever the callee's stack space is
6915 // less than or equal to the caller's stack space. There are many permutations
6916 // of when the caller and callee have different stack sizes if there are
6917 // structs being passed to either the caller or callee.
6920 // 1) If the callee has structs which cannot be enregistered it will be
6921 // reported as cannot fast tail call. This is an implementation limitation
6922 // where the callee only is checked for non enregisterable structs. This is
6923 // tracked with https://github.com/dotnet/coreclr/issues/12644.
6925 // 2) If the caller or callee has stack arguments and the callee has more
6926 // arguments then the caller it will be reported as cannot fast tail call.
6927 // This is due to a bug in LowerFastTailCall which assumes that
6928 // nCalleeArgs <= nCallerArgs, which is always true on Windows Amd64. This
6929 // is tracked with https://github.com/dotnet/coreclr/issues/12468.
6931 // 3) If the callee has a 9 to 16 byte struct argument and the callee has
6932 // stack arguments, the decision will be to not fast tail call. This is
6933 // because before fgMorphArgs is done, the struct is unknown whether it
6934 // will be placed on the stack or enregistered. Therefore, the conservative
6935 // decision of do not fast tail call is taken. This limitations should be
6936 // removed if/when fgMorphArgs no longer depends on fgCanFastTailCall.
6938 // 4) Arm64 Only, if there are HFA arguments and the callee has stack
6939 // arguments, the decision will be reported as cannot fast tail call.
6940 // This is because before fgMorphArgs is done, the struct is unknown whether it
6941 // will be placed on the stack or enregistered. Therefore, the conservative
6942 // decision of do not fast tail call is taken.
6944 // Can fast tail call examples (amd64 Unix):
6946 // -- Callee will have all register arguments --
6947 // caller(int, int, int, int)
6948 // callee(int, int, float, int)
6950 // -- Callee requires stack space that is equal to the caller --
6951 // caller({ int, int }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte stack
6953 // callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space
6955 // -- Callee requires stack space that is less than the caller --
6956 // caller({ int, int }, int, { int, int }, int, { int, int }, { int, int }) 6 int register arguments, 32 byte stack
6958 // callee(int, int, int, int, int, int, { int, int } ) // 6 int register arguments, 16 byte stack space
6960 // -- Callee will have all register arguments --
6962 // callee(int, int, int, int)
6964 // Cannot fast tail call examples (amd64 Unix):
6966 // -- Callee requires stack space that is larger than the caller --
6967 // caller(float, float, float, float, float, float, float, float) -- 8 float register arguments
6968 // callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space
6970 // -- Callee has structs which cannot be enregistered (Implementation Limitation) --
6971 // caller(float, float, float, float, float, float, float, float, { double, double, double }) -- 8 float register
6972 // arguments, 24 byte stack space
6973 // callee({ double, double, double }) -- 24 bytes stack space
6975 // -- Callee requires stack space and has a struct argument >8 bytes and <16 bytes (Implementation Limitation) --
6976 // caller(int, int, int, int, int, int, { double, double, double }) -- 6 int register arguments, 24 byte stack space
6977 // callee(int, int, int, int, int, int, { int, int }) -- 6 int registers, 16 byte stack space
6979 // -- Caller requires stack space and nCalleeArgs > nCallerArgs (Bug) --
6980 // caller({ double, double, double, double, double, double }) // 48 byte stack
6981 // callee(int, int) -- 2 int registers
6983 bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
6985 #if FEATURE_FASTTAILCALL
6986 // To reach here means that the return types of the caller and callee are tail call compatible.
6987 // In the case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
6989 // In an implicit tail call case callSig may not be available but it is guaranteed to be available
6990 // for explicit tail call cases. The reason implicit tail case callSig may not be available is that
6991 // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
6992 // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
6993 // currently not copying/setting callSig.
6994 CLANG_FORMAT_COMMENT_ANCHOR;
6997 if (callee->IsTailPrefixedCall())
6999 assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
7000 (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
7004 auto reportFastTailCallDecision = [this, callee](const char* msg, size_t callerStackSize, size_t calleeStackSize) {
7006 if ((JitConfig.JitReportFastTailCallDecisions()) == 1)
7008 if (callee->gtCallType != CT_INDIRECT)
7010 const char* methodName;
7012 methodName = eeGetMethodFullName(callee->gtCallMethHnd);
7014 printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: ",
7015 info.compFullName, methodName);
7019 printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- "
7024 if (callerStackSize != -1)
7026 printf("%s (CallerStackSize: %d, CalleeStackSize: %d)\n\n", msg, callerStackSize, calleeStackSize);
7030 printf("%s\n\n", msg);
7035 JITDUMP("[Fast tailcall decision]: %s\n", msg);
7043 // Note on vararg methods:
7044 // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
7045 // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
7046 // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
7047 // out-going area required for callee is bounded by caller's fixed argument space.
7049 // Note that callee being a vararg method is not a problem since we can account the params being passed.
7050 unsigned nCallerArgs = info.compArgsCount;
7052 size_t callerArgRegCount = codeGen->intRegState.rsCalleeRegArgCount;
7053 size_t callerFloatArgRegCount = codeGen->floatRegState.rsCalleeRegArgCount;
7055 // Count the callee args including implicit and hidden.
7056 // Note that GenericContext and VarargCookie are added by importer while
7057 // importing the call to gtCallArgs list along with explicit user args.
7058 size_t calleeArgRegCount = 0;
7059 size_t calleeFloatArgRegCount = 0;
7061 if (callee->gtCallObjp) // thisPtr
7063 ++calleeArgRegCount;
7066 if (callee->HasRetBufArg()) // RetBuf
7068 // We don't increment calleeArgRegCount here, since it is already in callee->gtCallArgs.
7070 // If callee has RetBuf param, caller too must have it.
7071 // Otherwise go the slow route.
7072 if (info.compRetBuffArg == BAD_VAR_NUM)
7074 reportFastTailCallDecision("Callee has RetBuf but caller does not.", 0, 0);
7079 // Count user args while tracking whether any of them is a multi-byte params
7080 // that cannot be passed in a register. Note that we don't need to count
7081 // non-standard and secret params passed in registers (e.g. R10, R11) since
7082 // these won't contribute to out-going arg size.
7083 // For each struct arg, hasMultiByteStackArgs will track if it can be passed in registers.
7084 // If it cannot we will break the loop and not fastTailCall. This is an implementation limitation
7085 // where the callee only is checked for non enregisterable structs.
7086 // It is tracked with https://github.com/dotnet/coreclr/issues/12644.
7087 bool hasMultiByteStackArgs = false;
7088 bool hasTwoSlotSizedStruct = false;
7089 bool hasHfaArg = false;
7090 size_t nCalleeArgs = calleeArgRegCount; // Keep track of how many args we have.
7091 size_t calleeStackSize = 0;
7092 for (GenTree* args = callee->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
7095 assert(args->OperIsList());
7096 GenTree* argx = args->gtOp.gtOp1;
7098 if (varTypeIsStruct(argx))
7100 // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
7101 argx = argx->gtEffectiveVal(true /*commaOnly*/);
7103 // Get the size of the struct and see if it is register passable.
7104 CORINFO_CLASS_HANDLE objClass = nullptr;
7106 if (argx->OperGet() == GT_OBJ)
7108 objClass = argx->AsObj()->gtClass;
7110 else if (argx->IsLocal())
7112 objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
7114 if (objClass != nullptr)
7116 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
7118 unsigned typeSize = 0;
7119 // We should have already broken out of the loop if we've set hasMultiByteStackArgs to true.
7120 assert(!hasMultiByteStackArgs);
7121 hasMultiByteStackArgs =
7122 !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false, false);
7124 #if defined(UNIX_AMD64_ABI)
7125 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
7127 assert(objClass != nullptr);
7128 eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
7130 if (structDesc.passedInRegisters)
7132 if (structDesc.eightByteCount == 2)
7134 hasTwoSlotSizedStruct = true;
7137 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
7139 if (structDesc.IsIntegralSlot(i))
7141 ++calleeArgRegCount;
7143 else if (structDesc.IsSseSlot(i))
7145 ++calleeFloatArgRegCount;
7149 assert(false && "Invalid eightbyte classification type.");
7156 calleeStackSize += roundUp(typeSize, TARGET_POINTER_SIZE);
7157 hasMultiByteStackArgs = true;
7160 #elif defined(_TARGET_ARM64_) // ARM64
7161 var_types hfaType = GetHfaType(argx);
7162 bool isHfaArg = varTypeIsValidHfaType(hfaType);
7169 calleeFloatArgRegCount += GetHfaCount(argx);
7173 // Structs are either passed in 1 or 2 (64-bit) slots
7174 size_t roundupSize = roundUp(typeSize, TARGET_POINTER_SIZE);
7175 size = roundupSize / TARGET_POINTER_SIZE;
7184 hasTwoSlotSizedStruct = true;
7187 calleeArgRegCount += size;
7190 #elif defined(WINDOWS_AMD64_ABI)
7192 ++calleeArgRegCount;
7194 #endif // UNIX_AMD64_ABI
7197 assert(!"Target platform ABI rules regarding passing struct type args in registers");
7199 #endif //_TARGET_AMD64_ || _TARGET_ARM64_
7203 hasMultiByteStackArgs = true;
7208 varTypeIsFloating(argx) ? ++calleeFloatArgRegCount : ++calleeArgRegCount;
7211 // We can break early on multiByte cases.
7212 if (hasMultiByteStackArgs)
7218 const unsigned maxRegArgs = MAX_REG_ARG;
7219 hasTwoSlotSizedStruct = hasTwoSlotSizedStruct || info.compHasMultiSlotArgs;
7221 // If we reached here means that callee has only those argument types which can be passed in
7222 // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
7223 // If we are passing args on stack for the callee and it has more args passed on stack than
7224 // the caller, then fast tail call cannot be performed.
7226 // Note that the GC'ness of on stack args need not match since the arg setup area is marked
7227 // as non-interruptible for fast tail calls.
7229 #ifdef WINDOWS_AMD64_ABI
7230 assert(calleeStackSize == 0);
7231 size_t calleeStackSlots = ((calleeArgRegCount + calleeFloatArgRegCount) > maxRegArgs)
7232 ? (calleeArgRegCount + calleeFloatArgRegCount) - maxRegArgs
7234 calleeStackSize = calleeStackSlots * TARGET_POINTER_SIZE;
7235 size_t callerStackSize = info.compArgStackSize;
7237 bool hasStackArgs = false;
7239 if (callerStackSize > 0 || calleeStackSize > 0)
7241 hasStackArgs = true;
7244 // Go the slow route, if it has multi-byte params. This is an implementation
7245 // limitatio see https://github.com/dotnet/coreclr/issues/12644.
7246 if (hasMultiByteStackArgs)
7248 reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs", callerStackSize, calleeStackSize);
7252 // x64 Windows: If we have more callee registers used than MAX_REG_ARG, then
7253 // make sure the callee's incoming arguments is less than the caller's
7254 if (hasStackArgs && (nCalleeArgs > nCallerArgs))
7256 reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)", callerStackSize,
7261 #elif (defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_)
7263 // For *nix Amd64 and Arm64 check to see if all arguments for the callee
7264 // and caller are passing in registers. If not, ensure that the outgoing argument stack size
7265 // requirement for the callee is less than or equal to the caller's entire stack frame usage.
7267 // Also, in the case that we have to pass arguments on the stack make sure
7268 // that we are not dealing with structs that are >8 bytes.
7270 bool hasStackArgs = false;
7271 size_t maxFloatRegArgs = MAX_FLOAT_REG_ARG;
7273 size_t calleeIntStackArgCount = calleeArgRegCount > maxRegArgs ? calleeArgRegCount - maxRegArgs : 0;
7274 size_t calleeFloatStackArgCount =
7275 calleeFloatArgRegCount > maxFloatRegArgs ? calleeFloatArgRegCount - maxFloatRegArgs : 0;
7277 size_t calleeStackArgCount = calleeIntStackArgCount + calleeFloatStackArgCount;
7278 size_t callerStackSize = info.compArgStackSize;
7279 calleeStackSize += calleeStackArgCount * TARGET_POINTER_SIZE;
7281 if (callerStackSize > 0 || calleeStackSize > 0)
7283 hasStackArgs = true;
7286 // Go the slow route, if it has multi-byte params. This is an implementation
7287 // limitation see https://github.com/dotnet/coreclr/issues/12644.
7288 if (hasMultiByteStackArgs)
7290 reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs", callerStackSize, calleeStackSize);
7294 // Either the caller or callee has a >8 and <=16 byte struct and arguments that has to go on the stack. Do not
7297 // When either the caller or callee have multi-stlot stack arguments we cannot safely
7298 // shuffle arguments in LowerFastTailCall. See https://github.com/dotnet/coreclr/issues/12468.
7299 if (hasStackArgs && hasTwoSlotSizedStruct)
7301 reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasTwoSlotSizedStruct",
7302 callerStackSize, calleeStackSize);
7306 // Callee has an HFA struct and arguments that has to go on the stack. Do not fastTailCall.
7307 if (calleeStackSize > 0 && hasHfaArg)
7309 reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasHfaArg", callerStackSize,
7317 // LowerFastTailCall currently assumes nCalleeArgs <= nCallerArgs. This is
7318 // not true in many cases on x64 linux, remove this pessimization when
7319 // LowerFastTailCall is fixed. See https://github.com/dotnet/coreclr/issues/12468
7320 // for more information.
7321 if (hasStackArgs && (nCalleeArgs > nCallerArgs))
7323 reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)", callerStackSize,
7328 if (calleeStackSize > callerStackSize)
7330 reportFastTailCallDecision("Will not fastTailCall calleeStackSize > callerStackSize", callerStackSize,
7337 NYI("fastTailCall not supported on this Architecture.");
7339 #endif // WINDOWS_AMD64_ABI
7341 reportFastTailCallDecision("Will fastTailCall", callerStackSize, calleeStackSize);
7343 #else // FEATURE_FASTTAILCALL
7348 /*****************************************************************************
7350 * Transform the given GT_CALL tree for tail call code generation.
7352 void Compiler::fgMorphTailCall(GenTreeCall* call, void* pfnCopyArgs)
7354 #if defined(_TARGET_UNIX_)
7355 noway_assert(!"Slow tail calls not supported on non-Windows platforms.");
7358 JITDUMP("fgMorphTailCall (before):\n");
7361 // The runtime requires that we perform a null check on the `this` argument before
7362 // tail calling to a virtual dispatch stub. This requirement is a consequence of limitations
7363 // in the runtime's ability to map an AV to a NullReferenceException if
7364 // the AV occurs in a dispatch stub that has unmanaged caller.
7365 if (call->IsVirtualStub())
7367 call->gtFlags |= GTF_CALL_NULLCHECK;
7370 #if defined(_TARGET_ARM_)
7371 // For the helper-assisted tail calls, we need to push all the arguments
7372 // into a single list, and then add a few extra at the beginning
7374 // Check for PInvoke call types that we don't handle in codegen yet.
7375 assert(!call->IsUnmanaged());
7376 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));
7378 // First move the this pointer (if any) onto the regular arg list
7379 GenTree* thisPtr = NULL;
7380 if (call->gtCallObjp)
7382 GenTree* objp = call->gtCallObjp;
7383 call->gtCallObjp = NULL;
7385 if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
7387 thisPtr = gtClone(objp, true);
7388 var_types vt = objp->TypeGet();
7389 if (thisPtr == NULL)
7391 // Too complex, so use a temp
7392 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7393 GenTree* asg = gtNewTempAssign(lclNum, objp);
7394 if (!call->IsVirtualVtable())
7396 // Add an indirection to get the nullcheck
7397 GenTree* tmp = gtNewLclvNode(lclNum, vt);
7398 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7399 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7401 objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7402 thisPtr = gtNewLclvNode(lclNum, vt);
7404 else if (!call->IsVirtualVtable())
7406 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7407 objp = gtNewOperNode(GT_COMMA, vt, ind, objp);
7408 thisPtr = gtClone(thisPtr, true);
7411 call->gtFlags &= ~GTF_CALL_NULLCHECK;
7414 call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
7417 // Add the extra VSD parameter if needed
7418 if (call->IsVirtualStub())
7420 GenTree* stubAddrArg = fgGetStubAddrArg(call);
7422 // We don't need this arg to be in the normal stub register, so
7423 // clear out the register assignment.
7424 assert(stubAddrArg->gtRegNum == virtualStubParamInfo->GetReg());
7425 stubAddrArg->gtRegNum = REG_NA;
7427 // And push the stub address onto the list of arguments
7428 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
7430 else if (call->IsVirtualVtable())
7432 noway_assert(thisPtr != NULL);
7434 GenTree* add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
7435 GenTree* vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7436 vtbl->gtFlags |= GTF_EXCEPT;
7438 unsigned vtabOffsOfIndirection;
7439 unsigned vtabOffsAfterIndirection;
7441 info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection,
7444 /* Get the appropriate vtable chunk */
7446 if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
7448 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
7450 GenTree* indOffTree = nullptr;
7454 indOffTree = impCloneExpr(add, &add, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
7455 nullptr DEBUGARG("virtual table call"));
7458 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7462 vtbl = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, indOffTree);
7466 /* Now the appropriate vtable slot */
7468 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
7470 GenTree* indOffTree = nullptr;
7474 indOffTree = impCloneExpr(add, &add, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
7475 nullptr DEBUGARG("virtual table call 2"));
7478 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7482 vtbl = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, indOffTree);
7485 // Switch this to a plain indirect call
7486 call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
7487 assert(!call->IsVirtual());
7488 call->gtCallType = CT_INDIRECT;
7490 call->gtCallAddr = vtbl;
7491 call->gtCallCookie = NULL;
7492 call->gtFlags |= GTF_EXCEPT;
7495 // Now inject a placeholder for the real call target that codegen will generate
7496 GenTree* arg = gtNewIconNode(0, TYP_I_IMPL);
7497 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7499 // Lastly inject the pointer for the copy routine
7500 noway_assert(pfnCopyArgs != nullptr);
7501 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7502 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7504 // It is now a varargs tail call
7505 call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
7506 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7508 #elif defined(_TARGET_XARCH_)
7510 // For the helper-assisted tail calls, we need to push all the arguments
7511 // into a single list, and then add a few extra at the beginning or end.
7513 // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
7515 // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
7517 // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
7518 // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
7519 // for callTarget here which will be replaced later with callTarget in tail call lowering.
7521 // For x86, the tailcall helper is defined as:
7523 // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
7526 // Note that the special arguments are on the stack, whereas the function arguments follow
7527 // the normal convention: there might be register arguments in ECX and EDX. The stack will
7528 // look like (highest address at the top):
7529 // first normal stack argument
7531 // last normal stack argument
7532 // numberOfOldStackArgs
7533 // numberOfNewStackArgs
7537 // Each special arg is 4 bytes.
7539 // 'flags' is a bitmask where:
7540 // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
7541 // callee-saved registers for tailcall functions. Note that the helper assumes
7542 // that the callee-saved registers live immediately below EBP, and must have been
7543 // pushed in this order: EDI, ESI, EBX.
7544 // 2 == call target is a virtual stub dispatch.
7546 // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
7547 // on the custom calling convention.
7549 // Check for PInvoke call types that we don't handle in codegen yet.
7550 assert(!call->IsUnmanaged());
7551 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
7553 // Don't support tail calling helper methods
7554 assert(call->gtCallType != CT_HELPER);
7556 // We come this route only for tail prefixed calls that cannot be dispatched as
7558 assert(!call->IsImplicitTailCall());
7559 assert(!fgCanFastTailCall(call));
7561 // First move the 'this' pointer (if any) onto the regular arg list. We do this because
7562 // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
7563 // and thus shift where the 'this' pointer will be passed to a later argument slot. In
7564 // addition, for all platforms, we are going to change the call into a helper call. Our code
7565 // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
7566 // do this transformation, we must explicitly create a null 'this' pointer check, if required,
7567 // since special 'this' pointer handling will no longer kick in.
7569 // Some call types, such as virtual vtable calls, require creating a call address expression
7570 // that involves the "this" pointer. Lowering will sometimes create an embedded statement
7571 // to create a temporary that is assigned to the "this" pointer expression, and then use
7572 // that temp to create the call address expression. This temp creation embedded statement
7573 // will occur immediately before the "this" pointer argument, and then will be used for both
7574 // the "this" pointer argument as well as the call address expression. In the normal ordering,
7575 // the embedded statement establishing the "this" pointer temp will execute before both uses
7576 // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
7577 // normal call argument list, and insert a placeholder which will hold the call address
7578 // expression. For non-x86, things are ok, because the order of execution of these is not
7579 // altered. However, for x86, the call address expression is inserted as the *last* argument
7580 // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
7581 // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
7582 // for those cases where call lowering creates an embedded form temp of "this", we will
7583 // create a temp here, early, that will later get morphed correctly.
7585 if (call->gtCallObjp)
7587 GenTree* thisPtr = nullptr;
7588 GenTree* objp = call->gtCallObjp;
7589 call->gtCallObjp = nullptr;
7592 if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
7595 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7596 GenTree* asg = gtNewTempAssign(lclNum, objp);
7598 // COMMA(tmp = "this", tmp)
7599 var_types vt = objp->TypeGet();
7600 GenTree* tmp = gtNewLclvNode(lclNum, vt);
7601 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp);
7605 #endif // _TARGET_X86_
7607 if (call->NeedsNullCheck())
7609 // clone "this" if "this" has no side effects.
7610 if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
7612 thisPtr = gtClone(objp, true);
7615 var_types vt = objp->TypeGet();
7616 if (thisPtr == nullptr)
7618 // create a temp if either "this" has side effects or "this" is too complex to clone.
7621 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7622 GenTree* asg = gtNewTempAssign(lclNum, objp);
7624 // COMMA(tmp = "this", deref(tmp))
7625 GenTree* tmp = gtNewLclvNode(lclNum, vt);
7626 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7627 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7629 // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
7630 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7634 // thisPtr = COMMA(deref("this"), "this")
7635 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7636 thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
7639 call->gtFlags &= ~GTF_CALL_NULLCHECK;
7646 // During rationalization tmp="this" and null check will
7647 // materialize as embedded stmts in right execution order.
7648 assert(thisPtr != nullptr);
7649 call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
7652 #if defined(_TARGET_AMD64_)
7654 // Add the extra VSD parameter to arg list in case of VSD calls.
7655 // Tail call arg copying thunk will move this extra VSD parameter
7656 // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
7657 // in Stublinkerx86.cpp for more details.
7658 if (call->IsVirtualStub())
7660 GenTree* stubAddrArg = fgGetStubAddrArg(call);
7662 // We don't need this arg to be in the normal stub register, so
7663 // clear out the register assignment.
7664 assert(stubAddrArg->gtRegNum == virtualStubParamInfo->GetReg());
7665 stubAddrArg->gtRegNum = REG_NA;
7667 // And push the stub address onto the list of arguments
7668 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
7671 // Now inject a placeholder for the real call target that Lower phase will generate.
7672 GenTree* arg = gtNewIconNode(0, TYP_I_IMPL);
7673 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7675 // Inject the pointer for the copy routine to be used for struct copying
7676 noway_assert(pfnCopyArgs != nullptr);
7677 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7678 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7680 #else // !_TARGET_AMD64_
7682 // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
7683 // append to the list.
7684 GenTreeArgList** ppArg = &call->gtCallArgs;
7685 for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
7687 ppArg = (GenTreeArgList**)&args->gtOp2;
7689 assert(ppArg != nullptr);
7690 assert(*ppArg == nullptr);
7692 unsigned nOldStkArgsWords =
7693 (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
7694 GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
7695 *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
7696 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7698 // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
7699 // The constant will be replaced.
7700 GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
7701 *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
7702 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7704 // Inject a placeholder for the flags.
7705 // The constant will be replaced.
7706 GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
7707 *ppArg = gtNewListNode(arg1, nullptr);
7708 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7710 // Inject a placeholder for the real call target that the Lowering phase will generate.
7711 // The constant will be replaced.
7712 GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
7713 *ppArg = gtNewListNode(arg0, nullptr);
7715 #endif // !_TARGET_AMD64_
7717 // It is now a varargs tail call dispatched via helper.
7718 call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
7719 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7721 #elif defined(_TARGET_ARM64_)
7722 NYI_ARM64("Tail calls via stub are unsupported on this platform.");
7723 #endif // _TARGET_ARM64_
7725 // The function is responsible for doing explicit null check when it is necessary.
7726 assert(!call->NeedsNullCheck());
7728 JITDUMP("fgMorphTailCall (after):\n");
7732 //------------------------------------------------------------------------
7733 // fgGetStubAddrArg: Return the virtual stub address for the given call.
7736 // the JIT must place the address of the stub used to load the call target,
7737 // the "stub indirection cell", in special call argument with special register.
7740 // call - a call that needs virtual stub dispatching.
7743 // addr tree with set resister requirements.
7745 GenTree* Compiler::fgGetStubAddrArg(GenTreeCall* call)
7747 assert(call->IsVirtualStub());
7748 GenTree* stubAddrArg;
7749 if (call->gtCallType == CT_INDIRECT)
7751 stubAddrArg = gtClone(call->gtCallAddr, true);
7755 assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
7756 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7757 stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7759 assert(stubAddrArg != nullptr);
7760 stubAddrArg->gtRegNum = virtualStubParamInfo->GetReg();
7764 //------------------------------------------------------------------------------
7765 // fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
7769 // block - basic block ending with a recursive fast tail call
7770 // recursiveTailCall - recursive tail call to transform
7773 // The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
7775 void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
7777 assert(recursiveTailCall->IsTailCallConvertibleToLoop());
7778 GenTreeStmt* lastStmt = block->lastStmt();
7779 assert(recursiveTailCall == lastStmt->gtStmtExpr);
7781 // Transform recursive tail call into a loop.
7783 GenTreeStmt* earlyArgInsertionPoint = lastStmt;
7784 IL_OFFSETX callILOffset = lastStmt->gtStmtILoffsx;
7786 // Hoist arg setup statement for the 'this' argument.
7787 GenTree* thisArg = recursiveTailCall->gtCallObjp;
7788 if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
7790 GenTreeStmt* thisArgStmt = gtNewStmt(thisArg, callILOffset);
7791 fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
7794 // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
7795 // then the temps need to be assigned to the method parameters. This is done so that the caller
7796 // parameters are not re-assigned before call arguments depending on them are evaluated.
7797 // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
7798 // where the next temp or parameter assignment should be inserted.
7800 // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
7801 // while the second call argument (const 1) doesn't.
7802 // Basic block before tail recursion elimination:
7803 // ***** BB04, stmt 1 (top level)
7804 // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013)
7805 // [000033] --C - G------ - \--* call void RecursiveMethod
7806 // [000030] ------------ | / --* const int - 1
7807 // [000031] ------------arg0 in rcx + --* +int
7808 // [000029] ------------ | \--* lclVar int V00 arg1
7809 // [000032] ------------arg1 in rdx \--* const int 1
7812 // Basic block after tail recursion elimination :
7813 // ***** BB04, stmt 1 (top level)
7814 // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7815 // [000030] ------------ | / --* const int - 1
7816 // [000031] ------------ | / --* +int
7817 // [000029] ------------ | | \--* lclVar int V00 arg1
7818 // [000050] - A---------- \--* = int
7819 // [000049] D------N---- \--* lclVar int V02 tmp0
7821 // ***** BB04, stmt 2 (top level)
7822 // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7823 // [000052] ------------ | / --* lclVar int V02 tmp0
7824 // [000054] - A---------- \--* = int
7825 // [000053] D------N---- \--* lclVar int V00 arg0
7827 // ***** BB04, stmt 3 (top level)
7828 // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7829 // [000032] ------------ | / --* const int 1
7830 // [000057] - A---------- \--* = int
7831 // [000056] D------N---- \--* lclVar int V01 arg1
7833 GenTreeStmt* tmpAssignmentInsertionPoint = lastStmt;
7834 GenTreeStmt* paramAssignmentInsertionPoint = lastStmt;
7836 // Process early args. They may contain both setup statements for late args and actual args.
7837 // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
7838 // below has the correct second argument.
7839 int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
7840 for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
7841 (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
7843 GenTree* earlyArg = earlyArgs->Current();
7844 if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
7846 if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
7848 // This is a setup node so we need to hoist it.
7849 GenTreeStmt* earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
7850 fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
7854 // This is an actual argument that needs to be assigned to the corresponding caller parameter.
7855 fgArgTabEntry* curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
7856 GenTreeStmt* paramAssignStmt =
7857 fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
7858 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7859 if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr))
7861 // All temp assignments will happen before the first param assignment.
7862 tmpAssignmentInsertionPoint = paramAssignStmt;
7868 // Process late args.
7869 int lateArgIndex = 0;
7870 for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
7871 (lateArgIndex++, lateArgs = lateArgs->Rest()))
7873 // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
7874 GenTree* lateArg = lateArgs->Current();
7875 fgArgTabEntry* curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
7876 GenTreeStmt* paramAssignStmt =
7877 fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
7878 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7880 if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr))
7882 // All temp assignments will happen before the first param assignment.
7883 tmpAssignmentInsertionPoint = paramAssignStmt;
7887 // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
7888 // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
7889 // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
7890 if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
7892 var_types thisType = lvaTable[info.compThisArg].TypeGet();
7893 GenTree* arg0 = gtNewLclvNode(lvaArg0Var, thisType);
7894 GenTree* arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
7895 GenTreeStmt* arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
7896 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
7899 // If compInitMem is set, we may need to zero-initialize some locals. Normally it's done in the prolog
7900 // but this loop can't include the prolog. Since we don't have liveness information, we insert zero-initialization
7901 // for all non-parameter IL locals as well as temp structs with GC fields.
7902 // Liveness phase will remove unnecessary initializations.
7903 if (info.compInitMem)
7907 for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
7909 #if FEATURE_FIXED_OUT_ARGS
7910 if (varNum == lvaOutgoingArgSpaceVar)
7914 #endif // FEATURE_FIXED_OUT_ARGS
7915 if (!varDsc->lvIsParam)
7917 var_types lclType = varDsc->TypeGet();
7918 bool isUserLocal = (varNum < info.compLocalsCount);
7919 bool structWithGCFields = ((lclType == TYP_STRUCT) && (varDsc->lvStructGcCount > 0));
7920 if (isUserLocal || structWithGCFields)
7922 GenTree* lcl = gtNewLclvNode(varNum, lclType);
7923 GenTree* init = nullptr;
7924 if (varTypeIsStruct(lclType))
7926 const bool isVolatile = false;
7927 const bool isCopyBlock = false;
7928 init = gtNewBlkOpNode(lcl, gtNewIconNode(0), varDsc->lvSize(), isVolatile, isCopyBlock);
7929 init = fgMorphInitBlock(init);
7933 GenTree* zero = gtNewZeroConNode(genActualType(lclType));
7934 init = gtNewAssignNode(lcl, zero);
7936 GenTreeStmt* initStmt = gtNewStmt(init, callILOffset);
7937 fgInsertStmtBefore(block, lastStmt, initStmt);
7944 fgRemoveStmt(block, lastStmt);
7946 // Set the loop edge. Ensure we have a scratch block and then target the
7947 // next block. Loop detection needs to see a pred out of the loop, so
7948 // mark the scratch block BBF_DONT_REMOVE to prevent empty block removal
7950 fgEnsureFirstBBisScratch();
7951 fgFirstBB->bbFlags |= BBF_DONT_REMOVE;
7952 block->bbJumpKind = BBJ_ALWAYS;
7953 block->bbJumpDest = fgFirstBB->bbNext;
7954 fgAddRefPred(block->bbJumpDest, block);
7955 block->bbFlags &= ~BBF_HAS_JMP;
7958 //------------------------------------------------------------------------------
7959 // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
7963 // arg - argument to assign
7964 // argTabEntry - argument table entry corresponding to arg
7965 // block --- basic block the call is in
7966 // callILOffset - IL offset of the call
7967 // tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary)
7968 // paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted
7971 // parameter assignment statement if one was inserted; nullptr otherwise.
7973 GenTreeStmt* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg,
7974 fgArgTabEntry* argTabEntry,
7976 IL_OFFSETX callILOffset,
7977 GenTreeStmt* tmpAssignmentInsertionPoint,
7978 GenTreeStmt* paramAssignmentInsertionPoint)
7980 // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
7981 // some argument trees may reference parameters directly.
7983 GenTree* argInTemp = nullptr;
7984 unsigned originalArgNum = argTabEntry->argNum;
7985 bool needToAssignParameter = true;
7987 // TODO-CQ: enable calls with struct arguments passed in registers.
7988 noway_assert(!varTypeIsStruct(arg->TypeGet()));
7990 if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
7992 // The argument is already assigned to a temp or is a const.
7995 else if (arg->OperGet() == GT_LCL_VAR)
7997 unsigned lclNum = arg->AsLclVar()->gtLclNum;
7998 LclVarDsc* varDsc = &lvaTable[lclNum];
7999 if (!varDsc->lvIsParam)
8001 // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
8004 else if (lclNum == originalArgNum)
8006 // The argument is the same parameter local that we were about to assign so
8007 // we can skip the assignment.
8008 needToAssignParameter = false;
8012 // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
8013 // any caller parameters. Some common cases are handled above but we may be able to eliminate
8014 // more temp assignments.
8016 GenTreeStmt* paramAssignStmt = nullptr;
8017 if (needToAssignParameter)
8019 if (argInTemp == nullptr)
8021 // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
8022 // TODO: we can avoid a temp assignment if we can prove that the argument tree
8023 // doesn't involve any caller parameters.
8024 unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp"));
8025 lvaTable[tmpNum].lvType = arg->gtType;
8026 GenTree* tempSrc = arg;
8027 GenTree* tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType);
8028 GenTree* tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
8029 GenTreeStmt* tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
8030 fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
8031 argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
8034 // Now assign the temp to the parameter.
8035 LclVarDsc* paramDsc = lvaTable + originalArgNum;
8036 assert(paramDsc->lvIsParam);
8037 GenTree* paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType);
8038 GenTree* paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
8039 paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset);
8041 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
8043 return paramAssignStmt;
8046 /*****************************************************************************
8048 * Transform the given GT_CALL tree for code generation.
8051 GenTree* Compiler::fgMorphCall(GenTreeCall* call)
8053 if (varTypeIsStruct(call))
8055 fgFixupStructReturn(call);
8057 if (call->CanTailCall())
8059 // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
8060 assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
8062 // It cannot be an inline candidate
8063 assert(!call->IsInlineCandidate());
8065 const char* szFailReason = nullptr;
8066 bool hasStructParam = false;
8067 if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
8069 szFailReason = "Might turn into an intrinsic";
8072 if (opts.compNeedSecurityCheck)
8074 szFailReason = "Needs security check";
8076 else if (compLocallocUsed || compLocallocOptimized)
8078 szFailReason = "Localloc used";
8080 #ifdef _TARGET_AMD64_
8081 // Needed for Jit64 compat.
8082 // In future, enabling tail calls from methods that need GS cookie check
8083 // would require codegen side work to emit GS cookie check before a tail
8085 else if (getNeedsGSSecurityCookie())
8087 szFailReason = "GS Security cookie check";
8091 // DDB 99324: Just disable tailcall under compGcChecks stress mode.
8092 else if (opts.compGcChecks)
8094 szFailReason = "GcChecks";
8097 #if FEATURE_TAILCALL_OPT
8100 // We are still not sure whether it can be a tail call. Because, when converting
8101 // a call to an implicit tail call, we must check that there are no locals with
8102 // their address taken. If this is the case, we have to assume that the address
8103 // has been leaked and the current stack frame must live until after the final
8106 // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
8107 // that lvHasLdAddrOp is much more conservative. We cannot just base it on
8108 // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
8109 // during morph stage. The reason for also checking lvAddrExposed is that in case
8110 // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
8111 // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
8112 // never to be incorrect.
8114 // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
8115 // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
8116 // is set. This avoids the need for iterating through all lcl vars of the current
8117 // method. Right now throughout the code base we are not consistently using 'set'
8118 // method to set lvHasLdAddrOp and lvAddrExposed flags.
8121 bool hasAddrExposedVars = false;
8122 bool hasStructPromotedParam = false;
8123 bool hasPinnedVars = false;
8125 for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
8127 // If the method is marked as an explicit tail call we will skip the
8128 // following three hazard checks.
8129 // We still must check for any struct parameters and set 'hasStructParam'
8130 // so that we won't transform the recursive tail call into a loop.
8132 if (call->IsImplicitTailCall())
8134 if (varDsc->lvHasLdAddrOp)
8136 hasAddrExposedVars = true;
8139 if (varDsc->lvAddrExposed)
8141 if (lvaIsImplicitByRefLocal(varNum))
8143 // The address of the implicit-byref is a non-address use of the pointer parameter.
8145 else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl))
8147 // The address of the implicit-byref's field is likewise a non-address use of the pointer
8150 else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum))
8152 // This temp was used for struct promotion bookkeeping. It will not be used, and will have
8153 // its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs.
8154 assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl));
8155 assert(fgGlobalMorph);
8159 hasAddrExposedVars = true;
8163 if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum))
8165 hasStructPromotedParam = true;
8168 if (varDsc->lvPinned)
8170 // A tail call removes the method from the stack, which means the pinning
8171 // goes away for the callee. We can't allow that.
8172 hasPinnedVars = true;
8176 if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
8178 hasStructParam = true;
8179 // This prevents transforming a recursive tail call into a loop
8180 // but doesn't prevent tail call optimization so we need to
8181 // look at the rest of parameters.
8186 if (hasAddrExposedVars)
8188 szFailReason = "Local address taken";
8190 if (hasStructPromotedParam)
8192 szFailReason = "Has Struct Promoted Param";
8196 szFailReason = "Has Pinned Vars";
8199 #endif // FEATURE_TAILCALL_OPT
8201 var_types callType = call->TypeGet();
8203 // We have to ensure to pass the incoming retValBuf as the
8204 // outgoing one. Using a temp will not do as this function will
8205 // not regain control to do the copy.
8207 if (info.compRetBuffArg != BAD_VAR_NUM)
8209 noway_assert(callType == TYP_VOID);
8210 GenTree* retValBuf = call->gtCallArgs->gtOp.gtOp1;
8211 if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
8213 szFailReason = "Need to copy return buffer";
8217 // If this is an opportunistic tail call and cannot be dispatched as
8218 // fast tail call, go the non-tail call route. This is done for perf
8221 // Avoid the cost of determining whether can be dispatched as fast tail
8222 // call if we already know that tail call cannot be honored for other
8224 bool canFastTailCall = false;
8225 if (szFailReason == nullptr)
8227 canFastTailCall = fgCanFastTailCall(call);
8228 if (!canFastTailCall)
8230 // Implicit or opportunistic tail calls are always dispatched via fast tail call
8231 // mechanism and never via tail call helper for perf.
8232 if (call->IsImplicitTailCall())
8234 szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
8236 else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
8238 // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
8239 // dispatched as a fast tail call.
8241 // Methods with non-standard args will have indirection cell or cookie param passed
8242 // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
8243 // tail calling the target method and hence ".tail" prefix on such calls needs to be
8246 // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
8247 // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
8248 // This is done by by adding stubAddr as an additional arg before the original list of
8249 // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
8250 // in Stublinkerx86.cpp.
8251 szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
8252 "called via helper";
8254 #ifdef _TARGET_ARM64_
8257 // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
8258 // So, bail out if we can't make fast tail call.
8259 szFailReason = "Non-qualified fast tail call";
8265 // Clear these flags before calling fgMorphCall() to avoid recursion.
8266 bool isTailPrefixed = call->IsTailPrefixedCall();
8267 call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
8269 #if FEATURE_TAILCALL_OPT
8270 call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
8273 if (szFailReason == nullptr)
8275 if (!fgCheckStmtAfterTailCall())
8277 szFailReason = "Unexpected statements after the tail call";
8281 void* pfnCopyArgs = nullptr;
8282 #if !defined(_TARGET_X86_) || defined(_TARGET_UNIX_)
8283 if (!canFastTailCall && szFailReason == nullptr)
8286 info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, call->IsVirtualStub()
8287 ? CORINFO_TAILCALL_STUB_DISPATCH_ARG
8288 : CORINFO_TAILCALL_NORMAL);
8289 if (pfnCopyArgs == nullptr)
8291 if (!info.compMatchedVM)
8293 // If we don't have a matched VM, we won't get valid results when asking for a thunk.
8294 pfnCopyArgs = UlongToPtr(0xCA11CA11); // "callcall"
8298 szFailReason = "TailCallCopyArgsThunk not available.";
8302 #endif // !defined(_TARGET_X86_) || defined(_TARGET_UNIX_)
8304 if (szFailReason != nullptr)
8309 printf("\nRejecting tail call late for call ");
8311 printf(": %s\n", szFailReason);
8315 // for non user funcs, we have no handles to report
8316 info.compCompHnd->reportTailCallDecision(nullptr,
8317 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
8318 isTailPrefixed, TAILCALL_FAIL, szFailReason);
8320 #if FEATURE_MULTIREG_RET
8321 if (fgGlobalMorph && call->HasMultiRegRetVal())
8323 // The tail call has been rejected so we must finish the work deferred
8324 // by impFixupCallStructReturn for multi-reg-returning calls and transform
8330 // Create a new temp.
8332 lvaGrabTemp(false DEBUGARG("Return value temp for multi-reg return (rejected tail call)."));
8333 lvaTable[tmpNum].lvIsMultiRegRet = true;
8335 GenTree* assg = nullptr;
8336 if (varTypeIsStruct(callType))
8338 CORINFO_CLASS_HANDLE structHandle = call->gtRetClsHnd;
8339 assert(structHandle != NO_CLASS_HANDLE);
8340 const bool unsafeValueClsCheck = false;
8341 lvaSetStruct(tmpNum, structHandle, unsafeValueClsCheck);
8342 var_types structType = lvaTable[tmpNum].lvType;
8343 GenTree* dst = gtNewLclvNode(tmpNum, structType);
8344 assg = gtNewAssignNode(dst, call);
8348 assg = gtNewTempAssign(tmpNum, call);
8351 assg = fgMorphTree(assg);
8353 // Create the assignment statement and insert it before the current statement.
8354 GenTreeStmt* assgStmt = gtNewStmt(assg, compCurStmt->gtStmtILoffsx);
8355 fgInsertStmtBefore(compCurBB, compCurStmt, assgStmt);
8358 GenTree* result = gtNewLclvNode(tmpNum, lvaTable[tmpNum].lvType);
8359 result->gtFlags |= GTF_DONT_CSE;
8364 printf("\nInserting assignment of a multi-reg call result to a temp:\n");
8365 gtDispTree(assgStmt);
8367 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8375 #if !FEATURE_TAILCALL_OPT_SHARED_RETURN
8376 // We enable shared-ret tail call optimization for recursive calls even if
8377 // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
8378 if (gtIsRecursiveCall(call))
8381 // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
8382 // but if the call falls through to a ret, and we are doing a tailcall, change it here.
8383 if (compCurBB->bbJumpKind != BBJ_RETURN)
8385 compCurBB->bbJumpKind = BBJ_RETURN;
8389 // Set this flag before calling fgMorphCall() to prevent inlining this call.
8390 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
8392 bool fastTailCallToLoop = false;
8393 #if FEATURE_TAILCALL_OPT
8394 // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
8395 // or return type is a struct that can be passed in a register.
8397 // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
8398 // hidden generic context param or through keep alive thisptr), then while transforming a recursive
8399 // call to such a method requires that the generic context stored on stack slot be updated. Right now,
8400 // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
8401 // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the
8402 // generic type parameters of both caller and callee generic method are the same.
8403 if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
8404 !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam &&
8405 !varTypeIsStruct(call->TypeGet()) && ((info.compClassAttr & CORINFO_FLG_MARSHAL_BYREF) == 0))
8407 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
8408 fastTailCallToLoop = true;
8412 // Do some target-specific transformations (before we process the args, etc.)
8413 // This is needed only for tail prefixed calls that cannot be dispatched as
8415 if (!canFastTailCall)
8417 fgMorphTailCall(call, pfnCopyArgs);
8420 // Implementation note : If we optimize tailcall to do a direct jump
8421 // to the target function (after stomping on the return address, etc),
8422 // without using CORINFO_HELP_TAILCALL, we have to make certain that
8423 // we don't starve the hijacking logic (by stomping on the hijacked
8424 // return address etc).
8426 // At this point, we are committed to do the tailcall.
8427 compTailCallUsed = true;
8429 CorInfoTailCall tailCallResult;
8431 if (fastTailCallToLoop)
8433 tailCallResult = TAILCALL_RECURSIVE;
8435 else if (canFastTailCall)
8437 tailCallResult = TAILCALL_OPTIMIZED;
8441 tailCallResult = TAILCALL_HELPER;
8444 // for non user funcs, we have no handles to report
8445 info.compCompHnd->reportTailCallDecision(nullptr,
8446 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
8447 isTailPrefixed, tailCallResult, nullptr);
8449 // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
8450 // to avoid doing any extra work for the return value.
8451 call->gtType = TYP_VOID;
8456 printf("\nGTF_CALL_M_TAILCALL bit set for call ");
8459 if (fastTailCallToLoop)
8461 printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
8468 GenTree* stmtExpr = fgMorphStmt->gtStmtExpr;
8471 // Tail call needs to be in one of the following IR forms
8472 // Either a call stmt or
8473 // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
8474 // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
8475 // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
8477 // GT_CASTS may be nested.
8478 genTreeOps stmtOper = stmtExpr->gtOper;
8479 if (stmtOper == GT_CALL)
8481 assert(stmtExpr == call);
8485 assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA);
8486 GenTree* treeWithCall;
8487 if (stmtOper == GT_RETURN)
8489 treeWithCall = stmtExpr->gtGetOp1();
8491 else if (stmtOper == GT_COMMA)
8493 // Second operation must be nop.
8494 assert(stmtExpr->gtGetOp2()->IsNothingNode());
8495 treeWithCall = stmtExpr->gtGetOp1();
8499 treeWithCall = stmtExpr->gtGetOp2();
8503 while (treeWithCall->gtOper == GT_CAST)
8505 assert(!treeWithCall->gtOverflow());
8506 treeWithCall = treeWithCall->gtGetOp1();
8509 assert(treeWithCall == call);
8512 GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt;
8513 // Remove all stmts after the call.
8514 while (nextMorphStmt != nullptr)
8516 GenTreeStmt* stmtToRemove = nextMorphStmt;
8517 nextMorphStmt = stmtToRemove->gtNextStmt;
8518 fgRemoveStmt(compCurBB, stmtToRemove);
8521 fgMorphStmt->gtStmtExpr = call;
8523 // Tail call via helper: The VM can't use return address hijacking if we're
8524 // not going to return and the helper doesn't have enough info to safely poll,
8525 // so we poll before the tail call, if the block isn't already safe. Since
8526 // tail call via helper is a slow mechanism it doen't matter whether we emit
8527 // GC poll. This is done to be in parity with Jit64. Also this avoids GC info
8528 // size increase if all most all methods are expected to be tail calls (e.g. F#).
8530 // Note that we can avoid emitting GC-poll if we know that the current BB is
8531 // dominated by a Gc-SafePoint block. But we don't have dominator info at this
8532 // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
8533 // here and remove it in lowering if the block is dominated by a GC-SafePoint. For
8534 // now it not clear whether optimizing slow tail calls is worth the effort. As a
8535 // low cost check, we check whether the first and current basic blocks are
8538 // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
8539 // is going to mark the method as fully interruptible if the block containing this tail
8540 // call is reachable without executing any call.
8541 if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
8542 !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
8544 // We didn't insert a poll block, so we need to morph the call now
8545 // (Normally it will get morphed when we get to the split poll block)
8546 GenTree* temp = fgMorphCall(call);
8547 noway_assert(temp == call);
8550 // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
8551 // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
8553 // Fast tail call: in case of fast tail calls, we need a jmp epilog and
8554 // hence mark it as BBJ_RETURN with BBF_JMP flag set.
8555 noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
8557 if (canFastTailCall)
8559 compCurBB->bbFlags |= BBF_HAS_JMP;
8563 compCurBB->bbJumpKind = BBJ_THROW;
8566 // For non-void calls, we return a place holder which will be
8567 // used by the parent GT_RETURN node of this call.
8569 GenTree* result = call;
8570 if (callType != TYP_VOID && info.compRetType != TYP_VOID)
8573 // Return a dummy node, as the return is already removed.
8574 if (callType == TYP_STRUCT)
8576 // This is a HFA, use float 0.
8577 callType = TYP_FLOAT;
8579 #elif defined(UNIX_AMD64_ABI)
8580 // Return a dummy node, as the return is already removed.
8581 if (varTypeIsStruct(callType))
8583 // This is a register-returned struct. Return a 0.
8584 // The actual return registers are hacked in lower and the register allocator.
8589 // Return a dummy node, as the return is already removed.
8590 if (varTypeIsSIMD(callType))
8592 callType = TYP_DOUBLE;
8595 result = gtNewZeroConNode(genActualType(callType));
8596 result = fgMorphTree(result);
8604 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
8605 (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
8606 #ifdef FEATURE_READYTORUN_COMPILER
8607 || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
8610 (call == fgMorphStmt->gtStmtExpr))
8612 // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
8613 // Transform it into a null check.
8615 GenTree* thisPtr = call->gtCallArgs->gtOp.gtOp1;
8617 GenTree* nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
8618 nullCheck->gtFlags |= GTF_EXCEPT;
8620 return fgMorphTree(nullCheck);
8623 noway_assert(call->gtOper == GT_CALL);
8626 // Only count calls once (only in the global morph phase)
8630 if (call->gtCallType == CT_INDIRECT)
8633 optIndirectCallCount++;
8635 else if (call->gtCallType == CT_USER_FUNC)
8638 if (call->IsVirtual())
8640 optIndirectCallCount++;
8645 // Couldn't inline - remember that this BB contains method calls
8647 // If this is a 'regular' call, mark the basic block as
8648 // having a call (for computing full interruptibility).
8649 CLANG_FORMAT_COMMENT_ANCHOR;
8651 if (IsGcSafePoint(call))
8653 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
8656 // Morph Type.op_Equality, Type.op_Inequality, and Enum.HasFlag
8658 // We need to do these before the arguments are morphed
8659 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
8661 // See if this is foldable
8662 GenTree* optTree = gtFoldExprCall(call);
8664 // If we optimized, morph the result
8665 if (optTree != call)
8667 return fgMorphTree(optTree);
8671 // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
8672 GenTree* origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
8674 unsigned retValTmpNum = BAD_VAR_NUM;
8675 CORINFO_CLASS_HANDLE structHnd = nullptr;
8676 if (call->HasRetBufArg() &&
8677 call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
8679 // We're enforcing the invariant that return buffers pointers (at least for
8680 // struct return types containing GC pointers) are never pointers into the heap.
8681 // The large majority of cases are address of local variables, which are OK.
8682 // Otherwise, allocate a local of the given struct type, pass its address,
8683 // then assign from that into the proper destination. (We don't need to do this
8684 // if we're passing the caller's ret buff arg to the callee, since the caller's caller
8685 // will maintain the same invariant.)
8687 GenTree* dest = call->gtCallArgs->gtOp.gtOp1;
8688 assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
8689 if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
8691 // We'll exempt helper calls from this, assuming that the helper implementation
8692 // follows the old convention, and does whatever barrier is required.
8693 if (call->gtCallType != CT_HELPER)
8695 structHnd = call->gtRetClsHnd;
8696 if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
8697 !(dest->OperGet() == GT_LCL_VAR && dest->gtLclVar.gtLclNum == info.compRetBuffArg))
8701 retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
8702 lvaSetStruct(retValTmpNum, structHnd, true);
8703 dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8708 call->gtCallArgs->gtOp.gtOp1 = dest;
8711 /* Process the "normal" argument list */
8712 call = fgMorphArgs(call);
8713 noway_assert(call->gtOper == GT_CALL);
8715 // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
8716 // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
8717 if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
8719 GenTree* value = gtArgEntryByArgNum(call, 2)->node;
8720 if (value->IsIntegralConst(0))
8722 assert(value->OperGet() == GT_CNS_INT);
8724 GenTree* arr = gtArgEntryByArgNum(call, 0)->node;
8725 GenTree* index = gtArgEntryByArgNum(call, 1)->node;
8727 // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy
8728 // the spill trees as well if necessary.
8729 GenTreeOp* argSetup = nullptr;
8730 for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest())
8732 GenTree* const arg = earlyArgs->Current();
8733 if (arg->OperGet() != GT_ASG)
8739 assert(arg != index);
8741 arg->gtFlags &= ~GTF_LATE_ARG;
8743 GenTree* op1 = argSetup;
8746 op1 = gtNewNothingNode();
8748 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8752 argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg);
8755 argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8760 auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult {
8761 (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
8762 return WALK_CONTINUE;
8765 fgWalkTreePost(&arr, resetMorphedFlag);
8766 fgWalkTreePost(&index, resetMorphedFlag);
8767 fgWalkTreePost(&value, resetMorphedFlag);
8770 GenTree* const nullCheckedArr = impCheckForNullPointer(arr);
8771 GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
8772 GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value);
8773 arrStore->gtFlags |= GTF_ASG;
8775 GenTree* result = fgMorphTree(arrStore);
8776 if (argSetup != nullptr)
8778 result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result);
8780 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8788 // Optimize get_ManagedThreadId(get_CurrentThread)
8789 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8790 info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
8792 noway_assert(origDest == nullptr);
8793 noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);
8795 GenTree* innerCall = call->gtCallLateArgs->gtOp.gtOp1;
8797 if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8798 info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
8799 CORINFO_INTRINSIC_GetCurrentManagedThread)
8801 // substitute expression with call to helper
8802 GenTree* newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT);
8803 JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
8804 return fgMorphTree(newCall);
8808 if (origDest != nullptr)
8810 GenTree* retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8811 // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
8812 // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the
8813 // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to
8815 if (origDest->OperGet() == GT_ASG)
8817 if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
8819 GenTree* var = origDest->gtOp.gtOp1;
8820 origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
8821 gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
8824 GenTree* copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
8825 copyBlk = fgMorphTree(copyBlk);
8826 GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
8828 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8833 if (call->IsNoReturn())
8836 // If we know that the call does not return then we can set fgRemoveRestOfBlock
8837 // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
8838 // As a result the compiler won't need to preserve live registers across the call.
8840 // This isn't need for tail calls as there shouldn't be any code after the call anyway.
8841 // Besides, the tail call code is part of the epilog and converting the block to
8842 // BBJ_THROW would result in the tail call being dropped as the epilog is generated
8843 // only for BBJ_RETURN blocks.
8845 // Currently this doesn't work for non-void callees. Some of the code that handles
8846 // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
8847 // do not have this flag by default. We could add the flag here but the proper solution
8848 // would be to replace the return expression with a local var node during inlining
8849 // so the rest of the call tree stays in a separate statement. That statement can then
8850 // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
8853 if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
8855 fgRemoveRestOfBlock = true;
8862 /*****************************************************************************
8864 * Transform the given GTK_CONST tree for code generation.
8867 GenTree* Compiler::fgMorphConst(GenTree* tree)
8869 assert(tree->OperKind() & GTK_CONST);
8871 /* Clear any exception flags or other unnecessary flags
8872 * that may have been set before folding this node to a constant */
8874 tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
8876 if (tree->OperGet() != GT_CNS_STR)
8881 // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
8882 // guarantee slow performance for that block. Instead cache the return value
8883 // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
8885 if (compCurBB->bbJumpKind == BBJ_THROW)
8887 CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
8888 if (helper != CORINFO_HELP_UNDEF)
8890 // For un-important blocks, we want to construct the string lazily
8892 GenTreeArgList* args;
8893 if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
8895 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
8899 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
8900 gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
8903 tree = gtNewHelperCallNode(helper, TYP_REF, args);
8904 return fgMorphTree(tree);
8908 assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
8911 InfoAccessType iat =
8912 info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);
8914 tree = gtNewStringLiteralNode(iat, pValue);
8916 return fgMorphTree(tree);
8919 /*****************************************************************************
8921 * Transform the given GTK_LEAF tree for code generation.
8924 GenTree* Compiler::fgMorphLeaf(GenTree* tree)
8926 assert(tree->OperKind() & GTK_LEAF);
8928 if (tree->gtOper == GT_LCL_VAR)
8930 const bool forceRemorph = false;
8931 return fgMorphLocalVar(tree, forceRemorph);
8934 else if (tree->gtOper == GT_LCL_FLD)
8936 if (info.compIsVarArgs)
8939 fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
8940 if (newTree != nullptr)
8942 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
8944 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
8950 #endif // _TARGET_X86_
8951 else if (tree->gtOper == GT_FTN_ADDR)
8953 CORINFO_CONST_LOOKUP addrInfo;
8955 #ifdef FEATURE_READYTORUN_COMPILER
8956 if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
8958 addrInfo = tree->gtFptrVal.gtEntryPoint;
8963 info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
8966 // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
8968 tree->SetOper(GT_CNS_INT);
8969 tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
8970 tree->gtFlags |= GTF_ICON_FTN_ADDR;
8972 switch (addrInfo.accessType)
8975 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8976 tree->gtFlags |= GTF_IND_INVARIANT;
8981 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8985 tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
8989 noway_assert(!"Unknown addrInfo.accessType");
8992 return fgMorphTree(tree);
8998 void Compiler::fgAssignSetVarDef(GenTree* tree)
9000 GenTreeLclVarCommon* lclVarCmnTree;
9001 bool isEntire = false;
9002 if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
9006 lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
9010 // We consider partial definitions to be modeled as uses followed by definitions.
9011 // This captures the idea that precedings defs are not necessarily made redundant
9012 // by this definition.
9013 lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
9018 //------------------------------------------------------------------------
9019 // fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
9022 // tree - The block assignment to be possibly morphed
9025 // The modified tree if successful, nullptr otherwise.
9028 // 'tree' must be a block assignment.
9031 // If successful, this method always returns the incoming tree, modifying only
9034 GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
9036 // This must be a block assignment.
9037 noway_assert(tree->OperIsBlkOp());
9038 var_types asgType = tree->TypeGet();
9040 GenTree* asg = tree;
9041 GenTree* dest = asg->gtGetOp1();
9042 GenTree* src = asg->gtGetOp2();
9043 unsigned destVarNum = BAD_VAR_NUM;
9044 LclVarDsc* destVarDsc = nullptr;
9045 GenTree* destLclVarTree = nullptr;
9046 bool isCopyBlock = asg->OperIsCopyBlkOp();
9047 bool isInitBlock = !isCopyBlock;
9050 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
9052 if (dest->gtEffectiveVal()->OperIsBlk())
9054 GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
9055 size = lhsBlk->Size();
9056 if (impIsAddressInLocal(lhsBlk->Addr(), &destLclVarTree))
9058 destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum;
9059 destVarDsc = &(lvaTable[destVarNum]);
9061 if (lhsBlk->OperGet() == GT_OBJ)
9063 clsHnd = lhsBlk->AsObj()->gtClass;
9068 // Is this an enregisterable struct that is already a simple assignment?
9069 // This can happen if we are re-morphing.
9070 // Note that we won't do this straightaway if this is a SIMD type, since it
9071 // may be a promoted lclVar (sometimes we promote the individual float fields of
9072 // fixed-size SIMD).
9073 if (dest->OperGet() == GT_IND)
9075 noway_assert(asgType != TYP_STRUCT);
9076 if (varTypeIsStruct(asgType))
9078 destLclVarTree = fgIsIndirOfAddrOfLocal(dest);
9080 if (isCopyBlock && destLclVarTree == nullptr && !src->OperIs(GT_LCL_VAR))
9082 fgMorphBlockOperand(src, asgType, genTypeSize(asgType), false /*isBlkReqd*/);
9088 noway_assert(dest->OperIsLocal());
9089 destLclVarTree = dest;
9091 if (destLclVarTree != nullptr)
9093 destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum;
9094 destVarDsc = &(lvaTable[destVarNum]);
9095 if (asgType == TYP_STRUCT)
9097 clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
9098 size = destVarDsc->lvExactSize;
9101 if (asgType != TYP_STRUCT)
9103 size = genTypeSize(asgType);
9112 // See if we can do a simple transformation:
9114 // GT_ASG <TYP_size>
9116 // GT_IND GT_IND or CNS_INT
9121 if (asgType == TYP_STRUCT)
9123 if (size == REGSIZE_BYTES)
9125 if (clsHnd == NO_CLASS_HANDLE)
9127 // A register-sized cpblk can be treated as an integer asignment.
9128 asgType = TYP_I_IMPL;
9133 info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
9134 asgType = getJitGCType(gcPtr);
9145 asgType = TYP_SHORT;
9148 #ifdef _TARGET_64BIT_
9152 #endif // _TARGET_64BIT_
9157 if ((destVarDsc != nullptr) && varTypeIsStruct(destLclVarTree) && destVarDsc->lvPromoted)
9159 // Let fgMorphCopyBlock handle it.
9163 GenTree* srcLclVarTree = nullptr;
9164 LclVarDsc* srcVarDsc = nullptr;
9167 if (src->OperGet() == GT_LCL_VAR)
9169 srcLclVarTree = src;
9170 srcVarDsc = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
9172 else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &srcLclVarTree))
9174 srcVarDsc = &(lvaTable[srcLclVarTree->AsLclVarCommon()->gtLclNum]);
9176 if ((srcVarDsc != nullptr) && varTypeIsStruct(srcLclVarTree) && srcVarDsc->lvPromoted)
9178 // Let fgMorphCopyBlock handle it.
9183 if (asgType != TYP_STRUCT)
9185 noway_assert((size <= REGSIZE_BYTES) || varTypeIsSIMD(asgType));
9187 // For initBlk, a non constant source is not going to allow us to fiddle
9188 // with the bits to create a single assigment.
9189 // Nor do we (for now) support transforming an InitBlock of SIMD type, unless
9190 // it is a direct assignment to a lclVar and the value is zero.
9193 if (!src->IsConstInitVal())
9197 if (varTypeIsSIMD(asgType) && (!src->IsIntegralConst(0) || (destVarDsc == nullptr)))
9203 if (destVarDsc != nullptr)
9205 #if LOCAL_ASSERTION_PROP
9206 // Kill everything about dest
9207 if (optLocalAssertionProp)
9209 if (optAssertionCount > 0)
9211 fgKillDependentAssertions(destVarNum DEBUGARG(tree));
9214 #endif // LOCAL_ASSERTION_PROP
9216 // A previous incarnation of this code also required the local not to be
9217 // address-exposed(=taken). That seems orthogonal to the decision of whether
9218 // to do field-wise assignments: being address-exposed will cause it to be
9219 // "dependently" promoted, so it will be in the right memory location. One possible
9220 // further reason for avoiding field-wise stores is that the struct might have alignment-induced
9221 // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid
9222 // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
9223 // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
9224 if (varTypeIsStruct(destLclVarTree) && destVarDsc->lvPromoted)
9226 // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
9229 else if (!varTypeIsFloating(destLclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
9231 // Use the dest local var directly, as well as its type.
9232 dest = destLclVarTree;
9233 asgType = destVarDsc->lvType;
9235 // If the block operation had been a write to a local var of a small int type,
9236 // of the exact size of the small int type, and the var is NormalizeOnStore,
9237 // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
9238 // have done that normalization. If we're now making it into an assignment,
9239 // the NormalizeOnStore will work, and it can be a full def.
9240 if (destVarDsc->lvNormalizeOnStore())
9242 dest->gtFlags &= (~GTF_VAR_USEASG);
9247 // Could be a non-promoted struct, or a floating point type local, or
9248 // an int subject to a partial write. Don't enregister.
9249 lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
9251 // Mark the local var tree as a definition point of the local.
9252 destLclVarTree->gtFlags |= GTF_VAR_DEF;
9253 if (size < destVarDsc->lvExactSize)
9254 { // If it's not a full-width assignment....
9255 destLclVarTree->gtFlags |= GTF_VAR_USEASG;
9258 if (dest == destLclVarTree)
9260 dest = gtNewIndir(asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
9265 // Check to ensure we don't have a reducible *(& ... )
9266 if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
9268 // If dest is an Indir or Block, and it has a child that is a Addr node
9270 GenTree* addrNode = dest->AsIndir()->Addr(); // known to be a GT_ADDR
9272 // Can we just remove the Ind(Addr(destOp)) and operate directly on 'destOp'?
9274 GenTree* destOp = addrNode->gtGetOp1();
9275 var_types destOpType = destOp->TypeGet();
9277 // We can if we have a primitive integer type and the sizes are exactly the same.
9279 if ((varTypeIsIntegralOrI(destOp) && (size == genTypeSize(destOpType))))
9282 asgType = destOpType;
9286 if (dest->gtEffectiveVal()->OperIsIndir())
9288 // If we have no information about the destination, we have to assume it could
9289 // live anywhere (not just in the GC heap).
9290 // Mark the GT_IND node so that we use the correct write barrier helper in case
9291 // the field is a GC ref.
9293 if (!fgIsIndirOfAddrOfLocal(dest))
9295 dest->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
9296 tree->gtFlags |= GTF_GLOB_REF;
9299 dest->gtFlags &= (~GTF_EXCEPT | dest->AsIndir()->Addr()->gtFlags);
9300 dest->SetIndirExceptionFlags(this);
9301 tree->gtFlags |= (dest->gtFlags & GTF_EXCEPT);
9306 if (srcVarDsc != nullptr)
9309 assert(!varTypeIsStruct(srcLclVarTree) || !srcVarDsc->lvPromoted);
9310 if (!varTypeIsFloating(srcLclVarTree->TypeGet()) &&
9311 size == genTypeSize(genActualType(srcLclVarTree->TypeGet())))
9313 // Use the src local var directly.
9314 src = srcLclVarTree;
9318 // The source argument of the copyblk can potentially be accessed only through indir(addr(lclVar))
9319 // or indir(lclVarAddr) so it must be on the stack.
9320 unsigned lclVarNum = srcLclVarTree->gtLclVarCommon.gtLclNum;
9321 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_BlockOp));
9323 if (src == srcLclVarTree)
9325 srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
9326 src = gtNewOperNode(GT_IND, asgType, srcAddr);
9330 assert(src->OperIsIndir());
9335 if (src->OperIsIndir())
9337 if (!fgIsIndirOfAddrOfLocal(src))
9339 // If we have no information about the src, we have to assume it could
9340 // live anywhere (not just in the GC heap).
9341 // Mark the GT_IND node so that we use the correct write barrier helper in case
9342 // the field is a GC ref.
9343 src->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
9346 src->gtFlags &= (~GTF_EXCEPT | src->AsIndir()->Addr()->gtFlags);
9347 src->SetIndirExceptionFlags(this);
9353 if (varTypeIsSIMD(asgType))
9355 assert(!isCopyBlock); // Else we would have returned the tree above.
9356 noway_assert(src->IsIntegralConst(0));
9357 noway_assert(destVarDsc != nullptr);
9359 src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
9364 if (src->OperIsInitVal())
9366 src = src->gtGetOp1();
9368 assert(src->IsCnsIntOrI());
9369 // This will mutate the integer constant, in place, to be the correct
9370 // value for the type we are using in the assignment.
9371 src->AsIntCon()->FixupInitBlkValue(asgType);
9375 // Ensure that the dest is setup appropriately.
9376 if (dest->gtEffectiveVal()->OperIsIndir())
9378 dest = fgMorphBlockOperand(dest, asgType, size, false /*isBlkReqd*/);
9381 // Ensure that the rhs is setup appropriately.
9384 src = fgMorphBlockOperand(src, asgType, size, false /*isBlkReqd*/);
9387 // Set the lhs and rhs on the assignment.
9388 if (dest != tree->gtOp.gtOp1)
9390 asg->gtOp.gtOp1 = dest;
9392 if (src != asg->gtOp.gtOp2)
9394 asg->gtOp.gtOp2 = src;
9397 asg->ChangeType(asgType);
9398 dest->gtFlags |= GTF_DONT_CSE;
9399 asg->gtFlags &= ~GTF_EXCEPT;
9400 asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
9401 // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
9402 asg->gtFlags &= ~GTF_REVERSE_OPS;
9407 printf("fgMorphOneAsgBlock (after):\n");
9417 //------------------------------------------------------------------------
9418 // fgMorphInitBlock: Morph a block initialization assignment tree.
9421 // tree - A GT_ASG tree that performs block initialization
9424 // A single assignment, when fgMorphOneAsgBlockOp transforms it.
9426 // If the destination is a promoted struct local variable then we will try to
9427 // perform a field by field assignment for each of the promoted struct fields.
9428 // This is not always possible (e.g. if the struct has holes and custom layout).
9430 // Otherwise the orginal GT_ASG tree is returned unmodified (always correct but
9431 // least desirable because it prevents enregistration and/or blocks independent
9432 // struct promotion).
9435 // GT_ASG's children have already been morphed.
9437 GenTree* Compiler::fgMorphInitBlock(GenTree* tree)
9439 // We must have the GT_ASG form of InitBlkOp.
9440 noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp());
9442 bool morphed = false;
9445 GenTree* src = tree->gtGetOp2();
9446 GenTree* origDest = tree->gtGetOp1();
9448 GenTree* dest = fgMorphBlkNode(origDest, true);
9449 if (dest != origDest)
9451 tree->gtOp.gtOp1 = dest;
9453 tree->gtType = dest->TypeGet();
9454 // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its
9455 // type will be the type of the original lclVar, in which case we will change it to TYP_INT).
9456 if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src))
9458 src->gtType = TYP_INT;
9460 JITDUMP("\nfgMorphInitBlock:");
9462 GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree);
9465 JITDUMP(" using oneAsgTree.\n");
9470 GenTreeLclVarCommon* destLclNode = nullptr;
9471 unsigned destLclNum = BAD_VAR_NUM;
9472 LclVarDsc* destLclVar = nullptr;
9473 GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src;
9474 unsigned blockSize = 0;
9476 if (dest->IsLocal())
9478 destLclNode = dest->AsLclVarCommon();
9479 destLclNum = destLclNode->GetLclNum();
9480 destLclVar = lvaGetDesc(destLclNum);
9481 blockSize = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar->TypeGet());
9485 blockSize = dest->AsBlk()->Size();
9487 FieldSeqNode* destFldSeq = nullptr;
9488 if (dest->AsIndir()->Addr()->IsLocalAddrExpr(this, &destLclNode, &destFldSeq))
9490 destLclNum = destLclNode->GetLclNum();
9491 destLclVar = lvaGetDesc(destLclNum);
9495 bool destDoFldAsg = false;
9497 if (destLclNum != BAD_VAR_NUM)
9499 #if LOCAL_ASSERTION_PROP
9500 // Kill everything about destLclNum (and its field locals)
9501 if (optLocalAssertionProp && (optAssertionCount > 0))
9503 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9505 #endif // LOCAL_ASSERTION_PROP
9507 if (destLclVar->lvPromoted)
9509 GenTree* newTree = fgMorphPromoteLocalInitBlock(destLclNode->AsLclVar(), initVal, blockSize);
9511 if (newTree != nullptr)
9514 destDoFldAsg = true;
9515 INDEBUG(morphed = true);
9519 // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
9520 if (!destDoFldAsg && !destLclVar->lvRegStruct)
9522 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
9528 // For an InitBlock we always require a block operand.
9529 dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockSize, true /*isBlkReqd*/);
9530 tree->gtOp.gtOp1 = dest;
9531 tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
9538 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9542 printf("fgMorphInitBlock (after):\n");
9551 //------------------------------------------------------------------------
9552 // fgMorphPromoteLocalInitBlock: Attempts to promote a local block init tree
9553 // to a tree of promoted field initialization assignments.
9556 // destLclNode - The destination LclVar node
9557 // initVal - The initialization value
9558 // blockSize - The amount of bytes to initialize
9561 // A tree that performs field by field initialization of the destination
9562 // struct variable if various conditions are met, nullptr otherwise.
9565 // This transforms a single block initialization assignment like:
9567 // * ASG struct (init)
9568 // +--* BLK(12) struct
9570 // | \--* LCL_VAR struct(P) V02 loc0
9571 // | \--* int V02.a (offs=0x00) -> V06 tmp3
9572 // | \--* ubyte V02.c (offs=0x04) -> V07 tmp4
9573 // | \--* float V02.d (offs=0x08) -> V08 tmp5
9574 // \--* INIT_VAL int
9575 // \--* CNS_INT int 42
9577 // into a COMMA tree of assignments that initialize each promoted struct
9583 // | | +--* LCL_VAR int V06 tmp3
9584 // | | \--* CNS_INT int 0x2A2A2A2A
9586 // | +--* LCL_VAR ubyte V07 tmp4
9587 // | \--* CNS_INT int 42
9589 // +--* LCL_VAR float V08 tmp5
9590 // \--* CNS_DBL float 1.5113661732714390e-13
9592 GenTree* Compiler::fgMorphPromoteLocalInitBlock(GenTreeLclVar* destLclNode, GenTree* initVal, unsigned blockSize)
9594 assert(destLclNode->OperIs(GT_LCL_VAR));
9596 LclVarDsc* destLclVar = lvaGetDesc(destLclNode);
9597 assert(varTypeIsStruct(destLclVar->TypeGet()));
9598 assert(destLclVar->lvPromoted);
9602 JITDUMP(" size is zero or unknown.\n");
9606 if (destLclVar->lvAddrExposed && destLclVar->lvContainsHoles)
9608 JITDUMP(" dest is address exposed and contains holes.\n");
9612 if (destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9614 JITDUMP(" dest has custom layout and contains holes.\n");
9618 if (destLclVar->lvExactSize != blockSize)
9620 JITDUMP(" dest size mismatch.\n");
9624 if (!initVal->OperIs(GT_CNS_INT))
9626 JITDUMP(" source is not constant.\n");
9630 const int64_t initPattern = (initVal->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL;
9632 if (initPattern != 0)
9634 for (unsigned i = 0; i < destLclVar->lvFieldCnt; ++i)
9636 LclVarDsc* fieldDesc = lvaGetDesc(destLclVar->lvFieldLclStart + i);
9638 if (varTypeIsSIMD(fieldDesc->TypeGet()) || varTypeIsGC(fieldDesc->TypeGet()))
9640 // Cannot initialize GC or SIMD types with a non-zero constant.
9641 // The former is completly bogus. The later restriction could be
9642 // lifted by supporting non-zero SIMD constants or by generating
9643 // field initialization code that converts an integer constant to
9644 // the appropiate SIMD value. Unlikely to be very useful, though.
9645 JITDUMP(" dest contains GC and/or SIMD fields and source constant is not 0.\n");
9651 JITDUMP(" using field by field initialization.\n");
9653 GenTree* tree = nullptr;
9655 for (unsigned i = 0; i < destLclVar->lvFieldCnt; ++i)
9657 unsigned fieldLclNum = destLclVar->lvFieldLclStart + i;
9658 LclVarDsc* fieldDesc = lvaGetDesc(fieldLclNum);
9659 GenTree* dest = gtNewLclvNode(fieldLclNum, fieldDesc->TypeGet());
9660 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
9661 dest->gtFlags |= (destLclNode->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));
9665 switch (dest->TypeGet())
9672 // Promoted fields are expected to be "normalize on load". If that changes then
9673 // we may need to adjust this code to widen the constant correctly.
9674 assert(fieldDesc->lvNormalizeOnLoad());
9678 int64_t mask = (int64_t(1) << (genTypeSize(dest->TypeGet()) * 8)) - 1;
9679 src = gtNewIconNode(static_cast<int32_t>(initPattern & mask));
9683 src = gtNewLconNode(initPattern);
9687 memcpy(&floatPattern, &initPattern, sizeof(floatPattern));
9688 src = gtNewDconNode(floatPattern, dest->TypeGet());
9691 double doublePattern;
9692 memcpy(&doublePattern, &initPattern, sizeof(doublePattern));
9693 src = gtNewDconNode(doublePattern, dest->TypeGet());
9702 #endif // FEATURE_SIMD
9703 assert(initPattern == 0);
9704 src = gtNewIconNode(0, dest->TypeGet());
9710 GenTree* asg = gtNewAssignNode(dest, src);
9712 #if LOCAL_ASSERTION_PROP
9713 if (optLocalAssertionProp)
9715 optAssertionGen(asg);
9717 #endif // LOCAL_ASSERTION_PROP
9719 if (tree != nullptr)
9721 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
9732 //------------------------------------------------------------------------
9733 // fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
9736 // tree - the node to be modified.
9737 // type - the type of indirection to change it to.
9740 // Returns the node, modified in place.
9743 // This doesn't really warrant a separate method, but is here to abstract
9744 // the fact that these nodes can be modified in-place.
9746 GenTree* Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
9748 tree->SetOper(GT_IND);
9749 tree->gtType = type;
9753 //------------------------------------------------------------------------
9754 // fgMorphGetStructAddr: Gets the address of a struct object
9757 // pTree - the parent's pointer to the struct object node
9758 // clsHnd - the class handle for the struct type
9759 // isRValue - true if this is a source (not dest)
9762 // Returns the address of the struct value, possibly modifying the existing tree to
9763 // sink the address below any comma nodes (this is to canonicalize for value numbering).
9764 // If this is a source, it will morph it to an GT_IND before taking its address,
9765 // since it may not be remorphed (and we don't want blk nodes as rvalues).
9767 GenTree* Compiler::fgMorphGetStructAddr(GenTree** pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
9770 GenTree* tree = *pTree;
9771 // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
9772 // need to hang onto that for the purposes of value numbering.
9773 if (tree->OperIsIndir())
9775 if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
9777 addr = tree->gtOp.gtOp1;
9781 if (isRValue && tree->OperIsBlk())
9783 tree->ChangeOper(GT_IND);
9785 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9788 else if (tree->gtOper == GT_COMMA)
9790 // If this is a comma, we're going to "sink" the GT_ADDR below it.
9791 (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
9792 tree->gtType = TYP_BYREF;
9797 switch (tree->gtOper)
9804 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9811 // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
9812 // not going to use "temp"
9813 GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
9814 addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
9823 //------------------------------------------------------------------------
9824 // fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
9827 // tree - The struct type node
9828 // isDest - True if this is the destination of the assignment
9831 // Returns the possibly-morphed node. The caller is responsible for updating
9832 // the parent of this node..
9834 GenTree* Compiler::fgMorphBlkNode(GenTree* tree, bool isDest)
9836 GenTree* handleTree = nullptr;
9837 GenTree* addr = nullptr;
9838 if (tree->OperIs(GT_COMMA))
9840 // In order to CSE and value number array index expressions and bounds checks,
9841 // the commas in which they are contained need to match.
9842 // The pattern is that the COMMA should be the address expression.
9843 // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
9844 // TODO-1stClassStructs: Consider whether this can be improved.
9845 // Also consider whether some of this can be included in gtNewBlockVal (though note
9846 // that doing so may cause us to query the type system before we otherwise would).
9848 // before: [3] comma struct <- [2] comma struct <- [1] LCL_VAR struct
9849 // after: [3] comma byref <- [2] comma byref <- [4] addr byref <- [1] LCL_VAR struct
9852 GenTree* effectiveVal = tree->gtEffectiveVal();
9854 GenTreePtrStack commas(getAllocator(CMK_ArrayStack));
9855 for (GenTree* comma = tree; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2())
9860 GenTree* lastComma = commas.Top();
9861 noway_assert(lastComma->gtGetOp2() == effectiveVal);
9862 GenTree* effectiveValAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9864 effectiveValAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9866 lastComma->gtOp.gtOp2 = effectiveValAddr;
9868 while (!commas.Empty())
9870 GenTree* comma = commas.Pop();
9871 comma->gtType = TYP_BYREF;
9872 gtUpdateNodeSideEffects(comma);
9875 handleTree = effectiveVal;
9877 else if (tree->OperIs(GT_IND) && tree->AsIndir()->Addr()->OperIs(GT_INDEX_ADDR))
9880 addr = tree->AsIndir()->Addr();
9883 if (addr != nullptr)
9885 var_types structType = handleTree->TypeGet();
9886 if (structType == TYP_STRUCT)
9888 CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(handleTree);
9889 if (structHnd == NO_CLASS_HANDLE)
9891 tree = gtNewOperNode(GT_IND, structType, addr);
9895 tree = gtNewObjNode(structHnd, addr);
9896 if (tree->OperGet() == GT_OBJ)
9898 gtSetObjGcInfo(tree->AsObj());
9904 tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
9907 gtUpdateNodeSideEffects(tree);
9909 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9913 if (!tree->OperIsBlk())
9917 GenTreeBlk* blkNode = tree->AsBlk();
9918 if (blkNode->OperGet() == GT_DYN_BLK)
9920 if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
9922 unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
9923 // A GT_BLK with size of zero is not supported,
9924 // so if we encounter such a thing we just leave it as a GT_DYN_BLK
9927 blkNode->AsDynBlk()->gtDynamicSize = nullptr;
9928 blkNode->ChangeOper(GT_BLK);
9929 blkNode->gtBlkSize = size;
9941 if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
9942 (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
9944 GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
9945 if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
9947 lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
9954 //------------------------------------------------------------------------
9955 // fgMorphBlockOperand: Canonicalize an operand of a block assignment
9958 // tree - The block operand
9959 // asgType - The type of the assignment
9960 // blockWidth - The size of the block
9961 // isBlkReqd - true iff this operand must remain a block node
9964 // Returns the morphed block operand
9967 // This does the following:
9968 // - Ensures that a struct operand is a block node or lclVar.
9969 // - Ensures that any COMMAs are above ADDR nodes.
9970 // Although 'tree' WAS an operand of a block assignment, the assignment
9971 // may have been retyped to be a scalar assignment.
9973 GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isBlkReqd)
9975 GenTree* effectiveVal = tree->gtEffectiveVal();
9977 if (asgType != TYP_STRUCT)
9979 if (effectiveVal->OperIsIndir())
9983 GenTree* addr = effectiveVal->AsIndir()->Addr();
9984 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
9986 effectiveVal = addr->gtGetOp1();
9988 else if (effectiveVal->OperIsBlk())
9990 effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9993 effectiveVal->gtType = asgType;
9995 else if (effectiveVal->TypeGet() != asgType)
9997 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9998 effectiveVal = gtNewIndir(asgType, addr);
10003 GenTreeIndir* indirTree = nullptr;
10004 GenTreeLclVarCommon* lclNode = nullptr;
10005 bool needsIndirection = true;
10007 if (effectiveVal->OperIsIndir())
10009 indirTree = effectiveVal->AsIndir();
10010 GenTree* addr = effectiveVal->AsIndir()->Addr();
10011 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
10013 lclNode = addr->gtGetOp1()->AsLclVarCommon();
10016 else if (effectiveVal->OperGet() == GT_LCL_VAR)
10018 lclNode = effectiveVal->AsLclVarCommon();
10020 if (lclNode != nullptr)
10022 LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]);
10023 if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth) && (varDsc->lvType == asgType))
10025 if (effectiveVal != lclNode)
10027 JITDUMP("Replacing block node [%06d] with lclVar V%02u\n", dspTreeID(tree), lclNode->gtLclNum);
10028 effectiveVal = lclNode;
10030 needsIndirection = false;
10034 // This may be a lclVar that was determined to be address-exposed.
10035 effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT);
10038 if (needsIndirection)
10040 if (indirTree != nullptr)
10042 if (indirTree->OperIsBlk() && !isBlkReqd)
10044 (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
10048 // If we have an indirection and a block is required, it should already be a block.
10049 assert(indirTree->OperIsBlk() || !isBlkReqd);
10055 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
10058 CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
10059 if (clsHnd == NO_CLASS_HANDLE)
10061 newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
10065 newTree = gtNewObjNode(clsHnd, addr);
10066 if (newTree->OperGet() == GT_OBJ)
10068 gtSetObjGcInfo(newTree->AsObj());
10074 newTree = gtNewIndir(asgType, addr);
10076 effectiveVal = newTree;
10080 tree = effectiveVal;
10084 //------------------------------------------------------------------------
10085 // fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk
10088 // dest - the GT_OBJ or GT_STORE_OBJ
10091 // The destination must be known (by the caller) to be on the stack.
10094 // If we have a CopyObj with a dest on the stack, and its size is small enough
10095 // to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a
10096 // GC Unsafe CopyBlk that is non-interruptible.
10097 // This is not supported for the JIT32_GCENCODER, in which case this method is a no-op.
10099 void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest)
10101 #if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
10102 assert(dest->gtGcPtrCount != 0);
10103 unsigned blockWidth = dest->AsBlk()->gtBlkSize;
10105 bool destOnStack = false;
10106 GenTree* destAddr = dest->Addr();
10107 assert(destAddr->IsLocalAddrExpr() != nullptr);
10109 if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT))
10111 genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK;
10112 dest->SetOper(newOper);
10113 dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
10115 #endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
10118 //------------------------------------------------------------------------
10119 // fgMorphCopyBlock: Perform the Morphing of block copy
10122 // tree - a block copy (i.e. an assignment with a block op on the lhs).
10125 // We can return the orginal block copy unmodified (least desirable, but always correct)
10126 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
10127 // If we have performed struct promotion of the Source() or the Dest() then we will try to
10128 // perform a field by field assignment for each of the promoted struct fields.
10131 // The child nodes for tree have already been Morphed.
10134 // If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
10135 // When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
10136 // and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
10137 // if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
10138 // can not use a field by field assignment and must leave the orginal block copy unmodified.
10140 GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
10142 noway_assert(tree->OperIsCopyBlkOp());
10144 JITDUMP("\nfgMorphCopyBlock:");
10146 bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
10148 GenTree* asg = tree;
10149 GenTree* rhs = asg->gtGetOp2();
10150 GenTree* dest = asg->gtGetOp1();
10152 #if FEATURE_MULTIREG_RET
10153 // If this is a multi-reg return, we will not do any morphing of this node.
10154 if (rhs->IsMultiRegCall())
10156 assert(dest->OperGet() == GT_LCL_VAR);
10157 JITDUMP(" not morphing a multireg call return\n");
10160 #endif // FEATURE_MULTIREG_RET
10162 // If we have an array index on the lhs, we need to create an obj node.
10164 dest = fgMorphBlkNode(dest, true);
10165 if (dest != asg->gtGetOp1())
10167 asg->gtOp.gtOp1 = dest;
10168 if (dest->IsLocal())
10170 dest->gtFlags |= GTF_VAR_DEF;
10173 asg->gtType = dest->TypeGet();
10174 rhs = fgMorphBlkNode(rhs, false);
10176 asg->gtOp.gtOp2 = rhs;
10178 GenTree* oldTree = tree;
10179 GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree);
10183 JITDUMP(" using oneAsgTree.\n");
10188 unsigned blockWidth;
10189 bool blockWidthIsConst = false;
10190 GenTreeLclVarCommon* lclVarTree = nullptr;
10191 GenTreeLclVarCommon* srcLclVarTree = nullptr;
10192 unsigned destLclNum = BAD_VAR_NUM;
10193 LclVarDsc* destLclVar = nullptr;
10194 FieldSeqNode* destFldSeq = nullptr;
10195 bool destDoFldAsg = false;
10196 GenTree* destAddr = nullptr;
10197 GenTree* srcAddr = nullptr;
10198 bool destOnStack = false;
10199 bool hasGCPtrs = false;
10201 JITDUMP("block assignment to morph:\n");
10204 if (dest->IsLocal())
10206 blockWidthIsConst = true;
10207 destOnStack = true;
10208 if (dest->gtOper == GT_LCL_VAR)
10210 lclVarTree = dest->AsLclVarCommon();
10211 destLclNum = lclVarTree->gtLclNum;
10212 destLclVar = &lvaTable[destLclNum];
10213 if (destLclVar->lvType == TYP_STRUCT)
10215 // It would be nice if lvExactSize always corresponded to the size of the struct,
10216 // but it doesn't always for the temps that the importer creates when it spills side
10218 // TODO-Cleanup: Determine when this happens, and whether it can be changed.
10219 blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
10223 blockWidth = genTypeSize(destLclVar->lvType);
10225 hasGCPtrs = destLclVar->lvStructGcCount != 0;
10229 assert(dest->TypeGet() != TYP_STRUCT);
10230 assert(dest->gtOper == GT_LCL_FLD);
10231 blockWidth = genTypeSize(dest->TypeGet());
10232 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
10233 destFldSeq = dest->AsLclFld()->gtFieldSeq;
10238 GenTree* effectiveDest = dest->gtEffectiveVal();
10239 if (effectiveDest->OperGet() == GT_IND)
10241 assert(dest->TypeGet() != TYP_STRUCT);
10242 blockWidth = genTypeSize(effectiveDest->TypeGet());
10243 blockWidthIsConst = true;
10244 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
10246 destAddr = dest->gtGetOp1();
10251 assert(effectiveDest->OperIsBlk());
10252 GenTreeBlk* blk = effectiveDest->AsBlk();
10254 blockWidth = blk->gtBlkSize;
10255 blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
10256 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
10258 destAddr = blk->Addr();
10261 if (destAddr != nullptr)
10263 noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
10264 if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
10266 destOnStack = true;
10267 destLclNum = lclVarTree->gtLclNum;
10268 destLclVar = &lvaTable[destLclNum];
10273 if (destLclVar != nullptr)
10275 #if LOCAL_ASSERTION_PROP
10276 // Kill everything about destLclNum (and its field locals)
10277 if (optLocalAssertionProp)
10279 if (optAssertionCount > 0)
10281 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
10284 #endif // LOCAL_ASSERTION_PROP
10286 if (destLclVar->lvPromoted && blockWidthIsConst)
10288 noway_assert(varTypeIsStruct(destLclVar));
10289 noway_assert(!opts.MinOpts());
10291 if (blockWidth == destLclVar->lvExactSize)
10293 JITDUMP(" (destDoFldAsg=true)");
10294 // We may decide later that a copyblk is required when this struct has holes
10295 destDoFldAsg = true;
10299 JITDUMP(" with mismatched dest size");
10304 FieldSeqNode* srcFldSeq = nullptr;
10305 unsigned srcLclNum = BAD_VAR_NUM;
10306 LclVarDsc* srcLclVar = nullptr;
10307 bool srcDoFldAsg = false;
10309 if (rhs->IsLocal())
10311 srcLclVarTree = rhs->AsLclVarCommon();
10312 srcLclNum = srcLclVarTree->gtLclNum;
10313 if (rhs->OperGet() == GT_LCL_FLD)
10315 srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
10318 else if (rhs->OperIsIndir())
10320 if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
10322 srcLclNum = srcLclVarTree->gtLclNum;
10326 srcAddr = rhs->gtOp.gtOp1;
10330 if (srcLclNum != BAD_VAR_NUM)
10332 srcLclVar = &lvaTable[srcLclNum];
10334 if (srcLclVar->lvPromoted && blockWidthIsConst)
10336 noway_assert(varTypeIsStruct(srcLclVar));
10337 noway_assert(!opts.MinOpts());
10339 if (blockWidth == srcLclVar->lvExactSize)
10341 JITDUMP(" (srcDoFldAsg=true)");
10342 // We may decide later that a copyblk is required when this struct has holes
10343 srcDoFldAsg = true;
10347 JITDUMP(" with mismatched src size");
10352 // Check to see if we are doing a copy to/from the same local block.
10353 // If so, morph it to a nop.
10354 if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq) &&
10355 destFldSeq != FieldSeqStore::NotAField())
10357 JITDUMP("Self-copy; replaced with a NOP.\n");
10358 GenTree* nop = gtNewNothingNode();
10359 INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
10363 // Check to see if we are required to do a copy block because the struct contains holes
10364 // and either the src or dest is externally visible
10366 bool requiresCopyBlock = false;
10367 bool srcSingleLclVarAsg = false;
10368 bool destSingleLclVarAsg = false;
10370 // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
10371 if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
10373 requiresCopyBlock = true;
10376 // Can we use field by field assignment for the dest?
10377 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
10379 JITDUMP(" dest contains custom layout and contains holes");
10380 // C++ style CopyBlock with holes
10381 requiresCopyBlock = true;
10384 // Can we use field by field assignment for the src?
10385 if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
10387 JITDUMP(" src contains custom layout and contains holes");
10388 // C++ style CopyBlock with holes
10389 requiresCopyBlock = true;
10392 #if defined(_TARGET_ARM_)
10393 if ((rhs->OperIsIndir()) && (rhs->gtFlags & GTF_IND_UNALIGNED))
10395 JITDUMP(" rhs is unaligned");
10396 requiresCopyBlock = true;
10399 if (asg->gtFlags & GTF_BLK_UNALIGNED)
10401 JITDUMP(" asg is unaligned");
10402 requiresCopyBlock = true;
10404 #endif // _TARGET_ARM_
10406 if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
10408 requiresCopyBlock = true;
10411 // Can't use field by field assignment if the src is a call.
10412 if (rhs->OperGet() == GT_CALL)
10414 JITDUMP(" src is a call");
10415 // C++ style CopyBlock with holes
10416 requiresCopyBlock = true;
10419 // If we passed the above checks, then we will check these two
10420 if (!requiresCopyBlock)
10422 // Are both dest and src promoted structs?
10423 if (destDoFldAsg && srcDoFldAsg)
10425 // Both structs should be of the same type, or each have a single field of the same type.
10426 // If not we will use a copy block.
10427 if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
10428 lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
10430 unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart;
10431 unsigned srcFieldNum = lvaTable[srcLclNum].lvFieldLclStart;
10432 if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) ||
10433 (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType))
10435 requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
10436 JITDUMP(" with mismatched types");
10440 // Are neither dest or src promoted structs?
10441 else if (!destDoFldAsg && !srcDoFldAsg)
10443 requiresCopyBlock = true; // Leave as a CopyBlock
10444 JITDUMP(" with no promoted structs");
10446 else if (destDoFldAsg)
10448 // Match the following kinds of trees:
10449 // fgMorphTree BB01, stmt 9 (before)
10450 // [000052] ------------ const int 8
10451 // [000053] -A--G------- copyBlk void
10452 // [000051] ------------ addr byref
10453 // [000050] ------------ lclVar long V07 loc5
10454 // [000054] --------R--- <list> void
10455 // [000049] ------------ addr byref
10456 // [000048] ------------ lclVar struct(P) V06 loc4
10457 // long V06.h (offs=0x00) -> V17 tmp9
10458 // Yields this transformation
10459 // fgMorphCopyBlock (after):
10460 // [000050] ------------ lclVar long V07 loc5
10461 // [000085] -A---------- = long
10462 // [000083] D------N---- lclVar long V17 tmp9
10464 if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
10465 (blockWidth == genTypeSize(srcLclVar->TypeGet())))
10467 // Reject the following tree:
10468 // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe
10470 // fgMorphTree BB01, stmt 6 (before)
10471 // [000038] ------------- const int 4
10472 // [000039] -A--G-------- copyBlk void
10473 // [000037] ------------- addr byref
10474 // [000036] ------------- lclVar int V05 loc3
10475 // [000040] --------R---- <list> void
10476 // [000035] ------------- addr byref
10477 // [000034] ------------- lclVar struct(P) V04 loc2
10478 // float V04.f1 (offs=0x00) -> V13 tmp6
10479 // As this would framsform into
10480 // float V13 = int V05
10482 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
10483 var_types destType = lvaTable[fieldLclNum].TypeGet();
10484 if (srcLclVar->TypeGet() == destType)
10486 srcSingleLclVarAsg = true;
10492 assert(srcDoFldAsg);
10493 // Check for the symmetric case (which happens for the _pointer field of promoted spans):
10495 // [000240] -----+------ /--* lclVar struct(P) V18 tmp9
10496 // /--* byref V18._value (offs=0x00) -> V30 tmp21
10497 // [000245] -A------R--- * = struct (copy)
10498 // [000244] -----+------ \--* obj(8) struct
10499 // [000243] -----+------ \--* addr byref
10500 // [000242] D----+-N---- \--* lclVar byref V28 tmp19
10502 if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) &&
10503 (blockWidth == genTypeSize(destLclVar->TypeGet())))
10505 // Check for type agreement
10506 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart;
10507 var_types srcType = lvaTable[fieldLclNum].TypeGet();
10508 if (destLclVar->TypeGet() == srcType)
10510 destSingleLclVarAsg = true;
10516 // If we require a copy block the set both of the field assign bools to false
10517 if (requiresCopyBlock)
10519 // If a copy block is required then we won't do field by field assignments
10520 destDoFldAsg = false;
10521 srcDoFldAsg = false;
10524 JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
10526 // Mark the dest/src structs as DoNotEnreg when they are not being fully referenced as the same type.
10528 if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg)
10530 if (!destLclVar->lvRegStruct || (destLclVar->lvType != dest->TypeGet()))
10532 // Mark it as DoNotEnregister.
10533 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
10537 if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
10539 if (!srcLclVar->lvRegStruct || (srcLclVar->lvType != dest->TypeGet()))
10541 lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
10545 var_types asgType = dest->TypeGet();
10546 if (requiresCopyBlock)
10548 bool isBlkReqd = (asgType == TYP_STRUCT);
10549 dest = fgMorphBlockOperand(dest, asgType, blockWidth, isBlkReqd);
10550 asg->gtOp.gtOp1 = dest;
10551 asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
10553 // Note that the unrolling of CopyBlk is only implemented on some platforms.
10554 // Currently that includes x64 and ARM but not x86: the code generation for this
10555 // construct requires the ability to mark certain regions of the generated code
10556 // as non-interruptible, and the GC encoding for the latter platform does not
10557 // have this capability.
10559 // If we have a CopyObj with a dest on the stack
10560 // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
10561 // when its size is small enough to be completely unrolled (i.e. between [16..64] bytes).
10562 // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.)
10564 if (destOnStack && (dest->OperGet() == GT_OBJ))
10566 fgMorphUnsafeBlk(dest->AsObj());
10569 // Eliminate the "OBJ or BLK" node on the rhs.
10570 rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isBlkReqd*/);
10571 asg->gtOp.gtOp2 = rhs;
10577 // Otherwise we convert this CopyBlock into individual field by field assignments
10582 GenTree* addrSpill = nullptr;
10583 unsigned addrSpillTemp = BAD_VAR_NUM;
10584 bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
10586 unsigned fieldCnt = DUMMY_INIT(0);
10588 if (destDoFldAsg && srcDoFldAsg)
10590 // To do fieldwise assignments for both sides, they'd better be the same struct type!
10591 // All of these conditions were checked above...
10592 assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
10593 assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
10595 fieldCnt = destLclVar->lvFieldCnt;
10596 goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
10599 else if (destDoFldAsg)
10601 fieldCnt = destLclVar->lvFieldCnt;
10602 rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*isBlkReqd*/);
10603 if (srcAddr == nullptr)
10605 srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
10610 assert(srcDoFldAsg);
10611 fieldCnt = srcLclVar->lvFieldCnt;
10612 dest = fgMorphBlockOperand(dest, asgType, blockWidth, false /*isBlkReqd*/);
10613 if (dest->OperIsBlk())
10615 (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
10617 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
10622 noway_assert(!srcDoFldAsg);
10623 if (gtClone(srcAddr))
10625 // srcAddr is simple expression. No need to spill.
10626 noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10630 // srcAddr is complex expression. Clone and spill it (unless the destination is
10631 // a struct local that only has one field, in which case we'd only use the
10632 // address value once...)
10633 if (destLclVar->lvFieldCnt > 1)
10635 addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
10636 noway_assert(addrSpill != nullptr);
10643 noway_assert(!destDoFldAsg);
10645 // If we're doing field-wise stores, to an address within a local, and we copy
10646 // the address into "addrSpill", do *not* declare the original local var node in the
10647 // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
10648 // field-wise assignments as an "indirect" assignment to the local.
10649 // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
10651 if (lclVarTree != nullptr)
10653 lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
10656 if (gtClone(destAddr))
10658 // destAddr is simple expression. No need to spill
10659 noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10663 // destAddr is complex expression. Clone and spill it (unless
10664 // the source is a struct local that only has one field, in which case we'd only
10665 // use the address value once...)
10666 if (srcLclVar->lvFieldCnt > 1)
10668 addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
10669 noway_assert(addrSpill != nullptr);
10672 // TODO-CQ: this should be based on a more general
10673 // "BaseAddress" method, that handles fields of structs, before or after
10675 if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
10677 if (addrSpill->gtOp.gtOp1->IsLocal())
10679 // We will *not* consider this to define the local, but rather have each individual field assign
10680 // be a definition.
10681 addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
10682 assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
10683 PROMOTION_TYPE_INDEPENDENT);
10684 addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
10685 // local stack frame
10691 if (addrSpill != nullptr)
10693 // Simplify the address if possible, and mark as DONT_CSE as needed..
10694 addrSpill = fgMorphTree(addrSpill);
10696 // Spill the (complex) address to a BYREF temp.
10697 // Note, at most one address may need to be spilled.
10698 addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
10700 lvaTable[addrSpillTemp].lvType = TYP_BYREF;
10702 if (addrSpillIsStackDest)
10704 lvaTable[addrSpillTemp].lvStackByref = true;
10707 tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);
10709 // If we are assigning the address of a LclVar here
10710 // liveness does not account for this kind of address taken use.
10712 // We have to mark this local as address exposed so
10713 // that we don't delete the definition for this LclVar
10714 // as a dead store later on.
10716 if (addrSpill->OperGet() == GT_ADDR)
10718 GenTree* addrOp = addrSpill->gtOp.gtOp1;
10719 if (addrOp->IsLocal())
10721 unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum;
10722 lvaTable[lclVarNum].lvAddrExposed = true;
10723 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
10730 // We may have allocated a temp above, and that may have caused the lvaTable to be expanded.
10731 // So, beyond this point we cannot rely on the old values of 'srcLclVar' and 'destLclVar'.
10732 for (unsigned i = 0; i < fieldCnt; ++i)
10734 FieldSeqNode* curFieldSeq = nullptr;
10737 noway_assert(destLclNum != BAD_VAR_NUM);
10738 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10739 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10740 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
10741 if (destAddr != nullptr)
10743 noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
10744 dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10748 noway_assert(lclVarTree != nullptr);
10749 dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10751 // Don't CSE the lhs of an assignment.
10752 dest->gtFlags |= GTF_DONT_CSE;
10756 noway_assert(srcDoFldAsg);
10757 noway_assert(srcLclNum != BAD_VAR_NUM);
10758 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10760 if (destSingleLclVarAsg)
10762 noway_assert(fieldCnt == 1);
10763 noway_assert(destLclVar != nullptr);
10764 noway_assert(addrSpill == nullptr);
10766 dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet());
10772 assert(addrSpillTemp != BAD_VAR_NUM);
10773 dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10777 dest = gtCloneExpr(destAddr);
10778 noway_assert(dest != nullptr);
10780 // Is the address of a local?
10781 GenTreeLclVarCommon* lclVarTree = nullptr;
10782 bool isEntire = false;
10783 bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
10784 if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
10786 lclVarTree->gtFlags |= GTF_VAR_DEF;
10789 lclVarTree->gtFlags |= GTF_VAR_USEASG;
10794 GenTree* fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
10795 // Have to set the field sequence -- which means we need the field handle.
10796 CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
10797 CORINFO_FIELD_HANDLE fieldHnd =
10798 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10799 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10800 fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
10802 dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
10804 dest = gtNewIndir(lvaTable[fieldLclNum].TypeGet(), dest);
10806 // !!! The destination could be on stack. !!!
10807 // This flag will let us choose the correct write barrier.
10808 dest->gtFlags |= GTF_IND_TGTANYWHERE;
10814 noway_assert(srcLclNum != BAD_VAR_NUM);
10815 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10816 src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10818 noway_assert(srcLclVarTree != nullptr);
10819 src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
10823 noway_assert(destDoFldAsg);
10824 noway_assert(destLclNum != BAD_VAR_NUM);
10825 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10827 if (srcSingleLclVarAsg)
10829 noway_assert(fieldCnt == 1);
10830 noway_assert(srcLclNum != BAD_VAR_NUM);
10831 noway_assert(addrSpill == nullptr);
10833 src = gtNewLclvNode(srcLclNum, lvaGetDesc(srcLclNum)->TypeGet());
10839 assert(addrSpillTemp != BAD_VAR_NUM);
10840 src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10844 src = gtCloneExpr(srcAddr);
10845 noway_assert(src != nullptr);
10848 CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
10849 CORINFO_FIELD_HANDLE fieldHnd =
10850 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10851 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10852 var_types destType = lvaGetDesc(fieldLclNum)->lvType;
10855 if (lvaGetDesc(fieldLclNum)->lvFldOffset == 0)
10857 // If this is a full-width use of the src via a different type, we need to create a GT_LCL_FLD.
10858 // (Note that if it was the same type, 'srcSingleLclVarAsg' would be true.)
10859 if (srcLclNum != BAD_VAR_NUM)
10861 noway_assert(srcLclVarTree != nullptr);
10862 assert(destType != TYP_STRUCT);
10863 unsigned destSize = genTypeSize(destType);
10864 srcLclVar = lvaGetDesc(srcLclNum);
10866 (srcLclVar->lvType == TYP_STRUCT) ? srcLclVar->lvExactSize : genTypeSize(srcLclVar);
10867 if (destSize == srcSize)
10869 srcLclVarTree->gtFlags |= GTF_VAR_CAST;
10870 srcLclVarTree->ChangeOper(GT_LCL_FLD);
10871 srcLclVarTree->gtType = destType;
10872 srcLclVarTree->AsLclFld()->gtFieldSeq = curFieldSeq;
10873 src = srcLclVarTree;
10878 else // if (lvaGetDesc(fieldLclNum)->lvFldOffset != 0)
10880 src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
10881 new (this, GT_CNS_INT)
10882 GenTreeIntCon(TYP_I_IMPL, lvaGetDesc(fieldLclNum)->lvFldOffset,
10887 src = gtNewIndir(destType, src);
10892 noway_assert(dest->TypeGet() == src->TypeGet());
10894 asg = gtNewAssignNode(dest, src);
10896 // If we spilled the address, and we didn't do individual field assignments to promoted fields,
10897 // and it was of a local, ensure that the destination local variable has been marked as address
10898 // exposed. Neither liveness nor SSA are able to track this kind of indirect assignments.
10899 if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
10901 noway_assert(lvaGetDesc(destLclNum)->lvAddrExposed);
10904 #if LOCAL_ASSERTION_PROP
10905 if (optLocalAssertionProp)
10907 optAssertionGen(asg);
10909 #endif // LOCAL_ASSERTION_PROP
10913 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
10924 tree->gtFlags |= GTF_LATE_ARG;
10928 if (tree != oldTree)
10930 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10935 printf("\nfgMorphCopyBlock (after):\n");
10944 // insert conversions and normalize to make tree amenable to register
10945 // FP architectures
10946 GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
10948 if (tree->OperIsArithmetic())
10950 if (varTypeIsFloating(tree))
10952 GenTree* op1 = tree->gtOp.gtOp1;
10953 GenTree* op2 = tree->gtGetOp2();
10955 assert(varTypeIsFloating(op1->TypeGet()) && varTypeIsFloating(op2->TypeGet()));
10957 if (op1->TypeGet() != tree->TypeGet())
10959 tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, false, tree->TypeGet());
10961 if (op2->TypeGet() != tree->TypeGet())
10963 tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, false, tree->TypeGet());
10967 else if (tree->OperIsCompare())
10969 GenTree* op1 = tree->gtOp.gtOp1;
10971 if (varTypeIsFloating(op1))
10973 GenTree* op2 = tree->gtGetOp2();
10974 assert(varTypeIsFloating(op2));
10976 if (op1->TypeGet() != op2->TypeGet())
10978 // both had better be floating, just one bigger than other
10979 if (op1->TypeGet() == TYP_FLOAT)
10981 assert(op2->TypeGet() == TYP_DOUBLE);
10982 tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE);
10984 else if (op2->TypeGet() == TYP_FLOAT)
10986 assert(op1->TypeGet() == TYP_DOUBLE);
10987 tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE);
10996 //--------------------------------------------------------------------------------------------------------------
10997 // fgMorphRecognizeBoxNullable:
10998 // Recognize this pattern:
11000 // stmtExpr void (IL 0x000... ???)
11002 // CNS_INT ref null
11004 // CALL help ref HELPER.CORINFO_HELP_BOX_NULLABLE
11005 // CNS_INT(h) long 0x7fed96836c8 class
11007 // FIELD struct value
11008 // LCL_VAR ref V00 this
11010 // which comes from this code:
11012 // return this.value==null;
11014 // and transform it into
11016 // stmtExpr void (IL 0x000... ???)
11018 // CNS_INT ref null
11022 // FIELD struct value
11023 // LCL_VAR ref V00 this
11026 // compare - Compare tree to optimize.
11029 // A tree that has a call to CORINFO_HELP_BOX_NULLABLE optimized away if the pattern is found;
11030 // the original tree otherwise.
11033 GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
11035 GenTree* op1 = compare->gtOp.gtOp1;
11036 GenTree* op2 = compare->gtOp.gtOp2;
11038 GenTreeCall* opCall;
11040 if (op1->IsCnsIntOrI() && op2->IsHelperCall())
11043 opCall = op2->AsCall();
11045 else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
11048 opCall = op1->AsCall();
11055 if (!opCns->IsIntegralConst(0))
11060 if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
11065 // Get the nullable struct argument
11066 GenTree* arg = opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
11068 // Check for cases that are unsafe to optimize and return the unchanged tree
11069 if (arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || ((arg->gtFlags & GTF_LATE_ARG) != 0))
11074 // Replace the box with an access of the nullable 'hasValue' field which is at the zero offset
11075 GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, arg);
11079 compare->gtOp.gtOp1 = newOp;
11083 compare->gtOp.gtOp2 = newOp;
11086 opCns->gtType = TYP_INT;
11091 #ifdef FEATURE_SIMD
11093 //--------------------------------------------------------------------------------------------------------------
11094 // getSIMDStructFromField:
11095 // Checking whether the field belongs to a simd struct or not. If it is, return the GenTree* for
11096 // the struct node, also base type, field index and simd size. If it is not, just return nullptr.
11097 // Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
11098 // should return nullptr, since in this case we should treat SIMD struct as a regular struct.
11099 // However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
11100 // as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
11101 // if the struct is a SIMD struct.
11104 // tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
11105 // struct used for simd intrinsic or not.
11106 // pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
11107 // to simd lclvar's base type.
11108 // indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
11109 // equals to the index number of this field.
11110 // simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
11111 // equals to the simd struct size which this tree belongs to.
11112 // ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
11113 // the UsedInSIMDIntrinsic check.
11116 // A GenTree* which points the simd lclvar tree belongs to. If the tree is not the simd
11117 // instrinic related field, return nullptr.
11120 GenTree* Compiler::getSIMDStructFromField(GenTree* tree,
11121 var_types* pBaseTypeOut,
11122 unsigned* indexOut,
11123 unsigned* simdSizeOut,
11124 bool ignoreUsedInSIMDIntrinsic /*false*/)
11126 GenTree* ret = nullptr;
11127 if (tree->OperGet() == GT_FIELD)
11129 GenTree* objRef = tree->gtField.gtFldObj;
11130 if (objRef != nullptr)
11132 GenTree* obj = nullptr;
11133 if (objRef->gtOper == GT_ADDR)
11135 obj = objRef->gtOp.gtOp1;
11137 else if (ignoreUsedInSIMDIntrinsic)
11146 if (isSIMDTypeLocal(obj))
11148 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
11149 LclVarDsc* varDsc = &lvaTable[lclNum];
11150 if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
11152 *simdSizeOut = varDsc->lvExactSize;
11153 *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
11157 else if (obj->OperGet() == GT_SIMD)
11160 GenTreeSIMD* simdNode = obj->AsSIMD();
11161 *simdSizeOut = simdNode->gtSIMDSize;
11162 *pBaseTypeOut = simdNode->gtSIMDBaseType;
11164 #ifdef FEATURE_HW_INTRINSICS
11165 else if (obj->OperIsHWIntrinsic())
11168 GenTreeHWIntrinsic* simdNode = obj->AsHWIntrinsic();
11169 *simdSizeOut = simdNode->gtSIMDSize;
11170 *pBaseTypeOut = simdNode->gtSIMDBaseType;
11172 #endif // FEATURE_HW_INTRINSICS
11175 if (ret != nullptr)
11177 unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
11178 *indexOut = tree->gtField.gtFldOffset / BaseTypeSize;
11183 /*****************************************************************************
11184 * If a read operation tries to access simd struct field, then transform the
11185 * operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
11186 * Otherwise, return the old tree.
11188 * tree - GenTree*. If this pointer points to simd struct which is used for simd
11189 * intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem.
11191 * A GenTree* which points to the new tree. If the tree is not for simd intrinsic,
11195 GenTree* Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTree* tree)
11197 unsigned index = 0;
11198 var_types baseType = TYP_UNKNOWN;
11199 unsigned simdSize = 0;
11200 GenTree* simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
11201 if (simdStructNode != nullptr)
11203 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
11204 GenTree* op2 = gtNewIconNode(index);
11205 tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
11207 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11213 /*****************************************************************************
11214 * Transform an assignment of a SIMD struct field to SIMD intrinsic
11215 * SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment,
11216 * then return the old tree.
11218 * tree - GenTree*. If this pointer points to simd struct which is used for simd
11219 * intrinsic, we will morph it as simd intrinsic set.
11221 * A GenTree* which points to the new tree. If the tree is not for simd intrinsic,
11225 GenTree* Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTree* tree)
11227 assert(tree->OperGet() == GT_ASG);
11228 GenTree* op1 = tree->gtGetOp1();
11229 GenTree* op2 = tree->gtGetOp2();
11231 unsigned index = 0;
11232 var_types baseType = TYP_UNKNOWN;
11233 unsigned simdSize = 0;
11234 GenTree* simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
11235 if (simdOp1Struct != nullptr)
11237 // Generate the simd set intrinsic
11238 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
11240 SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
11244 simdIntrinsicID = SIMDIntrinsicSetX;
11247 simdIntrinsicID = SIMDIntrinsicSetY;
11250 simdIntrinsicID = SIMDIntrinsicSetZ;
11253 simdIntrinsicID = SIMDIntrinsicSetW;
11256 noway_assert(!"There is no set intrinsic for index bigger than 3");
11259 GenTree* target = gtClone(simdOp1Struct);
11260 assert(target != nullptr);
11261 var_types simdType = target->gtType;
11262 GenTree* simdTree = gtNewSIMDNode(simdType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
11264 tree->gtOp.gtOp1 = target;
11265 tree->gtOp.gtOp2 = simdTree;
11267 // fgMorphTree has already called fgMorphImplicitByRefArgs() on this assignment, but the source
11268 // and target have not yet been morphed.
11269 // Therefore, in case the source and/or target are now implicit byrefs, we need to call it again.
11270 if (fgMorphImplicitByRefArgs(tree))
11272 if (tree->gtGetOp1()->OperIsBlk())
11274 assert(tree->gtGetOp1()->TypeGet() == simdType);
11275 fgMorphBlkToInd(tree->gtGetOp1()->AsBlk(), simdType);
11279 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11286 #endif // FEATURE_SIMD
11288 /*****************************************************************************
11290 * Transform the given GTK_SMPOP tree for code generation.
11294 #pragma warning(push)
11295 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
11297 GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
11300 assert(tree->OperKind() & GTK_SMPOP);
11302 /* The steps in this function are :
11303 o Perform required preorder processing
11304 o Process the first, then second operand, if any
11305 o Perform required postorder morphing
11306 o Perform optional postorder morphing if optimizing
11309 bool isQmarkColon = false;
11311 #if LOCAL_ASSERTION_PROP
11312 AssertionIndex origAssertionCount = DUMMY_INIT(0);
11313 AssertionDsc* origAssertionTab = DUMMY_INIT(NULL);
11315 AssertionIndex thenAssertionCount = DUMMY_INIT(0);
11316 AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL);
11321 tree = fgMorphForRegisterFP(tree);
11324 genTreeOps oper = tree->OperGet();
11325 var_types typ = tree->TypeGet();
11326 GenTree* op1 = tree->gtOp.gtOp1;
11327 GenTree* op2 = tree->gtGetOp2IfPresent();
11329 /*-------------------------------------------------------------------------
11330 * First do any PRE-ORDER processing
11335 // Some arithmetic operators need to use a helper call to the EE
11339 tree = fgDoNormalizeOnStore(tree);
11340 /* fgDoNormalizeOnStore can change op2 */
11341 noway_assert(op1 == tree->gtOp.gtOp1);
11342 op2 = tree->gtOp.gtOp2;
11344 #ifdef FEATURE_SIMD
11346 // We should check whether op2 should be assigned to a SIMD field or not.
11347 // If it is, we should tranlate the tree to simd intrinsic.
11348 assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0));
11349 GenTree* newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
11350 typ = tree->TypeGet();
11351 op1 = tree->gtGetOp1();
11352 op2 = tree->gtGetOp2();
11354 assert((tree == newTree) && (tree->OperGet() == oper));
11355 if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
11357 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
11363 // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
11364 // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former
11365 // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
11366 // TODO-1stClassStructs: improve this.
11367 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
11369 op1->gtFlags |= GTF_DONT_CSE;
11375 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
11376 op1->gtFlags |= GTF_DONT_CSE;
11384 if (op1->OperKind() & GTK_RELOP)
11386 noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
11387 /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
11388 not need to materialize the result as a 0 or 1. */
11390 /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
11391 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
11393 // Request that the codegen for op1 sets the condition flags
11394 // when it generates the code for op1.
11396 // Codegen for op1 must set the condition flags if
11397 // this method returns true.
11399 op1->gtRequestSetFlags();
11403 GenTree* effOp1 = op1->gtEffectiveVal();
11404 noway_assert((effOp1->gtOper == GT_CNS_INT) &&
11405 (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
11410 #if LOCAL_ASSERTION_PROP
11411 if (optLocalAssertionProp)
11414 isQmarkColon = true;
11419 return fgMorphArrayIndex(tree);
11422 return fgMorphCast(tree);
11426 #ifndef _TARGET_64BIT_
11427 if (typ == TYP_LONG)
11429 /* For (long)int1 * (long)int2, we dont actually do the
11430 casts, and just multiply the 32 bit values, which will
11431 give us the 64 bit result in edx:eax */
11434 if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
11435 genActualType(op1->CastFromType()) == TYP_INT && genActualType(op2->CastFromType()) == TYP_INT) &&
11436 !op1->gtOverflow() && !op2->gtOverflow())
11438 // The casts have to be of the same signedness.
11439 if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
11441 // We see if we can force an int constant to change its signedness
11443 if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
11445 else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
11448 goto NO_MUL_64RSLT;
11450 if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
11451 constOp->gtFlags ^= GTF_UNSIGNED;
11453 goto NO_MUL_64RSLT;
11456 // The only combination that can overflow
11457 if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
11458 goto NO_MUL_64RSLT;
11460 /* Remaining combinations can never overflow during long mul. */
11462 tree->gtFlags &= ~GTF_OVERFLOW;
11464 /* Do unsigned mul only if the casts were unsigned */
11466 tree->gtFlags &= ~GTF_UNSIGNED;
11467 tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
11469 /* Since we are committing to GTF_MUL_64RSLT, we don't want
11470 the casts to be folded away. So morph the castees directly */
11472 op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
11473 op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
11475 // Propagate side effect flags up the tree
11476 op1->gtFlags &= ~GTF_ALL_EFFECT;
11477 op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11478 op2->gtFlags &= ~GTF_ALL_EFFECT;
11479 op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11481 // If the GT_MUL can be altogether folded away, we should do that.
11483 if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
11484 opts.OptEnabled(CLFLG_CONSTANTFOLD))
11486 tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
11487 tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
11488 noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
11489 tree = gtFoldExprConst(tree);
11490 noway_assert(tree->OperIsConst());
11494 tree->gtFlags |= GTF_MUL_64RSLT;
11496 // If op1 and op2 are unsigned casts, we need to do an unsigned mult
11497 tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
11499 // Insert GT_NOP nodes for the cast operands so that they do not get folded
11500 // And propagate the new flags. We don't want to CSE the casts because
11501 // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
11503 if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
11505 op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
11506 op1->gtFlags &= ~GTF_ALL_EFFECT;
11507 op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11510 if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
11512 op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
11513 op2->gtFlags &= ~GTF_ALL_EFFECT;
11514 op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11517 op1->gtFlags |= GTF_DONT_CSE;
11518 op2->gtFlags |= GTF_DONT_CSE;
11520 tree->gtFlags &= ~GTF_ALL_EFFECT;
11521 tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
11523 goto DONE_MORPHING_CHILDREN;
11525 else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
11528 if (tree->gtOverflow())
11529 helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
11531 helper = CORINFO_HELP_LMUL;
11533 goto USE_HELPER_FOR_ARITH;
11537 /* We are seeing this node again. We have decided to use
11538 GTF_MUL_64RSLT, so leave it alone. */
11540 assert(tree->gtIsValid64RsltMul());
11543 #endif // !_TARGET_64BIT_
11548 #ifndef _TARGET_64BIT_
11549 if (typ == TYP_LONG)
11551 helper = CORINFO_HELP_LDIV;
11552 goto USE_HELPER_FOR_ARITH;
11555 #if USE_HELPERS_FOR_INT_DIV
11556 if (typ == TYP_INT)
11558 helper = CORINFO_HELP_DIV;
11559 goto USE_HELPER_FOR_ARITH;
11562 #endif // !_TARGET_64BIT_
11564 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11566 op2 = gtFoldExprConst(op2);
11572 #ifndef _TARGET_64BIT_
11573 if (typ == TYP_LONG)
11575 helper = CORINFO_HELP_ULDIV;
11576 goto USE_HELPER_FOR_ARITH;
11578 #if USE_HELPERS_FOR_INT_DIV
11579 if (typ == TYP_INT)
11581 helper = CORINFO_HELP_UDIV;
11582 goto USE_HELPER_FOR_ARITH;
11585 #endif // _TARGET_64BIT_
11590 if (varTypeIsFloating(typ))
11592 helper = CORINFO_HELP_DBLREM;
11594 if (op1->TypeGet() == TYP_FLOAT)
11596 if (op2->TypeGet() == TYP_FLOAT)
11598 helper = CORINFO_HELP_FLTREM;
11602 tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE);
11605 else if (op2->TypeGet() == TYP_FLOAT)
11607 tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE);
11609 goto USE_HELPER_FOR_ARITH;
11612 // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
11613 // A similar optimization for signed mod will not work for a negative perfectly divisible
11614 // HI-word. To make it correct, we would need to divide without the sign and then flip the
11615 // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
11616 goto ASSIGN_HELPER_FOR_MOD;
11620 #ifdef _TARGET_ARMARCH_
11622 // Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization
11624 #else // _TARGET_XARCH
11625 /* If this is an unsigned long mod with op2 which is a cast to long from a
11626 constant int, then don't morph to a call to the helper. This can be done
11627 faster inline using idiv.
11631 if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
11632 ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
11633 ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
11635 if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
11636 op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
11637 op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
11638 (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
11640 tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
11641 noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
11644 if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
11645 op2->gtIntConCommon.LngValue() <= 0x3fffffff)
11647 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
11648 noway_assert(op1->TypeGet() == TYP_LONG);
11650 // Update flags for op1 morph
11651 tree->gtFlags &= ~GTF_ALL_EFFECT;
11653 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
11655 // If op1 is a constant, then do constant folding of the division operator
11656 if (op1->gtOper == GT_CNS_NATIVELONG)
11658 tree = gtFoldExpr(tree);
11663 #endif // _TARGET_XARCH
11665 ASSIGN_HELPER_FOR_MOD:
11667 // For "val % 1", return 0 if op1 doesn't have any side effects
11668 // and we are not in the CSE phase, we cannot discard 'tree'
11669 // because it may contain CSE expressions that we haven't yet examined.
11671 if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
11673 if (op2->IsIntegralConst(1))
11675 GenTree* zeroNode = gtNewZeroConNode(typ);
11677 zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11679 DEBUG_DESTROY_NODE(tree);
11684 #ifndef _TARGET_64BIT_
11685 if (typ == TYP_LONG)
11687 helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
11688 goto USE_HELPER_FOR_ARITH;
11691 #if USE_HELPERS_FOR_INT_DIV
11692 if (typ == TYP_INT)
11694 if (oper == GT_UMOD)
11696 helper = CORINFO_HELP_UMOD;
11697 goto USE_HELPER_FOR_ARITH;
11699 else if (oper == GT_MOD)
11701 helper = CORINFO_HELP_MOD;
11702 goto USE_HELPER_FOR_ARITH;
11706 #endif // !_TARGET_64BIT_
11708 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11710 op2 = gtFoldExprConst(op2);
11713 #ifdef _TARGET_ARM64_
11714 // For ARM64 we don't have a remainder instruction,
11715 // The architecture manual suggests the following transformation to
11716 // generate code for such operator:
11718 // a % b = a - (a / b) * b;
11720 // TODO: there are special cases where it can be done better, for example
11721 // when the modulo operation is unsigned and the divisor is a
11722 // integer constant power of two. In this case, we can make the transform:
11724 // a % b = a & (b - 1);
11726 // Lower supports it for all cases except when `a` is constant, but
11727 // in Morph we can't guarantee that `a` won't be transformed into a constant,
11728 // so can't guarantee that lower will be able to do this optimization.
11730 // Do "a % b = a - (a / b) * b" morph always, see TODO before this block.
11731 bool doMorphModToSubMulDiv = true;
11733 if (doMorphModToSubMulDiv)
11735 assert(!optValnumCSE_phase);
11737 tree = fgMorphModToSubMulDiv(tree->AsOp());
11738 op1 = tree->gtOp.gtOp1;
11739 op2 = tree->gtOp.gtOp2;
11742 #else // !_TARGET_ARM64_
11743 // If b is not a power of 2 constant then lowering replaces a % b
11744 // with a - (a / b) * b and applies magic division optimization to
11745 // a / b. The code may already contain an a / b expression (e.g.
11746 // x = a / 10; y = a % 10;) and then we end up with redundant code.
11747 // If we convert % to / here we give CSE the opportunity to eliminate
11748 // the redundant division. If there's no redundant division then
11749 // nothing is lost, lowering would have done this transform anyway.
11751 if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
11753 ssize_t divisorValue = op2->AsIntCon()->IconValue();
11754 size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
11755 : static_cast<size_t>(abs(divisorValue));
11757 if (!isPow2(absDivisorValue))
11759 tree = fgMorphModToSubMulDiv(tree->AsOp());
11760 op1 = tree->gtOp.gtOp1;
11761 op2 = tree->gtOp.gtOp2;
11764 #endif // !_TARGET_ARM64_
11767 USE_HELPER_FOR_ARITH:
11769 // TODO: this comment is wrong now, do an appropriate fix.
11770 /* We have to morph these arithmetic operations into helper calls
11771 before morphing the arguments (preorder), else the arguments
11772 won't get correct values of fgPtrArgCntCur.
11773 However, try to fold the tree first in case we end up with a
11774 simple node which won't need a helper call at all */
11776 noway_assert(tree->OperIsBinary());
11778 GenTree* oldTree = tree;
11780 tree = gtFoldExpr(tree);
11782 // Were we able to fold it ?
11783 // Note that gtFoldExpr may return a non-leaf even if successful
11784 // e.g. for something like "expr / 1" - see also bug #290853
11785 if (tree->OperIsLeaf() || (oldTree != tree))
11787 return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
11790 // Did we fold it into a comma node with throw?
11791 if (tree->gtOper == GT_COMMA)
11793 noway_assert(fgIsCommaThrow(tree));
11794 return fgMorphTree(tree);
11797 return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
11800 // normalize small integer return values
11801 if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) && (op1->TypeGet() != TYP_VOID) &&
11802 fgCastNeeded(op1, info.compRetType))
11804 // Small-typed return values are normalized by the callee
11805 op1 = gtNewCastNode(TYP_INT, op1, false, info.compRetType);
11807 // Propagate GTF_COLON_COND
11808 op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
11810 tree->gtOp.gtOp1 = fgMorphCast(op1);
11812 // Propagate side effect flags
11813 tree->gtFlags &= ~GTF_ALL_EFFECT;
11814 tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11823 GenTree* optimizedTree = gtFoldTypeCompare(tree);
11825 if (optimizedTree != tree)
11827 return fgMorphTree(optimizedTree);
11835 // Try to optimize away calls to CORINFO_HELP_BOX_NULLABLE for GT_EQ, GT_NE, and unsigned GT_GT.
11836 if ((oper != GT_GT) || tree->IsUnsigned())
11838 fgMorphRecognizeBoxNullable(tree);
11841 op1 = tree->gtOp.gtOp1;
11842 op2 = tree->gtGetOp2IfPresent();
11846 case GT_RUNTIMELOOKUP:
11847 return fgMorphTree(op1);
11849 #ifdef _TARGET_ARM_
11851 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
11853 switch (tree->TypeGet())
11856 return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
11858 return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
11866 // Special handling for the arg list.
11867 return fgMorphArgList(tree->AsArgList(), mac);
11873 #if !CPU_HAS_FP_SUPPORT
11874 tree = fgMorphToEmulatedFP(tree);
11877 /*-------------------------------------------------------------------------
11878 * Process the first operand, if any
11884 #if LOCAL_ASSERTION_PROP
11885 // If we are entering the "then" part of a Qmark-Colon we must
11886 // save the state of the current copy assignment table
11887 // so that we can restore this state when entering the "else" part
11890 noway_assert(optLocalAssertionProp);
11891 if (optAssertionCount)
11893 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11894 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
11895 origAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
11896 origAssertionCount = optAssertionCount;
11897 memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
11901 origAssertionCount = 0;
11902 origAssertionTab = nullptr;
11905 #endif // LOCAL_ASSERTION_PROP
11907 // We might need a new MorphAddressContext context. (These are used to convey
11908 // parent context about how addresses being calculated will be used; see the
11909 // specification comment for MorphAddrContext for full details.)
11910 // Assume it's an Ind context to start.
11911 MorphAddrContext subIndMac1(MACK_Ind);
11912 MorphAddrContext* subMac1 = mac;
11913 if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind)
11915 switch (tree->gtOper)
11918 // A non-null mac here implies this node is part of an address computation.
11919 // If so, we need to pass the existing mac down to the child node.
11921 // Otherwise, use a new mac.
11922 if (subMac1 == nullptr)
11924 subMac1 = &subIndMac1;
11925 subMac1->m_kind = MACK_Addr;
11929 // In a comma, the incoming context only applies to the rightmost arg of the
11930 // comma list. The left arg (op1) gets a fresh context.
11937 // A non-null mac here implies this node is part of an address computation (the tree parent is
11939 // If so, we need to pass the existing mac down to the child node.
11941 // Otherwise, use a new mac.
11942 if (subMac1 == nullptr)
11944 subMac1 = &subIndMac1;
11952 // For additions, if we're in an IND context keep track of whether
11953 // all offsets added to the address are constant, and their sum.
11954 if (tree->gtOper == GT_ADD && subMac1 != nullptr)
11956 assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
11957 GenTree* otherOp = tree->gtOp.gtOp2;
11958 // Is the other operator a constant?
11959 if (otherOp->IsCnsIntOrI())
11961 ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
11962 totalOffset += otherOp->gtIntConCommon.IconValue();
11963 if (totalOffset.IsOverflow())
11965 // We will consider an offset so large as to overflow as "not a constant" --
11966 // we will do a null check.
11967 subMac1->m_allConstantOffsets = false;
11971 subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11976 subMac1->m_allConstantOffsets = false;
11980 // If op1 is a GT_FIELD or indir, we need to pass down the mac if
11981 // its parent is GT_ADDR, since the address of op1
11982 // is part of an ongoing address computation. Otherwise
11983 // op1 represents the value of the field and so any address
11984 // calculations it does are in a new context.
11985 if (((op1->gtOper == GT_FIELD) || op1->OperIsIndir()) && (tree->gtOper != GT_ADDR))
11989 // The impact of op1's value to any ongoing
11990 // address computation is handled below when looking
11994 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
11996 #if LOCAL_ASSERTION_PROP
11997 // If we are exiting the "then" part of a Qmark-Colon we must
11998 // save the state of the current copy assignment table
11999 // so that we can merge this state with the "else" part exit
12002 noway_assert(optLocalAssertionProp);
12003 if (optAssertionCount)
12005 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
12006 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
12007 thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
12008 thenAssertionCount = optAssertionCount;
12009 memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
12013 thenAssertionCount = 0;
12014 thenAssertionTab = nullptr;
12017 #endif // LOCAL_ASSERTION_PROP
12019 /* Morphing along with folding and inlining may have changed the
12020 * side effect flags, so we have to reset them
12022 * NOTE: Don't reset the exception flags on nodes that may throw */
12024 assert(tree->gtOper != GT_CALL);
12026 if (!tree->OperRequiresCallFlag(this))
12028 tree->gtFlags &= ~GTF_CALL;
12031 /* Propagate the new flags */
12032 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
12034 // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
12035 // Similarly for clsVar
12036 if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
12038 tree->gtFlags &= ~GTF_GLOB_REF;
12042 /*-------------------------------------------------------------------------
12043 * Process the second operand, if any
12049 #if LOCAL_ASSERTION_PROP
12050 // If we are entering the "else" part of a Qmark-Colon we must
12051 // reset the state of the current copy assignment table
12054 noway_assert(optLocalAssertionProp);
12055 optAssertionReset(0);
12056 if (origAssertionCount)
12058 size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
12059 memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
12060 optAssertionReset(origAssertionCount);
12063 #endif // LOCAL_ASSERTION_PROP
12065 // We might need a new MorphAddressContext context to use in evaluating op2.
12066 // (These are used to convey parent context about how addresses being calculated
12067 // will be used; see the specification comment for MorphAddrContext for full details.)
12068 // Assume it's an Ind context to start.
12069 switch (tree->gtOper)
12072 if (mac != nullptr && mac->m_kind == MACK_Ind)
12074 GenTree* otherOp = tree->gtOp.gtOp1;
12075 // Is the other operator a constant?
12076 if (otherOp->IsCnsIntOrI())
12078 mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
12082 mac->m_allConstantOffsets = false;
12090 // If op2 is a GT_FIELD or indir, we must be taking its value,
12091 // so it should evaluate its address in a new context.
12092 if ((op2->gtOper == GT_FIELD) || op2->OperIsIndir())
12094 // The impact of op2's value to any ongoing
12095 // address computation is handled above when looking
12100 tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
12102 /* Propagate the side effect flags from op2 */
12104 tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
12106 #if LOCAL_ASSERTION_PROP
12107 // If we are exiting the "else" part of a Qmark-Colon we must
12108 // merge the state of the current copy assignment table with
12109 // that of the exit of the "then" part.
12112 noway_assert(optLocalAssertionProp);
12113 // If either exit table has zero entries then
12114 // the merged table also has zero entries
12115 if (optAssertionCount == 0 || thenAssertionCount == 0)
12117 optAssertionReset(0);
12121 size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
12122 if ((optAssertionCount != thenAssertionCount) ||
12123 (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
12125 // Yes they are different so we have to find the merged set
12126 // Iterate over the copy asgn table removing any entries
12127 // that do not have an exact match in the thenAssertionTab
12128 AssertionIndex index = 1;
12129 while (index <= optAssertionCount)
12131 AssertionDsc* curAssertion = optGetAssertion(index);
12133 for (unsigned j = 0; j < thenAssertionCount; j++)
12135 AssertionDsc* thenAssertion = &thenAssertionTab[j];
12137 // Do the left sides match?
12138 if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
12139 (curAssertion->assertionKind == thenAssertion->assertionKind))
12141 // Do the right sides match?
12142 if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
12143 (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
12154 // If we fall out of the loop above then we didn't find
12155 // any matching entry in the thenAssertionTab so it must
12156 // have been killed on that path so we remove it here
12159 // The data at optAssertionTabPrivate[i] is to be removed
12160 CLANG_FORMAT_COMMENT_ANCHOR;
12164 printf("The QMARK-COLON ");
12166 printf(" removes assertion candidate #%d\n", index);
12169 optAssertionRemove(index);
12172 // The data at optAssertionTabPrivate[i] is to be kept
12178 #endif // LOCAL_ASSERTION_PROP
12181 DONE_MORPHING_CHILDREN:
12183 if (tree->OperMayThrow(this))
12185 // Mark the tree node as potentially throwing an exception
12186 tree->gtFlags |= GTF_EXCEPT;
12190 if (tree->OperIsIndirOrArrLength())
12192 tree->gtFlags |= GTF_IND_NONFAULTING;
12194 if (((op1 == nullptr) || ((op1->gtFlags & GTF_EXCEPT) == 0)) &&
12195 ((op2 == nullptr) || ((op2->gtFlags & GTF_EXCEPT) == 0)))
12197 tree->gtFlags &= ~GTF_EXCEPT;
12201 if (tree->OperRequiresAsgFlag())
12203 tree->gtFlags |= GTF_ASG;
12207 if (((op1 == nullptr) || ((op1->gtFlags & GTF_ASG) == 0)) &&
12208 ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0)))
12210 tree->gtFlags &= ~GTF_ASG;
12214 if (tree->OperRequiresCallFlag(this))
12216 tree->gtFlags |= GTF_CALL;
12220 if (((op1 == nullptr) || ((op1->gtFlags & GTF_CALL) == 0)) &&
12221 ((op2 == nullptr) || ((op2->gtFlags & GTF_CALL) == 0)))
12223 tree->gtFlags &= ~GTF_CALL;
12226 /*-------------------------------------------------------------------------
12227 * Now do POST-ORDER processing
12230 if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) && (op2 && !varTypeIsGC(op2->TypeGet())))
12232 // The tree is really not GC but was marked as such. Now that the
12233 // children have been unmarked, unmark the tree too.
12235 // Remember that GT_COMMA inherits it's type only from op2
12236 if (tree->gtOper == GT_COMMA)
12238 tree->gtType = genActualType(op2->TypeGet());
12242 tree->gtType = genActualType(op1->TypeGet());
12246 GenTree* oldTree = tree;
12248 GenTree* qmarkOp1 = nullptr;
12249 GenTree* qmarkOp2 = nullptr;
12251 if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
12253 qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
12254 qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
12257 // Try to fold it, maybe we get lucky,
12258 tree = gtFoldExpr(tree);
12260 if (oldTree != tree)
12262 /* if gtFoldExpr returned op1 or op2 then we are done */
12263 if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
12268 /* If we created a comma-throw tree then we need to morph op1 */
12269 if (fgIsCommaThrow(tree))
12271 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
12272 fgMorphTreeDone(tree);
12278 else if (tree->OperKind() & GTK_CONST)
12283 /* gtFoldExpr could have used setOper to change the oper */
12284 oper = tree->OperGet();
12285 typ = tree->TypeGet();
12287 /* gtFoldExpr could have changed op1 and op2 */
12288 op1 = tree->gtOp.gtOp1;
12289 op2 = tree->gtGetOp2IfPresent();
12291 // Do we have an integer compare operation?
12293 if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
12295 // Are we comparing against zero?
12297 if (op2->IsIntegralConst(0))
12299 // Request that the codegen for op1 sets the condition flags
12300 // when it generates the code for op1.
12302 // Codegen for op1 must set the condition flags if
12303 // this method returns true.
12305 op1->gtRequestSetFlags();
12308 /*-------------------------------------------------------------------------
12309 * Perform the required oper-specific postorder morphing
12315 size_t ival1, ival2;
12316 GenTree* lclVarTree;
12317 GenTree* effectiveOp1;
12318 FieldSeqNode* fieldSeq = nullptr;
12324 lclVarTree = fgIsIndirOfAddrOfLocal(op1);
12325 if (lclVarTree != nullptr)
12327 lclVarTree->gtFlags |= GTF_VAR_DEF;
12330 effectiveOp1 = op1->gtEffectiveVal();
12332 if (effectiveOp1->OperIsConst())
12334 op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
12335 tree->gtOp.gtOp1 = op1;
12338 /* If we are storing a small type, we might be able to omit a cast */
12339 if ((effectiveOp1->gtOper == GT_IND) && varTypeIsSmall(effectiveOp1->TypeGet()))
12341 if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
12343 var_types castType = op2->CastToType();
12345 // If we are performing a narrowing cast and
12346 // castType is larger or the same as op1's type
12347 // then we can discard the cast.
12349 if (varTypeIsSmall(castType) && (genTypeSize(castType) >= genTypeSize(effectiveOp1->TypeGet())))
12351 tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
12354 else if (op2->OperIsCompare() && varTypeIsByte(effectiveOp1->TypeGet()))
12356 /* We don't need to zero extend the setcc instruction */
12357 op2->gtType = TYP_BYTE;
12360 // If we introduced a CSE we may need to undo the optimization above
12361 // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
12362 // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
12363 else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
12365 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
12366 LclVarDsc* varDsc = &lvaTable[varNum];
12368 /* We again need to zero extend the setcc instruction */
12369 op2->gtType = varDsc->TypeGet();
12371 fgAssignSetVarDef(tree);
12373 /* We can't CSE the LHS of an assignment */
12374 /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
12375 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
12377 op1->gtFlags |= GTF_DONT_CSE;
12384 /* Make sure we're allowed to do this */
12386 if (optValnumCSE_phase)
12388 // It is not safe to reorder/delete CSE's
12394 /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
12396 if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
12398 op1 = tree->gtOp.gtOp1;
12400 /* Since this can occur repeatedly we use a while loop */
12402 while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) && (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) &&
12403 (op1->gtType == TYP_INT) && (op1->gtOverflow() == false))
12405 /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
12407 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
12408 ival2 = cns2->gtIntCon.gtIconVal;
12410 if (op1->gtOper == GT_ADD)
12418 cns2->gtIntCon.gtIconVal = ival2;
12420 #ifdef _TARGET_64BIT_
12421 // we need to properly re-sign-extend or truncate as needed.
12422 cns2->AsIntCon()->TruncateOrSignExtend32();
12423 #endif // _TARGET_64BIT_
12425 op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12430 // Here we look for the following tree
12436 ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
12438 // cast to unsigned allows test for both 0 and 1
12439 if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
12441 ival2 = (size_t)cns2->gtIntConCommon.IconValue();
12443 else // cast to UINT64 allows test for both 0 and 1
12444 if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
12446 ival2 = (size_t)cns2->gtIntConCommon.LngValue();
12449 if (ival2 != INT_MAX)
12451 // If we don't have a comma and relop, we can't do this optimization
12453 if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
12455 // Here we look for the following transformation
12457 // EQ/NE Possible REVERSE(RELOP)
12459 // COMMA CNS 0/1 -> COMMA relop_op2
12461 // x RELOP x relop_op1
12463 // relop_op1 relop_op2
12467 GenTree* comma = op1;
12468 GenTree* relop = comma->gtOp.gtOp2;
12470 GenTree* relop_op1 = relop->gtOp.gtOp1;
12472 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12476 gtReverseCond(relop);
12479 relop->gtOp.gtOp1 = comma;
12480 comma->gtOp.gtOp2 = relop_op1;
12482 // Comma now has fewer nodes underneath it, so we need to regenerate its flags
12483 comma->gtFlags &= ~GTF_ALL_EFFECT;
12484 comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
12485 comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12487 noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
12488 noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
12490 tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);
12495 if (op1->gtOper == GT_COMMA)
12497 // Here we look for the following tree
12498 // and when the LCL_VAR is a temp we can fold the tree:
12502 // COMMA CNS 0/1 -> RELOP CNS 0/1
12510 GenTree* asg = op1->gtOp.gtOp1;
12511 GenTree* lcl = op1->gtOp.gtOp2;
12513 /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
12514 if (asg->gtOper != GT_ASG)
12519 /* The right side of the comma must be a LCL_VAR temp */
12520 if (lcl->gtOper != GT_LCL_VAR)
12525 unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
12526 noway_assert(lclNum < lvaCount);
12528 /* If the LCL_VAR is not a temp then bail, a temp has a single def */
12529 if (!lvaTable[lclNum].lvIsTemp)
12535 /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
12536 // Fix 383856 X86/ARM ILGEN
12537 if (lclNumIsCSE(lclNum))
12543 /* We also must be assigning the result of a RELOP */
12544 if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
12549 /* Both of the LCL_VAR must match */
12550 if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
12555 /* If right side of asg is not a RELOP then skip */
12556 if (!asg->gtOp.gtOp2->OperIsCompare())
12561 LclVarDsc* varDsc = lvaTable + lclNum;
12563 /* Set op1 to the right side of asg, (i.e. the RELOP) */
12564 op1 = asg->gtOp.gtOp2;
12566 DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
12567 DEBUG_DESTROY_NODE(lcl);
12570 if (op1->OperIsCompare())
12572 // Here we look for the following tree
12574 // EQ/NE -> RELOP/!RELOP
12579 // Note that we will remove/destroy the EQ/NE node and move
12580 // the RELOP up into it's location.
12582 /* Here we reverse the RELOP if necessary */
12584 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12588 gtReverseCond(op1);
12591 /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
12592 op1->gtType = tree->gtType;
12594 noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
12595 op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12597 DEBUG_DESTROY_NODE(tree);
12602 // Now we check for a compare with the result of an '&' operator
12604 // Here we look for the following transformation:
12608 // AND CNS 0/1 -> AND CNS 0
12610 // RSZ/RSH CNS 1 x CNS (1 << y)
12614 if (op1->gtOper == GT_AND)
12616 GenTree* andOp = op1;
12617 GenTree* rshiftOp = andOp->gtOp.gtOp1;
12619 if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
12624 if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
12629 ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
12631 if (shiftAmount < 0)
12636 if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
12641 if (andOp->gtType == TYP_INT)
12643 if (shiftAmount > 31)
12648 UINT32 newAndOperand = ((UINT32)1) << shiftAmount;
12650 andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
12652 // Reverse the cond if necessary
12655 gtReverseCond(tree);
12656 cns2->gtIntCon.gtIconVal = 0;
12657 oper = tree->gtOper;
12660 else if (andOp->gtType == TYP_LONG)
12662 if (shiftAmount > 63)
12667 UINT64 newAndOperand = ((UINT64)1) << shiftAmount;
12669 andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
12671 // Reverse the cond if necessary
12674 gtReverseCond(tree);
12675 cns2->gtIntConCommon.SetLngValue(0);
12676 oper = tree->gtOper;
12680 andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
12682 DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
12683 DEBUG_DESTROY_NODE(rshiftOp);
12685 } // END if (ival2 != INT_MAX)
12688 /* Now check for compares with small constant longs that can be cast to int */
12690 if (!cns2->OperIsConst())
12695 if (cns2->TypeGet() != TYP_LONG)
12700 /* Is the constant 31 bits or smaller? */
12702 if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
12707 /* Is the first comparand mask operation of type long ? */
12709 if (op1->gtOper != GT_AND)
12711 /* Another interesting case: cast from int */
12713 if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
12714 !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
12715 !op1->gtOverflow()) // cannot be an overflow checking cast
12717 /* Simply make this into an integer comparison */
12719 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
12720 tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
12726 noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
12728 /* Is the result of the mask effectively an INT ? */
12731 andMask = op1->gtOp.gtOp2;
12732 if (andMask->gtOper != GT_CNS_NATIVELONG)
12736 if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
12741 /* Now we know that we can cast gtOp.gtOp1 of AND to int */
12743 op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, false, TYP_INT);
12745 /* now replace the mask node (gtOp.gtOp2 of AND node) */
12747 noway_assert(andMask == op1->gtOp.gtOp2);
12749 ival1 = (int)andMask->gtIntConCommon.LngValue();
12750 andMask->SetOper(GT_CNS_INT);
12751 andMask->gtType = TYP_INT;
12752 andMask->gtIntCon.gtIconVal = ival1;
12754 /* now change the type of the AND node */
12756 op1->gtType = TYP_INT;
12758 /* finally we replace the comparand */
12760 ival2 = (int)cns2->gtIntConCommon.LngValue();
12761 cns2->SetOper(GT_CNS_INT);
12762 cns2->gtType = TYP_INT;
12764 noway_assert(cns2 == op2);
12765 cns2->gtIntCon.gtIconVal = ival2;
12774 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
12776 if (op2->gtOper == GT_CNS_INT)
12779 /* Check for "expr relop 1" */
12780 if (cns2->IsIntegralConst(1))
12782 /* Check for "expr >= 1" */
12785 /* Change to "expr > 0" */
12789 /* Check for "expr < 1" */
12790 else if (oper == GT_LT)
12792 /* Change to "expr <= 0" */
12797 /* Check for "expr relop -1" */
12798 else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
12800 /* Check for "expr <= -1" */
12803 /* Change to "expr < 0" */
12807 /* Check for "expr > -1" */
12808 else if (oper == GT_GT)
12810 /* Change to "expr >= 0" */
12814 // IF we get here we should be changing 'oper'
12815 assert(tree->OperGet() != oper);
12817 // Keep the old ValueNumber for 'tree' as the new expr
12818 // will still compute the same value as before
12819 tree->SetOper(oper, GenTree::PRESERVE_VN);
12820 cns2->gtIntCon.gtIconVal = 0;
12822 // vnStore is null before the ValueNumber phase has run
12823 if (vnStore != nullptr)
12825 // Update the ValueNumber for 'cns2', as we just changed it to 0
12826 fgValueNumberTreeConst(cns2);
12829 op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
12834 else // we have an unsigned comparison
12836 if (op2->IsIntegralConst(0))
12838 if ((oper == GT_GT) || (oper == GT_LE))
12840 // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
12841 // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
12842 // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
12843 // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
12844 // occurs as a result of branch inversion.
12845 oper = (oper == GT_LE) ? GT_EQ : GT_NE;
12846 tree->SetOper(oper, GenTree::PRESERVE_VN);
12847 tree->gtFlags &= ~GTF_UNSIGNED;
12854 noway_assert(tree->OperKind() & GTK_RELOP);
12859 #ifndef _TARGET_64BIT_
12860 if (typ == TYP_LONG)
12862 // This must be GTF_MUL_64RSLT
12863 assert(tree->gtIsValid64RsltMul());
12866 #endif // _TARGET_64BIT_
12871 if (tree->gtOverflow())
12876 // TODO #4104: there are a lot of other places where
12877 // this condition is not checked before transformations.
12880 /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
12883 if (op2->IsCnsIntOrI())
12885 /* Negate the constant and change the node to be "+" */
12887 op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
12888 op2->gtIntCon.gtFieldSeq = FieldSeqStore::NotAField();
12890 tree->ChangeOper(oper);
12894 /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
12897 if (op1->IsCnsIntOrI())
12899 noway_assert(varTypeIsIntOrI(tree));
12901 // The type of the new GT_NEG node cannot just be op2->TypeGet().
12902 // Otherwise we may sign-extend incorrectly in cases where the GT_NEG
12903 // node ends up feeding directly into a cast, for example in
12904 // GT_CAST<ubyte>(GT_SUB(0, s_1.ubyte))
12905 tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, genActualType(op2->TypeGet()), op2);
12906 fgMorphTreeDone(op2);
12909 tree->ChangeOper(oper);
12913 /* No match - exit */
12917 #ifdef _TARGET_ARM64_
12919 if (!varTypeIsFloating(tree->gtType))
12921 // Codegen for this instruction needs to be able to throw two exceptions:
12922 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW);
12923 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO);
12927 // Codegen for this instruction needs to be able to throw one exception:
12928 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO);
12935 if (tree->gtOverflow())
12937 tree->gtRequestSetFlags();
12939 // Add the excptn-throwing basic block to jump to on overflow
12941 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW);
12943 // We can't do any commutative morphing for overflow instructions
12954 /* Commute any non-REF constants to the right */
12957 if (op1->OperIsConst() && (op1->gtType != TYP_REF))
12959 // TODO-Review: We used to assert here that
12960 // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
12961 // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
12962 // and would sometimes hit this assertion. This may indicate a missed "remorph".
12963 // Task is to re-enable this assertion and investigate.
12965 /* Swap the operands */
12966 tree->gtOp.gtOp1 = op2;
12967 tree->gtOp.gtOp2 = op1;
12970 op2 = tree->gtOp.gtOp2;
12973 /* See if we can fold GT_ADD nodes. */
12975 if (oper == GT_ADD)
12977 /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
12979 if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
12980 op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
12981 !op1->gtOverflow() && !op2->gtOverflow())
12983 // Don't create a byref pointer that may point outside of the ref object.
12984 // If a GC happens, the byref won't get updated. This can happen if one
12985 // of the int components is negative. It also requires the address generation
12986 // be in a fully-interruptible code region.
12987 if (!varTypeIsGC(op1->gtOp.gtOp1->TypeGet()) && !varTypeIsGC(op2->gtOp.gtOp1->TypeGet()))
12989 cns1 = op1->gtOp.gtOp2;
12990 cns2 = op2->gtOp.gtOp2;
12991 cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
12992 #ifdef _TARGET_64BIT_
12993 if (cns1->TypeGet() == TYP_INT)
12995 // we need to properly re-sign-extend or truncate after adding two int constants above
12996 cns1->AsIntCon()->TruncateOrSignExtend32();
12998 #endif //_TARGET_64BIT_
13000 tree->gtOp.gtOp2 = cns1;
13001 DEBUG_DESTROY_NODE(cns2);
13003 op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
13004 op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
13005 DEBUG_DESTROY_NODE(op2);
13006 op2 = tree->gtOp.gtOp2;
13010 if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
13012 /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
13013 CLANG_FORMAT_COMMENT_ANCHOR;
13015 if (op1->gtOper == GT_ADD && //
13016 !gtIsActiveCSE_Candidate(op1) && //
13017 !op1->gtOverflow() && //
13018 op1->gtOp.gtOp2->IsCnsIntOrI() && //
13019 (op1->gtOp.gtOp2->OperGet() == op2->OperGet()) && //
13020 (op1->gtOp.gtOp2->TypeGet() != TYP_REF) && // Don't fold REFs
13021 (op2->TypeGet() != TYP_REF)) // Don't fold REFs
13023 cns1 = op1->gtOp.gtOp2;
13024 op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
13025 op2->gtIntConCommon.IconValue());
13026 #ifdef _TARGET_64BIT_
13027 if (op2->TypeGet() == TYP_INT)
13029 // we need to properly re-sign-extend or truncate after adding two int constants above
13030 op2->AsIntCon()->TruncateOrSignExtend32();
13032 #endif //_TARGET_64BIT_
13034 if (cns1->OperGet() == GT_CNS_INT)
13036 op2->gtIntCon.gtFieldSeq =
13037 GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
13039 DEBUG_DESTROY_NODE(cns1);
13041 tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
13042 DEBUG_DESTROY_NODE(op1);
13043 op1 = tree->gtOp.gtOp1;
13048 if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
13051 // If this addition is adding an offset to a null pointer,
13052 // avoid the work and yield the null pointer immediately.
13053 // Dereferencing the pointer in either case will have the
13056 if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) &&
13057 ((op1->gtFlags & GTF_ALL_EFFECT) == 0))
13059 op2->gtType = tree->gtType;
13060 DEBUG_DESTROY_NODE(op1);
13061 DEBUG_DESTROY_NODE(tree);
13065 // Remove the addition iff it won't change the tree type
13068 if (!gtIsActiveCSE_Candidate(op2) &&
13069 ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
13071 if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
13072 (op2->gtIntCon.gtFieldSeq != nullptr) &&
13073 (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
13075 fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
13078 DEBUG_DESTROY_NODE(op2);
13079 DEBUG_DESTROY_NODE(tree);
13086 /* See if we can fold GT_MUL by const nodes */
13087 else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
13089 #ifndef _TARGET_64BIT_
13090 noway_assert(typ <= TYP_UINT);
13091 #endif // _TARGET_64BIT_
13092 noway_assert(!tree->gtOverflow());
13094 ssize_t mult = op2->gtIntConCommon.IconValue();
13095 bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
13096 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
13098 assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
13102 // We may be able to throw away op1 (unless it has side-effects)
13104 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
13106 DEBUG_DESTROY_NODE(op1);
13107 DEBUG_DESTROY_NODE(tree);
13108 return op2; // Just return the "0" node
13111 // We need to keep op1 for the side-effects. Hang it off
13114 tree->ChangeOper(GT_COMMA);
13118 size_t abs_mult = (mult >= 0) ? mult : -mult;
13119 size_t lowestBit = genFindLowestBit(abs_mult);
13120 bool changeToShift = false;
13122 // is it a power of two? (positive or negative)
13123 if (abs_mult == lowestBit)
13125 // if negative negate (min-int does not need negation)
13126 if (mult < 0 && mult != SSIZE_T_MIN)
13128 // The type of the new GT_NEG node cannot just be op1->TypeGet().
13129 // Otherwise we may sign-extend incorrectly in cases where the GT_NEG
13130 // node ends up feeding directly a cast, for example in
13131 // GT_CAST<ubyte>(GT_MUL(-1, s_1.ubyte))
13132 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, genActualType(op1->TypeGet()), op1);
13133 fgMorphTreeDone(op1);
13136 // If "op2" is a constant array index, the other multiplicand must be a constant.
13137 // Transfer the annotation to the other one.
13138 if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
13139 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
13141 assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
13142 GenTree* otherOp = op1;
13143 if (otherOp->OperGet() == GT_NEG)
13145 otherOp = otherOp->gtOp.gtOp1;
13147 assert(otherOp->OperGet() == GT_CNS_INT);
13148 assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
13149 otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
13154 DEBUG_DESTROY_NODE(op2);
13155 DEBUG_DESTROY_NODE(tree);
13159 /* Change the multiplication into a shift by log2(val) bits */
13160 op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
13161 changeToShift = true;
13164 else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
13166 int shift = genLog2(lowestBit);
13167 ssize_t factor = abs_mult >> shift;
13169 if (factor == 3 || factor == 5 || factor == 9)
13171 // if negative negate (min-int does not need negation)
13172 if (mult < 0 && mult != SSIZE_T_MIN)
13174 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, genActualType(op1->TypeGet()), op1);
13175 fgMorphTreeDone(op1);
13178 GenTree* factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
13179 if (op2IsConstIndex)
13181 factorIcon->AsIntCon()->gtFieldSeq =
13182 GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
13185 // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
13186 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
13187 fgMorphTreeDone(op1);
13189 op2->gtIntConCommon.SetIconValue(shift);
13190 changeToShift = true;
13193 #endif // LEA_AVAILABLE
13196 // vnStore is null before the ValueNumber phase has run
13197 if (vnStore != nullptr)
13199 // Update the ValueNumber for 'op2', as we just changed the constant
13200 fgValueNumberTreeConst(op2);
13203 // Keep the old ValueNumber for 'tree' as the new expr
13204 // will still compute the same value as before
13205 tree->ChangeOper(oper, GenTree::PRESERVE_VN);
13207 goto DONE_MORPHING_CHILDREN;
13210 else if (fgOperIsBitwiseRotationRoot(oper))
13212 tree = fgRecognizeAndMorphBitwiseRotation(tree);
13214 // fgRecognizeAndMorphBitwiseRotation may return a new tree
13215 oper = tree->OperGet();
13216 typ = tree->TypeGet();
13217 op1 = tree->gtOp.gtOp1;
13218 op2 = tree->gtOp.gtOp2;
13226 /* Any constant cases should have been folded earlier */
13227 noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
13232 noway_assert(varTypeIsFloating(op1->TypeGet()));
13234 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN);
13238 // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
13239 // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
13240 // is a local or clsVar, even if it has been address-exposed.
13241 if (op1->OperGet() == GT_ADDR)
13243 tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
13249 // Can not remove a GT_IND if it is currently a CSE candidate.
13250 if (gtIsActiveCSE_Candidate(tree))
13255 bool foldAndReturnTemp;
13256 foldAndReturnTemp = false;
13260 // Don't remove a volatile GT_IND, even if the address points to a local variable.
13261 if ((tree->gtFlags & GTF_IND_VOLATILE) == 0)
13263 /* Try to Fold *(&X) into X */
13264 if (op1->gtOper == GT_ADDR)
13266 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13267 if (gtIsActiveCSE_Candidate(op1))
13272 temp = op1->gtOp.gtOp1; // X
13274 // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
13275 // they are the *same* struct type. In fact, they almost certainly aren't. If the
13276 // address has an associated field sequence, that identifies this case; go through
13277 // the "lcl_fld" path rather than this one.
13278 FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
13279 if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
13281 foldAndReturnTemp = true;
13283 else if (temp->OperIsLocal())
13285 unsigned lclNum = temp->gtLclVarCommon.gtLclNum;
13286 LclVarDsc* varDsc = &lvaTable[lclNum];
13288 // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
13289 if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
13291 noway_assert(varTypeIsStruct(varDsc));
13293 // We will try to optimize when we have a single field struct that is being struct promoted
13294 if (varDsc->lvFieldCnt == 1)
13296 unsigned lclNumFld = varDsc->lvFieldLclStart;
13297 // just grab the promoted field
13298 LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
13300 // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
13302 if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
13304 // We can just use the existing promoted field LclNum
13305 temp->gtLclVarCommon.SetLclNum(lclNumFld);
13306 temp->gtType = fieldVarDsc->TypeGet();
13308 foldAndReturnTemp = true;
13312 // If the type of the IND (typ) is a "small int", and the type of the local has the
13313 // same width, then we can reduce to just the local variable -- it will be
13314 // correctly normalized, and signed/unsigned differences won't matter.
13316 // The below transformation cannot be applied if the local var needs to be normalized on load.
13317 else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
13318 !lvaTable[lclNum].lvNormalizeOnLoad())
13320 tree->gtType = typ = temp->TypeGet();
13321 foldAndReturnTemp = true;
13323 else if (!varTypeIsStruct(typ) && (lvaTable[lclNum].lvType == typ) &&
13324 !lvaTable[lclNum].lvNormalizeOnLoad())
13326 tree->gtType = typ = temp->TypeGet();
13327 foldAndReturnTemp = true;
13331 // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
13333 assert(fieldSeq == nullptr);
13334 bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
13335 assert(b || fieldSeq == nullptr);
13337 if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
13339 // Append the field sequence, change the type.
13340 temp->AsLclFld()->gtFieldSeq =
13341 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13342 temp->gtType = typ;
13344 foldAndReturnTemp = true;
13347 // Otherwise will will fold this into a GT_LCL_FLD below
13348 // where we check (temp != nullptr)
13350 else // !temp->OperIsLocal()
13352 // We don't try to fold away the GT_IND/GT_ADDR for this case
13356 else if (op1->OperGet() == GT_ADD)
13358 /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
13360 if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
13361 opts.OptimizationEnabled())
13363 // No overflow arithmetic with pointers
13364 noway_assert(!op1->gtOverflow());
13366 temp = op1->gtOp.gtOp1->gtOp.gtOp1;
13367 if (!temp->OperIsLocal())
13373 // Can not remove the GT_ADDR if it is currently a CSE candidate.
13374 if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
13379 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
13380 fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
13382 // Does the address have an associated zero-offset field sequence?
13383 FieldSeqNode* addrFieldSeq = nullptr;
13384 if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
13386 fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
13389 if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
13391 noway_assert(!varTypeIsGC(temp->TypeGet()));
13392 foldAndReturnTemp = true;
13396 // The emitter can't handle large offsets
13397 if (ival1 != (unsigned short)ival1)
13402 // The emitter can get confused by invalid offsets
13403 if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
13408 #ifdef _TARGET_ARM_
13409 // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
13411 if (varTypeIsFloating(typ))
13413 if ((ival1 % emitTypeSize(typ)) != 0)
13415 tree->gtFlags |= GTF_IND_UNALIGNED;
13421 // Now we can fold this into a GT_LCL_FLD below
13422 // where we check (temp != nullptr)
13427 // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging:
13428 // - We may have a load of a local where the load has a different type than the local
13429 // - We may have a load of a local plus an offset
13431 // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and
13432 // offset if doing so is legal. The only cases in which this transformation is illegal are if the load
13433 // begins before the local or if the load extends beyond the end of the local (i.e. if the load is
13434 // out-of-bounds w.r.t. the local).
13435 if ((temp != nullptr) && !foldAndReturnTemp)
13437 assert(temp->OperIsLocal());
13439 const unsigned lclNum = temp->AsLclVarCommon()->gtLclNum;
13440 LclVarDsc* const varDsc = &lvaTable[lclNum];
13442 const var_types tempTyp = temp->TypeGet();
13443 const bool useExactSize = varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK);
13444 const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp);
13446 // Make sure we do not enregister this lclVar.
13447 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
13449 // If the size of the load is greater than the size of the lclVar, we cannot fold this access into
13450 // a lclFld: the access represented by an lclFld node must begin at or after the start of the
13451 // lclVar and must not extend beyond the end of the lclVar.
13452 if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize))
13454 GenTreeLclFld* lclFld;
13456 // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
13457 // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
13458 // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
13460 if (temp->OperGet() == GT_LCL_FLD)
13462 lclFld = temp->AsLclFld();
13463 lclFld->gtLclOffs += (unsigned short)ival1;
13464 lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
13466 else // we have a GT_LCL_VAR
13468 assert(temp->OperGet() == GT_LCL_VAR);
13469 temp->ChangeOper(GT_LCL_FLD); // Note that this typically makes the gtFieldSeq "NotAField",
13470 // unless there is a zero filed offset associated with 'temp'.
13471 lclFld = temp->AsLclFld();
13472 lclFld->gtLclOffs = (unsigned short)ival1;
13474 if (lclFld->gtFieldSeq == FieldSeqStore::NotAField())
13476 if (fieldSeq != nullptr)
13478 // If it does represent a field, note that.
13479 lclFld->gtFieldSeq = fieldSeq;
13484 // Append 'fieldSeq' to the existing one
13485 lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
13488 temp->gtType = tree->gtType;
13489 foldAndReturnTemp = true;
13493 if (foldAndReturnTemp)
13495 assert(temp != nullptr);
13496 assert(temp->TypeGet() == typ);
13497 assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR));
13499 // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for
13500 // 'temp' because a GT_ADDR always marks it for its operand.
13501 temp->gtFlags &= ~GTF_DONT_CSE;
13502 temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
13504 if (op1->OperGet() == GT_ADD)
13506 DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
13507 DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
13509 DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR
13510 DEBUG_DESTROY_NODE(tree); // GT_IND
13512 // If the result of the fold is a local var, we may need to perform further adjustments e.g. for
13514 if (temp->OperIs(GT_LCL_VAR))
13517 // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear
13518 // and the node in question must have this bit set (as it has already been morphed).
13519 temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
13521 const bool forceRemorph = true;
13522 temp = fgMorphLocalVar(temp, forceRemorph);
13524 // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the
13525 // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function
13527 temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13534 // Only do this optimization when we are in the global optimizer. Doing this after value numbering
13535 // could result in an invalid value number for the newly generated GT_IND node.
13536 if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
13538 // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
13539 // TBD: this transformation is currently necessary for correctness -- it might
13540 // be good to analyze the failures that result if we don't do this, and fix them
13541 // in other ways. Ideally, this should be optional.
13542 GenTree* commaNode = op1;
13543 unsigned treeFlags = tree->gtFlags;
13544 commaNode->gtType = typ;
13545 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13546 // dangerous, clear the GTF_REVERSE_OPS at
13549 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13551 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13553 commaNode = commaNode->gtOp.gtOp2;
13554 commaNode->gtType = typ;
13555 commaNode->gtFlags =
13556 (treeFlags & ~GTF_REVERSE_OPS & ~GTF_ASG & ~GTF_CALL); // Bashing the GT_COMMA flags here is
13557 // dangerous, clear the GTF_REVERSE_OPS, GT_ASG, and GT_CALL at
13559 commaNode->gtFlags |=
13560 ((commaNode->gtOp.gtOp1->gtFlags | commaNode->gtOp.gtOp2->gtFlags) & (GTF_ASG | GTF_CALL));
13562 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13565 bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
13569 bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
13571 GetArrayInfoMap()->Remove(tree);
13574 GenTree* addr = commaNode->gtOp.gtOp2;
13575 op1 = gtNewIndir(typ, addr);
13576 // This is very conservative
13577 op1->gtFlags |= treeFlags & ~GTF_ALL_EFFECT & ~GTF_IND_NONFAULTING;
13578 op1->gtFlags |= (addr->gtFlags & GTF_ALL_EFFECT);
13582 GetArrayInfoMap()->Set(op1, arrInfo);
13585 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13587 commaNode->gtOp.gtOp2 = op1;
13588 commaNode->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
13596 // Can not remove op1 if it is currently a CSE candidate.
13597 if (gtIsActiveCSE_Candidate(op1))
13602 if (op1->OperGet() == GT_IND)
13604 if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
13606 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13607 if (gtIsActiveCSE_Candidate(tree))
13612 // Perform the transform ADDR(IND(...)) == (...).
13613 GenTree* addr = op1->gtOp.gtOp1;
13615 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13617 DEBUG_DESTROY_NODE(op1);
13618 DEBUG_DESTROY_NODE(tree);
13623 else if (op1->OperGet() == GT_OBJ)
13625 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13626 if (gtIsActiveCSE_Candidate(tree))
13631 // Perform the transform ADDR(OBJ(...)) == (...).
13632 GenTree* addr = op1->AsObj()->Addr();
13634 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13636 DEBUG_DESTROY_NODE(op1);
13637 DEBUG_DESTROY_NODE(tree);
13641 else if (op1->gtOper == GT_CAST)
13643 GenTree* casting = op1->gtCast.CastOp();
13644 if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
13646 DEBUG_DESTROY_NODE(op1);
13647 tree->gtOp.gtOp1 = op1 = casting;
13650 else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
13652 // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
13653 // (Be sure to mark "z" as an l-value...)
13655 GenTreePtrStack commas(getAllocator(CMK_ArrayStack));
13656 for (GenTree* comma = op1; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2())
13658 commas.Push(comma);
13660 GenTree* commaNode = commas.Top();
13662 // The top-level addr might be annotated with a zeroOffset field.
13663 FieldSeqNode* zeroFieldSeq = nullptr;
13664 bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
13666 commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
13668 // If the node we're about to put under a GT_ADDR is an indirection, it
13669 // doesn't need to be materialized, since we only want the addressing mode. Because
13670 // of this, this GT_IND is not a faulting indirection and we don't have to extract it
13671 // as a side effect.
13672 GenTree* commaOp2 = commaNode->gtOp.gtOp2;
13673 if (commaOp2->OperIsBlk())
13675 commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
13677 if (commaOp2->gtOper == GT_IND)
13679 commaOp2->gtFlags |= GTF_IND_NONFAULTING;
13680 commaOp2->gtFlags &= ~GTF_EXCEPT;
13681 commaOp2->gtFlags |= (commaOp2->gtOp.gtOp1->gtFlags & GTF_EXCEPT);
13684 op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
13688 // The "op1" node might already be annotated with a zero-offset field sequence.
13689 FieldSeqNode* existingZeroOffsetFldSeq = nullptr;
13690 if (GetZeroOffsetFieldMap()->Lookup(op1, &existingZeroOffsetFldSeq))
13692 // Append the zero field sequences
13693 zeroFieldSeq = GetFieldSeqStore()->Append(existingZeroOffsetFldSeq, zeroFieldSeq);
13695 // Transfer the annotation to the new GT_ADDR node.
13696 fgAddFieldSeqForZeroOffset(op1, zeroFieldSeq);
13698 commaNode->gtOp.gtOp2 = op1;
13699 // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform
13700 // might give op1 a type different from byref (like, say, native int). So now go back and give
13701 // all the comma nodes the type of op1.
13702 // TODO: the comma flag update below is conservative and can be improved.
13703 // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
13704 // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
13706 while (!commas.Empty())
13708 GenTree* comma = commas.Pop();
13709 comma->gtType = op1->gtType;
13710 comma->gtFlags |= op1->gtFlags;
13712 comma->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13714 gtUpdateNodeSideEffects(comma);
13720 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
13721 op1->gtFlags |= GTF_DONT_CSE;
13727 /* Mark the nodes that are conditionally executed */
13728 fgWalkTreePre(&tree, gtMarkColonCond);
13730 /* Since we're doing this postorder we clear this if it got set by a child */
13731 fgRemoveRestOfBlock = false;
13736 /* Special case: trees that don't produce a value */
13737 if (op2->OperIs(GT_ASG) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) || fgIsThrow(op2))
13739 typ = tree->gtType = TYP_VOID;
13742 // If we are in the Valuenum CSE phase then don't morph away anything as these
13743 // nodes may have CSE defs/uses in them.
13745 if (!optValnumCSE_phase)
13747 // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this
13750 GenTree* op1SideEffects = nullptr;
13751 // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
13752 // hoisted expressions in loops.
13753 gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
13754 if (op1SideEffects)
13756 // Replace the left hand side with the side effect list.
13757 tree->gtOp.gtOp1 = op1SideEffects;
13758 gtUpdateNodeSideEffects(tree);
13762 op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13763 DEBUG_DESTROY_NODE(tree);
13764 DEBUG_DESTROY_NODE(op1);
13768 /* If the right operand is just a void nop node, throw it away */
13769 if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
13771 op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13772 DEBUG_DESTROY_NODE(tree);
13773 DEBUG_DESTROY_NODE(op2);
13782 /* Special case if fgRemoveRestOfBlock is set to true */
13783 if (fgRemoveRestOfBlock)
13785 if (fgIsCommaThrow(op1, true))
13787 GenTree* throwNode = op1->gtOp.gtOp1;
13788 noway_assert(throwNode->gtType == TYP_VOID);
13790 JITDUMP("Removing [%06d] GT_JTRUE as the block now unconditionally throws an exception.\n",
13792 DEBUG_DESTROY_NODE(tree);
13797 noway_assert(op1->OperKind() & GTK_RELOP);
13798 noway_assert(op1->gtFlags & GTF_EXCEPT);
13800 // We need to keep op1 for the side-effects. Hang it off
13803 JITDUMP("Keeping side-effects by bashing [%06d] GT_JTRUE into a GT_COMMA.\n", dspTreeID(tree));
13805 tree->ChangeOper(GT_COMMA);
13806 tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
13808 // Additionally since we're eliminating the JTRUE
13809 // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
13810 // So we change it into a GT_COMMA as well.
13811 JITDUMP("Also bashing [%06d] (a relop) into a GT_COMMA.\n", dspTreeID(op1));
13812 op1->ChangeOper(GT_COMMA);
13813 op1->gtFlags &= ~GTF_UNSIGNED; // Clear the unsigned flag if it was set on the relop
13814 op1->gtType = op1->gtOp.gtOp1->gtType;
13823 assert(oper == tree->gtOper);
13825 // If we are in the Valuenum CSE phase then don't morph away anything as these
13826 // nodes may have CSE defs/uses in them.
13828 if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList())
13830 /* Check for op1 as a GT_COMMA with a unconditional throw node */
13831 if (op1 && fgIsCommaThrow(op1, true))
13833 if ((op1->gtFlags & GTF_COLON_COND) == 0)
13835 /* We can safely throw out the rest of the statements */
13836 fgRemoveRestOfBlock = true;
13839 GenTree* throwNode = op1->gtOp.gtOp1;
13840 noway_assert(throwNode->gtType == TYP_VOID);
13842 if (oper == GT_COMMA)
13844 /* Both tree and op1 are GT_COMMA nodes */
13845 /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
13846 tree->gtOp.gtOp1 = throwNode;
13848 // Possibly reset the assignment flag
13849 if (((throwNode->gtFlags & GTF_ASG) == 0) && ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0)))
13851 tree->gtFlags &= ~GTF_ASG;
13856 else if (oper != GT_NOP)
13858 if (genActualType(typ) == genActualType(op1->gtType))
13860 /* The types match so, return the comma throw node as the new tree */
13865 if (typ == TYP_VOID)
13867 // Return the throw node
13872 GenTree* commaOp2 = op1->gtOp.gtOp2;
13874 // need type of oper to be same as tree
13875 if (typ == TYP_LONG)
13877 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13878 commaOp2->gtIntConCommon.SetLngValue(0);
13879 /* Change the types of oper and commaOp2 to TYP_LONG */
13880 op1->gtType = commaOp2->gtType = TYP_LONG;
13882 else if (varTypeIsFloating(typ))
13884 commaOp2->ChangeOperConst(GT_CNS_DBL);
13885 commaOp2->gtDblCon.gtDconVal = 0.0;
13886 /* Change the types of oper and commaOp2 to TYP_DOUBLE */
13887 op1->gtType = commaOp2->gtType = TYP_DOUBLE;
13891 commaOp2->ChangeOperConst(GT_CNS_INT);
13892 commaOp2->gtIntConCommon.SetIconValue(0);
13893 /* Change the types of oper and commaOp2 to TYP_INT */
13894 op1->gtType = commaOp2->gtType = TYP_INT;
13897 /* Return the GT_COMMA node as the new tree */
13904 /* Check for op2 as a GT_COMMA with a unconditional throw */
13906 if (op2 && fgIsCommaThrow(op2, true))
13908 if ((op2->gtFlags & GTF_COLON_COND) == 0)
13910 /* We can safely throw out the rest of the statements */
13911 fgRemoveRestOfBlock = true;
13914 // If op1 has no side-effects
13915 if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
13917 // If tree is an asg node
13918 if (tree->OperIs(GT_ASG))
13920 /* Return the throw node as the new tree */
13921 return op2->gtOp.gtOp1;
13924 if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
13926 /* Return the throw node as the new tree */
13927 return op2->gtOp.gtOp1;
13930 // If tree is a comma node
13931 if (tree->OperGet() == GT_COMMA)
13933 /* Return the throw node as the new tree */
13934 return op2->gtOp.gtOp1;
13937 /* for the shift nodes the type of op2 can differ from the tree type */
13938 if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
13940 noway_assert(GenTree::OperIsShiftOrRotate(oper));
13942 GenTree* commaOp2 = op2->gtOp.gtOp2;
13944 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13945 commaOp2->gtIntConCommon.SetLngValue(0);
13947 /* Change the types of oper and commaOp2 to TYP_LONG */
13948 op2->gtType = commaOp2->gtType = TYP_LONG;
13951 if ((genActualType(typ) == TYP_INT) &&
13952 (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
13954 // An example case is comparison (say GT_GT) of two longs or floating point values.
13956 GenTree* commaOp2 = op2->gtOp.gtOp2;
13958 commaOp2->ChangeOperConst(GT_CNS_INT);
13959 commaOp2->gtIntCon.gtIconVal = 0;
13960 /* Change the types of oper and commaOp2 to TYP_INT */
13961 op2->gtType = commaOp2->gtType = TYP_INT;
13964 if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
13966 noway_assert(tree->OperGet() == GT_ADD);
13968 GenTree* commaOp2 = op2->gtOp.gtOp2;
13970 commaOp2->ChangeOperConst(GT_CNS_INT);
13971 commaOp2->gtIntCon.gtIconVal = 0;
13972 /* Change the types of oper and commaOp2 to TYP_BYREF */
13973 op2->gtType = commaOp2->gtType = TYP_BYREF;
13976 /* types should now match */
13977 noway_assert((genActualType(typ) == genActualType(op2->gtType)));
13979 /* Return the GT_COMMA node as the new tree */
13985 /*-------------------------------------------------------------------------
13986 * Optional morphing is done if tree transformations is permitted
13989 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
13994 tree = fgMorphSmpOpOptional(tree->AsOp());
13999 #pragma warning(pop)
14002 GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
14004 genTreeOps oper = tree->gtOper;
14005 GenTree* op1 = tree->gtOp1;
14006 GenTree* op2 = tree->gtOp2;
14007 var_types typ = tree->TypeGet();
14009 if (fgGlobalMorph && GenTree::OperIsCommutative(oper))
14011 /* Swap the operands so that the more expensive one is 'op1' */
14013 if (tree->gtFlags & GTF_REVERSE_OPS)
14021 tree->gtFlags &= ~GTF_REVERSE_OPS;
14024 if (oper == op2->gtOper)
14026 /* Reorder nested operators at the same precedence level to be
14027 left-recursive. For example, change "(a+(b+c))" to the
14028 equivalent expression "((a+b)+c)".
14031 /* Things are handled differently for floating-point operators */
14033 if (!varTypeIsFloating(tree->TypeGet()))
14035 fgMoveOpsLeft(tree);
14044 /* Change "((x+icon)+y)" to "((x+y)+icon)"
14045 Don't reorder floating-point operations */
14047 if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
14048 varTypeIsIntegralOrI(typ))
14050 GenTree* ad1 = op1->gtOp.gtOp1;
14051 GenTree* ad2 = op1->gtOp.gtOp2;
14053 if (!op2->OperIsConst() && ad2->OperIsConst())
14065 // and it swaps ad2 and op2.
14067 // Don't create a byref pointer that may point outside of the ref object.
14068 // If a GC happens, the byref won't get updated. This can happen if one
14069 // of the int components is negative. It also requires the address generation
14070 // be in a fully-interruptible code region.
14071 if (!varTypeIsGC(ad1->TypeGet()) && !varTypeIsGC(op2->TypeGet()))
14075 op1->gtOp.gtOp2 = op2;
14076 op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
14085 /*-------------------------------------------------------------------------
14086 * Perform optional oper-specific postorder morphing
14092 if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
14094 if (tree->OperIsCopyBlkOp())
14096 return fgMorphCopyBlock(tree);
14100 return fgMorphInitBlock(tree);
14104 if (typ == TYP_LONG)
14109 /* Make sure we're allowed to do this */
14111 if (optValnumCSE_phase)
14113 // It is not safe to reorder/delete CSE's
14117 if (op2->gtFlags & GTF_ASG)
14122 if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
14127 /* Special case: a cast that can be thrown away */
14129 // TODO-Cleanup: fgMorphSmp does a similar optimization. However, it removes only
14130 // one cast and sometimes there is another one after it that gets removed by this
14131 // code. fgMorphSmp should be improved to remove all redundant casts so this code
14134 if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
14140 srct = op2->gtCast.CastOp()->TypeGet();
14141 cast = (var_types)op2->CastToType();
14142 dstt = op1->TypeGet();
14144 /* Make sure these are all ints and precision is not lost */
14146 if (genTypeSize(cast) >= genTypeSize(dstt) && dstt <= TYP_INT && srct <= TYP_INT)
14148 op2 = tree->gtOp2 = op2->gtCast.CastOp();
14156 /* Check for the case "(val + icon) * icon" */
14158 if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
14160 GenTree* add = op1->gtOp.gtOp2;
14162 if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
14164 if (tree->gtOverflow() || op1->gtOverflow())
14169 ssize_t imul = op2->gtIntCon.gtIconVal;
14170 ssize_t iadd = add->gtIntCon.gtIconVal;
14172 /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
14175 tree->ChangeOper(oper);
14177 op2->gtIntCon.gtIconVal = iadd * imul;
14179 op1->ChangeOper(GT_MUL);
14181 add->gtIntCon.gtIconVal = imul;
14182 #ifdef _TARGET_64BIT_
14183 if (add->gtType == TYP_INT)
14185 // we need to properly re-sign-extend or truncate after multiplying two int constants above
14186 add->AsIntCon()->TruncateOrSignExtend32();
14188 #endif //_TARGET_64BIT_
14196 /* For "val / 1", just return "val" */
14198 if (op2->IsIntegralConst(1))
14200 DEBUG_DESTROY_NODE(tree);
14208 /* Check for the case "(val + icon) << icon" */
14210 if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
14212 GenTree* cns = op1->gtOp.gtOp2;
14214 if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
14216 ssize_t ishf = op2->gtIntConCommon.IconValue();
14217 ssize_t iadd = cns->gtIntConCommon.IconValue();
14219 // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
14221 /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
14223 tree->ChangeOper(GT_ADD);
14224 ssize_t result = iadd << ishf;
14225 op2->gtIntConCommon.SetIconValue(result);
14226 #ifdef _TARGET_64BIT_
14227 if (op1->gtType == TYP_INT)
14229 op2->AsIntCon()->TruncateOrSignExtend32();
14231 #endif // _TARGET_64BIT_
14233 // we are reusing the shift amount node here, but the type we want is that of the shift result
14234 op2->gtType = op1->gtType;
14236 if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
14237 cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
14239 assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
14240 op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
14243 op1->ChangeOper(GT_LSH);
14245 cns->gtIntConCommon.SetIconValue(ishf);
14253 if (!optValnumCSE_phase)
14255 /* "x ^ -1" is "~x" */
14257 if (op2->IsIntegralConst(-1))
14259 tree->ChangeOper(GT_NOT);
14260 tree->gtOp2 = nullptr;
14261 DEBUG_DESTROY_NODE(op2);
14263 else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
14265 /* "binaryVal ^ 1" is "!binaryVal" */
14266 gtReverseCond(op1);
14267 DEBUG_DESTROY_NODE(op2);
14268 DEBUG_DESTROY_NODE(tree);
14276 // Initialization values for initBlk have special semantics - their lower
14277 // byte is used to fill the struct. However, we allow 0 as a "bare" value,
14278 // which enables them to get a VNForZero, and be propagated.
14279 if (op1->IsIntegralConst(0))
14291 //------------------------------------------------------------------------
14292 // fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b
14293 // (see ECMA III 3.55 and III.3.56).
14296 // tree - The GT_MOD/GT_UMOD tree to morph
14299 // The morphed tree
14302 // For ARM64 we don't have a remainder instruction so this transform is
14303 // always done. For XARCH this transform is done if we know that magic
14304 // division will be used, in that case this transform allows CSE to
14305 // eliminate the redundant div from code like "x = a / 3; y = a % 3;".
14307 // This method will produce the above expression in 'a' and 'b' are
14308 // leaf nodes, otherwise, if any of them is not a leaf it will spill
14309 // its value into a temporary variable, an example:
14310 // (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
14312 GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
14314 if (tree->OperGet() == GT_MOD)
14316 tree->SetOper(GT_DIV);
14318 else if (tree->OperGet() == GT_UMOD)
14320 tree->SetOper(GT_UDIV);
14324 noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
14327 var_types type = tree->gtType;
14328 GenTree* denominator = tree->gtOp2;
14329 GenTree* numerator = tree->gtOp1;
14331 if (!numerator->OperIsLeaf())
14333 numerator = fgMakeMultiUse(&tree->gtOp1);
14336 if (!denominator->OperIsLeaf())
14338 denominator = fgMakeMultiUse(&tree->gtOp2);
14341 // The numerator and denominator may have been assigned to temps, in which case
14342 // their defining assignments are in the current tree. Therefore, we need to
14343 // set the execuction order accordingly on the nodes we create.
14344 // That is, the "mul" will be evaluated in "normal" order, and the "sub" must
14345 // be set to be evaluated in reverse order.
14347 GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
14348 assert(!mul->IsReverseOp());
14349 GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
14350 sub->gtFlags |= GTF_REVERSE_OPS;
14353 sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14359 //------------------------------------------------------------------------------
14360 // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
14364 // oper - Operation to check
14367 // True if the operation can be a root of a bitwise rotation tree; false otherwise.
14369 bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
14371 return (oper == GT_OR) || (oper == GT_XOR);
14374 //------------------------------------------------------------------------------
14375 // fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
14376 // an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
14379 // tree - tree to check for a rotation pattern
14382 // An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
14385 // The input is a GT_OR or a GT_XOR tree.
14387 GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree)
14390 // Check for a rotation pattern, e.g.,
14403 // The patterns recognized:
14404 // (x << (y & M)) op (x >>> ((-y + N) & M))
14405 // (x >>> ((-y + N) & M)) op (x << (y & M))
14407 // (x << y) op (x >>> (-y + N))
14408 // (x >> > (-y + N)) op (x << y)
14410 // (x >>> (y & M)) op (x << ((-y + N) & M))
14411 // (x << ((-y + N) & M)) op (x >>> (y & M))
14413 // (x >>> y) op (x << (-y + N))
14414 // (x << (-y + N)) op (x >>> y)
14416 // (x << c1) op (x >>> c2)
14417 // (x >>> c1) op (x << c2)
14420 // c1 and c2 are const
14421 // c1 + c2 == bitsize(x)
14424 // M & (N - 1) == N - 1
14425 // op is either | or ^
14427 if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
14429 // We can't do anything if the tree has assignments, calls, or volatile
14430 // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
14431 // thrown by the original tree will be thrown by the transformed tree as well.
14435 genTreeOps oper = tree->OperGet();
14436 assert(fgOperIsBitwiseRotationRoot(oper));
14438 // Check if we have an LSH on one side of the OR and an RSZ on the other side.
14439 GenTree* op1 = tree->gtGetOp1();
14440 GenTree* op2 = tree->gtGetOp2();
14441 GenTree* leftShiftTree = nullptr;
14442 GenTree* rightShiftTree = nullptr;
14443 if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
14445 leftShiftTree = op1;
14446 rightShiftTree = op2;
14448 else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
14450 leftShiftTree = op2;
14451 rightShiftTree = op1;
14458 // Check if the trees representing the value to shift are identical.
14459 // We already checked that there are no side effects above.
14460 if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
14462 GenTree* rotatedValue = leftShiftTree->gtGetOp1();
14463 var_types rotatedValueActualType = genActualType(rotatedValue->gtType);
14464 ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8;
14465 noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
14466 GenTree* leftShiftIndex = leftShiftTree->gtGetOp2();
14467 GenTree* rightShiftIndex = rightShiftTree->gtGetOp2();
14469 // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
14470 // shouldn't be masked for the transformation to be valid. If additional
14471 // higher bits are not masked, the transformation is still valid since the result
14472 // of MSIL shift instructions is unspecified if the shift amount is greater or equal
14473 // than the width of the value being shifted.
14474 ssize_t minimalMask = rotatedValueBitSize - 1;
14475 ssize_t leftShiftMask = -1;
14476 ssize_t rightShiftMask = -1;
14478 if ((leftShiftIndex->OperGet() == GT_AND))
14480 if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
14482 leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14483 leftShiftIndex = leftShiftIndex->gtGetOp1();
14491 if ((rightShiftIndex->OperGet() == GT_AND))
14493 if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
14495 rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14496 rightShiftIndex = rightShiftIndex->gtGetOp1();
14504 if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
14506 // The shift index is overmasked, e.g., we have
14507 // something like (x << y & 15) or
14508 // (x >> (32 - y) & 15 with 32 bit x.
14509 // The transformation is not valid.
14513 GenTree* shiftIndexWithAdd = nullptr;
14514 GenTree* shiftIndexWithoutAdd = nullptr;
14515 genTreeOps rotateOp = GT_NONE;
14516 GenTree* rotateIndex = nullptr;
14518 if (leftShiftIndex->OperGet() == GT_ADD)
14520 shiftIndexWithAdd = leftShiftIndex;
14521 shiftIndexWithoutAdd = rightShiftIndex;
14524 else if (rightShiftIndex->OperGet() == GT_ADD)
14526 shiftIndexWithAdd = rightShiftIndex;
14527 shiftIndexWithoutAdd = leftShiftIndex;
14531 if (shiftIndexWithAdd != nullptr)
14533 if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
14535 if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
14537 if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
14539 if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
14541 // We found one of these patterns:
14542 // (x << (y & M)) | (x >>> ((-y + N) & M))
14543 // (x << y) | (x >>> (-y + N))
14544 // (x >>> (y & M)) | (x << ((-y + N) & M))
14545 // (x >>> y) | (x << (-y + N))
14546 // where N == bitsize(x), M is const, and
14547 // M & (N - 1) == N - 1
14548 CLANG_FORMAT_COMMENT_ANCHOR;
14550 #ifndef _TARGET_64BIT_
14551 if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
14553 // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86.
14554 // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
14555 // to add helpers for GT_ROL and GT_ROR.
14560 rotateIndex = shiftIndexWithoutAdd;
14566 else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
14568 if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
14570 // We found this pattern:
14571 // (x << c1) | (x >>> c2)
14572 // where c1 and c2 are const and c1 + c2 == bitsize(x)
14574 rotateIndex = leftShiftIndex;
14578 if (rotateIndex != nullptr)
14580 noway_assert(GenTree::OperIsRotate(rotateOp));
14582 unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
14584 // We can use the same tree only during global morph; reusing the tree in a later morph
14585 // may invalidate value numbers.
14588 tree->gtOp.gtOp1 = rotatedValue;
14589 tree->gtOp.gtOp2 = rotateIndex;
14590 tree->ChangeOper(rotateOp);
14592 unsigned childFlags = 0;
14593 for (GenTree* op : tree->Operands())
14595 childFlags |= (op->gtFlags & GTF_ALL_EFFECT);
14598 // The parent's flags should be a superset of its operands' flags
14599 noway_assert((inputTreeEffects & childFlags) == childFlags);
14603 tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
14604 noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
14613 #if !CPU_HAS_FP_SUPPORT
14614 GenTree* Compiler::fgMorphToEmulatedFP(GenTree* tree)
14617 genTreeOps oper = tree->OperGet();
14618 var_types typ = tree->TypeGet();
14619 GenTree* op1 = tree->gtOp.gtOp1;
14620 GenTree* op2 = tree->gtGetOp2IfPresent();
14623 We have to use helper calls for all FP operations:
14625 FP operators that operate on FP values
14626 casts to and from FP
14627 comparisons of FP values
14630 if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
14635 /* Not all FP operations need helper calls */
14649 /* If the result isn't FP, it better be a compare or cast */
14651 if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
14654 noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
14657 /* Keep track of how many arguments we're passing */
14659 /* Is this a binary operator? */
14663 /* What kind of an operator do we have? */
14668 helper = CPX_R4_ADD;
14671 helper = CPX_R4_SUB;
14674 helper = CPX_R4_MUL;
14677 helper = CPX_R4_DIV;
14679 // case GT_MOD: helper = CPX_R4_REM; break;
14682 helper = CPX_R4_EQ;
14685 helper = CPX_R4_NE;
14688 helper = CPX_R4_LT;
14691 helper = CPX_R4_LE;
14694 helper = CPX_R4_GE;
14697 helper = CPX_R4_GT;
14704 noway_assert(!"unexpected FP binary op");
14708 args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
14718 noway_assert(!"FP cast");
14721 helper = CPX_R4_NEG;
14728 noway_assert(!"unexpected FP unary op");
14732 args = gtNewArgList(tree->gtOp.gtOp1);
14735 /* If we have double result/operands, modify the helper */
14737 if (typ == TYP_DOUBLE)
14739 static_assert_no_msg(CPX_R4_NEG + 1 == CPX_R8_NEG);
14740 static_assert_no_msg(CPX_R4_ADD + 1 == CPX_R8_ADD);
14741 static_assert_no_msg(CPX_R4_SUB + 1 == CPX_R8_SUB);
14742 static_assert_no_msg(CPX_R4_MUL + 1 == CPX_R8_MUL);
14743 static_assert_no_msg(CPX_R4_DIV + 1 == CPX_R8_DIV);
14749 noway_assert(tree->OperIsCompare());
14751 static_assert_no_msg(CPX_R4_EQ + 1 == CPX_R8_EQ);
14752 static_assert_no_msg(CPX_R4_NE + 1 == CPX_R8_NE);
14753 static_assert_no_msg(CPX_R4_LT + 1 == CPX_R8_LT);
14754 static_assert_no_msg(CPX_R4_LE + 1 == CPX_R8_LE);
14755 static_assert_no_msg(CPX_R4_GE + 1 == CPX_R8_GE);
14756 static_assert_no_msg(CPX_R4_GT + 1 == CPX_R8_GT);
14759 tree = fgMorphIntoHelperCall(tree, helper, args);
14768 if (compCurBB == genReturnBB)
14770 /* This is the 'exitCrit' call at the exit label */
14772 noway_assert(op1->gtType == TYP_VOID);
14773 noway_assert(op2 == 0);
14775 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
14780 /* This is a (real) return value -- check its type */
14781 CLANG_FORMAT_COMMENT_ANCHOR;
14784 if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
14786 bool allowMismatch = false;
14788 // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
14789 if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
14790 (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
14791 allowMismatch = true;
14793 if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
14794 allowMismatch = true;
14796 if (!allowMismatch)
14797 NO_WAY("Return type mismatch");
14807 /*****************************************************************************
14809 * Transform the given tree for code generation and return an equivalent tree.
14812 GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac)
14815 assert(tree->gtOper != GT_STMT);
14820 if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
14822 noway_assert(!"JitBreakMorphTree hit");
14828 int thisMorphNum = 0;
14829 if (verbose && treesBeforeAfterMorph)
14831 thisMorphNum = morphNum++;
14832 printf("\nfgMorphTree (before %d):\n", thisMorphNum);
14839 // Apply any rewrites for implicit byref arguments before morphing the
14842 if (fgMorphImplicitByRefArgs(tree))
14845 if (verbose && treesBeforeAfterMorph)
14847 printf("\nfgMorphTree (%d), after implicit-byref rewrite:\n", thisMorphNum);
14854 /*-------------------------------------------------------------------------
14855 * fgMorphTree() can potentially replace a tree with another, and the
14856 * caller has to store the return value correctly.
14857 * Turn this on to always make copy of "tree" here to shake out
14858 * hidden/unupdated references.
14863 if (compStressCompile(STRESS_GENERIC_CHECK, 0))
14867 if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
14869 copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
14873 copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
14876 copy->ReplaceWith(tree, this);
14878 #if defined(LATE_DISASM)
14879 // GT_CNS_INT is considered small, so ReplaceWith() won't copy all fields
14880 if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
14882 copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle;
14886 DEBUG_DESTROY_NODE(tree);
14893 /* Ensure that we haven't morphed this node already */
14894 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
14896 #if LOCAL_ASSERTION_PROP
14897 /* Before morphing the tree, we try to propagate any active assertions */
14898 if (optLocalAssertionProp)
14900 /* Do we have any active assertions? */
14902 if (optAssertionCount > 0)
14904 GenTree* newTree = tree;
14905 while (newTree != nullptr)
14908 /* newTree is non-Null if we propagated an assertion */
14909 newTree = optAssertionProp(apFull, tree, nullptr);
14911 assert(tree != nullptr);
14914 PREFAST_ASSUME(tree != nullptr);
14918 /* Save the original un-morphed tree for fgMorphTreeDone */
14920 GenTree* oldTree = tree;
14922 /* Figure out what kind of a node we have */
14924 unsigned kind = tree->OperKind();
14926 /* Is this a constant node? */
14928 if (kind & GTK_CONST)
14930 tree = fgMorphConst(tree);
14934 /* Is this a leaf node? */
14936 if (kind & GTK_LEAF)
14938 tree = fgMorphLeaf(tree);
14942 /* Is it a 'simple' unary/binary operator? */
14944 if (kind & GTK_SMPOP)
14946 tree = fgMorphSmpOp(tree, mac);
14950 /* See what kind of a special operator we have here */
14952 switch (tree->OperGet())
14955 tree = fgMorphField(tree, mac);
14959 if (tree->OperMayThrow(this))
14961 tree->gtFlags |= GTF_EXCEPT;
14965 tree->gtFlags &= ~GTF_EXCEPT;
14967 tree = fgMorphCall(tree->AsCall());
14970 case GT_ARR_BOUNDS_CHECK:
14971 #ifdef FEATURE_SIMD
14973 #endif // FEATURE_SIMD
14974 #ifdef FEATURE_HW_INTRINSICS
14975 case GT_HW_INTRINSIC_CHK:
14976 #endif // FEATURE_HW_INTRINSICS
14978 fgSetRngChkTarget(tree);
14980 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
14981 bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex);
14982 bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
14983 // If the index is a comma(throw, x), just return that.
14984 if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
14986 tree = bndsChk->gtIndex;
14989 bndsChk->gtFlags &= ~GTF_CALL;
14991 // Propagate effects flags upwards
14992 bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
14993 bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
14995 // Otherwise, we don't change the tree.
15000 tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
15003 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
15005 tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
15008 tree->gtFlags &= ~GTF_CALL;
15010 tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15012 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
15014 tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
15019 fgSetRngChkTarget(tree, false);
15023 case GT_ARR_OFFSET:
15024 tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
15025 tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
15026 tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
15028 tree->gtFlags &= ~GTF_CALL;
15029 tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
15030 tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
15031 tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15034 fgSetRngChkTarget(tree, false);
15039 tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
15040 tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue);
15041 tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
15043 tree->gtFlags &= (~GTF_EXCEPT & ~GTF_CALL);
15045 tree->gtFlags |= tree->gtCmpXchg.gtOpLocation->gtFlags & GTF_ALL_EFFECT;
15046 tree->gtFlags |= tree->gtCmpXchg.gtOpValue->gtFlags & GTF_ALL_EFFECT;
15047 tree->gtFlags |= tree->gtCmpXchg.gtOpComparand->gtFlags & GTF_ALL_EFFECT;
15050 case GT_STORE_DYN_BLK:
15052 if (tree->OperGet() == GT_STORE_DYN_BLK)
15054 tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
15056 tree->gtDynBlk.Addr() = fgMorphTree(tree->gtDynBlk.Addr());
15057 tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
15059 tree->gtFlags &= (~GTF_EXCEPT & ~GTF_CALL);
15060 tree->SetIndirExceptionFlags(this);
15062 if (tree->OperGet() == GT_STORE_DYN_BLK)
15064 tree->gtFlags |= tree->gtDynBlk.Data()->gtFlags & GTF_ALL_EFFECT;
15066 tree->gtFlags |= tree->gtDynBlk.Addr()->gtFlags & GTF_ALL_EFFECT;
15067 tree->gtFlags |= tree->gtDynBlk.gtDynamicSize->gtFlags & GTF_ALL_EFFECT;
15070 case GT_INDEX_ADDR:
15071 GenTreeIndexAddr* indexAddr;
15072 indexAddr = tree->AsIndexAddr();
15073 indexAddr->Index() = fgMorphTree(indexAddr->Index());
15074 indexAddr->Arr() = fgMorphTree(indexAddr->Arr());
15076 tree->gtFlags &= ~GTF_CALL;
15078 tree->gtFlags |= indexAddr->Index()->gtFlags & GTF_ALL_EFFECT;
15079 tree->gtFlags |= indexAddr->Arr()->gtFlags & GTF_ALL_EFFECT;
15086 noway_assert(!"unexpected operator");
15090 fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
15095 #if LOCAL_ASSERTION_PROP
15096 //------------------------------------------------------------------------
15097 // fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum
15100 // lclNum - The varNum of the lclVar for which we're killing assertions.
15101 // tree - (DEBUG only) the tree responsible for killing its assertions.
15103 void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTree* tree))
15105 /* All dependent assertions are killed here */
15107 ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
15111 AssertionIndex index = optAssertionCount;
15112 while (killed && (index > 0))
15114 if (BitVecOps::IsMember(apTraits, killed, index - 1))
15117 AssertionDsc* curAssertion = optGetAssertion(index);
15118 noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
15119 ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
15122 printf("\nThe assignment ");
15124 printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
15125 optPrintAssertion(curAssertion);
15128 // Remove this bit from the killed mask
15129 BitVecOps::RemoveElemD(apTraits, killed, index - 1);
15131 optAssertionRemove(index);
15137 // killed mask should now be zero
15138 noway_assert(BitVecOps::IsEmpty(apTraits, killed));
15141 //------------------------------------------------------------------------
15142 // fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum.
15145 // lclNum - The varNum of the lclVar for which we're killing assertions.
15146 // tree - (DEBUG only) the tree responsible for killing its assertions.
15149 // For structs and struct fields, it will invalidate the children and parent
15151 // Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar.
15153 void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTree* tree))
15155 LclVarDsc* varDsc = &lvaTable[lclNum];
15157 if (varDsc->lvPromoted)
15159 noway_assert(varTypeIsStruct(varDsc));
15161 // Kill the field locals.
15162 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
15164 fgKillDependentAssertionsSingle(i DEBUGARG(tree));
15167 // Kill the struct local itself.
15168 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15170 else if (varDsc->lvIsStructField)
15172 // Kill the field local.
15173 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15175 // Kill the parent struct.
15176 fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree));
15180 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15183 #endif // LOCAL_ASSERTION_PROP
15185 /*****************************************************************************
15187 * This function is called to complete the morphing of a tree node
15188 * It should only be called once for each node.
15189 * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
15190 * to enforce the invariant that each node is only morphed once.
15191 * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
15192 * by an equivalent tree.
15196 void Compiler::fgMorphTreeDone(GenTree* tree,
15197 GenTree* oldTree /* == NULL */
15198 DEBUGARG(int morphNum))
15201 if (verbose && treesBeforeAfterMorph)
15203 printf("\nfgMorphTree (after %d):\n", morphNum);
15205 printf(""); // in our logic this causes a flush
15209 if (!fgGlobalMorph)
15214 if ((oldTree != nullptr) && (oldTree != tree))
15216 /* Ensure that we have morphed this node */
15217 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
15220 TransferTestDataToNode(oldTree, tree);
15225 // Ensure that we haven't morphed this node already
15226 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
15229 if (tree->OperKind() & GTK_CONST)
15234 #if LOCAL_ASSERTION_PROP
15236 if (!optLocalAssertionProp)
15241 /* Do we have any active assertions? */
15243 if (optAssertionCount > 0)
15245 /* Is this an assignment to a local variable */
15246 GenTreeLclVarCommon* lclVarTree = nullptr;
15248 // The check below will miss LIR-style assignments.
15250 // But we shouldn't be running local assertion prop on these,
15251 // as local prop gets disabled when we run global prop.
15252 assert(!tree->OperIs(GT_STORE_LCL_VAR, GT_STORE_LCL_FLD));
15254 // DefinesLocal can return true for some BLK op uses, so
15255 // check what gets assigned only when we're at an assignment.
15256 if (tree->OperIs(GT_ASG) && tree->DefinesLocal(this, &lclVarTree))
15258 unsigned lclNum = lclVarTree->gtLclNum;
15259 noway_assert(lclNum < lvaCount);
15260 fgKillDependentAssertions(lclNum DEBUGARG(tree));
15264 /* If this tree makes a new assertion - make it available */
15265 optAssertionGen(tree);
15267 #endif // LOCAL_ASSERTION_PROP
15272 /* Mark this node as being morphed */
15273 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
15277 /*****************************************************************************
15279 * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
15280 * Returns true if we modified the flow graph
15283 bool Compiler::fgFoldConditional(BasicBlock* block)
15285 bool result = false;
15287 // We don't want to make any code unreachable
15288 if (opts.OptimizationDisabled())
15293 if (block->bbJumpKind == BBJ_COND)
15295 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15297 GenTreeStmt* lastStmt = block->lastStmt();
15299 noway_assert(lastStmt->gtNext == nullptr);
15301 if (lastStmt->gtStmtExpr->gtOper == GT_CALL)
15303 noway_assert(fgRemoveRestOfBlock);
15305 /* Unconditional throw - transform the basic block into a BBJ_THROW */
15306 fgConvertBBToThrowBB(block);
15308 /* Remove 'block' from the predecessor list of 'block->bbNext' */
15309 fgRemoveRefPred(block->bbNext, block);
15311 /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
15312 fgRemoveRefPred(block->bbJumpDest, block);
15317 printf("\nConditional folded at " FMT_BB "\n", block->bbNum);
15318 printf(FMT_BB " becomes a BBJ_THROW\n", block->bbNum);
15324 noway_assert(lastStmt->gtStmtExpr->gtOper == GT_JTRUE);
15326 /* Did we fold the conditional */
15328 noway_assert(lastStmt->gtStmtExpr->gtOp.gtOp1);
15330 cond = lastStmt->gtStmtExpr->gtOp.gtOp1;
15332 if (cond->OperKind() & GTK_CONST)
15334 /* Yupee - we folded the conditional!
15335 * Remove the conditional statement */
15337 noway_assert(cond->gtOper == GT_CNS_INT);
15338 noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));
15340 /* remove the statement from bbTreelist - No need to update
15341 * the reference counts since there are no lcl vars */
15342 fgRemoveStmt(block, lastStmt);
15344 // block is a BBJ_COND that we are folding the conditional for
15345 // bTaken is the path that will always be taken from block
15346 // bNotTaken is the path that will never be taken from block
15348 BasicBlock* bTaken;
15349 BasicBlock* bNotTaken;
15351 if (cond->gtIntCon.gtIconVal != 0)
15353 /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
15354 block->bbJumpKind = BBJ_ALWAYS;
15355 bTaken = block->bbJumpDest;
15356 bNotTaken = block->bbNext;
15360 /* Unmark the loop if we are removing a backwards branch */
15361 /* dest block must also be marked as a loop head and */
15362 /* We must be able to reach the backedge block */
15363 if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
15364 fgReachable(block->bbJumpDest, block))
15366 optUnmarkLoopBlocks(block->bbJumpDest, block);
15369 /* JTRUE 0 - transform the basic block into a BBJ_NONE */
15370 block->bbJumpKind = BBJ_NONE;
15371 noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
15372 bTaken = block->bbNext;
15373 bNotTaken = block->bbJumpDest;
15376 if (fgHaveValidEdgeWeights)
15378 // We are removing an edge from block to bNotTaken
15379 // and we have already computed the edge weights, so
15380 // we will try to adjust some of the weights
15382 flowList* edgeTaken = fgGetPredForBlock(bTaken, block);
15383 BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block
15385 // We examine the taken edge (block -> bTaken)
15386 // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
15387 // else if bTaken has valid profile weight and block does not we try to adjust block's weight
15388 // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
15390 if (block->hasProfileWeight())
15392 // The edge weights for (block -> bTaken) are 100% of block's weight
15393 edgeTaken->flEdgeWeightMin = block->bbWeight;
15394 edgeTaken->flEdgeWeightMax = block->bbWeight;
15396 if (!bTaken->hasProfileWeight())
15398 if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
15400 // Update the weight of bTaken
15401 bTaken->inheritWeight(block);
15406 else if (bTaken->hasProfileWeight())
15408 if (bTaken->countOfInEdges() == 1)
15410 // There is only one in edge to bTaken
15411 edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
15412 edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
15414 // Update the weight of block
15415 block->inheritWeight(bTaken);
15420 if (bUpdated != nullptr)
15423 // Now fix the weights of the edges out of 'bUpdated'
15424 switch (bUpdated->bbJumpKind)
15427 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15428 edge->flEdgeWeightMax = bUpdated->bbWeight;
15431 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15432 edge->flEdgeWeightMax = bUpdated->bbWeight;
15435 edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
15436 edge->flEdgeWeightMax = bUpdated->bbWeight;
15439 // We don't handle BBJ_SWITCH
15445 /* modify the flow graph */
15447 /* Remove 'block' from the predecessor list of 'bNotTaken' */
15448 fgRemoveRefPred(bNotTaken, block);
15453 printf("\nConditional folded at " FMT_BB "\n", block->bbNum);
15454 printf(FMT_BB " becomes a %s", block->bbNum,
15455 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15456 if (block->bbJumpKind == BBJ_ALWAYS)
15458 printf(" to " FMT_BB, block->bbJumpDest->bbNum);
15464 /* if the block was a loop condition we may have to modify
15465 * the loop table */
15467 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
15469 /* Some loops may have been already removed by
15470 * loop unrolling or conditional folding */
15472 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
15477 /* We are only interested in the loop bottom */
15479 if (optLoopTable[loopNum].lpBottom == block)
15481 if (cond->gtIntCon.gtIconVal == 0)
15483 /* This was a bogus loop (condition always false)
15484 * Remove the loop from the table */
15486 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
15490 printf("Removing loop L%02u (from " FMT_BB " to " FMT_BB ")\n\n", loopNum,
15491 optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
15501 else if (block->bbJumpKind == BBJ_SWITCH)
15503 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15505 GenTreeStmt* lastStmt = block->lastStmt();
15507 noway_assert(lastStmt->gtNext == nullptr);
15509 if (lastStmt->gtStmtExpr->gtOper == GT_CALL)
15511 noway_assert(fgRemoveRestOfBlock);
15513 /* Unconditional throw - transform the basic block into a BBJ_THROW */
15514 fgConvertBBToThrowBB(block);
15516 /* update the flow graph */
15518 unsigned jumpCnt = block->bbJumpSwt->bbsCount;
15519 BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
15521 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15523 BasicBlock* curJump = *jumpTab;
15525 /* Remove 'block' from the predecessor list of 'curJump' */
15526 fgRemoveRefPred(curJump, block);
15532 printf("\nConditional folded at " FMT_BB "\n", block->bbNum);
15533 printf(FMT_BB " becomes a BBJ_THROW\n", block->bbNum);
15539 noway_assert(lastStmt->gtStmtExpr->gtOper == GT_SWITCH);
15541 /* Did we fold the conditional */
15543 noway_assert(lastStmt->gtStmtExpr->gtOp.gtOp1);
15545 cond = lastStmt->gtStmtExpr->gtOp.gtOp1;
15547 if (cond->OperKind() & GTK_CONST)
15549 /* Yupee - we folded the conditional!
15550 * Remove the conditional statement */
15552 noway_assert(cond->gtOper == GT_CNS_INT);
15554 /* remove the statement from bbTreelist - No need to update
15555 * the reference counts since there are no lcl vars */
15556 fgRemoveStmt(block, lastStmt);
15558 /* modify the flow graph */
15560 /* Find the actual jump target */
15561 unsigned switchVal;
15562 switchVal = (unsigned)cond->gtIntCon.gtIconVal;
15564 jumpCnt = block->bbJumpSwt->bbsCount;
15565 BasicBlock** jumpTab;
15566 jumpTab = block->bbJumpSwt->bbsDstTab;
15570 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15572 BasicBlock* curJump = *jumpTab;
15574 assert(curJump->countOfInEdges() > 0);
15576 // If val matches switchVal or we are at the last entry and
15577 // we never found the switch value then set the new jump dest
15579 if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
15581 if (curJump != block->bbNext)
15583 /* transform the basic block into a BBJ_ALWAYS */
15584 block->bbJumpKind = BBJ_ALWAYS;
15585 block->bbJumpDest = curJump;
15587 // if we are jumping backwards, make sure we have a GC Poll.
15588 if (curJump->bbNum > block->bbNum)
15590 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15595 /* transform the basic block into a BBJ_NONE */
15596 block->bbJumpKind = BBJ_NONE;
15597 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15603 /* Remove 'block' from the predecessor list of 'curJump' */
15604 fgRemoveRefPred(curJump, block);
15610 printf("\nConditional folded at " FMT_BB "\n", block->bbNum);
15611 printf(FMT_BB " becomes a %s", block->bbNum,
15612 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15613 if (block->bbJumpKind == BBJ_ALWAYS)
15615 printf(" to " FMT_BB, block->bbJumpDest->bbNum);
15627 //*****************************************************************************
15629 // Morphs a single statement in a block.
15630 // Can be called anytime, unlike fgMorphStmts() which should only be called once.
15632 // Returns true if 'stmt' was removed from the block.
15633 // Returns false if 'stmt' is still in the block (even if other statements were removed).
15636 bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg))
15638 assert(block != nullptr);
15639 assert(stmt != nullptr);
15642 compCurStmt = stmt;
15644 GenTree* morph = fgMorphTree(stmt->gtStmtExpr);
15646 // Bug 1106830 - During the CSE phase we can't just remove
15647 // morph->gtOp.gtOp2 as it could contain CSE expressions.
15648 // This leads to a noway_assert in OptCSE.cpp when
15649 // searching for the removed CSE ref. (using gtFindLink)
15651 if (!optValnumCSE_phase)
15653 // Check for morph as a GT_COMMA with an unconditional throw
15654 if (fgIsCommaThrow(morph, true))
15659 printf("Folding a top-level fgIsCommaThrow stmt\n");
15660 printf("Removing op2 as unreachable:\n");
15661 gtDispTree(morph->gtOp.gtOp2);
15665 // Use the call as the new stmt
15666 morph = morph->gtOp.gtOp1;
15667 noway_assert(morph->gtOper == GT_CALL);
15670 // we can get a throw as a statement root
15671 if (fgIsThrow(morph))
15676 printf("We have a top-level fgIsThrow stmt\n");
15677 printf("Removing the rest of block as unreachable:\n");
15680 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15681 fgRemoveRestOfBlock = true;
15685 stmt->gtStmtExpr = morph;
15687 // Can the entire tree be removed?
15688 bool removedStmt = false;
15690 // Defer removing statements during CSE so we don't inadvertently remove any CSE defs.
15691 if (!optValnumCSE_phase)
15693 removedStmt = fgCheckRemoveStmt(block, stmt);
15696 // Or this is the last statement of a conditional branch that was just folded?
15697 if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock)
15699 if (fgFoldConditional(block))
15701 if (block->bbJumpKind != BBJ_THROW)
15703 removedStmt = true;
15710 // Have to re-do the evaluation order since for example some later code does not expect constants as op1
15711 gtSetStmtInfo(stmt);
15713 // Have to re-link the nodes for this statement
15714 fgSetStmtSeq(stmt);
15720 printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
15726 if (fgRemoveRestOfBlock)
15728 // Remove the rest of the stmts in the block
15729 for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
15731 fgRemoveStmt(block, stmt);
15734 // The rest of block has been removed and we will always throw an exception.
15736 // Update succesors of block
15737 fgRemoveBlockAsPred(block);
15739 // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
15740 // We should not convert it to a ThrowBB.
15741 if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
15743 // Convert block to a throw bb
15744 fgConvertBBToThrowBB(block);
15750 printf("\n%s Block " FMT_BB " becomes a throw block.\n", msg, block->bbNum);
15753 fgRemoveRestOfBlock = false;
15756 return removedStmt;
15759 /*****************************************************************************
15761 * Morph the statements of the given block.
15762 * This function should be called just once for a block. Use fgMorphBlockStmt()
15763 * for reentrant calls.
15766 void Compiler::fgMorphStmts(BasicBlock* block, bool* lnot, bool* loadw)
15768 fgRemoveRestOfBlock = false;
15770 /* Make the current basic block address available globally */
15774 *lnot = *loadw = false;
15776 fgCurrentlyInUseArgTemps = hashBv::Create(this);
15778 GenTreeStmt* stmt = block->firstStmt();
15779 GenTree* prev = nullptr;
15780 for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt)
15782 if (fgRemoveRestOfBlock)
15784 fgRemoveStmt(block, stmt);
15787 #ifdef FEATURE_SIMD
15788 if (opts.OptimizationEnabled() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT &&
15789 stmt->gtStmtExpr->OperGet() == GT_ASG)
15791 fgMorphCombineSIMDFieldAssignments(block, stmt);
15795 fgMorphStmt = stmt;
15796 compCurStmt = stmt;
15797 GenTree* tree = stmt->gtStmtExpr;
15801 if (stmt == block->bbTreeList)
15803 block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
15806 unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
15810 printf("\nfgMorphTree " FMT_BB ", stmt %d (before)\n", block->bbNum, compCurStmtNum);
15815 /* Morph this statement tree */
15817 GenTree* morph = fgMorphTree(tree);
15819 // mark any outgoing arg temps as free so we can reuse them in the next statement.
15821 fgCurrentlyInUseArgTemps->ZeroAll();
15823 // Has fgMorphStmt been sneakily changed ?
15825 if (stmt->gtStmtExpr != tree)
15827 /* This must be tailcall. Ignore 'morph' and carry on with
15828 the tail-call node */
15830 morph = stmt->gtStmtExpr;
15831 noway_assert(compTailCallUsed);
15832 noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
15833 noway_assert(stmt->gtNextStmt == nullptr);
15835 GenTreeCall* call = morph->AsCall();
15837 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15838 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15840 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15841 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15842 (compCurBB->bbFlags & BBF_HAS_JMP)));
15844 else if (block != compCurBB)
15846 /* This must be a tail call that caused a GCPoll to get
15847 injected. We haven't actually morphed the call yet
15848 but the flag still got set, clear it here... */
15849 CLANG_FORMAT_COMMENT_ANCHOR;
15852 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
15855 noway_assert(compTailCallUsed);
15856 noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
15857 noway_assert(stmt->gtNextStmt == nullptr);
15859 GenTreeCall* call = morph->AsCall();
15862 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15863 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15865 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15866 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15867 (compCurBB->bbFlags & BBF_HAS_JMP)));
15871 if (compStressCompile(STRESS_CLONE_EXPR, 30))
15873 // Clone all the trees to stress gtCloneExpr()
15877 printf("\nfgMorphTree (stressClone from):\n");
15881 morph = gtCloneExpr(morph);
15882 noway_assert(morph);
15886 printf("\nfgMorphTree (stressClone to):\n");
15891 /* If the hash value changes. we modified the tree during morphing */
15894 unsigned newHash = gtHashValue(morph);
15895 if (newHash != oldHash)
15897 printf("\nfgMorphTree " FMT_BB ", stmt %d (after)\n", block->bbNum, compCurStmtNum);
15903 /* Check for morph as a GT_COMMA with an unconditional throw */
15904 if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
15906 /* Use the call as the new stmt */
15907 morph = morph->gtOp.gtOp1;
15908 noway_assert(morph->gtOper == GT_CALL);
15909 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15911 fgRemoveRestOfBlock = true;
15914 stmt->gtStmtExpr = tree = morph;
15916 if (fgRemoveRestOfBlock)
15921 /* Has the statement been optimized away */
15923 if (fgCheckRemoveStmt(block, stmt))
15928 /* Check if this block ends with a conditional branch that can be folded */
15930 if (fgFoldConditional(block))
15935 if (ehBlockHasExnFlowDsc(block))
15941 if (fgRemoveRestOfBlock)
15943 if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
15945 GenTreeStmt* first = block->firstStmt();
15946 noway_assert(first);
15947 GenTreeStmt* lastStmt = block->lastStmt();
15948 noway_assert(lastStmt && lastStmt->gtNext == nullptr);
15949 GenTree* last = lastStmt->gtStmtExpr;
15951 if (((block->bbJumpKind == BBJ_COND) && (last->gtOper == GT_JTRUE)) ||
15952 ((block->bbJumpKind == BBJ_SWITCH) && (last->gtOper == GT_SWITCH)))
15954 GenTree* op1 = last->gtOp.gtOp1;
15956 if (op1->OperKind() & GTK_RELOP)
15958 /* Unmark the comparison node with GTF_RELOP_JMP_USED */
15959 op1->gtFlags &= ~GTF_RELOP_JMP_USED;
15962 lastStmt->gtStmtExpr = fgMorphTree(op1);
15966 /* Mark block as a BBJ_THROW block */
15967 fgConvertBBToThrowBB(block);
15970 #if FEATURE_FASTTAILCALL
15971 GenTree* recursiveTailCall = nullptr;
15972 if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
15974 fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
15979 compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
15982 // Reset this back so that it doesn't leak out impacting other blocks
15983 fgRemoveRestOfBlock = false;
15986 /*****************************************************************************
15988 * Morph the blocks of the method.
15989 * Returns true if the basic block list is modified.
15990 * This function should be called just once.
15993 void Compiler::fgMorphBlocks()
15998 printf("\n*************** In fgMorphBlocks()\n");
16002 /* Since fgMorphTree can be called after various optimizations to re-arrange
16003 * the nodes we need a global flag to signal if we are during the one-pass
16004 * global morphing */
16006 fgGlobalMorph = true;
16008 #if LOCAL_ASSERTION_PROP
16010 // Local assertion prop is enabled if we are optimized
16012 optLocalAssertionProp = opts.OptimizationEnabled();
16014 if (optLocalAssertionProp)
16017 // Initialize for local assertion prop
16019 optAssertionInit(true);
16021 #elif ASSERTION_PROP
16023 // If LOCAL_ASSERTION_PROP is not set
16024 // and we have global assertion prop
16025 // then local assertion prop is always off
16027 optLocalAssertionProp = false;
16031 /*-------------------------------------------------------------------------
16032 * Process all basic blocks in the function
16035 BasicBlock* block = fgFirstBB;
16036 noway_assert(block);
16039 compCurStmtNum = 0;
16048 bool loadw = false;
16053 printf("\nMorphing " FMT_BB " of '%s'\n", block->bbNum, info.compFullName);
16057 #if LOCAL_ASSERTION_PROP
16058 if (optLocalAssertionProp)
16061 // Clear out any currently recorded assertion candidates
16062 // before processing each basic block,
16063 // also we must handle QMARK-COLON specially
16065 optAssertionReset(0);
16069 /* Process all statement trees in the basic block */
16071 fgMorphStmts(block, &lnot, &loadw);
16073 /* Are we using a single return block? */
16075 if (block->bbJumpKind == BBJ_RETURN)
16077 if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
16080 // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
16081 // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
16082 // Such blocks do materialize as part of in-lining.
16084 // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
16085 // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
16086 // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
16089 // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
16091 GenTreeStmt* lastStmt = block->lastStmt();
16092 GenTree* ret = (lastStmt != nullptr) ? lastStmt->gtStmtExpr : nullptr;
16094 if ((ret != nullptr) && (ret->OperGet() == GT_RETURN) && ((ret->gtFlags & GTF_RET_MERGED) != 0))
16096 // This return was generated during epilog merging, so leave it alone
16100 /* We'll jump to the genReturnBB */
16101 CLANG_FORMAT_COMMENT_ANCHOR;
16103 #if !defined(_TARGET_X86_)
16104 if (info.compFlags & CORINFO_FLG_SYNCH)
16106 fgConvertSyncReturnToLeave(block);
16109 #endif // !_TARGET_X86_
16111 block->bbJumpKind = BBJ_ALWAYS;
16112 block->bbJumpDest = genReturnBB;
16115 if (genReturnLocal != BAD_VAR_NUM)
16117 // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
16119 // Method must be returning a value other than TYP_VOID.
16120 noway_assert(compMethodHasRetVal());
16122 // This block must be ending with a GT_RETURN
16123 noway_assert(lastStmt != nullptr);
16124 noway_assert(lastStmt->getNextStmt() == nullptr);
16125 noway_assert(ret != nullptr);
16127 // GT_RETURN must have non-null operand as the method is returning the value assigned to
16129 noway_assert(ret->OperGet() == GT_RETURN);
16130 noway_assert(ret->gtGetOp1() != nullptr);
16132 GenTreeStmt* pAfterStatement = lastStmt;
16133 IL_OFFSETX offset = lastStmt->gtStmtILoffsx;
16135 gtNewTempAssign(genReturnLocal, ret->gtGetOp1(), &pAfterStatement, offset, block);
16136 if (tree->OperIsCopyBlkOp())
16138 tree = fgMorphCopyBlock(tree);
16141 if (pAfterStatement == lastStmt)
16143 lastStmt->gtStmtExpr = tree;
16147 // gtNewTempAssign inserted additional statements after last
16148 fgRemoveStmt(block, lastStmt);
16149 lastStmt = fgInsertStmtAfter(block, pAfterStatement, gtNewStmt(tree, offset));
16152 // make sure that copy-prop ignores this assignment.
16153 lastStmt->gtStmtExpr->gtFlags |= GTF_DONT_CSE;
16155 else if (ret != nullptr && ret->OperGet() == GT_RETURN)
16157 // This block ends with a GT_RETURN
16158 noway_assert(lastStmt != nullptr);
16159 noway_assert(lastStmt->getNextStmt() == nullptr);
16161 // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn
16163 noway_assert(ret->TypeGet() == TYP_VOID);
16164 noway_assert(ret->gtGetOp1() == nullptr);
16166 fgRemoveStmt(block, lastStmt);
16171 printf("morph " FMT_BB " to point at onereturn. New block is\n", block->bbNum);
16172 fgTableDispBasicBlock(block);
16178 block = block->bbNext;
16181 /* We are done with the global morphing phase */
16183 fgGlobalMorph = false;
16188 fgDispBasicBlocks(true);
16193 /*****************************************************************************
16195 * Make some decisions about the kind of code to generate.
16198 void Compiler::fgSetOptions()
16201 /* Should we force fully interruptible code ? */
16202 if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
16204 noway_assert(!codeGen->isGCTypeFixed());
16205 genInterruptible = true;
16209 if (opts.compDbgCode)
16211 assert(!codeGen->isGCTypeFixed());
16212 genInterruptible = true; // debugging is easier this way ...
16215 /* Assume we won't need an explicit stack frame if this is allowed */
16217 // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
16218 // the callee-saved registers.
16219 noway_assert(!compTailCallUsed || !compLocallocUsed);
16221 if (compLocallocUsed)
16223 codeGen->setFramePointerRequired(true);
16226 #ifdef _TARGET_X86_
16228 if (compTailCallUsed)
16229 codeGen->setFramePointerRequired(true);
16231 #endif // _TARGET_X86_
16233 if (!opts.genFPopt)
16235 codeGen->setFramePointerRequired(true);
16238 // Assert that the EH table has been initialized by now. Note that
16239 // compHndBBtabAllocCount never decreases; it is a high-water mark
16240 // of table allocation. In contrast, compHndBBtabCount does shrink
16241 // if we delete a dead EH region, and if it shrinks to zero, the
16242 // table pointer compHndBBtab is unreliable.
16243 assert(compHndBBtabAllocCount >= info.compXcptnsCount);
16245 #ifdef _TARGET_X86_
16247 // Note: this case, and the !X86 case below, should both use the
16248 // !X86 path. This would require a few more changes for X86 to use
16249 // compHndBBtabCount (the current number of EH clauses) instead of
16250 // info.compXcptnsCount (the number of EH clauses in IL), such as
16251 // in ehNeedsShadowSPslots(). This is because sometimes the IL has
16252 // an EH clause that we delete as statically dead code before we
16253 // get here, leaving no EH clauses left, and thus no requirement
16254 // to use a frame pointer because of EH. But until all the code uses
16255 // the same test, leave info.compXcptnsCount here.
16256 if (info.compXcptnsCount > 0)
16258 codeGen->setFramePointerRequiredEH(true);
16261 #else // !_TARGET_X86_
16263 if (compHndBBtabCount > 0)
16265 codeGen->setFramePointerRequiredEH(true);
16268 #endif // _TARGET_X86_
16270 #ifdef UNIX_X86_ABI
16271 if (info.compXcptnsCount > 0)
16273 assert(!codeGen->isGCTypeFixed());
16274 // Enforce fully interruptible codegen for funclet unwinding
16275 genInterruptible = true;
16277 #endif // UNIX_X86_ABI
16279 if (info.compCallUnmanaged)
16281 codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
16284 if (info.compPublishStubParam)
16286 codeGen->setFramePointerRequiredGCInfo(true);
16289 if (opts.compNeedSecurityCheck)
16291 codeGen->setFramePointerRequiredGCInfo(true);
16293 #ifndef JIT32_GCENCODER
16295 // The decoder only reports objects in frames with exceptions if the frame
16296 // is fully interruptible.
16297 // Even if there is no catch or other way to resume execution in this frame
16298 // the VM requires the security object to remain alive until later, so
16299 // Frames with security objects must be fully interruptible.
16300 genInterruptible = true;
16302 #endif // JIT32_GCENCODER
16305 if (compIsProfilerHookNeeded())
16307 codeGen->setFramePointerRequired(true);
16310 if (info.compIsVarArgs)
16312 // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
16313 codeGen->setFramePointerRequiredGCInfo(true);
16316 if (lvaReportParamTypeArg())
16318 codeGen->setFramePointerRequiredGCInfo(true);
16321 // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not");
16324 /*****************************************************************************/
16326 GenTree* Compiler::fgInitThisClass()
16328 noway_assert(!compIsForInlining());
16330 CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
16332 if (!kind.needsRuntimeLookup)
16334 return fgGetSharedCCtor(info.compClassHnd);
16338 #ifdef FEATURE_READYTORUN_COMPILER
16339 // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR.
16340 if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI))
16342 CORINFO_RESOLVED_TOKEN resolvedToken;
16343 memset(&resolvedToken, 0, sizeof(resolvedToken));
16345 // We are in a shared method body, but maybe we don't need a runtime lookup after all.
16346 // This covers the case of a generic method on a non-generic type.
16347 if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
16349 resolvedToken.hClass = info.compClassHnd;
16350 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
16353 // We need a runtime lookup.
16354 GenTree* ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
16356 // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
16357 // base of the class that owns the method being compiled". If we're in this method, it means we're not
16358 // inlining and there's no ambiguity.
16359 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF,
16360 gtNewArgList(ctxTree), &kind);
16364 // Collectible types requires that for shared generic code, if we use the generic context paramter
16365 // that we report it. (This is a conservative approach, we could detect some cases particularly when the
16366 // context parameter is this that we don't need the eager reporting logic.)
16367 lvaGenericsContextUseCount++;
16369 switch (kind.runtimeLookupKind)
16371 case CORINFO_LOOKUP_THISOBJ:
16372 // This code takes a this pointer; but we need to pass the static method desc to get the right point in
16375 GenTree* vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
16376 // Vtable pointer of this object
16377 vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
16378 vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
16379 GenTree* methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
16381 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, gtNewArgList(vtTree, methodHnd));
16384 case CORINFO_LOOKUP_CLASSPARAM:
16386 GenTree* vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16387 return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, gtNewArgList(vtTree));
16390 case CORINFO_LOOKUP_METHODPARAM:
16392 GenTree* methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16393 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID,
16394 gtNewArgList(gtNewIconNode(0), methHndTree));
16399 noway_assert(!"Unknown LOOKUP_KIND");
16404 /*****************************************************************************
16406 * Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
16407 * except for the allowed ? 1 : 0; pattern.
16409 Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTree** tree, fgWalkData* data)
16411 if ((*tree)->OperGet() == GT_QMARK)
16413 fgCheckQmarkAllowedForm(*tree);
16415 return WALK_CONTINUE;
16418 void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
16420 assert(tree->OperGet() == GT_QMARK);
16421 assert(!"Qmarks beyond morph disallowed.");
16424 /*****************************************************************************
16426 * Verify that the importer has created GT_QMARK nodes in a way we can
16427 * process them. The following is allowed:
16429 * 1. A top level qmark. Top level qmark is of the form:
16430 * a) (bool) ? (void) : (void) OR
16431 * b) V0N = (bool) ? (type) : (type)
16433 * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
16434 * of either op1 of colon or op2 of colon but not a child of any other
16437 void Compiler::fgPreExpandQmarkChecks(GenTree* expr)
16439 GenTree* topQmark = fgGetTopLevelQmark(expr);
16441 // If the top level Qmark is null, then scan the tree to make sure
16442 // there are no qmarks within it.
16443 if (topQmark == nullptr)
16445 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
16449 // We could probably expand the cond node also, but don't think the extra effort is necessary,
16450 // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
16451 fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);
16453 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
16454 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
16459 /*****************************************************************************
16461 * Get the top level GT_QMARK node in a given "expr", return NULL if such a
16462 * node is not present. If the top level GT_QMARK node is assigned to a
16463 * GT_LCL_VAR, then return the lcl node in ppDst.
16466 GenTree* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = NULL */)
16468 if (ppDst != nullptr)
16473 GenTree* topQmark = nullptr;
16474 if (expr->gtOper == GT_QMARK)
16478 else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16480 topQmark = expr->gtOp.gtOp2;
16481 if (ppDst != nullptr)
16483 *ppDst = expr->gtOp.gtOp1;
16489 /*********************************************************************************
16491 * For a castclass helper call,
16492 * Importer creates the following tree:
16493 * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
16495 * This method splits the qmark expression created by the importer into the
16496 * following blocks: (block, asg, cond1, cond2, helper, remainder)
16497 * Notice that op1 is the result for both the conditions. So we coalesce these
16498 * assignments into a single block instead of two blocks resulting a nested diamond.
16500 * +---------->-----------+
16504 * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
16506 * We expect to achieve the following codegen:
16507 * mov rsi, rdx tmp = op1 // asgBlock
16508 * test rsi, rsi goto skip if tmp == null ? // cond1Block
16510 * mov rcx, 0x76543210 cns = op2 // cond2Block
16511 * cmp qword ptr [rsi], rcx goto skip if *tmp == op2
16513 * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock
16515 * SKIP: // remainderBlock
16516 * tmp has the result.
16519 void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreeStmt* stmt)
16524 printf("\nExpanding CastInstOf qmark in " FMT_BB " (before)\n", block->bbNum);
16525 fgDispBasicBlocks(block, block, true);
16529 GenTree* expr = stmt->gtStmtExpr;
16531 GenTree* dst = nullptr;
16532 GenTree* qmark = fgGetTopLevelQmark(expr, &dst);
16533 noway_assert(dst != nullptr);
16535 assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
16537 // Get cond, true, false exprs for the qmark.
16538 GenTree* condExpr = qmark->gtGetOp1();
16539 GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
16540 GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16542 // Get cond, true, false exprs for the nested qmark.
16543 GenTree* nestedQmark = falseExpr;
16544 GenTree* cond2Expr;
16545 GenTree* true2Expr;
16546 GenTree* false2Expr;
16548 if (nestedQmark->gtOper == GT_QMARK)
16550 cond2Expr = nestedQmark->gtGetOp1();
16551 true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
16552 false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
16554 assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
16555 cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
16559 // This is a rare case that arises when we are doing minopts and encounter isinst of null
16560 // gtFoldExpr was still is able to optimize away part of the tree (but not all).
16561 // That means it does not match our pattern.
16563 // Rather than write code to handle this case, just fake up some nodes to make it match the common
16564 // case. Synthesize a comparison that is always true, and for the result-on-true, use the
16565 // entire subtree we expected to be the nested question op.
16567 cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
16568 true2Expr = nestedQmark;
16569 false2Expr = gtNewIconNode(0, TYP_I_IMPL);
16571 assert(false2Expr->OperGet() == trueExpr->OperGet());
16573 // Clear flags as they are now going to be part of JTRUE.
16574 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16575 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16577 // Create the chain of blocks. See method header comment.
16578 // The order of blocks after this is the following:
16579 // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
16581 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16582 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16583 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16584 // remainderBlock will still be GC safe.
16585 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16586 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16587 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16589 BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
16590 BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true);
16591 BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true);
16592 BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true);
16594 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16596 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16597 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16598 if ((block->bbFlags & BBF_INTERNAL) == 0)
16600 helperBlock->bbFlags &= ~BBF_INTERNAL;
16601 cond2Block->bbFlags &= ~BBF_INTERNAL;
16602 cond1Block->bbFlags &= ~BBF_INTERNAL;
16603 asgBlock->bbFlags &= ~BBF_INTERNAL;
16604 helperBlock->bbFlags |= BBF_IMPORTED;
16605 cond2Block->bbFlags |= BBF_IMPORTED;
16606 cond1Block->bbFlags |= BBF_IMPORTED;
16607 asgBlock->bbFlags |= BBF_IMPORTED;
16610 // Chain the flow correctly.
16611 fgAddRefPred(asgBlock, block);
16612 fgAddRefPred(cond1Block, asgBlock);
16613 fgAddRefPred(cond2Block, cond1Block);
16614 fgAddRefPred(helperBlock, cond2Block);
16615 fgAddRefPred(remainderBlock, helperBlock);
16616 fgAddRefPred(remainderBlock, cond1Block);
16617 fgAddRefPred(remainderBlock, cond2Block);
16619 cond1Block->bbJumpDest = remainderBlock;
16620 cond2Block->bbJumpDest = remainderBlock;
16622 // Set the weights; some are guesses.
16623 asgBlock->inheritWeight(block);
16624 cond1Block->inheritWeight(block);
16625 cond2Block->inheritWeightPercentage(cond1Block, 50);
16626 helperBlock->inheritWeightPercentage(cond2Block, 50);
16628 // Append cond1 as JTRUE to cond1Block
16629 GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
16630 GenTree* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmtILoffsx);
16631 fgInsertStmtAtEnd(cond1Block, jmpStmt);
16633 // Append cond2 as JTRUE to cond2Block
16634 jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
16635 jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmtILoffsx);
16636 fgInsertStmtAtEnd(cond2Block, jmpStmt);
16638 // AsgBlock should get tmp = op1 assignment.
16639 trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
16640 GenTree* trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmtILoffsx);
16641 fgInsertStmtAtEnd(asgBlock, trueStmt);
16643 // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
16644 gtReverseCond(cond2Expr);
16645 GenTree* helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
16646 GenTree* helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmtILoffsx);
16647 fgInsertStmtAtEnd(helperBlock, helperStmt);
16649 // Finally remove the nested qmark stmt.
16650 fgRemoveStmt(block, stmt);
16655 printf("\nExpanding CastInstOf qmark in " FMT_BB " (after)\n", block->bbNum);
16656 fgDispBasicBlocks(block, remainderBlock, true);
16661 /*****************************************************************************
16663 * Expand a statement with a top level qmark node. There are three cases, based
16664 * on whether the qmark has both "true" and "false" arms, or just one of them.
16675 * S0 -->-- ~C -->-- T F -->-- S1
16680 * -----------------------------------------
16689 * S0 -->-- ~C -->-- T -->-- S1
16691 * +-->-------------+
16694 * -----------------------------------------
16703 * S0 -->-- C -->-- F -->-- S1
16705 * +-->------------+
16708 * If the qmark assigns to a variable, then create tmps for "then"
16709 * and "else" results and assign the temp to the variable as a writeback step.
16711 void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTreeStmt* stmt)
16713 GenTree* expr = stmt->gtStmtExpr;
16715 // Retrieve the Qmark node to be expanded.
16716 GenTree* dst = nullptr;
16717 GenTree* qmark = fgGetTopLevelQmark(expr, &dst);
16718 if (qmark == nullptr)
16723 if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
16725 fgExpandQmarkForCastInstOf(block, stmt);
16732 printf("\nExpanding top-level qmark in " FMT_BB " (before)\n", block->bbNum);
16733 fgDispBasicBlocks(block, block, true);
16737 // Retrieve the operands.
16738 GenTree* condExpr = qmark->gtGetOp1();
16739 GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
16740 GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16742 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16743 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16745 assert(!varTypeIsFloating(condExpr->TypeGet()));
16747 bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP);
16748 bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
16749 assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
16751 // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
16752 // block ... condBlock ... elseBlock ... remainderBlock
16754 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16755 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16756 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16757 // remainderBlock will still be GC safe.
16758 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16759 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16760 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16762 BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
16763 BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
16765 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16766 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16767 if ((block->bbFlags & BBF_INTERNAL) == 0)
16769 condBlock->bbFlags &= ~BBF_INTERNAL;
16770 elseBlock->bbFlags &= ~BBF_INTERNAL;
16771 condBlock->bbFlags |= BBF_IMPORTED;
16772 elseBlock->bbFlags |= BBF_IMPORTED;
16775 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16777 condBlock->inheritWeight(block);
16779 fgAddRefPred(condBlock, block);
16780 fgAddRefPred(elseBlock, condBlock);
16781 fgAddRefPred(remainderBlock, elseBlock);
16783 BasicBlock* thenBlock = nullptr;
16784 if (hasTrueExpr && hasFalseExpr)
16789 // S0 -->-- ~C -->-- T F -->-- S1
16794 gtReverseCond(condExpr);
16795 condBlock->bbJumpDest = elseBlock;
16797 thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
16798 thenBlock->bbJumpDest = remainderBlock;
16799 if ((block->bbFlags & BBF_INTERNAL) == 0)
16801 thenBlock->bbFlags &= ~BBF_INTERNAL;
16802 thenBlock->bbFlags |= BBF_IMPORTED;
16805 elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
16807 fgAddRefPred(thenBlock, condBlock);
16808 fgAddRefPred(remainderBlock, thenBlock);
16810 thenBlock->inheritWeightPercentage(condBlock, 50);
16811 elseBlock->inheritWeightPercentage(condBlock, 50);
16813 else if (hasTrueExpr)
16816 // S0 -->-- ~C -->-- T -->-- S1
16818 // +-->-------------+
16821 gtReverseCond(condExpr);
16822 condBlock->bbJumpDest = remainderBlock;
16823 fgAddRefPred(remainderBlock, condBlock);
16824 // Since we have no false expr, use the one we'd already created.
16825 thenBlock = elseBlock;
16826 elseBlock = nullptr;
16828 thenBlock->inheritWeightPercentage(condBlock, 50);
16830 else if (hasFalseExpr)
16833 // S0 -->-- C -->-- F -->-- S1
16835 // +-->------------+
16838 condBlock->bbJumpDest = remainderBlock;
16839 fgAddRefPred(remainderBlock, condBlock);
16841 elseBlock->inheritWeightPercentage(condBlock, 50);
16844 GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
16845 GenTree* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmtILoffsx);
16846 fgInsertStmtAtEnd(condBlock, jmpStmt);
16848 // Remove the original qmark statement.
16849 fgRemoveStmt(block, stmt);
16851 // Since we have top level qmarks, we either have a dst for it in which case
16852 // we need to create tmps for true and falseExprs, else just don't bother
16854 unsigned lclNum = BAD_VAR_NUM;
16855 if (dst != nullptr)
16857 assert(dst->gtOper == GT_LCL_VAR);
16858 lclNum = dst->gtLclVar.gtLclNum;
16862 assert(qmark->TypeGet() == TYP_VOID);
16867 if (dst != nullptr)
16869 trueExpr = gtNewTempAssign(lclNum, trueExpr);
16871 GenTreeStmt* trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmtILoffsx);
16872 fgInsertStmtAtEnd(thenBlock, trueStmt);
16875 // Assign the falseExpr into the dst or tmp, insert in elseBlock
16878 if (dst != nullptr)
16880 falseExpr = gtNewTempAssign(lclNum, falseExpr);
16882 GenTreeStmt* falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmtILoffsx);
16883 fgInsertStmtAtEnd(elseBlock, falseStmt);
16889 printf("\nExpanding top-level qmark in " FMT_BB " (after)\n", block->bbNum);
16890 fgDispBasicBlocks(block, remainderBlock, true);
16895 /*****************************************************************************
16897 * Expand GT_QMARK nodes from the flow graph into basic blocks.
16901 void Compiler::fgExpandQmarkNodes()
16905 for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
16907 for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
16909 GenTree* expr = stmt->gtStmtExpr;
16911 fgPreExpandQmarkChecks(expr);
16913 fgExpandQmarkStmt(block, stmt);
16917 fgPostExpandQmarkChecks();
16920 compQmarkRationalized = true;
16924 /*****************************************************************************
16926 * Make sure we don't have any more GT_QMARK nodes.
16929 void Compiler::fgPostExpandQmarkChecks()
16931 for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
16933 for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
16935 GenTree* expr = stmt->gtStmtExpr;
16936 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
16942 /*****************************************************************************
16944 * Transform all basic blocks for codegen.
16947 void Compiler::fgMorph()
16949 noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
16951 fgOutgoingArgTemps = nullptr;
16956 printf("*************** In fgMorph()\n");
16960 fgDispBasicBlocks(true);
16964 // Insert call to class constructor as the first basic block if
16965 // we were asked to do so.
16966 if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
16967 impTokenLookupContextHandle /* context */) &
16968 CORINFO_INITCLASS_USE_HELPER)
16970 fgEnsureFirstBBisScratch();
16971 fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
16975 if (opts.compGcChecks)
16977 for (unsigned i = 0; i < info.compArgsCount; i++)
16979 if (lvaTable[i].TypeGet() == TYP_REF)
16981 // confirm that the argument is a GC pointer (for debugging (GC stress))
16982 GenTree* op = gtNewLclvNode(i, TYP_REF);
16983 GenTreeArgList* args = gtNewArgList(op);
16984 op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, args);
16986 fgEnsureFirstBBisScratch();
16987 fgInsertStmtAtEnd(fgFirstBB, op);
16993 #if defined(DEBUG) && defined(_TARGET_XARCH_)
16994 if (opts.compStackCheckOnRet)
16996 lvaReturnSpCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnSpCheck"));
16997 lvaTable[lvaReturnSpCheck].lvType = TYP_I_IMPL;
16999 #endif // defined(DEBUG) && defined(_TARGET_XARCH_)
17001 #if defined(DEBUG) && defined(_TARGET_X86_)
17002 if (opts.compStackCheckOnCall)
17004 lvaCallSpCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallSpCheck"));
17005 lvaTable[lvaCallSpCheck].lvType = TYP_I_IMPL;
17007 #endif // defined(DEBUG) && defined(_TARGET_X86_)
17009 /* Filter out unimported BBs */
17011 fgRemoveEmptyBlocks();
17014 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
17015 fgDebugCheckBBlist(false, false);
17018 EndPhase(PHASE_MORPH_INIT);
17023 JITDUMP("trees after inlining\n");
17024 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
17027 RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
17029 EndPhase(PHASE_MORPH_INLINE);
17031 // Transform each GT_ALLOCOBJ node into either an allocation helper call or
17032 // local variable allocation on the stack.
17033 ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS
17035 if (JitConfig.JitObjectStackAllocation() && opts.OptimizationEnabled())
17037 objectAllocator.EnableObjectStackAllocation();
17040 objectAllocator.Run();
17042 /* Add any internal blocks/trees we may need */
17047 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
17048 fgDebugCheckBBlist(false, false);
17049 /* Inliner could clone some trees. */
17050 fgDebugCheckNodesUniqueness();
17053 fgRemoveEmptyTry();
17055 EndPhase(PHASE_EMPTY_TRY);
17057 fgRemoveEmptyFinally();
17059 EndPhase(PHASE_EMPTY_FINALLY);
17061 fgMergeFinallyChains();
17063 EndPhase(PHASE_MERGE_FINALLY_CHAINS);
17067 EndPhase(PHASE_CLONE_FINALLY);
17069 fgUpdateFinallyTargetFlags();
17071 /* For x64 and ARM64 we need to mark irregular parameters */
17072 lvaRefCountState = RCS_EARLY;
17073 fgResetImplicitByRefRefCount();
17075 /* Promote struct locals if necessary */
17076 fgPromoteStructs();
17078 /* Now it is the time to figure out what locals have address-taken. */
17079 fgMarkAddressExposedLocals();
17081 EndPhase(PHASE_STR_ADRLCL);
17083 /* Apply the type update to implicit byref parameters; also choose (based on address-exposed
17084 analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. */
17085 fgRetypeImplicitByRefArgs();
17088 /* Now that locals have address-taken and implicit byref marked, we can safely apply stress. */
17090 fgStress64RsltMul();
17093 EndPhase(PHASE_MORPH_IMPBYREF);
17095 /* Morph the trees in all the blocks of the method */
17099 /* Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args */
17100 fgMarkDemotedImplicitByRefArgs();
17101 lvaRefCountState = RCS_INVALID;
17103 EndPhase(PHASE_MORPH_GLOBAL);
17106 JITDUMP("trees after fgMorphBlocks\n");
17107 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
17110 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
17111 if (fgNeedToAddFinallyTargetBits)
17113 // We previously wiped out the BBF_FINALLY_TARGET bits due to some morphing; add them back.
17114 fgAddFinallyTargetFlags();
17115 fgNeedToAddFinallyTargetBits = false;
17117 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
17119 /* Decide the kind of code we want to generate */
17123 fgExpandQmarkNodes();
17126 compCurBB = nullptr;
17130 /*****************************************************************************
17132 * Promoting struct locals
17134 void Compiler::fgPromoteStructs()
17139 printf("*************** In fgPromoteStructs()\n");
17143 if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
17145 JITDUMP(" promotion opt flag not enabled\n");
17149 if (fgNoStructPromotion)
17151 JITDUMP(" promotion disabled by JitNoStructPromotion\n");
17156 // The code in this #if has been useful in debugging struct promotion issues, by
17157 // enabling selective enablement of the struct promotion optimization according to
17160 unsigned methHash = info.compMethodHash();
17161 char* lostr = getenv("structpromohashlo");
17162 unsigned methHashLo = 0;
17165 sscanf_s(lostr, "%x", &methHashLo);
17167 char* histr = getenv("structpromohashhi");
17168 unsigned methHashHi = UINT32_MAX;
17171 sscanf_s(histr, "%x", &methHashHi);
17173 if (methHash < methHashLo || methHash > methHashHi)
17179 printf("Promoting structs for method %s, hash = 0x%x.\n",
17180 info.compFullName, info.compMethodHash());
17181 printf(""); // in our logic this causes a flush
17186 if (info.compIsVarArgs)
17188 JITDUMP(" promotion disabled because of varargs\n");
17195 printf("\nlvaTable before fgPromoteStructs\n");
17200 // The lvaTable might grow as we grab temps. Make a local copy here.
17201 unsigned startLvaCount = lvaCount;
17204 // Loop through the original lvaTable. Looking for struct locals to be promoted.
17206 lvaStructPromotionInfo structPromotionInfo;
17207 bool tooManyLocalsReported = false;
17209 for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
17211 // Whether this var got promoted
17212 bool promotedVar = false;
17213 LclVarDsc* varDsc = &lvaTable[lclNum];
17215 // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
17216 // its fields. Instead, we will attempt to enregister the entire struct.
17217 if (varDsc->lvIsSIMDType() && (varDsc->lvIsUsedInSIMDIntrinsic() || isOpaqueSIMDLclVar(varDsc)))
17219 varDsc->lvRegStruct = true;
17221 // Don't promote if we have reached the tracking limit.
17222 else if (lvaHaveManyLocals())
17224 // Print the message first time when we detected this condition
17225 if (!tooManyLocalsReported)
17227 JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
17229 tooManyLocalsReported = true;
17231 else if (varTypeIsStruct(varDsc))
17233 assert(structPromotionHelper != nullptr);
17234 promotedVar = structPromotionHelper->TryPromoteStructVar(lclNum);
17237 if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed)
17239 // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
17240 // we will treat it as a reg struct.
17241 varDsc->lvRegStruct = true;
17245 #ifdef _TARGET_ARM_
17246 if (structPromotionHelper->GetRequiresScratchVar())
17248 // Ensure that the scratch variable is allocated, in case we
17249 // pass a promoted struct as an argument.
17250 if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
17252 lvaPromotedStructAssemblyScratchVar =
17253 lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
17254 lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
17257 #endif // _TARGET_ARM_
17262 printf("\nlvaTable after fgPromoteStructs\n");
17268 void Compiler::fgMorphStructField(GenTree* tree, GenTree* parent)
17270 noway_assert(tree->OperGet() == GT_FIELD);
17272 GenTreeField* field = tree->AsField();
17273 GenTree* objRef = field->gtFldObj;
17274 GenTree* obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
17275 noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));
17277 /* Is this an instance data member? */
17279 if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
17281 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
17282 const LclVarDsc* varDsc = &lvaTable[lclNum];
17284 if (varTypeIsStruct(obj))
17286 if (varDsc->lvPromoted)
17289 unsigned fldOffset = field->gtFldOffset;
17290 unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17292 if (fieldLclIndex == BAD_VAR_NUM)
17294 // Access a promoted struct's field with an offset that doesn't correspond to any field.
17295 // It can happen if the struct was cast to another struct with different offsets.
17299 const LclVarDsc* fieldDsc = &lvaTable[fieldLclIndex];
17300 var_types fieldType = fieldDsc->TypeGet();
17302 assert(fieldType != TYP_STRUCT); // promoted LCL_VAR can't have a struct type.
17303 if (tree->TypeGet() != fieldType)
17305 if (tree->TypeGet() != TYP_STRUCT)
17307 // This is going to be an incorrect instruction promotion.
17308 // For example when we try to read int as long.
17312 if (field->gtFldHnd != fieldDsc->lvFieldHnd)
17314 CORINFO_CLASS_HANDLE fieldTreeClass = nullptr, fieldDscClass = nullptr;
17316 CorInfoType fieldTreeType = info.compCompHnd->getFieldType(field->gtFldHnd, &fieldTreeClass);
17317 CorInfoType fieldDscType = info.compCompHnd->getFieldType(fieldDsc->lvFieldHnd, &fieldDscClass);
17318 if (fieldTreeType != fieldDscType || fieldTreeClass != fieldDscClass)
17320 // Access the promoted field with a different class handle, can't check that types match.
17323 // Access the promoted field as a field of a non-promoted struct with the same class handle.
17326 else if (tree->TypeGet() == TYP_STRUCT)
17328 // The field tree accesses it as a struct, but the promoted lcl var for the field
17329 // says that it has another type. It can happen only if struct promotion faked
17330 // field type for a struct of single field of scalar type aligned at their natural boundary.
17331 assert(structPromotionHelper != nullptr);
17332 structPromotionHelper->CheckRetypedAsScalar(field->gtFldHnd, fieldType);
17337 tree->SetOper(GT_LCL_VAR);
17338 tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
17339 tree->gtType = fieldType;
17340 tree->gtFlags &= GTF_NODE_MASK;
17341 tree->gtFlags &= ~GTF_GLOB_REF;
17343 if (parent->gtOper == GT_ASG)
17345 if (parent->gtOp.gtOp1 == tree)
17347 tree->gtFlags |= GTF_VAR_DEF;
17348 tree->gtFlags |= GTF_DONT_CSE;
17351 // Promotion of struct containing struct fields where the field
17352 // is a struct with a single pointer sized scalar type field: in
17353 // this case struct promotion uses the type of the underlying
17354 // scalar field as the type of struct field instead of recursively
17355 // promoting. This can lead to a case where we have a block-asgn
17356 // with its RHS replaced with a scalar type. Mark RHS value as
17357 // DONT_CSE so that assertion prop will not do const propagation.
17358 // The reason this is required is that if RHS of a block-asg is a
17359 // constant, then it is interpreted as init-block incorrectly.
17361 // TODO - This can also be avoided if we implement recursive struct
17362 // promotion, tracked by #10019.
17363 if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree))
17365 tree->gtFlags |= GTF_DONT_CSE;
17371 printf("Replacing the field in promoted struct with local var V%02u\n", fieldLclIndex);
17379 // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
17380 // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
17381 // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
17382 // there is one extremely rare case where that won't be true. An enum type is a special value type
17383 // that contains exactly one element of a primitive integer type (that, for CLS programs is named
17384 // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
17385 // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
17386 // ldfld. For example:
17388 // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
17390 // .field public specialname rtspecialname int16 value__
17391 // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
17393 // .method public hidebysig static void Main() cil managed
17395 // .locals init (valuetype mynamespace.e_t V_0)
17398 // ldflda int16 mynamespace.e_t::value__
17402 // Normally, compilers will not generate the ldflda, since it is superfluous.
17404 // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
17405 // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
17406 // mismatch like this, don't do this morphing. The local var may end up getting marked as
17407 // address taken, and the appropriate SHORT load will be done from memory in that case.
17409 if (tree->TypeGet() == obj->TypeGet())
17411 tree->ChangeOper(GT_LCL_VAR);
17412 tree->gtLclVarCommon.SetLclNum(lclNum);
17413 tree->gtFlags &= GTF_NODE_MASK;
17415 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17417 tree->gtFlags |= GTF_VAR_DEF;
17418 tree->gtFlags |= GTF_DONT_CSE;
17423 printf("Replacing the field in normed struct with local var V%02u\n", lclNum);
17431 void Compiler::fgMorphLocalField(GenTree* tree, GenTree* parent)
17433 noway_assert(tree->OperGet() == GT_LCL_FLD);
17435 unsigned lclNum = tree->gtLclFld.gtLclNum;
17436 LclVarDsc* varDsc = &lvaTable[lclNum];
17438 if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
17441 unsigned fldOffset = tree->gtLclFld.gtLclOffs;
17442 unsigned fieldLclIndex = 0;
17443 LclVarDsc* fldVarDsc = nullptr;
17445 if (fldOffset != BAD_VAR_NUM)
17447 fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17448 noway_assert(fieldLclIndex != BAD_VAR_NUM);
17449 fldVarDsc = &lvaTable[fieldLclIndex];
17452 if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
17453 #ifdef _TARGET_X86_
17454 && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
17458 // There is an existing sub-field we can use.
17459 tree->gtLclFld.SetLclNum(fieldLclIndex);
17461 // The field must be an enregisterable type; otherwise it would not be a promoted field.
17462 // The tree type may not match, e.g. for return types that have been morphed, but both
17463 // must be enregisterable types.
17464 var_types treeType = tree->TypeGet();
17465 var_types fieldType = fldVarDsc->TypeGet();
17466 assert((varTypeIsEnregisterable(treeType) || varTypeIsSIMD(treeType)) &&
17467 (varTypeIsEnregisterable(fieldType) || varTypeIsSIMD(fieldType)));
17469 tree->ChangeOper(GT_LCL_VAR);
17470 assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
17471 tree->gtType = fldVarDsc->TypeGet();
17475 printf("Replacing the GT_LCL_FLD in promoted struct with local var V%02u\n", fieldLclIndex);
17479 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17481 tree->gtFlags |= GTF_VAR_DEF;
17482 tree->gtFlags |= GTF_DONT_CSE;
17487 // There is no existing field that has all the parts that we need
17488 // So we must ensure that the struct lives in memory.
17489 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
17492 // We can't convert this guy to a float because he really does have his
17494 varDsc->lvKeepType = 1;
17500 //------------------------------------------------------------------------
17501 // fgResetImplicitByRefRefCount: Clear the ref count field of all implicit byrefs
17503 void Compiler::fgResetImplicitByRefRefCount()
17505 #if (defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_)
17509 printf("\n*************** In fgResetImplicitByRefRefCount()\n");
17513 for (unsigned lclNum = 0; lclNum < info.compArgsCount; ++lclNum)
17515 LclVarDsc* varDsc = lvaGetDesc(lclNum);
17517 if (varDsc->lvIsImplicitByRef)
17519 // Clear the ref count field; fgMarkAddressTakenLocals will increment it per
17520 // appearance of implicit-by-ref param so that call arg morphing can do an
17521 // optimization for single-use implicit-by-ref params whose single use is as
17522 // an outgoing call argument.
17523 varDsc->setLvRefCnt(0, RCS_EARLY);
17527 #endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_
17530 //------------------------------------------------------------------------
17531 // fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from
17532 // struct to pointer). Also choose (based on address-exposed analysis)
17533 // which struct promotions of implicit byrefs to keep or discard.
17534 // For those which are kept, insert the appropriate initialization code.
17535 // For those which are to be discarded, annotate the promoted field locals
17536 // so that fgMorphImplicitByRefArgs will know to rewrite their appearances
17537 // using indirections off the pointer parameters.
17539 void Compiler::fgRetypeImplicitByRefArgs()
17541 #if (defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_)
17545 printf("\n*************** In fgRetypeImplicitByRefArgs()\n");
17549 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17551 LclVarDsc* varDsc = &lvaTable[lclNum];
17553 if (lvaIsImplicitByRefLocal(lclNum))
17557 if (varDsc->lvSize() > REGSIZE_BYTES)
17559 size = varDsc->lvSize();
17563 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
17564 size = info.compCompHnd->getClassSize(typeHnd);
17567 if (varDsc->lvPromoted)
17569 // This implicit-by-ref was promoted; create a new temp to represent the
17570 // promoted struct before rewriting this parameter as a pointer.
17571 unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref"));
17572 lvaSetStruct(newLclNum, lvaGetStruct(lclNum), true);
17573 if (info.compIsVarArgs)
17575 lvaSetStructUsedAsVarArg(newLclNum);
17578 // Update varDsc since lvaGrabTemp might have re-allocated the var dsc array.
17579 varDsc = &lvaTable[lclNum];
17581 // Copy the struct promotion annotations to the new temp.
17582 LclVarDsc* newVarDsc = &lvaTable[newLclNum];
17583 newVarDsc->lvPromoted = true;
17584 newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart;
17585 newVarDsc->lvFieldCnt = varDsc->lvFieldCnt;
17586 newVarDsc->lvContainsHoles = varDsc->lvContainsHoles;
17587 newVarDsc->lvCustomLayout = varDsc->lvCustomLayout;
17589 newVarDsc->lvKeepType = true;
17592 // Propagate address-taken-ness and do-not-enregister-ness.
17593 newVarDsc->lvAddrExposed = varDsc->lvAddrExposed;
17594 newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister;
17596 newVarDsc->lvLclBlockOpAddr = varDsc->lvLclBlockOpAddr;
17597 newVarDsc->lvLclFieldExpr = varDsc->lvLclFieldExpr;
17598 newVarDsc->lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr;
17599 newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr;
17600 newVarDsc->lvLiveAcrossUCall = varDsc->lvLiveAcrossUCall;
17603 // If the promotion is dependent, the promoted temp would just be committed
17604 // to memory anyway, so we'll rewrite its appearances to be indirections
17605 // through the pointer parameter, the same as we'd do for this
17606 // parameter if it weren't promoted at all (otherwise the initialization
17607 // of the new temp would just be a needless memcpy at method entry).
17608 bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ||
17609 (varDsc->lvRefCnt(RCS_EARLY) <= varDsc->lvFieldCnt);
17611 if (!undoPromotion)
17613 // Insert IR that initializes the temp from the parameter.
17614 // LHS is a simple reference to the temp.
17615 fgEnsureFirstBBisScratch();
17616 GenTree* lhs = gtNewLclvNode(newLclNum, varDsc->lvType);
17617 // RHS is an indirection (using GT_OBJ) off the parameter.
17618 GenTree* addr = gtNewLclvNode(lclNum, TYP_BYREF);
17619 GenTree* rhs = gtNewBlockVal(addr, (unsigned)size);
17620 GenTree* assign = gtNewAssignNode(lhs, rhs);
17621 fgInsertStmtAtBeg(fgFirstBB, assign);
17624 // Update the locals corresponding to the promoted fields.
17625 unsigned fieldLclStart = varDsc->lvFieldLclStart;
17626 unsigned fieldCount = varDsc->lvFieldCnt;
17627 unsigned fieldLclStop = fieldLclStart + fieldCount;
17629 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
17631 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
17635 // Leave lvParentLcl pointing to the parameter so that fgMorphImplicitByRefArgs
17636 // will know to rewrite appearances of this local.
17637 assert(fieldVarDsc->lvParentLcl == lclNum);
17641 // Set the new parent.
17642 fieldVarDsc->lvParentLcl = newLclNum;
17643 // Clear the ref count field; it is used to communicate the nubmer of references
17644 // to the implicit byref parameter when morphing calls that pass the implicit byref
17645 // out as an outgoing argument value, but that doesn't pertain to this field local
17646 // which is now a field of a non-arg local.
17647 fieldVarDsc->setLvRefCnt(0, RCS_EARLY);
17650 fieldVarDsc->lvIsParam = false;
17651 // The fields shouldn't inherit any register preferences from
17652 // the parameter which is really a pointer to the struct.
17653 fieldVarDsc->lvIsRegArg = false;
17654 fieldVarDsc->lvIsMultiRegArg = false;
17655 fieldVarDsc->lvArgReg = REG_NA;
17656 #if FEATURE_MULTIREG_ARGS
17657 fieldVarDsc->lvOtherArgReg = REG_NA;
17661 // Hijack lvFieldLclStart to record the new temp number.
17662 // It will get fixed up in fgMarkDemotedImplicitByRefArgs.
17663 varDsc->lvFieldLclStart = newLclNum;
17664 // Go ahead and clear lvFieldCnt -- either we're promoting
17665 // a replacement temp or we're not promoting this arg, and
17666 // in either case the parameter is now a pointer that doesn't
17667 // have these fields.
17668 varDsc->lvFieldCnt = 0;
17670 // Hijack lvPromoted to communicate to fgMorphImplicitByRefArgs
17671 // whether references to the struct should be rewritten as
17672 // indirections off the pointer (not promoted) or references
17673 // to the new struct local (promoted).
17674 varDsc->lvPromoted = !undoPromotion;
17678 // The "undo promotion" path above clears lvPromoted for args that struct
17679 // promotion wanted to promote but that aren't considered profitable to
17680 // rewrite. It hijacks lvFieldLclStart to communicate to
17681 // fgMarkDemotedImplicitByRefArgs that it needs to clean up annotations left
17682 // on such args for fgMorphImplicitByRefArgs to consult in the interim.
17683 // Here we have an arg that was simply never promoted, so make sure it doesn't
17684 // have nonzero lvFieldLclStart, since that would confuse fgMorphImplicitByRefArgs
17685 // and fgMarkDemotedImplicitByRefArgs.
17686 assert(varDsc->lvFieldLclStart == 0);
17689 // Since the parameter in this position is really a pointer, its type is TYP_BYREF.
17690 varDsc->lvType = TYP_BYREF;
17692 // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
17693 // make sure that the following flag is not set as these will force SSA to
17694 // exclude tracking/enregistering these LclVars. (see SsaBuilder::IncludeInSsa)
17696 varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
17698 // The struct parameter may have had its address taken, but the pointer parameter
17699 // cannot -- any uses of the struct parameter's address are uses of the pointer
17700 // parameter's value, and there's no way for the MSIL to reference the pointer
17701 // parameter's address. So clear the address-taken bit for the parameter.
17702 varDsc->lvAddrExposed = 0;
17703 varDsc->lvDoNotEnregister = 0;
17706 // This should not be converted to a double in stress mode,
17707 // because it is really a pointer
17708 varDsc->lvKeepType = 1;
17712 printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
17718 #endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_
17721 //------------------------------------------------------------------------
17722 // fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion
17723 // asked to promote. Appearances of these have now been rewritten
17724 // (by fgMorphImplicitByRefArgs) using indirections from the pointer
17725 // parameter or references to the promotion temp, as appropriate.
17727 void Compiler::fgMarkDemotedImplicitByRefArgs()
17729 #if (defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_)
17731 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17733 LclVarDsc* varDsc = &lvaTable[lclNum];
17735 if (lvaIsImplicitByRefLocal(lclNum))
17737 if (varDsc->lvPromoted)
17739 // The parameter is simply a pointer now, so clear lvPromoted. It was left set
17740 // by fgRetypeImplicitByRefArgs to communicate to fgMorphImplicitByRefArgs that
17741 // appearances of this arg needed to be rewritten to a new promoted struct local.
17742 varDsc->lvPromoted = false;
17744 // Clear the lvFieldLclStart value that was set by fgRetypeImplicitByRefArgs
17745 // to tell fgMorphImplicitByRefArgs which local is the new promoted struct one.
17746 varDsc->lvFieldLclStart = 0;
17748 else if (varDsc->lvFieldLclStart != 0)
17750 // We created new temps to represent a promoted struct corresponding to this
17751 // parameter, but decided not to go through with the promotion and have
17752 // rewritten all uses as indirections off the pointer parameter.
17753 // We stashed the pointer to the new struct temp in lvFieldLclStart; make
17754 // note of that and clear the annotation.
17755 unsigned structLclNum = varDsc->lvFieldLclStart;
17756 varDsc->lvFieldLclStart = 0;
17758 // Clear the arg's ref count; this was set during address-taken analysis so that
17759 // call morphing could identify single-use implicit byrefs; we're done with
17760 // that, and want it to be in its default state of zero when we go to set
17761 // real ref counts for all variables.
17762 varDsc->setLvRefCnt(0, RCS_EARLY);
17764 // The temp struct is now unused; set flags appropriately so that we
17765 // won't allocate space for it on the stack.
17766 LclVarDsc* structVarDsc = &lvaTable[structLclNum];
17767 structVarDsc->setLvRefCnt(0, RCS_EARLY);
17768 structVarDsc->lvAddrExposed = false;
17770 structVarDsc->lvUnusedStruct = true;
17773 unsigned fieldLclStart = structVarDsc->lvFieldLclStart;
17774 unsigned fieldCount = structVarDsc->lvFieldCnt;
17775 unsigned fieldLclStop = fieldLclStart + fieldCount;
17777 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
17779 // Fix the pointer to the parent local.
17780 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
17781 assert(fieldVarDsc->lvParentLcl == lclNum);
17782 fieldVarDsc->lvParentLcl = structLclNum;
17784 // The field local is now unused; set flags appropriately so that
17785 // we won't allocate stack space for it.
17786 fieldVarDsc->setLvRefCnt(0, RCS_EARLY);
17787 fieldVarDsc->lvAddrExposed = false;
17793 #endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_
17796 /*****************************************************************************
17798 * Morph irregular parameters
17799 * for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
17801 bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree)
17803 #if (!defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)) && !defined(_TARGET_ARM64_)
17807 #else // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_
17809 bool changed = false;
17811 // Implicit byref morphing needs to know if the reference to the parameter is a
17812 // child of GT_ADDR or not, so this method looks one level down and does the
17813 // rewrite whenever a child is a reference to an implicit byref parameter.
17814 if (tree->gtOper == GT_ADDR)
17816 if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
17818 GenTree* morphedTree = fgMorphImplicitByRefArgs(tree, true);
17819 changed = (morphedTree != nullptr);
17820 assert(!changed || (morphedTree == tree));
17825 for (GenTree** pTree : tree->UseEdges())
17827 GenTree* childTree = *pTree;
17828 if (childTree->gtOper == GT_LCL_VAR)
17830 GenTree* newChildTree = fgMorphImplicitByRefArgs(childTree, false);
17831 if (newChildTree != nullptr)
17834 *pTree = newChildTree;
17841 #endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_
17844 GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr)
17846 assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
17847 assert(isAddr == (tree->gtOper == GT_ADDR));
17849 GenTree* lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
17850 unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
17851 LclVarDsc* lclVarDsc = &lvaTable[lclNum];
17853 CORINFO_FIELD_HANDLE fieldHnd;
17854 unsigned fieldOffset = 0;
17855 var_types fieldRefType = TYP_UNKNOWN;
17857 if (lvaIsImplicitByRefLocal(lclNum))
17859 // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
17860 // re-invoke the traversal to mark address-taken locals.
17861 // So, we may encounter a tree that has already been transformed to TYP_BYREF.
17862 // If we do, leave it as-is.
17863 if (!varTypeIsStruct(lclVarTree))
17865 assert(lclVarTree->TypeGet() == TYP_BYREF);
17869 else if (lclVarDsc->lvPromoted)
17871 // fgRetypeImplicitByRefArgs created a new promoted struct local to represent this
17872 // arg. Rewrite this to refer to the new local.
17873 assert(lclVarDsc->lvFieldLclStart != 0);
17874 lclVarTree->AsLclVarCommon()->SetLclNum(lclVarDsc->lvFieldLclStart);
17878 fieldHnd = nullptr;
17880 else if (lclVarDsc->lvIsStructField && lvaIsImplicitByRefLocal(lclVarDsc->lvParentLcl))
17882 // This was a field reference to an implicit-by-reference struct parameter that was
17883 // dependently promoted; update it to a field reference off the pointer.
17884 // Grab the field handle from the struct field lclVar.
17885 fieldHnd = lclVarDsc->lvFieldHnd;
17886 fieldOffset = lclVarDsc->lvFldOffset;
17887 assert(fieldHnd != nullptr);
17888 // Update lclNum/lclVarDsc to refer to the parameter
17889 lclNum = lclVarDsc->lvParentLcl;
17890 lclVarDsc = &lvaTable[lclNum];
17891 fieldRefType = lclVarTree->TypeGet();
17895 // We only need to tranform the 'marked' implicit by ref parameters
17899 // This is no longer a def of the lclVar, even if it WAS a def of the struct.
17900 lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);
17904 if (fieldHnd == nullptr)
17906 // change &X into just plain X
17907 tree->ReplaceWith(lclVarTree, this);
17908 tree->gtType = TYP_BYREF;
17912 // change &(X.f) [i.e. GT_ADDR of local for promoted arg field]
17913 // into &(X, f) [i.e. GT_ADDR of GT_FIELD off ptr param]
17914 lclVarTree->gtLclVarCommon.SetLclNum(lclNum);
17915 lclVarTree->gtType = TYP_BYREF;
17916 tree->gtOp.gtOp1 = gtNewFieldRef(fieldRefType, fieldHnd, lclVarTree, fieldOffset);
17922 printf("Replacing address of implicit by ref struct parameter with byref:\n");
17928 // Change X into OBJ(X) or FIELD(X, f)
17929 var_types structType = tree->gtType;
17930 tree->gtType = TYP_BYREF;
17934 tree->gtLclVarCommon.SetLclNum(lclNum);
17935 tree = gtNewFieldRef(fieldRefType, fieldHnd, tree, fieldOffset);
17939 tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
17942 if (structType == TYP_STRUCT)
17944 gtSetObjGcInfo(tree->AsObj());
17947 // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
17948 // we could remove TGTANYWHERE
17949 tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
17954 printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
17969 class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
17971 // During tree traversal every GenTree node produces a "value" that represents:
17972 // - the memory location associated with a local variable, including an offset
17973 // accumulated from GT_LCL_FLD and GT_FIELD nodes.
17974 // - the address of local variable memory location, including an offset as well.
17975 // - an unknown value - the result of a node we don't know how to process. This
17976 // also includes the result of TYP_VOID nodes (or any other nodes that don't
17977 // actually produce values in IR) in order to support the invariant that every
17978 // node produces a value.
17980 // The existence of GT_ADDR nodes and their use together with GT_FIELD to form
17981 // FIELD/ADDR/FIELD/ADDR/LCL_VAR sequences complicate things a bit. A typical
17982 // GT_FIELD node acts like an indirection and should produce an unknown value,
17983 // local address analysis doesn't know or care what value the field stores.
17984 // But a GT_FIELD can also be used as an operand for a GT_ADDR node and then
17985 // the GT_FIELD node does not perform an indirection, it's just represents a
17986 // location, similar to GT_LCL_VAR and GT_LCL_FLD.
17988 // To avoid this issue, the semantics of GT_FIELD (and for simplicity's sake any other
17989 // indirection) nodes slightly deviates from the IR semantics - an indirection does not
17990 // actually produce an unknown value but a location value, if the indirection address
17991 // operand is an address value.
17993 // The actual indirection is performed when the indirection's user node is processed:
17994 // - A GT_ADDR user turns the location value produced by the indirection back
17995 // into an address value.
17996 // - Any other user node performs the indirection and produces an unknown value.
18004 INDEBUG(bool m_consumed;)
18007 // Produce an unknown value associated with the specified node.
18008 Value(GenTree* node)
18010 , m_lclNum(BAD_VAR_NUM)
18014 , m_consumed(false)
18019 // Get the node that produced this value.
18020 GenTree* Node() const
18025 // Does this value represent a location?
18026 bool IsLocation() const
18028 return (m_lclNum != BAD_VAR_NUM) && !m_address;
18031 // Does this value represent the address of a location?
18032 bool IsAddress() const
18034 assert((m_lclNum != BAD_VAR_NUM) || !m_address);
18039 // Get the location's variable number.
18040 unsigned LclNum() const
18042 assert(IsLocation() || IsAddress());
18047 // Get the location's byte offset.
18048 unsigned Offset() const
18050 assert(IsLocation() || IsAddress());
18055 //------------------------------------------------------------------------
18056 // Location: Produce a location value.
18059 // lclNum - the local variable number
18060 // offset - the byte offset of the location (used for GT_LCL_FLD nodes)
18063 // - (lclnum, offset) => LOCATION(lclNum, offset)
18065 void Location(unsigned lclNum, unsigned offset = 0)
18067 assert(!IsLocation() && !IsAddress());
18073 //------------------------------------------------------------------------
18074 // Address: Produce an address value from a location value.
18077 // val - the input value
18080 // - LOCATION(lclNum, offset) => ADDRESS(lclNum, offset)
18081 // - ADDRESS(lclNum, offset) => invalid, we should never encounter something like ADDR(ADDR(...))
18082 // - UNKNOWN => UNKNOWN
18084 void Address(Value& val)
18086 assert(!IsLocation() && !IsAddress());
18087 assert(!val.IsAddress());
18089 if (val.IsLocation())
18092 m_lclNum = val.m_lclNum;
18093 m_offset = val.m_offset;
18096 INDEBUG(val.Consume();)
18099 //------------------------------------------------------------------------
18100 // Field: Produce a location value from an address value.
18103 // val - the input value
18104 // offset - the offset to add to the existing location offset
18107 // `true` if the value was consumed. `false` if the input value
18108 // cannot be consumed because it is itsef a location or because
18109 // the offset overflowed. In this case the caller is expected
18110 // to escape the input value.
18113 // - LOCATION(lclNum, offset) => not representable, must escape
18114 // - ADDRESS(lclNum, offset) => LOCATION(lclNum, offset + field.Offset)
18115 // if the offset overflows then location is not representable, must escape
18116 // - UNKNOWN => UNKNOWN
18118 bool Field(Value& val, unsigned offset)
18120 assert(!IsLocation() && !IsAddress());
18122 if (val.IsLocation())
18127 if (val.IsAddress())
18129 ClrSafeInt<unsigned> newOffset = ClrSafeInt<unsigned>(val.m_offset) + ClrSafeInt<unsigned>(offset);
18131 if (newOffset.IsOverflow())
18136 m_lclNum = val.m_lclNum;
18137 m_offset = newOffset.Value();
18140 INDEBUG(val.Consume();)
18144 //------------------------------------------------------------------------
18145 // Indir: Produce a location value from an address value.
18148 // val - the input value
18151 // `true` if the value was consumed. `false` if the input value
18152 // cannot be consumed because it is itsef a location. In this
18153 // case the caller is expected to escape the input value.
18156 // - LOCATION(lclNum, offset) => not representable, must escape
18157 // - ADDRESS(lclNum, offset) => LOCATION(lclNum, offset)
18158 // - UNKNOWN => UNKNOWN
18160 bool Indir(Value& val)
18162 assert(!IsLocation() && !IsAddress());
18164 if (val.IsLocation())
18169 if (val.IsAddress())
18171 m_lclNum = val.m_lclNum;
18172 m_offset = val.m_offset;
18175 INDEBUG(val.Consume();)
18182 assert(!m_consumed);
18183 // Mark the value as consumed so that PopValue can ensure that values
18184 // aren't popped from the stack without being processed appropriately.
18195 ArrayStack<Value> m_valueStack;
18196 INDEBUG(bool m_stmtModified;)
18202 DoPostOrder = true,
18203 ComputeStack = true,
18204 DoLclVarsOnly = false,
18205 UseExecutionOrder = false,
18208 LocalAddressVisitor(Compiler* comp)
18209 : GenTreeVisitor<LocalAddressVisitor>(comp), m_valueStack(comp->getAllocator(CMK_LocalAddressVisitor))
18213 void VisitStmt(GenTreeStmt* stmt)
18216 if (m_compiler->verbose)
18218 printf("LocalAddressVisitor visiting statement:\n");
18219 m_compiler->gtDispTree(stmt);
18220 m_stmtModified = false;
18224 WalkTree(&stmt->gtStmtExpr, nullptr);
18226 // We could have somethinge like STMT(IND(ADDR(LCL_VAR))) so we need to escape
18227 // the location here. This doesn't seem to happen often, if ever. The importer
18228 // tends to wrap such a tree in a COMMA.
18229 if (TopValue(0).IsLocation())
18231 EscapeLocation(TopValue(0), stmt);
18235 // If we have an address on the stack then we don't need to do anything.
18236 // The address tree isn't actually used and it will be discarded during
18237 // morphing. So just mark any value as consumed to keep PopValue happy.
18238 INDEBUG(TopValue(0).Consume();)
18242 assert(m_valueStack.Empty());
18245 if (m_compiler->verbose)
18247 if (m_stmtModified)
18249 printf("LocalAddressVisitor modified statement:\n");
18250 m_compiler->gtDispTree(stmt);
18258 // Morph promoted struct fields and count implict byref argument occurrences.
18259 // Also create and push the value produced by the visited node. This is done here
18260 // rather than in PostOrderVisit because it makes it easy to handle nodes with an
18261 // arbitrary number of operands - just pop values until the value corresponding
18262 // to the visited node is encountered.
18263 fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
18265 GenTree* node = *use;
18267 if (node->OperIs(GT_FIELD))
18269 MorphStructField(node, user);
18271 else if (node->OperIs(GT_LCL_FLD))
18273 MorphLocalField(node, user);
18276 if (node->OperIsLocal())
18278 unsigned lclNum = node->AsLclVarCommon()->GetLclNum();
18280 LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum);
18281 if (varDsc->lvIsStructField)
18283 // Promoted field, increase counter for the parent lclVar.
18284 assert(!m_compiler->lvaIsImplicitByRefLocal(lclNum));
18285 unsigned parentLclNum = varDsc->lvParentLcl;
18286 UpdateEarlyRefCountForImplicitByRef(parentLclNum);
18290 UpdateEarlyRefCountForImplicitByRef(lclNum);
18296 return Compiler::WALK_CONTINUE;
18299 // Evaluate a node. Since this is done in postorder, the node's operands have already been
18300 // evaluated and are available on the value stack. The value produced by the visited node
18301 // is left on the top of the evaluation stack.
18302 fgWalkResult PostOrderVisit(GenTree** use, GenTree* user)
18304 GenTree* node = *use;
18306 switch (node->OperGet())
18309 assert(TopValue(0).Node() == node);
18311 TopValue(0).Location(node->AsLclVar()->GetLclNum());
18315 assert(TopValue(0).Node() == node);
18317 TopValue(0).Location(node->AsLclFld()->GetLclNum(), node->AsLclFld()->gtLclOffs);
18321 assert(TopValue(1).Node() == node);
18322 assert(TopValue(0).Node() == node->gtGetOp1());
18324 TopValue(1).Address(TopValue(0));
18329 if (node->AsField()->gtFldObj != nullptr)
18331 assert(TopValue(1).Node() == node);
18332 assert(TopValue(0).Node() == node->AsField()->gtFldObj);
18334 if (!TopValue(1).Field(TopValue(0), node->AsField()->gtFldOffset))
18336 // Either the address comes from a location value (e.g. FIELD(IND(...)))
18337 // or the field offset has overflowed.
18338 EscapeValue(TopValue(0), node);
18345 assert(TopValue(0).Node() == node);
18352 assert(TopValue(1).Node() == node);
18353 assert(TopValue(0).Node() == node->gtGetOp1());
18355 if ((node->gtFlags & GTF_IND_VOLATILE) != 0)
18357 // Volatile indirections must not be removed so the address,
18358 // if any, must be escaped.
18359 EscapeValue(TopValue(0), node);
18361 else if (!TopValue(1).Indir(TopValue(0)))
18363 // If the address comes from another indirection (e.g. IND(IND(...))
18364 // then we need to escape the location.
18365 EscapeLocation(TopValue(0), node);
18372 assert(TopValue(2).Node() == node);
18373 assert(TopValue(1).Node() == node->AsDynBlk()->Addr());
18374 assert(TopValue(0).Node() == node->AsDynBlk()->gtDynamicSize);
18376 // The block size may be the result of an indirection so we need
18377 // to escape the location that may be associated with it.
18378 EscapeValue(TopValue(0), node);
18380 if (!TopValue(2).Indir(TopValue(1)))
18382 // If the address comes from another indirection (e.g. DYN_BLK(IND(...))
18383 // then we need to escape the location.
18384 EscapeLocation(TopValue(1), node);
18392 while (TopValue(0).Node() != node)
18394 EscapeValue(TopValue(0), node);
18400 assert(TopValue(0).Node() == node);
18401 return Compiler::WALK_CONTINUE;
18405 void PushValue(GenTree* node)
18407 m_valueStack.Push(node);
18410 Value& TopValue(unsigned index)
18412 return m_valueStack.IndexRef(index);
18417 assert(TopValue(0).IsConsumed());
18418 m_valueStack.Pop();
18421 //------------------------------------------------------------------------
18422 // EscapeValue: Process an escaped value
18425 // val - the escaped address value
18426 // user - the node that uses the escaped value
18428 void EscapeValue(Value& val, GenTree* user)
18430 if (val.IsLocation())
18432 EscapeLocation(val, user);
18434 else if (val.IsAddress())
18436 EscapeAddress(val, user);
18440 INDEBUG(val.Consume();)
18444 //------------------------------------------------------------------------
18445 // EscapeAddress: Process an escaped address value
18448 // val - the escaped address value
18449 // user - the node that uses the address value
18451 void EscapeAddress(Value& val, GenTree* user)
18453 assert(val.IsAddress());
18455 LclVarDsc* varDsc = m_compiler->lvaGetDesc(val.LclNum());
18457 // In general we don't know how an exposed struct field address will be used - it may be used to
18458 // access only that specific field or it may be used to access other fields in the same struct
18459 // be using pointer/ref arithmetic. It seems reasonable to make an exception for the "this" arg
18460 // of calls - it would be highly unsual for a struct member method to attempt to access memory
18461 // beyond "this" instance. And calling struct member methods is common enough that attempting to
18462 // mark the entire struct as address exposed results in CQ regressions.
18463 bool isThisArg = user->IsCall() && (val.Node() == user->AsCall()->gtCallObjp);
18464 bool exposeParentLcl = varDsc->lvIsStructField && !isThisArg;
18466 m_compiler->lvaSetVarAddrExposed(exposeParentLcl ? varDsc->lvParentLcl : val.LclNum());
18468 #ifdef _TARGET_64BIT_
18469 // If the address of a variable is passed in a call and the allocation size of the variable
18470 // is 32 bits we will quirk the size to 64 bits. Some PInvoke signatures incorrectly specify
18471 // a ByRef to an INT32 when they actually write a SIZE_T or INT64. There are cases where
18472 // overwriting these extra 4 bytes corrupts some data (such as a saved register) that leads
18473 // to A/V. Wheras previously the JIT64 codegen did not lead to an A/V.
18474 if (!varDsc->lvIsParam && !varDsc->lvIsStructField && (genActualType(varDsc->TypeGet()) == TYP_INT))
18476 // TODO-Cleanup: This should simply check if the user is a call node, not if a call ancestor exists.
18477 if (Compiler::gtHasCallOnStack(&m_ancestors))
18479 varDsc->lvQuirkToLong = true;
18480 JITDUMP("Adding a quirk for the storage size of V%02u of type %s", val.LclNum(),
18481 varTypeName(varDsc->TypeGet()));
18484 #endif // _TARGET_64BIT_
18486 INDEBUG(val.Consume();)
18489 //------------------------------------------------------------------------
18490 // EscapeLocation: Process an escaped location value
18493 // val - the escaped location value
18494 // user - the node that uses the location value
18497 // Unlike EscapeAddress, this does not necessarily mark the lclvar associated
18498 // with the value as address exposed. This is needed only if the indirection
18499 // is wider than the lclvar.
18501 void EscapeLocation(Value& val, GenTree* user)
18503 assert(val.IsLocation());
18505 GenTree* node = val.Node();
18507 if (node->OperIs(GT_LCL_VAR, GT_LCL_FLD))
18509 // If the location is accessed directly then we don't need to do anything.
18511 assert(node->AsLclVarCommon()->GetLclNum() == val.LclNum());
18515 // Otherwise it must be accessed through some kind of indirection. Usually this is
18516 // something like IND(ADDR(LCL_VAR)), global morph will change it to GT_LCL_VAR or
18517 // GT_LCL_FLD so the lclvar does not need to be address exposed.
18519 // However, it is possible for the indirection to be wider than the lclvar
18520 // (e.g. *(long*)&int32Var) or to have a field offset that pushes the indirection
18521 // past the end of the lclvar memory location. In such cases morph doesn't do
18522 // anything so the lclvar needs to be address exposed.
18524 // More importantly, if the lclvar is a promoted struct field then the parent lclvar
18525 // also needs to be address exposed so we get dependent struct promotion. Code like
18526 // *(long*)&int32Var has undefined behavior and it's practically useless but reading,
18527 // say, 2 consecutive Int32 struct fields as Int64 has more practical value.
18529 LclVarDsc* varDsc = m_compiler->lvaGetDesc(val.LclNum());
18530 unsigned indirSize = GetIndirSize(node, user);
18533 if (indirSize == 0)
18535 // If we can't figure out the indirection size then treat it as a wide indirection.
18540 ClrSafeInt<unsigned> endOffset = ClrSafeInt<unsigned>(val.Offset()) + ClrSafeInt<unsigned>(indirSize);
18542 if (endOffset.IsOverflow())
18546 else if (varDsc->TypeGet() == TYP_STRUCT)
18548 isWide = (endOffset.Value() > varDsc->lvExactSize);
18552 // For small int types use the real type size, not the stack slot size.
18553 // Morph does manage to transform `*(int*)&byteVar` into just byteVar where
18554 // the LCL_VAR node has type TYP_INT. But such code is simply bogus and
18555 // there's no reason to attempt to optimize it. It makes more sense to
18556 // mark the variable address exposed in such circumstances.
18558 // Same for "small" SIMD types - SIMD8/12 have 8/12 bytes, even if the
18559 // stack location may have 16 bytes.
18561 // For TYP_BLK variables the type size is 0 so they're always address
18563 isWide = (endOffset.Value() > genTypeSize(varDsc->TypeGet()));
18569 m_compiler->lvaSetVarAddrExposed(varDsc->lvIsStructField ? varDsc->lvParentLcl : val.LclNum());
18573 INDEBUG(val.Consume();)
18576 //------------------------------------------------------------------------
18577 // GetIndirSize: Return the size (in bytes) of an indirection node.
18580 // indir - the indirection node
18581 // user - the node that uses the indirection
18584 // This returns 0 for indirection of unknown size, typically GT_DYN_BLK.
18585 // GT_IND nodes that have type TYP_STRUCT are expected to only appears
18586 // on the RHS of an assignment, in which case the LHS size will be used instead.
18587 // Otherwise 0 is returned as well.
18589 unsigned GetIndirSize(GenTree* indir, GenTree* user)
18591 assert(indir->OperIs(GT_IND, GT_OBJ, GT_BLK, GT_DYN_BLK, GT_FIELD));
18593 if (indir->TypeGet() != TYP_STRUCT)
18595 return genTypeSize(indir->TypeGet());
18598 // A struct indir that is the RHS of an assignment needs special casing:
18599 // - It can be a GT_IND of type TYP_STRUCT, in which case the size is given by the LHS.
18600 // - It can be a GT_OBJ that has a correct size, but different than the size of the LHS.
18601 // The LHS size takes precedence.
18602 // Just take the LHS size in all cases.
18603 if (user->OperIs(GT_ASG) && (indir == user->gtGetOp2()))
18605 indir = user->gtGetOp1();
18607 if (indir->TypeGet() != TYP_STRUCT)
18609 return genTypeSize(indir->TypeGet());
18612 // The LHS may be a LCL_VAR/LCL_FLD, these are not indirections so we need to handle them here.
18613 // It can also be a GT_INDEX, this is an indirection but it never applies to lclvar addresses
18614 // so it needs to be handled here as well.
18616 switch (indir->OperGet())
18619 return m_compiler->lvaGetDesc(indir->AsLclVar())->lvExactSize;
18621 return genTypeSize(indir->TypeGet());
18623 return indir->AsIndex()->gtIndElemSize;
18629 switch (indir->OperGet())
18632 return m_compiler->info.compCompHnd->getClassSize(
18633 m_compiler->info.compCompHnd->getFieldClass(indir->AsField()->gtFldHnd));
18636 return indir->AsBlk()->gtBlkSize;
18638 assert(indir->OperIs(GT_IND, GT_DYN_BLK));
18643 //------------------------------------------------------------------------
18644 // MorphStructField: Replaces a GT_FIELD based promoted/normed struct field access
18645 // (e.g. FIELD(ADDR(LCL_VAR))) with a GT_LCL_VAR that references the struct field.
18648 // node - the GT_FIELD node
18649 // user - the node that uses the field
18652 // This does not do anything if the field access does not denote
18653 // a promoted/normed struct field.
18655 void MorphStructField(GenTree* node, GenTree* user)
18657 assert(node->OperIs(GT_FIELD));
18658 // TODO-Cleanup: Move fgMorphStructField implementation here, it's not used anywhere else.
18659 m_compiler->fgMorphStructField(node, user);
18660 INDEBUG(m_stmtModified |= node->OperIs(GT_LCL_VAR);)
18663 //------------------------------------------------------------------------
18664 // MorphLocalField: Replaces a GT_LCL_FLD based promoted struct field access
18665 // with a GT_LCL_VAR that references the struct field.
18668 // node - the GT_LCL_FLD node
18669 // user - the node that uses the field
18672 // This does not do anything if the field access does not denote
18673 // involved a promoted struct local.
18674 // If the GT_LCL_FLD offset does not have a coresponding promoted struct
18675 // field then no transformation is done and struct local's enregistration
18678 void MorphLocalField(GenTree* node, GenTree* user)
18680 assert(node->OperIs(GT_LCL_FLD));
18681 // TODO-Cleanup: Move fgMorphLocalField implementation here, it's not used anywhere else.
18682 m_compiler->fgMorphLocalField(node, user);
18683 INDEBUG(m_stmtModified |= node->OperIs(GT_LCL_VAR);)
18686 //------------------------------------------------------------------------
18687 // UpdateEarlyRefCountForImplicitByRef: updates the ref count for implicit byref params.
18690 // lclNum - the local number to update the count for.
18693 // fgMakeOutgoingStructArgCopy checks the ref counts for implicit byref params when it decides
18694 // if it's legal to elide certain copies of them;
18695 // fgRetypeImplicitByRefArgs checks the ref counts when it decides to undo promotions.
18697 void UpdateEarlyRefCountForImplicitByRef(unsigned lclNum)
18699 if (!m_compiler->lvaIsImplicitByRefLocal(lclNum))
18703 LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum);
18704 JITDUMP("LocalAddressVisitor incrementing ref count from %d to %d for V%02d\n", varDsc->lvRefCnt(RCS_EARLY),
18705 varDsc->lvRefCnt(RCS_EARLY) + 1, lclNum);
18706 varDsc->incLvRefCnt(1, RCS_EARLY);
18710 //------------------------------------------------------------------------
18711 // fgAddFieldSeqForZeroOffset:
18712 // Associate a fieldSeq (with a zero offset) with the GenTree node 'addr'
18715 // addr - A GenTree node
18716 // fieldSeqZero - a fieldSeq (with a zero offset)
18719 // Some GenTree nodes have internal fields that record the field sequence.
18720 // If we have one of these nodes: GT_CNS_INT, GT_LCL_FLD
18721 // we can append the field sequence using the gtFieldSeq
18722 // If we have a GT_ADD of a GT_CNS_INT we can use the
18723 // fieldSeq from child node.
18724 // Otherwise we record 'fieldSeqZero' in the GenTree node using
18725 // a Map: GetFieldSeqStore()
18726 // When doing so we take care to preserve any existing zero field sequence
18728 void Compiler::fgAddFieldSeqForZeroOffset(GenTree* addr, FieldSeqNode* fieldSeqZero)
18730 // We expect 'addr' to be an address at this point.
18731 assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF);
18733 FieldSeqNode* fieldSeqUpdate = fieldSeqZero;
18734 GenTree* fieldSeqNode = addr;
18735 bool fieldSeqRecorded = false;
18736 bool isMapAnnotation = false;
18741 printf("\nfgAddFieldSeqForZeroOffset for");
18742 gtDispFieldSeq(fieldSeqZero);
18744 printf("\naddr (Before)\n");
18745 gtDispNode(addr, nullptr, nullptr, false);
18746 gtDispCommonEndLine(addr);
18750 switch (addr->OperGet())
18753 fieldSeqUpdate = GetFieldSeqStore()->Append(addr->gtIntCon.gtFieldSeq, fieldSeqZero);
18754 addr->gtIntCon.gtFieldSeq = fieldSeqUpdate;
18755 fieldSeqRecorded = true;
18760 GenTreeLclFld* lclFld = addr->AsLclFld();
18761 fieldSeqUpdate = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeqZero);
18762 lclFld->gtFieldSeq = fieldSeqUpdate;
18763 fieldSeqRecorded = true;
18768 if (addr->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
18770 fieldSeqNode = addr->gtOp.gtOp1;
18772 GenTreeLclFld* lclFld = addr->gtOp.gtOp1->AsLclFld();
18773 fieldSeqUpdate = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeqZero);
18774 lclFld->gtFieldSeq = fieldSeqUpdate;
18775 fieldSeqRecorded = true;
18780 if (addr->gtOp.gtOp1->OperGet() == GT_CNS_INT)
18782 fieldSeqNode = addr->gtOp.gtOp1;
18784 fieldSeqUpdate = GetFieldSeqStore()->Append(addr->gtOp.gtOp1->gtIntCon.gtFieldSeq, fieldSeqZero);
18785 addr->gtOp.gtOp1->gtIntCon.gtFieldSeq = fieldSeqUpdate;
18786 fieldSeqRecorded = true;
18788 else if (addr->gtOp.gtOp2->OperGet() == GT_CNS_INT)
18790 fieldSeqNode = addr->gtOp.gtOp2;
18792 fieldSeqUpdate = GetFieldSeqStore()->Append(addr->gtOp.gtOp2->gtIntCon.gtFieldSeq, fieldSeqZero);
18793 addr->gtOp.gtOp2->gtIntCon.gtFieldSeq = fieldSeqUpdate;
18794 fieldSeqRecorded = true;
18802 if (fieldSeqRecorded == false)
18804 // Record in the general zero-offset map.
18806 // The "addr" node might already be annotated with a zero-offset field sequence.
18807 FieldSeqNode* existingFieldSeq = nullptr;
18808 if (GetZeroOffsetFieldMap()->Lookup(addr, &existingFieldSeq))
18810 // Append the zero field sequences
18811 fieldSeqUpdate = GetFieldSeqStore()->Append(existingFieldSeq, fieldSeqZero);
18813 // Overwrite the field sequence annotation for op1
18814 GetZeroOffsetFieldMap()->Set(addr, fieldSeqUpdate, NodeToFieldSeqMap::Overwrite);
18815 fieldSeqRecorded = true;
18821 printf(" (After)\n");
18822 gtDispNode(fieldSeqNode, nullptr, nullptr, false);
18823 gtDispCommonEndLine(fieldSeqNode);
18828 //------------------------------------------------------------------------
18829 // fgMarkAddressExposedLocals: Traverses the entire method and marks address
18833 // Trees such as IND(ADDR(LCL_VAR)), that morph is expected to fold
18834 // to just LCL_VAR, do not result in the involved local being marked
18835 // address exposed.
18837 void Compiler::fgMarkAddressExposedLocals()
18842 printf("\n*************** In fgMarkAddressExposedLocals()\n");
18846 LocalAddressVisitor visitor(this);
18848 for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
18850 // Make the current basic block address available globally
18853 for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
18855 visitor.VisitStmt(stmt);
18860 #ifdef FEATURE_SIMD
18862 //-----------------------------------------------------------------------------------
18863 // fgMorphCombineSIMDFieldAssignments:
18864 // If the RHS of the input stmt is a read for simd vector X Field, then this function
18865 // will keep reading next few stmts based on the vector size(2, 3, 4).
18866 // If the next stmts LHS are located contiguous and RHS are also located
18867 // contiguous, then we replace those statements with a copyblk.
18870 // block - BasicBlock*. block which stmt belongs to
18871 // stmt - GenTreeStmt*. the stmt node we want to check
18874 // if this funciton successfully optimized the stmts, then return true. Otherwise
18877 bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreeStmt* stmt)
18880 GenTree* tree = stmt->gtStmtExpr;
18881 assert(tree->OperGet() == GT_ASG);
18883 GenTree* originalLHS = tree->gtOp.gtOp1;
18884 GenTree* prevLHS = tree->gtOp.gtOp1;
18885 GenTree* prevRHS = tree->gtOp.gtOp2;
18886 unsigned index = 0;
18887 var_types baseType = TYP_UNKNOWN;
18888 unsigned simdSize = 0;
18889 GenTree* simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
18891 if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
18893 // if the RHS is not from a SIMD vector field X, then there is no need to check further.
18897 var_types simdType = getSIMDTypeForSize(simdSize);
18898 int assignmentsCount = simdSize / genTypeSize(baseType) - 1;
18899 int remainingAssignments = assignmentsCount;
18900 GenTreeStmt* curStmt = stmt->getNextStmt();
18901 GenTreeStmt* lastStmt = stmt;
18903 while (curStmt != nullptr && remainingAssignments > 0)
18905 GenTree* exp = curStmt->gtStmtExpr;
18906 if (exp->OperGet() != GT_ASG)
18910 GenTree* curLHS = exp->gtGetOp1();
18911 GenTree* curRHS = exp->gtGetOp2();
18913 if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
18918 remainingAssignments--;
18922 lastStmt = curStmt;
18923 curStmt = curStmt->getNextStmt();
18926 if (remainingAssignments > 0)
18928 // if the left assignments number is bigger than zero, then this means
18929 // that the assignments are not assgining to the contiguously memory
18930 // locations from same vector.
18936 printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
18937 printf("From " FMT_BB ", stmt", block->bbNum);
18939 printf(" to stmt");
18940 printTreeID(lastStmt);
18945 for (int i = 0; i < assignmentsCount; i++)
18947 fgRemoveStmt(block, stmt->getNextStmt());
18950 GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
18951 if (simdStructNode->OperIsLocal())
18953 setLclRelatedToSIMDIntrinsic(simdStructNode);
18955 GenTree* copyBlkAddr = copyBlkDst;
18956 if (copyBlkAddr->gtOper == GT_LEA)
18958 copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
18960 GenTreeLclVarCommon* localDst = nullptr;
18961 if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
18963 setLclRelatedToSIMDIntrinsic(localDst);
18966 if (simdStructNode->TypeGet() == TYP_BYREF)
18968 assert(simdStructNode->OperIsLocal());
18969 assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
18970 simdStructNode = gtNewIndir(simdType, simdStructNode);
18974 assert(varTypeIsSIMD(simdStructNode));
18980 printf("\n" FMT_BB " stmt", block->bbNum);
18982 printf("(before)\n");
18987 GenTree* dstNode = gtNewOperNode(GT_IND, simdType, copyBlkDst);
18988 tree = gtNewAssignNode(dstNode, simdStructNode);
18990 stmt->gtStmtExpr = tree;
18992 // Since we generated a new address node which didn't exist before,
18993 // we should expose this address manually here.
18994 LocalAddressVisitor visitor(this);
18995 visitor.VisitStmt(stmt);
19000 printf("\nReplaced " FMT_BB " stmt", block->bbNum);
19002 printf("(after)\n");
19009 #endif // FEATURE_SIMD
19011 #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_)
19012 GenTreeStmt* SkipNopStmts(GenTreeStmt* stmt)
19014 while ((stmt != nullptr) && !stmt->IsNothingNode())
19016 stmt = stmt->gtNextStmt;
19021 #endif // !FEATURE_CORECLR && _TARGET_AMD64_
19023 //------------------------------------------------------------------------
19024 // fgCheckStmtAfterTailCall: check that statements after the tail call stmt
19025 // candidate are in one of expected forms, that are desctibed below.
19028 // 'true' if stmts are in the expected form, else 'false'.
19030 bool Compiler::fgCheckStmtAfterTailCall()
19033 // For void calls, we would have created a GT_CALL in the stmt list.
19034 // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
19035 // For calls returning structs, we would have a void call, followed by a void return.
19036 // For debuggable code, it would be an assignment of the call to a temp
19037 // We want to get rid of any of this extra trees, and just leave
19039 GenTreeStmt* callStmt = fgMorphStmt;
19041 GenTreeStmt* nextMorphStmt = callStmt->gtNextStmt;
19043 #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_)
19044 // Legacy Jit64 Compat:
19045 // There could be any number of GT_NOPs between tail call and GT_RETURN.
19046 // That is tail call pattern could be one of the following:
19047 // 1) tail.call, nop*, ret
19048 // 2) tail.call, nop*, pop, nop*, ret
19049 // 3) var=tail.call, nop*, ret(var)
19050 // 4) var=tail.call, nop*, pop, ret
19051 // 5) comma(tail.call, nop), nop*, ret
19053 // See impIsTailCallILPattern() for details on tail call IL patterns
19054 // that are supported.
19055 GenTree* callExpr = callStmt->gtStmtExpr;
19057 if (callExpr->gtOper != GT_RETURN)
19059 // First skip all GT_NOPs after the call
19060 nextMorphStmt = SkipNopStmts(nextMorphStmt);
19062 // Check to see if there is a pop.
19063 // Since tail call is honored, we can get rid of the stmt corresponding to pop.
19064 if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN)
19066 // Note that pop opcode may or may not result in a new stmt (for details see
19067 // impImportBlockCode()). Hence, it is not possible to assert about the IR
19068 // form generated by pop but pop tree must be side-effect free so that we can
19069 // delete it safely.
19070 GenTreeStmt* popStmt = nextMorphStmt;
19072 // Side effect flags on a GT_COMMA may be overly pessimistic, so examine
19073 // the constituent nodes.
19074 GenTree* popExpr = popStmt->gtStmtExpr;
19075 bool isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0;
19076 if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA))
19078 isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) &&
19079 ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0);
19081 noway_assert(isSideEffectFree);
19083 nextMorphStmt = popStmt->gtNextStmt;
19086 // Next skip any GT_NOP nodes after the pop
19087 nextMorphStmt = SkipNopStmts(nextMorphStmt);
19089 #endif // !FEATURE_CORECLR && _TARGET_AMD64_
19091 // Check that the rest stmts in the block are in one of the following pattern:
19093 // 2) ret(cast*(callResultLclVar))
19094 // 3) lclVar = callResultLclVar, the actual ret(lclVar) in another block
19095 if (nextMorphStmt != nullptr)
19097 GenTree* callExpr = callStmt->gtStmtExpr;
19098 if (callExpr->gtOper != GT_ASG)
19100 // The next stmt can be GT_RETURN(TYP_VOID) or GT_RETURN(lclVar),
19101 // where lclVar was return buffer in the call for structs or simd.
19102 GenTreeStmt* retStmt = nextMorphStmt;
19103 GenTree* retExpr = retStmt->gtStmtExpr;
19104 noway_assert(retExpr->gtOper == GT_RETURN);
19106 nextMorphStmt = retStmt->gtNextStmt;
19110 noway_assert(callExpr->gtGetOp1()->OperIsLocal());
19111 unsigned callResultLclNumber = callExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum;
19113 #if FEATURE_TAILCALL_OPT_SHARED_RETURN
19115 // We can have a move from the call result to an lvaInlineeReturnSpillTemp.
19116 // However, we can't check that this assignment was created there.
19117 if (nextMorphStmt->gtStmtExpr->gtOper == GT_ASG)
19119 GenTreeStmt* moveStmt = nextMorphStmt;
19120 GenTree* moveExpr = nextMorphStmt->gtStmtExpr;
19121 noway_assert(moveExpr->gtGetOp1()->OperIsLocal() && moveExpr->gtGetOp2()->OperIsLocal());
19123 unsigned srcLclNum = moveExpr->gtGetOp2()->AsLclVarCommon()->gtLclNum;
19124 noway_assert(srcLclNum == callResultLclNumber);
19125 unsigned dstLclNum = moveExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum;
19126 callResultLclNumber = dstLclNum;
19128 nextMorphStmt = moveStmt->gtNextStmt;
19130 if (nextMorphStmt != nullptr)
19133 GenTreeStmt* retStmt = nextMorphStmt;
19134 GenTree* retExpr = nextMorphStmt->gtStmtExpr;
19135 noway_assert(retExpr->gtOper == GT_RETURN);
19137 GenTree* treeWithLcl = retExpr->gtGetOp1();
19138 while (treeWithLcl->gtOper == GT_CAST)
19140 noway_assert(!treeWithLcl->gtOverflow());
19141 treeWithLcl = treeWithLcl->gtGetOp1();
19144 noway_assert(callResultLclNumber == treeWithLcl->AsLclVarCommon()->gtLclNum);
19146 nextMorphStmt = retStmt->gtNextStmt;
19150 return nextMorphStmt == nullptr;
19153 static const int numberOfTrackedFlags = 5;
19154 static const unsigned trackedFlags[numberOfTrackedFlags] = {GTF_ASG, GTF_CALL, GTF_EXCEPT, GTF_GLOB_REF,
19155 GTF_ORDER_SIDEEFF};
19157 //------------------------------------------------------------------------
19158 // fgMorphArgList: morph argument list tree without recursion.
19161 // args - argument list tree to morph;
19162 // mac - morph address context, used to morph children.
19165 // morphed argument list.
19167 GenTreeArgList* Compiler::fgMorphArgList(GenTreeArgList* args, MorphAddrContext* mac)
19169 // Use a non-recursive algorithm that morphs all actual list values,
19170 // memorizes the last node for each effect flag and resets
19171 // them during the second iteration.
19172 assert((trackedFlags[0] | trackedFlags[1] | trackedFlags[2] | trackedFlags[3] | trackedFlags[4]) == GTF_ALL_EFFECT);
19174 GenTree* memorizedLastNodes[numberOfTrackedFlags] = {nullptr};
19176 for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest())
19178 // Morph actual list values.
19179 GenTree*& arg = listNode->Current();
19180 arg = fgMorphTree(arg, mac);
19182 // Remember the last list node with each flag.
19183 for (int i = 0; i < numberOfTrackedFlags; ++i)
19185 if ((arg->gtFlags & trackedFlags[i]) != 0)
19187 memorizedLastNodes[i] = listNode;
19192 for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest())
19194 // Clear all old effects from the list node.
19195 listNode->gtFlags &= ~GTF_ALL_EFFECT;
19197 // Spread each flag to all list nodes (to the prefix) before the memorized last node.
19198 for (int i = 0; i < numberOfTrackedFlags; ++i)
19200 if (memorizedLastNodes[i] != nullptr)
19202 listNode->gtFlags |= trackedFlags[i];
19204 if (listNode == memorizedLastNodes[i])
19206 memorizedLastNodes[i] = nullptr;