1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 #include "allocacheck.h" // for alloca
21 // Convert the given node into a call to the specified helper passing
22 // the given argument list.
24 // Tries to fold constants and also adds an edge for overflow exception
25 // returns the morphed tree
26 GenTree* Compiler::fgMorphCastIntoHelper(GenTree* tree, int helper, GenTree* oper)
30 /* If the operand is a constant, we'll try to fold it */
31 if (oper->OperIsConst())
33 GenTree* oldTree = tree;
35 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
39 return fgMorphTree(tree);
41 else if (tree->OperKind() & GTK_CONST)
43 return fgMorphConst(tree);
46 // assert that oper is unchanged and that it is still a GT_CAST node
47 noway_assert(tree->gtCast.CastOp() == oper);
48 noway_assert(tree->gtOper == GT_CAST);
50 result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
51 assert(result == tree);
55 /*****************************************************************************
57 * Convert the given node into a call to the specified helper passing
58 * the given argument list.
61 GenTree* Compiler::fgMorphIntoHelperCall(GenTree* tree, int helper, GenTreeArgList* args)
63 // The helper call ought to be semantically equivalent to the original node, so preserve its VN.
64 tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN);
66 tree->gtCall.gtCallType = CT_HELPER;
67 tree->gtCall.gtCallMethHnd = eeFindHelper(helper);
68 tree->gtCall.gtCallArgs = args;
69 tree->gtCall.gtCallObjp = nullptr;
70 tree->gtCall.gtCallLateArgs = nullptr;
71 tree->gtCall.fgArgInfo = nullptr;
72 tree->gtCall.gtRetClsHnd = nullptr;
73 tree->gtCall.gtCallMoreFlags = 0;
74 tree->gtCall.gtInlineCandidateInfo = nullptr;
75 tree->gtCall.gtControlExpr = nullptr;
78 tree->gtCall.gtCallRegUsedMask = RBM_NONE;
79 #endif // LEGACY_BACKEND
82 // Helper calls are never candidates.
84 tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
87 #ifdef FEATURE_READYTORUN_COMPILER
88 tree->gtCall.gtEntryPoint.addr = nullptr;
89 tree->gtCall.gtEntryPoint.accessType = IAT_VALUE;
92 #if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND)
93 if (varTypeIsLong(tree))
95 GenTreeCall* callNode = tree->AsCall();
96 ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
98 retTypeDesc->InitializeLongReturnType(this);
99 callNode->ClearOtherRegs();
101 #endif // _TARGET_XXX_
103 if (tree->OperMayThrow(this))
105 tree->gtFlags |= GTF_EXCEPT;
109 tree->gtFlags &= ~GTF_EXCEPT;
111 tree->gtFlags |= GTF_CALL;
114 tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
117 /* Perform the morphing */
119 tree = fgMorphArgs(tree->AsCall());
124 /*****************************************************************************
126 * Determine if a relop must be morphed to a qmark to manifest a boolean value.
127 * This is done when code generation can't create straight-line code to do it.
129 bool Compiler::fgMorphRelopToQmark(GenTree* tree)
131 #ifndef LEGACY_BACKEND
133 #else // LEGACY_BACKEND
134 return (genActualType(tree->TypeGet()) == TYP_LONG) || varTypeIsFloating(tree->TypeGet());
135 #endif // LEGACY_BACKEND
138 /*****************************************************************************
140 * Morph a cast node (we perform some very simple transformations here).
144 #pragma warning(push)
145 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
147 GenTree* Compiler::fgMorphCast(GenTree* tree)
149 noway_assert(tree->gtOper == GT_CAST);
150 noway_assert(genTypeSize(TYP_I_IMPL) == TARGET_POINTER_SIZE);
152 /* The first sub-operand is the thing being cast */
154 GenTree* oper = tree->gtCast.CastOp();
156 if (fgGlobalMorph && (oper->gtOper == GT_ADDR))
158 // Make sure we've checked if 'oper' is an address of an implicit-byref parameter.
159 // If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast
160 // morphing code to see that type.
161 fgMorphImplicitByRefArgs(oper);
164 var_types srcType = genActualType(oper->TypeGet());
166 var_types dstType = tree->CastToType();
167 unsigned dstSize = genTypeSize(dstType);
169 // See if the cast has to be done in two steps. R -> I
170 if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
172 // Only x86 must go through TYP_DOUBLE to get to all
173 // integral types everybody else can get straight there
174 // except for when using helpers
175 if (srcType == TYP_FLOAT
176 #if !FEATURE_STACK_FP_X87
178 #if defined(_TARGET_ARM64_)
179 // Amd64: src = float, dst is overflow conversion.
180 // This goes through helper and hence src needs to be converted to double.
181 && tree->gtOverflow()
182 #elif defined(_TARGET_AMD64_)
183 // Amd64: src = float, dst = uint64 or overflow conversion.
184 // This goes through helper and hence src needs to be converted to double.
185 && (tree->gtOverflow() || (dstType == TYP_ULONG))
186 #elif defined(_TARGET_ARM_)
187 // Arm: src = float, dst = int64/uint64 or overflow conversion.
188 && (tree->gtOverflow() || varTypeIsLong(dstType))
191 #endif // FEATURE_STACK_FP_X87
194 oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE);
197 // do we need to do it in two steps R -> I, '-> smallType
198 CLANG_FORMAT_COMMENT_ANCHOR;
200 #if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
201 if (dstSize < genTypeSize(TYP_INT))
203 oper = gtNewCastNodeL(TYP_INT, oper, tree->IsUnsigned(), TYP_INT);
204 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
205 tree->gtFlags &= ~GTF_UNSIGNED;
208 if (dstSize < TARGET_POINTER_SIZE)
210 oper = gtNewCastNodeL(TYP_I_IMPL, oper, false, TYP_I_IMPL);
211 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
216 /* Note that if we need to use a helper call then we can not morph oper */
217 if (!tree->gtOverflow())
219 #ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
225 #ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
226 #ifdef LEGACY_BACKEND
227 // the RyuJIT backend does not use the x87 FPU and therefore
228 // does not support folding the cast conv.i4(round.d(d))
229 if ((oper->gtOper == GT_INTRINSIC) &&
230 (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
232 /* optimization: conv.i4(round.d(d)) -> round.i(d) */
233 oper->gtType = dstType;
234 return fgMorphTree(oper);
236 // if SSE2 is not enabled, we need the helper
238 #endif // LEGACY_BACKEND
239 if (!opts.compCanUseSSE2)
241 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
244 #endif // _TARGET_X86_
248 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
251 #else // _TARGET_ARM_
253 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
254 #endif // _TARGET_ARM_
256 #ifdef _TARGET_AMD64_
257 // SSE2 has instructions to convert a float/double directly to a long
262 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
263 #endif //_TARGET_AMD64_
265 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
269 #endif // _TARGET_ARM64_
276 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
278 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
280 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
282 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
287 noway_assert(!"Unexpected dstType");
290 #ifndef _TARGET_64BIT_
291 // The code generation phase (for x86 & ARM32) does not handle casts
292 // directly from [u]long to anything other than [u]int. Insert an
293 // intermediate cast to native int.
294 else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
296 oper = gtNewCastNode(TYP_I_IMPL, oper, tree->IsUnsigned(), TYP_I_IMPL);
297 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
298 tree->gtFlags &= ~GTF_UNSIGNED;
300 #endif //!_TARGET_64BIT_
303 else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
304 !varTypeIsLong(oper->gtCast.CastOp()))
306 // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
307 // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
308 // This happens semi-frequently because there is no IL 'conv.r4.un'
309 oper->gtType = TYP_FLOAT;
310 oper->CastToType() = TYP_FLOAT;
311 return fgMorphTree(oper);
313 // converts long/ulong --> float/double casts into helper calls.
314 else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
316 if (dstType == TYP_FLOAT)
318 // there is only a double helper, so we
319 // - change the dsttype to double
320 // - insert a cast from double to float
321 // - recurse into the resulting tree
322 tree->CastToType() = TYP_DOUBLE;
323 tree->gtType = TYP_DOUBLE;
325 tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT);
327 return fgMorphTree(tree);
329 if (tree->gtFlags & GTF_UNSIGNED)
330 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
331 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
333 #endif //_TARGET_ARM_
335 #ifdef _TARGET_AMD64_
336 // Do we have to do two step U4/8 -> R4/8 ?
337 // Codegen supports the following conversion as one-step operation
341 // The following conversions are performed as two-step operations using above.
342 // U4 -> R4/8 = U4-> Long -> R4/8
343 // U8 -> R4 = U8 -> R8 -> R4
344 else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
346 srcType = genUnsignedType(srcType);
348 if (srcType == TYP_ULONG)
350 if (dstType == TYP_FLOAT)
352 // Codegen can handle U8 -> R8 conversion.
353 // U8 -> R4 = U8 -> R8 -> R4
354 // - change the dsttype to double
355 // - insert a cast from double to float
356 // - recurse into the resulting tree
357 tree->CastToType() = TYP_DOUBLE;
358 tree->gtType = TYP_DOUBLE;
359 tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT);
360 return fgMorphTree(tree);
363 else if (srcType == TYP_UINT)
365 oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
366 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
367 tree->gtFlags &= ~GTF_UNSIGNED;
370 #endif // _TARGET_AMD64_
373 // Do we have to do two step U4/8 -> R4/8 ?
374 else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
376 srcType = genUnsignedType(srcType);
378 if (srcType == TYP_ULONG)
380 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
382 else if (srcType == TYP_UINT)
384 oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
385 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
386 tree->gtFlags &= ~GTF_UNSIGNED;
387 #ifndef LEGACY_BACKEND
388 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
392 #ifndef LEGACY_BACKEND
393 else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
395 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
398 #endif //_TARGET_XARCH_
399 else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
401 // We are casting away GC information. we would like to just
402 // change the type to int, however this gives the emitter fits because
403 // it believes the variable is a GC variable at the begining of the
404 // instruction group, but is not turned non-gc by the code generator
405 // we fix this by copying the GC pointer to a non-gc pointer temp.
406 noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
408 // We generate an assignment to an int and then do the cast from an int. With this we avoid
409 // the gc problem and we allow casts to bytes, longs, etc...
410 unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
411 oper->gtType = TYP_I_IMPL;
412 GenTree* asg = gtNewTempAssign(lclNum, oper);
413 oper->gtType = srcType;
416 GenTree* cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), false, dstType);
418 // Generate the comma tree
419 oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
421 return fgMorphTree(oper);
424 // Look for narrowing casts ([u]long -> [u]int) and try to push them
425 // down into the operand before morphing it.
427 // It doesn't matter if this is cast is from ulong or long (i.e. if
428 // GTF_UNSIGNED is set) because the transformation is only applied to
429 // overflow-insensitive narrowing casts, which always silently truncate.
431 // Note that casts from [u]long to small integer types are handled above.
432 if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
434 // As a special case, look for overflow-sensitive casts of an AND
435 // expression, and see if the second operand is a small constant. Since
436 // the result of an AND is bound by its smaller operand, it may be
437 // possible to prove that the cast won't overflow, which will in turn
438 // allow the cast's operand to be transformed.
439 if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
441 GenTree* andOp2 = oper->gtOp.gtOp2;
443 // Special case to the special case: AND with a casted int.
444 if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
446 // gtFoldExprConst will deal with whether the cast is signed or
447 // unsigned, or overflow-sensitive.
448 andOp2 = gtFoldExprConst(andOp2);
449 oper->gtOp.gtOp2 = andOp2;
452 // Look for a constant less than 2^{32} for a cast to uint, or less
453 // than 2^{31} for a cast to int.
454 int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
456 if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
458 // This cast can't overflow.
459 tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
463 // Only apply this transformation during global morph,
464 // when neither the cast node nor the oper node may throw an exception
465 // based on the upper 32 bits.
467 if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
469 // For these operations the lower 32 bits of the result only depends
470 // upon the lower 32 bits of the operands.
472 bool canPushCast = oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG);
474 // For long LSH cast to int, there is a discontinuity in behavior
475 // when the shift amount is 32 or larger.
477 // CAST(INT, LSH(1LL, 31)) == LSH(1, 31)
478 // LSH(CAST(INT, 1LL), CAST(INT, 31)) == LSH(1, 31)
480 // CAST(INT, LSH(1LL, 32)) == 0
481 // LSH(CAST(INT, 1LL), CAST(INT, 32)) == LSH(1, 32) == LSH(1, 0) == 1
483 // So some extra validation is needed.
485 if (oper->OperIs(GT_LSH))
487 GenTree* shiftAmount = oper->gtOp.gtOp2;
489 // Expose constant value for shift, if possible, to maximize the number
490 // of cases we can handle.
491 shiftAmount = gtFoldExpr(shiftAmount);
492 oper->gtOp.gtOp2 = shiftAmount;
495 // We may remorph the shift amount tree again later, so clear any morphed flag.
496 shiftAmount->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
499 if (shiftAmount->IsIntegralConst())
501 const ssize_t shiftAmountValue = shiftAmount->AsIntCon()->IconValue();
503 if (shiftAmountValue >= 64)
505 // Shift amount is large enough that result is undefined.
506 // Don't try and optimize.
507 assert(!canPushCast);
509 else if (shiftAmountValue >= 32)
511 // Result of the shift is zero.
512 DEBUG_DESTROY_NODE(tree);
513 GenTree* zero = gtNewZeroConNode(TYP_INT);
514 return fgMorphTree(zero);
516 else if (shiftAmountValue >= 0)
518 // Shift amount is small enough that we can push the cast through.
523 // Shift amount is negative and so result is undefined.
524 // Don't try and optimize.
525 assert(!canPushCast);
530 // Shift amount is unknown. We can't optimize this case.
531 assert(!canPushCast);
537 DEBUG_DESTROY_NODE(tree);
539 // Insert narrowing casts for op1 and op2
540 oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, false, dstType);
541 if (oper->gtOp.gtOp2 != nullptr)
543 oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, false, dstType);
546 // Clear the GT_MUL_64RSLT if it is set
547 if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
549 oper->gtFlags &= ~GTF_MUL_64RSLT;
552 // The operation now produces a 32-bit result.
553 oper->gtType = TYP_INT;
555 // Remorph the new tree as the casts that we added may be folded away.
556 return fgMorphTree(oper);
562 noway_assert(tree->gtOper == GT_CAST);
564 /* Morph the operand */
565 tree->gtCast.CastOp() = oper = fgMorphTree(oper);
567 /* Reset the call flag */
568 tree->gtFlags &= ~GTF_CALL;
570 /* Reset the assignment flag */
571 tree->gtFlags &= ~GTF_ASG;
573 /* unless we have an overflow cast, reset the except flag */
574 if (!tree->gtOverflow())
576 tree->gtFlags &= ~GTF_EXCEPT;
579 /* Just in case new side effects were introduced */
580 tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
582 if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate
584 srcType = oper->TypeGet();
586 /* See if we can discard the cast */
587 if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
589 if (tree->IsUnsigned() && !varTypeIsUnsigned(srcType))
591 if (varTypeIsSmall(srcType))
593 // Small signed values are automatically sign extended to TYP_INT. If the cast is interpreting the
594 // resulting TYP_INT value as unsigned then the "sign" bits end up being "value" bits and srcType
595 // must be TYP_UINT, not the original small signed type. Otherwise "conv.ovf.i2.un(i1(-1))" is
596 // wrongly treated as a widening conversion from i1 to i2 when in fact it is a narrowing conversion
598 srcType = genActualType(srcType);
601 srcType = genUnsignedType(srcType);
604 if (srcType == dstType)
605 { // Certainly if they are identical it is pointless
609 if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
611 unsigned varNum = oper->gtLclVarCommon.gtLclNum;
612 LclVarDsc* varDsc = &lvaTable[varNum];
613 if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
619 bool unsignedSrc = varTypeIsUnsigned(srcType);
620 bool unsignedDst = varTypeIsUnsigned(dstType);
621 bool signsDiffer = (unsignedSrc != unsignedDst);
622 unsigned srcSize = genTypeSize(srcType);
624 // For same sized casts with
625 // the same signs or non-overflow cast we discard them as well
626 if (srcSize == dstSize)
628 /* This should have been handled above */
629 noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
636 if (!tree->gtOverflow())
638 /* For small type casts, when necessary we force
639 the src operand to the dstType and allow the
640 implied load from memory to perform the casting */
641 if (varTypeIsSmall(srcType))
643 switch (oper->gtOper)
649 oper->gtType = dstType;
661 else if (srcSize < dstSize) // widening cast
663 // Keep any long casts
664 if (dstSize == sizeof(int))
666 // Only keep signed to unsigned widening cast with overflow check
667 if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
673 // Widening casts from unsigned or to signed can never overflow
675 if (unsignedSrc || !unsignedDst)
677 tree->gtFlags &= ~GTF_OVERFLOW;
678 if (!(oper->gtFlags & GTF_EXCEPT))
680 tree->gtFlags &= ~GTF_EXCEPT;
684 else // if (srcSize > dstSize)
686 // Try to narrow the operand of the cast and discard the cast
687 // Note: Do not narrow a cast that is marked as a CSE
688 // And do not narrow if the oper is marked as a CSE either
690 if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
691 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
693 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
695 /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
696 if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
698 oper = oper->gtCast.CastOp();
705 switch (oper->gtOper)
707 /* If the operand is a constant, we'll fold it */
713 GenTree* oldTree = tree;
715 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
717 // Did we get a comma throw as a result of gtFoldExprConst?
718 if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
720 noway_assert(fgIsCommaThrow(tree));
721 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
722 fgMorphTreeDone(tree);
725 else if (tree->gtOper != GT_CAST)
730 noway_assert(tree->gtCast.CastOp() == oper); // unchanged
735 /* Check for two consecutive casts into the same dstType */
736 if (!tree->gtOverflow())
738 var_types dstType2 = oper->CastToType();
739 if (dstType == dstType2)
746 #ifdef LEGACY_BACKEND
748 /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag
749 so that the code generator will know not to convert the result
750 of the idiv to a regpair */
752 if (dstType == TYP_INT)
754 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
759 if (dstType == TYP_UINT)
761 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
765 #endif // LEGACY_BACKEND
768 // Check for cast of a GT_COMMA with a throw overflow
769 // Bug 110829: Since this optimization will bash the types
770 // neither oper or commaOp2 can be CSE candidates
771 if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
773 GenTree* commaOp2 = oper->gtOp.gtOp2;
775 if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
777 // need type of oper to be same as tree
778 if (tree->gtType == TYP_LONG)
780 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
781 commaOp2->gtIntConCommon.SetLngValue(0);
782 /* Change the types of oper and commaOp2 to TYP_LONG */
783 oper->gtType = commaOp2->gtType = TYP_LONG;
785 else if (varTypeIsFloating(tree->gtType))
787 commaOp2->ChangeOperConst(GT_CNS_DBL);
788 commaOp2->gtDblCon.gtDconVal = 0.0;
789 // Change the types of oper and commaOp2
790 // X87 promotes everything to TYP_DOUBLE
791 // But other's are a little more precise
792 const var_types newTyp
793 #if FEATURE_X87_DOUBLES
795 #else // FEATURE_X87_DOUBLES
797 #endif // FEATURE_X87_DOUBLES
798 oper->gtType = commaOp2->gtType = newTyp;
802 commaOp2->ChangeOperConst(GT_CNS_INT);
803 commaOp2->gtIntCon.gtIconVal = 0;
804 /* Change the types of oper and commaOp2 to TYP_INT */
805 oper->gtType = commaOp2->gtType = TYP_INT;
809 if (vnStore != nullptr)
811 fgValueNumberTreeConst(commaOp2);
814 /* Return the GT_COMMA node as the new tree */
821 } /* end switch (oper->gtOper) */
824 if (tree->gtOverflow())
826 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
833 /* Here we've eliminated the cast, so just return it's operand */
834 assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
836 DEBUG_DESTROY_NODE(tree);
843 /*****************************************************************************
845 * Perform an unwrap operation on a Proxy object
848 GenTree* Compiler::fgUnwrapProxy(GenTree* objRef)
850 assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
852 CORINFO_EE_INFO* pInfo = eeGetEEInfo();
855 // Perform the unwrap:
857 // This requires two extra indirections.
858 // We mark these indirections as 'invariant' and
859 // the CSE logic will hoist them when appropriate.
861 // Note that each dereference is a GC pointer
863 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
865 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
866 objRef->gtFlags |= GTF_IND_INVARIANT;
868 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
870 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
871 objRef->gtFlags |= GTF_IND_INVARIANT;
873 // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
877 /*****************************************************************************
879 * Morph an argument list; compute the pointer argument count in the process.
881 * NOTE: This function can be called from any place in the JIT to perform re-morphing
882 * due to graph altering modifications such as copy / constant propagation
885 unsigned UpdateGT_LISTFlags(GenTree* tree)
887 assert(tree->gtOper == GT_LIST);
890 if (tree->gtOp.gtOp2)
892 flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
895 flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
897 tree->gtFlags &= ~GTF_ALL_EFFECT;
898 tree->gtFlags |= flags;
900 return tree->gtFlags;
904 void fgArgTabEntry::Dump()
906 printf("fgArgTabEntry[arg %u", argNum);
907 printf(" %d.%s", node->gtTreeID, GenTree::OpName(node->gtOper));
908 if (regNum != REG_STK)
910 printf(", %s, regs=%u", getRegName(regNum), numRegs);
914 printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
916 printf(", align=%u", alignment);
917 if (lateArgInx != (unsigned)-1)
919 printf(", lateArgInx=%u", lateArgInx);
927 printf(", tmpNum=V%02u", tmpNum);
931 printf(", needPlace");
939 printf(", processed");
947 printf(", isBackFilled");
951 printf(", isNonStandard");
957 fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
961 argCount = 0; // filled in arg count, starts at zero
962 nextSlotNum = INIT_ARG_STACK_SLOT;
964 #if defined(UNIX_X86_ABI)
965 alignmentDone = false;
969 #if FEATURE_FIXED_OUT_ARGS
973 argTableSize = numArgs; // the allocated table size
976 hasStackArgs = false;
977 argsComplete = false;
980 if (argTableSize == 0)
986 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize];
990 /*****************************************************************************
992 * fgArgInfo Copy Constructor
994 * This method needs to act like a copy constructor for fgArgInfo.
995 * The newCall needs to have its fgArgInfo initialized such that
996 * we have newCall that is an exact copy of the oldCall.
997 * We have to take care since the argument information
998 * in the argTable contains pointers that must point to the
999 * new arguments and not the old arguments.
1001 fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
1003 fgArgInfo* oldArgInfo = oldCall->gtCall.fgArgInfo;
1005 compiler = oldArgInfo->compiler;
1007 argCount = 0; // filled in arg count, starts at zero
1008 nextSlotNum = INIT_ARG_STACK_SLOT;
1009 stkLevel = oldArgInfo->stkLevel;
1010 #if defined(UNIX_X86_ABI)
1011 alignmentDone = oldArgInfo->alignmentDone;
1012 stkSizeBytes = oldArgInfo->stkSizeBytes;
1013 padStkAlign = oldArgInfo->padStkAlign;
1015 #if FEATURE_FIXED_OUT_ARGS
1016 outArgSize = oldArgInfo->outArgSize;
1018 argTableSize = oldArgInfo->argTableSize;
1019 argsComplete = false;
1021 if (argTableSize > 0)
1023 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize];
1024 for (unsigned inx = 0; inx < argTableSize; inx++)
1026 argTable[inx] = nullptr;
1030 assert(oldArgInfo->argsComplete);
1032 // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
1033 // so we can iterate over these argument lists more uniformly.
1034 // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
1035 GenTreeArgList* newArgs;
1036 GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs);
1037 GenTreeArgList* oldArgs;
1038 GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs);
1040 if (newCall->gtCallObjp == nullptr)
1042 assert(oldCall->gtCallObjp == nullptr);
1043 newArgs = newCall->gtCallArgs;
1044 oldArgs = oldCall->gtCallArgs;
1048 assert(oldCall->gtCallObjp != nullptr);
1049 newArgObjp.Current() = newCall->gtCallArgs;
1050 newArgs = &newArgObjp;
1051 oldArgObjp.Current() = oldCall->gtCallObjp;
1052 oldArgs = &oldArgObjp;
1057 GenTreeArgList* newParent = nullptr;
1058 GenTreeArgList* oldParent = nullptr;
1059 fgArgTabEntry** oldArgTable = oldArgInfo->argTable;
1060 bool scanRegArgs = false;
1064 /* Get hold of the next argument values for the oldCall and newCall */
1066 newCurr = newArgs->Current();
1067 oldCurr = oldArgs->Current();
1068 if (newArgs != &newArgObjp)
1070 newParent = newArgs;
1071 oldParent = oldArgs;
1075 assert(newParent == nullptr && oldParent == nullptr);
1077 newArgs = newArgs->Rest();
1078 oldArgs = oldArgs->Rest();
1080 fgArgTabEntry* oldArgTabEntry = nullptr;
1081 fgArgTabEntry* newArgTabEntry = nullptr;
1083 for (unsigned inx = 0; inx < argTableSize; inx++)
1085 oldArgTabEntry = oldArgTable[inx];
1087 if (oldArgTabEntry->parent == oldParent)
1089 assert((oldParent == nullptr) == (newParent == nullptr));
1091 // We have found the matching "parent" field in oldArgTabEntry
1093 newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1095 // First block copy all fields
1097 *newArgTabEntry = *oldArgTabEntry;
1099 // Then update all GenTree* fields in the newArgTabEntry
1101 newArgTabEntry->parent = newParent;
1103 // The node field is likely to have been updated
1104 // to point at a node in the gtCallLateArgs list
1106 if (oldArgTabEntry->node == oldCurr)
1108 // node is not pointing into the gtCallLateArgs list
1109 newArgTabEntry->node = newCurr;
1113 // node must be pointing into the gtCallLateArgs list
1115 // We will fix this pointer up in the next loop
1117 newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
1122 // Now initialize the proper element in the argTable array
1124 argTable[inx] = newArgTabEntry;
1128 // We should have found the matching oldArgTabEntry and created the newArgTabEntry
1130 assert(newArgTabEntry != nullptr);
1135 newArgs = newCall->gtCallLateArgs;
1136 oldArgs = oldCall->gtCallLateArgs;
1140 /* Get hold of the next argument values for the oldCall and newCall */
1142 assert(newArgs->OperIsList());
1144 newCurr = newArgs->Current();
1145 newArgs = newArgs->Rest();
1147 assert(oldArgs->OperIsList());
1149 oldCurr = oldArgs->Current();
1150 oldArgs = oldArgs->Rest();
1152 fgArgTabEntry* oldArgTabEntry = nullptr;
1153 fgArgTabEntry* newArgTabEntry = nullptr;
1155 for (unsigned inx = 0; inx < argTableSize; inx++)
1157 oldArgTabEntry = oldArgTable[inx];
1159 if (oldArgTabEntry->node == oldCurr)
1161 // We have found the matching "node" field in oldArgTabEntry
1163 newArgTabEntry = argTable[inx];
1164 assert(newArgTabEntry != nullptr);
1166 // update the "node" GenTree* fields in the newArgTabEntry
1168 assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
1170 newArgTabEntry->node = newCurr;
1177 argCount = oldArgInfo->argCount;
1178 nextSlotNum = oldArgInfo->nextSlotNum;
1179 hasRegArgs = oldArgInfo->hasRegArgs;
1180 hasStackArgs = oldArgInfo->hasStackArgs;
1181 argsComplete = true;
1185 void fgArgInfo::AddArg(fgArgTabEntry* curArgTabEntry)
1187 assert(argCount < argTableSize);
1188 argTable[argCount] = curArgTabEntry;
1192 fgArgTabEntry* fgArgInfo::AddRegArg(
1193 unsigned argNum, GenTree* node, GenTree* parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1195 fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1197 curArgTabEntry->argNum = argNum;
1198 curArgTabEntry->node = node;
1199 curArgTabEntry->parent = parent;
1200 curArgTabEntry->regNum = regNum;
1201 curArgTabEntry->slotNum = 0;
1202 curArgTabEntry->numRegs = numRegs;
1203 curArgTabEntry->numSlots = 0;
1204 curArgTabEntry->alignment = alignment;
1205 curArgTabEntry->lateArgInx = (unsigned)-1;
1206 curArgTabEntry->tmpNum = (unsigned)-1;
1207 curArgTabEntry->isSplit = false;
1208 curArgTabEntry->isTmp = false;
1209 curArgTabEntry->needTmp = false;
1210 curArgTabEntry->needPlace = false;
1211 curArgTabEntry->processed = false;
1212 curArgTabEntry->isHfaRegArg = false;
1213 curArgTabEntry->isBackFilled = false;
1214 curArgTabEntry->isNonStandard = false;
1217 AddArg(curArgTabEntry);
1218 return curArgTabEntry;
1221 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1222 fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum,
1228 const bool isStruct,
1229 const regNumber otherRegNum,
1230 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
1232 fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
1233 assert(curArgTabEntry != nullptr);
1235 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1236 // PlaceHolder node (in case of needed late argument, for example.)
1237 // This requires using of an extra flag. At creation time the state is right, so
1238 // and this assert enforces that.
1239 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1240 curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
1241 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1243 if (isStruct && structDescPtr != nullptr)
1245 curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
1248 return curArgTabEntry;
1250 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1252 fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum,
1256 unsigned alignment FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
1258 fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1260 nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1262 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1263 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1264 // PlaceHolder node (in case of needed late argument, for example.)
1265 // This reqires using of an extra flag. At creation time the state is right, so
1266 // and this assert enforces that.
1267 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1268 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1269 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1271 curArgTabEntry->argNum = argNum;
1272 curArgTabEntry->node = node;
1273 curArgTabEntry->parent = parent;
1274 curArgTabEntry->regNum = REG_STK;
1275 curArgTabEntry->slotNum = nextSlotNum;
1276 curArgTabEntry->numRegs = 0;
1277 curArgTabEntry->numSlots = numSlots;
1278 curArgTabEntry->alignment = alignment;
1279 curArgTabEntry->lateArgInx = (unsigned)-1;
1280 curArgTabEntry->tmpNum = (unsigned)-1;
1281 curArgTabEntry->isSplit = false;
1282 curArgTabEntry->isTmp = false;
1283 curArgTabEntry->needTmp = false;
1284 curArgTabEntry->needPlace = false;
1285 curArgTabEntry->processed = false;
1286 curArgTabEntry->isHfaRegArg = false;
1287 curArgTabEntry->isBackFilled = false;
1288 curArgTabEntry->isNonStandard = false;
1290 hasStackArgs = true;
1291 AddArg(curArgTabEntry);
1293 nextSlotNum += numSlots;
1294 return curArgTabEntry;
1297 void fgArgInfo::RemorphReset()
1299 nextSlotNum = INIT_ARG_STACK_SLOT;
1302 fgArgTabEntry* fgArgInfo::RemorphRegArg(
1303 unsigned argNum, GenTree* node, GenTree* parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1305 fgArgTabEntry* curArgTabEntry = nullptr;
1306 unsigned regArgInx = 0;
1309 for (inx = 0; inx < argCount; inx++)
1311 curArgTabEntry = argTable[inx];
1312 if (curArgTabEntry->argNum == argNum)
1319 if (curArgTabEntry->parent != nullptr)
1321 assert(curArgTabEntry->parent->OperIsList());
1322 argx = curArgTabEntry->parent->Current();
1323 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1327 argx = curArgTabEntry->node;
1336 // if this was a nonstandard arg the table is definitive
1337 if (curArgTabEntry->isNonStandard)
1339 regNum = curArgTabEntry->regNum;
1342 assert(curArgTabEntry->argNum == argNum);
1343 assert(curArgTabEntry->regNum == regNum);
1344 assert(curArgTabEntry->alignment == alignment);
1345 assert(curArgTabEntry->parent == parent);
1347 if (curArgTabEntry->node != node)
1349 GenTree* argx = nullptr;
1350 unsigned regIndex = 0;
1352 /* process the register argument list */
1353 for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
1355 argx = list->Current();
1356 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1357 if (regIndex == regArgInx)
1362 assert(regIndex == regArgInx);
1363 assert(regArgInx == curArgTabEntry->lateArgInx);
1365 if (curArgTabEntry->node != argx)
1367 curArgTabEntry->node = argx;
1370 return curArgTabEntry;
1373 void fgArgInfo::RemorphStkArg(unsigned argNum, GenTree* node, GenTree* parent, unsigned numSlots, unsigned alignment)
1375 fgArgTabEntry* curArgTabEntry = nullptr;
1376 bool isRegArg = false;
1377 unsigned regArgInx = 0;
1381 for (inx = 0; inx < argCount; inx++)
1383 curArgTabEntry = argTable[inx];
1385 if (curArgTabEntry->parent != nullptr)
1387 assert(curArgTabEntry->parent->OperIsList());
1388 argx = curArgTabEntry->parent->Current();
1389 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1393 argx = curArgTabEntry->node;
1397 if (curArgTabEntry->argNum == argNum)
1408 nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1410 assert(curArgTabEntry->argNum == argNum);
1411 assert(curArgTabEntry->slotNum == nextSlotNum);
1412 assert(curArgTabEntry->numSlots == numSlots);
1413 assert(curArgTabEntry->alignment == alignment);
1414 assert(curArgTabEntry->parent == parent);
1415 assert(parent->OperIsList());
1417 #if FEATURE_FIXED_OUT_ARGS
1418 if (curArgTabEntry->node != node)
1422 GenTree* argx = nullptr;
1423 unsigned regIndex = 0;
1425 /* process the register argument list */
1426 for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
1428 argx = list->Current();
1429 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1430 if (regIndex == regArgInx)
1435 assert(regIndex == regArgInx);
1436 assert(regArgInx == curArgTabEntry->lateArgInx);
1438 if (curArgTabEntry->node != argx)
1440 curArgTabEntry->node = argx;
1445 assert(parent->Current() == node);
1446 curArgTabEntry->node = node;
1450 curArgTabEntry->node = node;
1453 nextSlotNum += numSlots;
1456 void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
1458 fgArgTabEntry* curArgTabEntry = nullptr;
1459 assert(argNum < argCount);
1460 for (unsigned inx = 0; inx < argCount; inx++)
1462 curArgTabEntry = argTable[inx];
1463 if (curArgTabEntry->argNum == argNum)
1469 assert(numRegs > 0);
1470 assert(numSlots > 0);
1474 assert(curArgTabEntry->isSplit == true);
1475 assert(curArgTabEntry->numRegs == numRegs);
1476 assert(curArgTabEntry->numSlots == numSlots);
1477 assert(hasStackArgs == true);
1481 curArgTabEntry->isSplit = true;
1482 curArgTabEntry->numRegs = numRegs;
1483 curArgTabEntry->numSlots = numSlots;
1484 hasStackArgs = true;
1486 nextSlotNum += numSlots;
1489 void fgArgInfo::EvalToTmp(unsigned argNum, unsigned tmpNum, GenTree* newNode)
1491 fgArgTabEntry* curArgTabEntry = nullptr;
1492 assert(argNum < argCount);
1493 for (unsigned inx = 0; inx < argCount; inx++)
1495 curArgTabEntry = argTable[inx];
1496 if (curArgTabEntry->argNum == argNum)
1501 assert(curArgTabEntry->parent->Current() == newNode);
1503 curArgTabEntry->node = newNode;
1504 curArgTabEntry->tmpNum = tmpNum;
1505 curArgTabEntry->isTmp = true;
1508 void fgArgInfo::ArgsComplete()
1510 bool hasStackArgs = false;
1511 bool hasStructRegArg = false;
1513 for (unsigned curInx = 0; curInx < argCount; curInx++)
1515 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1516 assert(curArgTabEntry != nullptr);
1517 GenTree* argx = curArgTabEntry->node;
1519 if (curArgTabEntry->regNum == REG_STK)
1521 hasStackArgs = true;
1522 #if !FEATURE_FIXED_OUT_ARGS
1523 // On x86 we use push instructions to pass arguments:
1524 // The non-register arguments are evaluated and pushed in order
1525 // and they are never evaluated into temps
1530 #if defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND)
1531 else if (curArgTabEntry->isSplit)
1533 hasStructRegArg = true;
1534 hasStackArgs = true;
1537 else // we have a register argument, next we look for a struct type.
1539 if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
1541 hasStructRegArg = true;
1545 /* If the argument tree contains an assignment (GTF_ASG) then the argument and
1546 and every earlier argument (except constants) must be evaluated into temps
1547 since there may be other arguments that follow and they may use the value being assigned.
1549 EXAMPLE: ArgTab is "a, a=5, a"
1550 -> when we see the second arg "a=5"
1551 we know the first two arguments "a, a=5" have to be evaluated into temps
1553 For the case of an assignment, we only know that there exist some assignment someplace
1554 in the tree. We don't know what is being assigned so we are very conservative here
1555 and assume that any local variable could have been assigned.
1558 if (argx->gtFlags & GTF_ASG)
1560 // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
1561 // a tmp, then we need a temp in the late arg list.
1562 if ((argCount > 1) || argx->OperIsCopyBlkOp()
1563 #ifdef FEATURE_FIXED_OUT_ARGS
1564 || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
1565 // that we only have late non-register args when that feature is on.
1566 #endif // FEATURE_FIXED_OUT_ARGS
1569 curArgTabEntry->needTmp = true;
1572 // For all previous arguments, unless they are a simple constant
1573 // we require that they be evaluated into temps
1574 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1576 fgArgTabEntry* prevArgTabEntry = argTable[prevInx];
1577 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1579 assert(prevArgTabEntry->node);
1580 if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
1582 prevArgTabEntry->needTmp = true;
1587 #if FEATURE_FIXED_OUT_ARGS
1588 // Like calls, if this argument has a tree that will do an inline throw,
1589 // a call to a jit helper, then we need to treat it like a call (but only
1590 // if there are/were any stack args).
1591 // This means unnesting, sorting, etc. Technically this is overly
1592 // conservative, but I want to avoid as much special-case debug-only code
1593 // as possible, so leveraging the GTF_CALL flag is the easiest.
1595 if (!(argx->gtFlags & GTF_CALL) && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) &&
1596 compiler->opts.compDbgCode &&
1597 (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
1599 for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
1601 if (otherInx == curInx)
1606 if (argTable[otherInx]->regNum == REG_STK)
1608 argx->gtFlags |= GTF_CALL;
1613 #endif // FEATURE_FIXED_OUT_ARGS
1615 /* If it contains a call (GTF_CALL) then itself and everything before the call
1616 with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
1617 has to be kept in the right order since we will move the call to the first position)
1619 For calls we don't have to be quite as conservative as we are with an assignment
1620 since the call won't be modifying any non-address taken LclVars.
1623 if (argx->gtFlags & GTF_CALL)
1625 if (argCount > 1) // If this is not the only argument
1627 curArgTabEntry->needTmp = true;
1629 else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
1631 // Spill all arguments that are floating point calls
1632 curArgTabEntry->needTmp = true;
1635 // All previous arguments may need to be evaluated into temps
1636 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1638 fgArgTabEntry* prevArgTabEntry = argTable[prevInx];
1639 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1640 assert(prevArgTabEntry->node);
1642 // For all previous arguments, if they have any GTF_ALL_EFFECT
1643 // we require that they be evaluated into a temp
1644 if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
1646 prevArgTabEntry->needTmp = true;
1648 #if FEATURE_FIXED_OUT_ARGS
1649 // Or, if they are stored into the FIXED_OUT_ARG area
1650 // we require that they be moved to the gtCallLateArgs
1651 // and replaced with a placeholder node
1652 else if (prevArgTabEntry->regNum == REG_STK)
1654 prevArgTabEntry->needPlace = true;
1656 #if defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND)
1657 else if (prevArgTabEntry->isSplit)
1659 prevArgTabEntry->needPlace = true;
1666 #ifndef LEGACY_BACKEND
1667 #if FEATURE_MULTIREG_ARGS
1668 // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
1669 // with multiple indirections, so here we consider spilling it into a tmp LclVar.
1671 CLANG_FORMAT_COMMENT_ANCHOR;
1673 bool isMultiRegArg = (curArgTabEntry->numRegs > 0) && (curArgTabEntry->numRegs + curArgTabEntry->numSlots > 1);
1675 bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
1678 if ((varTypeIsStruct(argx->TypeGet())) && (curArgTabEntry->needTmp == false))
1680 if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
1682 // Spill multireg struct arguments that have Assignments or Calls embedded in them
1683 curArgTabEntry->needTmp = true;
1687 // We call gtPrepareCost to measure the cost of evaluating this tree
1688 compiler->gtPrepareCost(argx);
1690 if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
1692 // Spill multireg struct arguments that are expensive to evaluate twice
1693 curArgTabEntry->needTmp = true;
1695 #if defined(FEATURE_SIMD) && defined(_TARGET_ARM64_)
1696 else if (isMultiRegArg && varTypeIsSIMD(argx->TypeGet()))
1698 // SIMD types do not need the optimization below due to their sizes
1699 if (argx->OperIs(GT_SIMD) || (argx->OperIs(GT_OBJ) && argx->AsObj()->gtOp1->OperIs(GT_ADDR) &&
1700 argx->AsObj()->gtOp1->gtOp.gtOp1->OperIs(GT_SIMD)))
1702 curArgTabEntry->needTmp = true;
1706 #ifndef _TARGET_ARM_
1707 // TODO-Arm: This optimization is not implemented for ARM32
1708 // so we skip this for ARM32 until it is ported to use RyuJIT backend
1710 else if (argx->OperGet() == GT_OBJ)
1712 GenTreeObj* argObj = argx->AsObj();
1713 CORINFO_CLASS_HANDLE objClass = argObj->gtClass;
1714 unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass);
1721 // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
1723 if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
1725 // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
1726 // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
1728 curArgTabEntry->needTmp = true;
1735 // Spill any GT_OBJ multireg structs that are difficult to extract
1737 // When we have a GT_OBJ of a struct with the above sizes we would need
1738 // to use 3 or 4 load instructions to load the exact size of this struct.
1739 // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
1740 // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
1741 // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
1744 curArgTabEntry->needTmp = true;
1751 #endif // !_TARGET_ARM_
1754 #endif // FEATURE_MULTIREG_ARGS
1755 #endif // LEGACY_BACKEND
1758 // We only care because we can't spill structs and qmarks involve a lot of spilling, but
1759 // if we don't have qmarks, then it doesn't matter.
1760 // So check for Qmark's globally once here, instead of inside the loop.
1762 const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
1764 #if FEATURE_FIXED_OUT_ARGS
1766 // For Arm/x64 we only care because we can't reorder a register
1767 // argument that uses GT_LCLHEAP. This is an optimization to
1768 // save a check inside the below loop.
1770 const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
1774 const bool hasStackArgsWeCareAbout = hasStackArgs;
1776 #endif // FEATURE_FIXED_OUT_ARGS
1778 // If we have any stack args we have to force the evaluation
1779 // of any arguments passed in registers that might throw an exception
1781 // Technically we only a required to handle the following two cases:
1782 // a GT_IND with GTF_IND_RNGCHK (only on x86) or
1783 // a GT_LCLHEAP node that allocates stuff on the stack
1785 if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
1787 for (unsigned curInx = 0; curInx < argCount; curInx++)
1789 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1790 assert(curArgTabEntry != nullptr);
1791 GenTree* argx = curArgTabEntry->node;
1793 // Examine the register args that are currently not marked needTmp
1795 if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
1797 if (hasStackArgsWeCareAbout)
1799 #if !FEATURE_FIXED_OUT_ARGS
1800 // On x86 we previously recorded a stack depth of zero when
1801 // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
1802 // Thus we can not reorder the argument after any stack based argument
1803 // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
1804 // check for it explicitly
1806 if (argx->gtFlags & GTF_EXCEPT)
1808 curArgTabEntry->needTmp = true;
1812 // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
1814 if (argx->gtFlags & GTF_EXCEPT)
1816 assert(compiler->compLocallocUsed);
1818 // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
1820 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
1822 curArgTabEntry->needTmp = true;
1828 if (hasStructRegArgWeCareAbout)
1830 // Returns true if a GT_QMARK node is encountered in the argx tree
1832 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
1834 curArgTabEntry->needTmp = true;
1842 argsComplete = true;
1845 void fgArgInfo::SortArgs()
1847 assert(argsComplete == true);
1850 if (compiler->verbose)
1852 printf("\nSorting the arguments:\n");
1856 /* Shuffle the arguments around before we build the gtCallLateArgs list.
1857 The idea is to move all "simple" arguments like constants and local vars
1858 to the end of the table, and move the complex arguments towards the beginning
1859 of the table. This will help prevent registers from being spilled by
1860 allowing us to evaluate the more complex arguments before the simpler arguments.
1861 The argTable ends up looking like:
1862 +------------------------------------+ <--- argTable[argCount - 1]
1864 +------------------------------------+
1865 | local var / local field |
1866 +------------------------------------+
1867 | remaining arguments sorted by cost |
1868 +------------------------------------+
1869 | temps (argTable[].needTmp = true) |
1870 +------------------------------------+
1871 | args with calls (GTF_CALL) |
1872 +------------------------------------+ <--- argTable[0]
1875 /* Set the beginning and end for the new argument table */
1878 unsigned begTab = 0;
1879 unsigned endTab = argCount - 1;
1880 unsigned argsRemaining = argCount;
1882 // First take care of arguments that are constants.
1883 // [We use a backward iterator pattern]
1890 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1892 if (curArgTabEntry->regNum != REG_STK)
1897 // Skip any already processed args
1899 if (!curArgTabEntry->processed)
1901 GenTree* argx = curArgTabEntry->node;
1903 // put constants at the end of the table
1905 if (argx->gtOper == GT_CNS_INT)
1907 noway_assert(curInx <= endTab);
1909 curArgTabEntry->processed = true;
1911 // place curArgTabEntry at the endTab position by performing a swap
1913 if (curInx != endTab)
1915 argTable[curInx] = argTable[endTab];
1916 argTable[endTab] = curArgTabEntry;
1923 } while (curInx > 0);
1925 if (argsRemaining > 0)
1927 // Next take care of arguments that are calls.
1928 // [We use a forward iterator pattern]
1930 for (curInx = begTab; curInx <= endTab; curInx++)
1932 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1934 // Skip any already processed args
1936 if (!curArgTabEntry->processed)
1938 GenTree* argx = curArgTabEntry->node;
1940 // put calls at the beginning of the table
1942 if (argx->gtFlags & GTF_CALL)
1944 curArgTabEntry->processed = true;
1946 // place curArgTabEntry at the begTab position by performing a swap
1948 if (curInx != begTab)
1950 argTable[curInx] = argTable[begTab];
1951 argTable[begTab] = curArgTabEntry;
1961 if (argsRemaining > 0)
1963 // Next take care arguments that are temps.
1964 // These temps come before the arguments that are
1965 // ordinary local vars or local fields
1966 // since this will give them a better chance to become
1967 // enregistered into their actual argument register.
1968 // [We use a forward iterator pattern]
1970 for (curInx = begTab; curInx <= endTab; curInx++)
1972 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1974 // Skip any already processed args
1976 if (!curArgTabEntry->processed)
1978 if (curArgTabEntry->needTmp)
1980 curArgTabEntry->processed = true;
1982 // place curArgTabEntry at the begTab position by performing a swap
1984 if (curInx != begTab)
1986 argTable[curInx] = argTable[begTab];
1987 argTable[begTab] = curArgTabEntry;
1997 if (argsRemaining > 0)
1999 // Next take care of local var and local field arguments.
2000 // These are moved towards the end of the argument evaluation.
2001 // [We use a backward iterator pattern]
2003 curInx = endTab + 1;
2008 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2010 // Skip any already processed args
2012 if (!curArgTabEntry->processed)
2014 GenTree* argx = curArgTabEntry->node;
2016 if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
2018 noway_assert(curInx <= endTab);
2020 curArgTabEntry->processed = true;
2022 // place curArgTabEntry at the endTab position by performing a swap
2024 if (curInx != endTab)
2026 argTable[curInx] = argTable[endTab];
2027 argTable[endTab] = curArgTabEntry;
2034 } while (curInx > begTab);
2037 // Finally, take care of all the remaining arguments.
2038 // Note that we fill in one arg at a time using a while loop.
2039 bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
2040 while (argsRemaining > 0)
2042 /* Find the most expensive arg remaining and evaluate it next */
2044 fgArgTabEntry* expensiveArgTabEntry = nullptr;
2045 unsigned expensiveArg = UINT_MAX;
2046 unsigned expensiveArgCost = 0;
2048 // [We use a forward iterator pattern]
2050 for (curInx = begTab; curInx <= endTab; curInx++)
2052 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2054 // Skip any already processed args
2056 if (!curArgTabEntry->processed)
2058 GenTree* argx = curArgTabEntry->node;
2060 // We should have already handled these kinds of args
2061 assert(argx->gtOper != GT_LCL_VAR);
2062 assert(argx->gtOper != GT_LCL_FLD);
2063 assert(argx->gtOper != GT_CNS_INT);
2065 // This arg should either have no persistent side effects or be the last one in our table
2066 // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
2068 if (argsRemaining == 1)
2070 // This is the last arg to place
2071 expensiveArg = curInx;
2072 expensiveArgTabEntry = curArgTabEntry;
2073 assert(begTab == endTab);
2080 /* We call gtPrepareCost to measure the cost of evaluating this tree */
2081 compiler->gtPrepareCost(argx);
2084 if (argx->gtCostEx > expensiveArgCost)
2086 // Remember this arg as the most expensive one that we have yet seen
2087 expensiveArgCost = argx->gtCostEx;
2088 expensiveArg = curInx;
2089 expensiveArgTabEntry = curArgTabEntry;
2095 noway_assert(expensiveArg != UINT_MAX);
2097 // put the most expensive arg towards the beginning of the table
2099 expensiveArgTabEntry->processed = true;
2101 // place expensiveArgTabEntry at the begTab position by performing a swap
2103 if (expensiveArg != begTab)
2105 argTable[expensiveArg] = argTable[begTab];
2106 argTable[begTab] = expensiveArgTabEntry;
2112 costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
2115 // The table should now be completely filled and thus begTab should now be adjacent to endTab
2116 // and regArgsRemaining should be zero
2117 assert(begTab == (endTab + 1));
2118 assert(argsRemaining == 0);
2120 #if !FEATURE_FIXED_OUT_ARGS
2121 // Finally build the regArgList
2123 callTree->gtCall.regArgList = NULL;
2124 callTree->gtCall.regArgListCount = regCount;
2126 unsigned regInx = 0;
2127 for (curInx = 0; curInx < argCount; curInx++)
2129 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2131 if (curArgTabEntry->regNum != REG_STK)
2133 // Encode the argument register in the register mask
2135 callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
2139 #endif // !FEATURE_FIXED_OUT_ARGS
2145 void fgArgInfo::Dump(Compiler* compiler)
2147 for (unsigned curInx = 0; curInx < ArgCount(); curInx++)
2149 fgArgTabEntry* curArgEntry = ArgTable()[curInx];
2150 curArgEntry->Dump();
2155 //------------------------------------------------------------------------------
2156 // fgMakeTmpArgNode : This function creates a tmp var only if needed.
2157 // We need this to be done in order to enforce ordering
2158 // of the evaluation of arguments.
2161 // tmpVarNum - the var num which we clone into the newly created temp var.
2164 // the newly created temp var tree.
2166 GenTree* Compiler::fgMakeTmpArgNode(
2167 unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
2169 LclVarDsc* varDsc = &lvaTable[tmpVarNum];
2170 assert(varDsc->lvIsTemp);
2171 var_types type = varDsc->TypeGet();
2173 // Create a copy of the temp to go into the late argument list
2174 GenTree* arg = gtNewLclvNode(tmpVarNum, type);
2175 GenTree* addrNode = nullptr;
2177 if (varTypeIsStruct(type))
2180 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || (!defined(LEGACY_BACKEND) && defined(_TARGET_ARM_))
2182 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2184 arg->gtFlags |= GTF_DONT_CSE;
2186 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2187 // Can this type be passed in a single register?
2188 // If so, the following call will return the corresponding primitive type.
2189 // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.
2191 bool passedInRegisters = false;
2192 structPassingKind kind;
2193 CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
2194 var_types structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
2196 if (structBaseType != TYP_UNKNOWN)
2198 passedInRegisters = true;
2199 type = structBaseType;
2201 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2203 // If it is passed in registers, don't get the address of the var. Make it a
2204 // field instead. It will be loaded in registers with putarg_reg tree in lower.
2205 if (passedInRegisters)
2207 arg->ChangeOper(GT_LCL_FLD);
2212 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2213 // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
2214 var_types addrType = type;
2216 var_types addrType = TYP_BYREF;
2218 arg = gtNewOperNode(GT_ADDR, addrType, arg);
2221 #if FEATURE_MULTIREG_ARGS
2222 #ifdef _TARGET_ARM64_
2223 assert(varTypeIsStruct(type));
2224 if (lvaIsMultiregStruct(varDsc))
2226 // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD);
2227 // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
2228 // We will create a GT_OBJ for the argument below.
2229 // This will be passed by value in two registers.
2230 assert(addrNode != nullptr);
2232 // Create an Obj of the temp to use it as a call argument.
2233 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2235 // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
2236 // this is only to preserve former behavior (though some CSE'ing of struct
2237 // values can be pessimizing, so enabling this may require some additional tuning).
2238 arg->gtFlags |= GTF_DONT_CSE;
2240 #elif defined(_TARGET_ARM_)
2241 // Always create an Obj of the temp to use it as a call argument.
2242 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2243 arg->gtFlags |= GTF_DONT_CSE;
2244 #endif // _TARGET_ARM_
2245 #endif // FEATURE_MULTIREG_ARGS
2248 #else // not (_TARGET_AMD64_ or _TARGET_ARM64_ or (!LEGACY_BACKEND and _TARGET_ARM_))
2250 // other targets, we pass the struct by value
2251 assert(varTypeIsStruct(type));
2253 addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
2255 // Get a new Obj node temp to use it as a call argument.
2256 // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
2257 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
2259 #endif // not (_TARGET_AMD64_ or _TARGET_ARM64_ or (!LEGACY_BACKEND and _TARGET_ARM_))
2261 } // (varTypeIsStruct(type))
2263 if (addrNode != nullptr)
2265 assert(addrNode->gtOper == GT_ADDR);
2267 // This will prevent this LclVar from being optimized away
2268 lvaSetVarAddrExposed(tmpVarNum);
2270 // the child of a GT_ADDR is required to have this flag set
2271 addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
2277 void fgArgInfo::EvalArgsToTemps()
2279 assert(argsSorted == true);
2281 unsigned regArgInx = 0;
2282 // Now go through the argument table and perform the necessary evaluation into temps
2283 GenTreeArgList* tmpRegArgNext = nullptr;
2284 for (unsigned curInx = 0; curInx < argCount; curInx++)
2286 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2288 GenTree* argx = curArgTabEntry->node;
2289 GenTree* setupArg = nullptr;
2292 #if !FEATURE_FIXED_OUT_ARGS
2293 // Only ever set for FEATURE_FIXED_OUT_ARGS
2294 assert(curArgTabEntry->needPlace == false);
2296 // On x86 and other archs that use push instructions to pass arguments:
2297 // Only the register arguments need to be replaced with placeholder nodes.
2298 // Stacked arguments are evaluated and pushed (or stored into the stack) in order.
2300 if (curArgTabEntry->regNum == REG_STK)
2304 if (curArgTabEntry->needTmp)
2308 if (curArgTabEntry->isTmp == true)
2310 // Create a copy of the temp to go into the late argument list
2311 tmpVarNum = curArgTabEntry->tmpNum;
2312 defArg = compiler->fgMakeTmpArgNode(tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
2313 argTable[curInx]->structDesc.passedInRegisters));
2315 // mark the original node as a late argument
2316 argx->gtFlags |= GTF_LATE_ARG;
2320 // Create a temp assignment for the argument
2321 // Put the temp in the gtCallLateArgs list
2322 CLANG_FORMAT_COMMENT_ANCHOR;
2325 if (compiler->verbose)
2327 printf("Argument with 'side effect'...\n");
2328 compiler->gtDispTree(argx);
2332 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2333 noway_assert(argx->gtType != TYP_STRUCT);
2336 tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
2337 if (argx->gtOper == GT_MKREFANY)
2339 // For GT_MKREFANY, typically the actual struct copying does
2340 // not have any side-effects and can be delayed. So instead
2341 // of using a temp for the whole struct, we can just use a temp
2342 // for operand that that has a side-effect
2344 if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
2346 operand = argx->gtOp.gtOp1;
2348 // In the early argument evaluation, place an assignment to the temp
2349 // from the source operand of the mkrefany
2350 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2352 // Replace the operand for the mkrefany with the new temp.
2353 argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2355 else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
2357 operand = argx->gtOp.gtOp2;
2359 // In the early argument evaluation, place an assignment to the temp
2360 // from the source operand of the mkrefany
2361 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2363 // Replace the operand for the mkrefany with the new temp.
2364 argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2368 if (setupArg != nullptr)
2370 // Now keep the mkrefany for the late argument list
2373 // Clear the side-effect flags because now both op1 and op2 have no side-effects
2374 defArg->gtFlags &= ~GTF_ALL_EFFECT;
2378 setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
2380 LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
2382 #ifndef LEGACY_BACKEND
2383 if (compiler->fgOrder == Compiler::FGOrderLinear)
2385 // We'll reference this temporary variable just once
2386 // when we perform the function call after
2387 // setting up this argument.
2388 varDsc->lvRefCnt = 1;
2390 #endif // !LEGACY_BACKEND
2392 var_types lclVarType = genActualType(argx->gtType);
2393 var_types scalarType = TYP_UNKNOWN;
2395 if (setupArg->OperIsCopyBlkOp())
2397 setupArg = compiler->fgMorphCopyBlock(setupArg);
2398 #if defined(_TARGET_ARM64_) || (!defined(LEGACY_BACKEND) && defined(_TARGET_ARM_))
2399 // This scalar LclVar widening step is only performed for ARM architectures.
2401 CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum);
2402 unsigned structSize = varDsc->lvExactSize;
2404 scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd);
2405 #endif // _TARGET_ARM*_
2408 // scalarType can be set to a wider type for ARM architectures: (3 => 4) or (5,6,7 => 8)
2409 if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
2411 // Create a GT_LCL_FLD using the wider type to go to the late argument list
2412 defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
2416 // Create a copy of the temp to go to the late argument list
2417 defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
2420 curArgTabEntry->isTmp = true;
2421 curArgTabEntry->tmpNum = tmpVarNum;
2424 // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
2425 // might have left holes in the used registers (see
2426 // fgAddSkippedRegsInPromotedStructArg).
2427 // Too bad we're not that smart for these intermediate temps...
2428 if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
2430 regNumber argReg = curArgTabEntry->regNum;
2431 regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
2432 for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
2434 argReg = genRegArgNext(argReg);
2435 allUsedRegs |= genRegMask(argReg);
2437 #ifdef LEGACY_BACKEND
2438 callTree->gtCall.gtCallRegUsedMask |= allUsedRegs;
2439 #endif // LEGACY_BACKEND
2441 #endif // _TARGET_ARM_
2444 /* mark the assignment as a late argument */
2445 setupArg->gtFlags |= GTF_LATE_ARG;
2448 if (compiler->verbose)
2450 printf("\n Evaluate to a temp:\n");
2451 compiler->gtDispTree(setupArg);
2456 else // curArgTabEntry->needTmp == false
2459 // Only register args are replaced with placeholder nodes
2460 // and the stack based arguments are evaluated and pushed in order.
2462 // On Arm/x64 - When needTmp is false and needPlace is false,
2463 // the non-register arguments are evaluated and stored in order.
2464 // When needPlace is true we have a nested call that comes after
2465 // this argument so we have to replace it in the gtCallArgs list
2466 // (the initial argument evaluation list) with a placeholder.
2468 if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
2473 /* No temp needed - move the whole node to the gtCallLateArgs list */
2475 /* The argument is deferred and put in the late argument list */
2479 // Create a placeholder node to put in its place in gtCallLateArgs.
2481 // For a struct type we also need to record the class handle of the arg.
2482 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
2484 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2486 // All structs are either passed (and retyped) as integral types, OR they
2487 // are passed by reference.
2488 noway_assert(argx->gtType != TYP_STRUCT);
2490 #else // !defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2492 if (varTypeIsStruct(defArg))
2494 // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
2495 GenTree* defArgTmp = defArg;
2497 // The GT_OBJ may be be a child of a GT_COMMA.
2498 while (defArgTmp->gtOper == GT_COMMA)
2500 defArgTmp = defArgTmp->gtOp.gtOp2;
2502 assert(varTypeIsStruct(defArgTmp));
2504 // We handle two opcodes: GT_MKREFANY and GT_OBJ.
2505 if (defArgTmp->gtOper == GT_MKREFANY)
2507 clsHnd = compiler->impGetRefAnyClass();
2509 else if (defArgTmp->gtOper == GT_OBJ)
2511 clsHnd = defArgTmp->AsObj()->gtClass;
2515 BADCODE("Unhandled struct argument tree in fgMorphArgs");
2519 #endif // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
2521 setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
2523 /* mark the placeholder node as a late argument */
2524 setupArg->gtFlags |= GTF_LATE_ARG;
2527 if (compiler->verbose)
2529 if (curArgTabEntry->regNum == REG_STK)
2531 printf("Deferred stack argument :\n");
2535 printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
2538 compiler->gtDispTree(argx);
2539 printf("Replaced with placeholder node:\n");
2540 compiler->gtDispTree(setupArg);
2545 if (setupArg != nullptr)
2547 if (curArgTabEntry->parent)
2549 GenTree* parent = curArgTabEntry->parent;
2550 /* a normal argument from the list */
2551 noway_assert(parent->OperIsList());
2552 noway_assert(parent->gtOp.gtOp1 == argx);
2554 parent->gtFlags |= (setupArg->gtFlags & GTF_ALL_EFFECT);
2556 parent->gtOp.gtOp1 = setupArg;
2560 /* must be the gtCallObjp */
2561 noway_assert(callTree->gtCall.gtCallObjp == argx);
2563 callTree->gtCall.gtCallObjp = setupArg;
2567 /* deferred arg goes into the late argument list */
2569 if (tmpRegArgNext == nullptr)
2571 tmpRegArgNext = compiler->gtNewArgList(defArg);
2572 callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
2576 noway_assert(tmpRegArgNext->OperIsList());
2577 noway_assert(tmpRegArgNext->Current());
2578 tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
2580 tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT);
2581 tmpRegArgNext = tmpRegArgNext->Rest();
2584 tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT);
2586 curArgTabEntry->node = defArg;
2587 curArgTabEntry->lateArgInx = regArgInx++;
2591 if (compiler->verbose)
2593 printf("\nShuffled argument table: ");
2594 for (unsigned curInx = 0; curInx < argCount; curInx++)
2596 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2598 if (curArgTabEntry->regNum != REG_STK)
2600 printf("%s ", getRegName(curArgTabEntry->regNum));
2608 // Get the late arg for arg at position argIndex.
2609 // argIndex - 0-based position to get late arg for.
2610 // Caller must ensure this position has a late arg.
2611 GenTree* fgArgInfo::GetLateArg(unsigned argIndex)
2613 for (unsigned j = 0; j < this->ArgCount(); j++)
2615 if (this->ArgTable()[j]->argNum == argIndex)
2617 return this->ArgTable()[j]->node;
2620 // Caller must ensure late arg exists.
2624 void fgArgInfo::RecordStkLevel(unsigned stkLvl)
2626 assert(!IsUninitialized(stkLvl));
2627 this->stkLevel = stkLvl;
2630 unsigned fgArgInfo::RetrieveStkLevel()
2632 assert(!IsUninitialized(stkLevel));
2636 // Return a conservative estimate of the stack size in bytes.
2637 // It will be used only on the intercepted-for-host code path to copy the arguments.
2638 int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
2642 for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
2648 if (numArgs > MAX_REG_ARG)
2650 numStkArgs = numArgs - MAX_REG_ARG;
2657 return numStkArgs * REGSIZE_BYTES;
2660 //------------------------------------------------------------------------------
2661 // fgMakeMultiUse : If the node is a local, clone it and increase the ref count
2662 // otherwise insert a comma form temp
2665 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2666 // evaluates ppTree to a temp and returns the result
2669 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2672 // The result tree MUST be added to the tree structure since the ref counts are
2673 // already incremented.
2675 GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
2677 GenTree* tree = *pOp;
2678 if (tree->IsLocal())
2680 auto result = gtClone(tree);
2681 if (lvaLocalVarRefCounted)
2683 lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2689 GenTree* result = fgInsertCommaFormTemp(pOp);
2691 // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
2692 // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
2693 // be added by the caller.
2694 if (lvaLocalVarRefCounted)
2696 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2697 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2698 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2705 //------------------------------------------------------------------------------
2706 // fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
2707 // and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
2710 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2711 // evaluates ppTree to a temp and returns the result
2713 // structType - value type handle if the temp created is of TYP_STRUCT.
2716 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2719 GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
2721 GenTree* subTree = *ppTree;
2723 unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
2725 if (varTypeIsStruct(subTree))
2727 assert(structType != nullptr);
2728 lvaSetStruct(lclNum, structType, false);
2731 // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
2732 // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for
2733 // setting type of lcl vars created.
2734 GenTree* asg = gtNewTempAssign(lclNum, subTree);
2736 GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2738 GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
2742 return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2745 //------------------------------------------------------------------------
2746 // fgMorphArgs: Walk and transform (morph) the arguments of a call
2749 // callNode - the call for which we are doing the argument morphing
2752 // Like most morph methods, this method returns the morphed node,
2753 // though in this case there are currently no scenarios where the
2754 // node itself is re-created.
2757 // This method is even less idempotent than most morph methods.
2758 // That is, it makes changes that should not be redone. It uses the existence
2759 // of gtCallLateArgs (the late arguments list) to determine if it has
2760 // already done that work.
2762 // The first time it is called (i.e. during global morphing), this method
2763 // computes the "late arguments". This is when it determines which arguments
2764 // need to be evaluated to temps prior to the main argument setup, and which
2765 // can be directly evaluated into the argument location. It also creates a
2766 // second argument list (gtCallLateArgs) that does the final placement of the
2767 // arguments, e.g. into registers or onto the stack.
2769 // The "non-late arguments", aka the gtCallArgs, are doing the in-order
2770 // evaluation of the arguments that might have side-effects, such as embedded
2771 // assignments, calls or possible throws. In these cases, it and earlier
2772 // arguments must be evaluated to temps.
2774 // On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
2775 // if we have any nested calls, we need to defer the copying of the argument
2776 // into the fixed argument area until after the call. If the argument did not
2777 // otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
2778 // replaced in the "early" arg list (gtCallArgs) with a placeholder node.
2781 #pragma warning(push)
2782 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2784 GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
2789 unsigned flagsSummary = 0;
2790 unsigned genPtrArgCntSav = fgPtrArgCntCur;
2792 unsigned argIndex = 0;
2794 unsigned intArgRegNum = 0;
2795 unsigned fltArgRegNum = 0;
2798 regMaskTP argSkippedRegMask = RBM_NONE;
2799 regMaskTP fltArgSkippedRegMask = RBM_NONE;
2800 #endif // _TARGET_ARM_
2802 #if defined(_TARGET_X86_)
2803 unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
2805 const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
2808 unsigned argSlots = 0;
2809 unsigned nonRegPassedStructSlots = 0;
2810 bool reMorphing = call->AreArgsComplete();
2811 bool callHasRetBuffArg = call->HasRetBufArg();
2813 #ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
2814 bool callIsVararg = call->IsVarargs();
2817 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2818 // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
2819 // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
2820 // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
2821 bool hasStackArgCopy = false;
2824 #ifndef LEGACY_BACKEND
2825 // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
2826 // following the normal calling convention or in the normal argument registers. We either mark existing
2827 // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
2828 // non-standard arguments into the argument list, below.
2829 class NonStandardArgs
2831 struct NonStandardArg
2833 regNumber reg; // The register to be assigned to this non-standard argument.
2834 GenTree* node; // The tree node representing this non-standard argument.
2835 // Note that this must be updated if the tree node changes due to morphing!
2838 ArrayStack<NonStandardArg> args;
2841 NonStandardArgs(Compiler* compiler) : args(compiler, 3) // We will have at most 3 non-standard arguments
2845 //-----------------------------------------------------------------------------
2846 // Add: add a non-standard argument to the table of non-standard arguments
2849 // node - a GenTree node that has a non-standard argument.
2850 // reg - the register to assign to this node.
2855 void Add(GenTree* node, regNumber reg)
2857 NonStandardArg nsa = {reg, node};
2861 //-----------------------------------------------------------------------------
2862 // Find: Look for a GenTree* in the set of non-standard args.
2865 // node - a GenTree node to look for
2868 // The index of the non-standard argument (a non-negative, unique, stable number).
2869 // If the node is not a non-standard argument, return -1.
2871 int Find(GenTree* node)
2873 for (int i = 0; i < args.Height(); i++)
2875 if (node == args.Index(i).node)
2883 //-----------------------------------------------------------------------------
2884 // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
2885 // set the register to use for the node.
2888 // node - a GenTree node to look for
2889 // pReg - an OUT argument. *pReg is set to the non-standard register to use if
2890 // 'node' is found in the non-standard argument set.
2893 // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
2895 // 'false' otherwise (in this case, *pReg is unmodified).
2897 bool FindReg(GenTree* node, regNumber* pReg)
2899 for (int i = 0; i < args.Height(); i++)
2901 NonStandardArg& nsa = args.IndexRef(i);
2902 if (node == nsa.node)
2911 //-----------------------------------------------------------------------------
2912 // Replace: Replace the non-standard argument node at a given index. This is done when
2913 // the original node was replaced via morphing, but we need to continue to assign a
2914 // particular non-standard arg to it.
2917 // index - the index of the non-standard arg. It must exist.
2918 // node - the new GenTree node.
2923 void Replace(int index, GenTree* node)
2925 args.IndexRef(index).node = node;
2928 } nonStandardArgs(this);
2929 #endif // !LEGACY_BACKEND
2931 // Count of args. On first morph, this is counted before we've filled in the arg table.
2932 // On remorph, we grab it from the arg table.
2933 unsigned numArgs = 0;
2935 // Process the late arguments (which were determined by a previous caller).
2936 // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
2937 // may need to refer to it.
2940 // We need to reMorph the gtCallLateArgs early since that is what triggers
2941 // the expression folding and we need to have the final folded gtCallLateArgs
2942 // available when we call RemorphRegArg so that we correctly update the fgArgInfo
2943 // with the folded tree that represents the final optimized argument nodes.
2945 // However if a range-check needs to be generated for any of these late
2946 // arguments we also need to "know" what the stack depth will be when we generate
2947 // code to branch to the throw range check failure block as that is part of the
2948 // GC information contract for that block.
2950 // Since the late arguments are evaluated last we have pushed all of the
2951 // other arguments on the stack before we evaluate these late arguments,
2952 // so we record the stack depth on the first morph call when reMorphing
2953 // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
2955 if (call->gtCallLateArgs != nullptr)
2957 unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
2958 fgPtrArgCntCur += callStkLevel;
2959 call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
2960 flagsSummary |= call->gtCallLateArgs->gtFlags;
2961 fgPtrArgCntCur -= callStkLevel;
2963 assert(call->fgArgInfo != nullptr);
2964 call->fgArgInfo->RemorphReset();
2966 numArgs = call->fgArgInfo->ArgCount();
2970 // First we need to count the args
2971 if (call->gtCallObjp)
2975 for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
2980 // Insert or mark non-standard args. These are either outside the normal calling convention, or
2981 // arguments registers that don't follow the normal progression of argument registers in the calling
2982 // convention (such as for the ARM64 fixed return buffer argument x8).
2984 // *********** NOTE *************
2985 // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
2986 // in the implementation of fast tail call.
2987 // *********** END NOTE *********
2988 CLANG_FORMAT_COMMENT_ANCHOR;
2990 #if !defined(LEGACY_BACKEND)
2991 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2992 // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
2993 // Set the argument registers correctly here.
2994 if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
2996 GenTreeArgList* args = call->gtCallArgs;
2997 GenTree* arg1 = args->Current();
2998 assert(arg1 != nullptr);
2999 nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
3001 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
3002 #if defined(_TARGET_ARM_)
3003 else if (call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV)
3005 GenTree* arg = call->gtCallObjp;
3006 if (arg->OperIsLocal())
3008 arg = gtClone(arg, true);
3012 GenTree* tmp = fgInsertCommaFormTemp(&arg);
3013 call->gtCallObjp = arg;
3014 call->gtFlags |= GTF_ASG;
3017 noway_assert(arg != nullptr);
3019 GenTree* newArg = new (this, GT_ADDR)
3020 GenTreeAddrMode(TYP_BYREF, arg, nullptr, 0, eeGetEEInfo()->offsetOfSecureDelegateIndirectCell);
3022 // Append newArg as the last arg
3023 GenTreeArgList** insertionPoint = &call->gtCallArgs;
3024 for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
3027 *insertionPoint = gtNewListNode(newArg, nullptr);
3030 nonStandardArgs.Add(newArg, virtualStubParamInfo->GetReg());
3032 #endif // defined(_TARGET_ARM_)
3033 #if defined(_TARGET_X86_)
3034 // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
3035 // hi part to be in EDX. This sets the argument registers up correctly.
3036 else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
3037 call->IsHelperCall(this, CORINFO_HELP_LRSZ))
3039 GenTreeArgList* args = call->gtCallArgs;
3040 GenTree* arg1 = args->Current();
3041 assert(arg1 != nullptr);
3042 nonStandardArgs.Add(arg1, REG_LNGARG_LO);
3044 args = args->Rest();
3045 GenTree* arg2 = args->Current();
3046 assert(arg2 != nullptr);
3047 nonStandardArgs.Add(arg2, REG_LNGARG_HI);
3049 #else // !_TARGET_X86_
3050 // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
3051 // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
3052 // convention for x86/SSE.
3054 // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
3056 if (hasFixedRetBuffReg() && call->HasRetBufArg())
3058 args = call->gtCallArgs;
3059 assert(args != nullptr);
3060 assert(args->OperIsList());
3062 argx = call->gtCallArgs->Current();
3064 // We don't increment numArgs here, since we already counted this argument above.
3066 nonStandardArgs.Add(argx, theFixedRetBuffReg());
3069 // We are allowed to have a Fixed Return Buffer argument combined
3070 // with any of the remaining non-standard arguments
3072 if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
3074 assert(!call->gtCallCookie);
3075 // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
3076 // It will be used only on the intercepted-for-host code path to copy the arguments.
3078 GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
3079 call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
3082 nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
3084 else if (call->IsVirtualStub())
3086 if (!call->IsTailCallViaHelper())
3088 GenTree* stubAddrArg = fgGetStubAddrArg(call);
3089 // And push the stub address onto the list of arguments
3090 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
3093 nonStandardArgs.Add(stubAddrArg, stubAddrArg->gtRegNum);
3097 // If it is a VSD call getting dispatched via tail call helper,
3098 // fgMorphTailCall() would materialize stub addr as an additional
3099 // parameter added to the original arg list and hence no need to
3100 // add as a non-standard arg.
3104 #endif // !_TARGET_X86_
3105 if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
3107 assert(!call->IsUnmanaged());
3109 GenTree* arg = call->gtCallCookie;
3110 noway_assert(arg != nullptr);
3111 call->gtCallCookie = nullptr;
3113 #if defined(_TARGET_X86_)
3114 // x86 passes the cookie on the stack as the final argument to the call.
3115 GenTreeArgList** insertionPoint = &call->gtCallArgs;
3116 for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
3119 *insertionPoint = gtNewListNode(arg, nullptr);
3120 #else // !defined(_TARGET_X86_)
3121 // All other architectures pass the cookie in a register.
3122 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
3123 #endif // defined(_TARGET_X86_)
3125 nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
3128 // put destination into R10/EAX
3129 arg = gtClone(call->gtCallAddr, true);
3130 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
3133 nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
3135 // finally change this call to a helper call
3136 call->gtCallType = CT_HELPER;
3137 call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
3139 #if defined(FEATURE_READYTORUN_COMPILER) && defined(_TARGET_ARMARCH_)
3140 // For arm, we dispatch code same as VSD using virtualStubParamInfo->GetReg()
3141 // for indirection cell address, which ZapIndirectHelperThunk expects.
3142 if (call->IsR2RRelativeIndir())
3144 assert(call->gtEntryPoint.addr != nullptr);
3146 size_t addrValue = (size_t)call->gtEntryPoint.addr;
3147 GenTree* indirectCellAddress = gtNewIconHandleNode(addrValue, GTF_ICON_FTN_ADDR);
3148 indirectCellAddress->gtRegNum = REG_R2R_INDIRECT_PARAM;
3150 // Push the stub address onto the list of arguments.
3151 call->gtCallArgs = gtNewListNode(indirectCellAddress, call->gtCallArgs);
3154 nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->gtRegNum);
3157 #endif // FEATURE_READYTORUN_COMPILER && _TARGET_ARMARCH_
3158 #endif // !LEGACY_BACKEND
3160 // Allocate the fgArgInfo for the call node;
3162 call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
3165 /* First we morph the argument subtrees ('this' pointer, arguments, etc.).
3166 * During the first call to fgMorphArgs we also record the
3167 * information about late arguments we have in 'fgArgInfo'.
3168 * This information is used later to contruct the gtCallLateArgs */
3170 /* Process the 'this' argument value, if present */
3172 argx = call->gtCallObjp;
3176 argx = fgMorphTree(argx);
3177 call->gtCallObjp = argx;
3178 flagsSummary |= argx->gtFlags;
3180 assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
3182 assert(argIndex == 0);
3184 /* We must fill in or update the argInfo table */
3188 /* this is a register argument - possibly update it in the table */
3189 call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
3193 assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
3195 /* this is a register argument - put it in the table */
3196 call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
3197 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3199 false, REG_STK, nullptr
3200 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3203 // this can't be a struct.
3204 assert(argx->gtType != TYP_STRUCT);
3206 /* Increment the argument register count and argument index */
3207 if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
3210 #ifdef WINDOWS_AMD64_ABI
3211 // Whenever we pass an integer register argument
3212 // we skip the corresponding floating point register argument
3214 #endif // WINDOWS_AMD64_ABI
3218 noway_assert(!"the 'this' pointer can not be a floating point type");
3225 // Compute the maximum number of arguments that can be passed in registers.
3226 // For X86 we handle the varargs and unmanaged calling conventions
3228 if (call->gtFlags & GTF_CALL_POP_ARGS)
3230 noway_assert(intArgRegNum < MAX_REG_ARG);
3231 // No more register arguments for varargs (CALL_POP_ARGS)
3232 maxRegArgs = intArgRegNum;
3234 // Add in the ret buff arg
3235 if (callHasRetBuffArg)
3239 if (call->IsUnmanaged())
3241 noway_assert(intArgRegNum == 0);
3243 if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
3245 noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
3246 call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
3247 call->gtCallArgs->gtOp.gtOp1->gtOper ==
3248 GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
3256 // Add in the ret buff arg
3257 if (callHasRetBuffArg)
3260 #endif // _TARGET_X86_
3262 /* Morph the user arguments */
3263 CLANG_FORMAT_COMMENT_ANCHOR;
3265 #if defined(_TARGET_ARM_)
3267 // The ARM ABI has a concept of back-filling of floating-point argument registers, according
3268 // to the "Procedure Call Standard for the ARM Architecture" document, especially
3269 // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
3270 // appear in a lower-numbered register than floating point argument N. That is, argument
3271 // register allocation is not strictly increasing. To support this, we need to keep track of unused
3272 // floating-point argument registers that we can back-fill. We only support 4-byte float and
3273 // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
3274 // only back-fill single registers, since there is no way with these types to create
3275 // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
3276 // available (with 16 FP argument registers). Consider this code:
3278 // struct HFA { float x, y, z; }; // a three element HFA
3279 // void bar(float a1, // passed in f0
3280 // double a2, // passed in f2/f3; skip f1 for alignment
3281 // HFA a3, // passed in f4/f5/f6
3282 // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
3283 // HFA a5, // passed in f10/f11/f12
3284 // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
3286 // float a7, // passed in f1 (back-filled)
3287 // float a8, // passed in f7 (back-filled)
3288 // float a9, // passed in f13 (back-filled)
3289 // float a10) // passed on the stack in [OutArg+0]
3291 // Note that if we ever support FP types with larger alignment requirements, then there could
3292 // be more than single register back-fills.
3294 // Once we assign a floating-pointer register to the stack, they all must be on the stack.
3295 // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
3296 // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
3297 // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
3298 // and prevent any additional floating-point arguments from going in registers.
3300 bool anyFloatStackArgs = false;
3302 #endif // _TARGET_ARM_
3304 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3305 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3306 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3308 bool hasStructArgument = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
3309 // hasMultiregStructArgs is true if there are any structs that are eligible for passing
3310 // in registers; this is true even if it is not actually passed in registers (i.e. because
3311 // previous arguments have used up available argument registers).
3312 bool hasMultiregStructArgs = false;
3313 for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
3315 GenTree** parentArgx = &args->gtOp.gtOp1;
3317 #if FEATURE_MULTIREG_ARGS
3318 if (!hasStructArgument)
3320 hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
3322 #endif // FEATURE_MULTIREG_ARGS
3324 #ifndef LEGACY_BACKEND
3325 // Record the index of any nonStandard arg that we may be processing here, as we are
3326 // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
3327 GenTree* orig_argx = *parentArgx;
3328 int nonStandard_index = nonStandardArgs.Find(orig_argx);
3329 #endif // !LEGACY_BACKEND
3331 argx = fgMorphTree(*parentArgx);
3334 assert(args->OperIsList());
3335 assert(argx == args->Current());
3337 #ifndef LEGACY_BACKEND
3338 if ((nonStandard_index != -1) && (argx != orig_argx))
3340 // We need to update the node field for this nonStandard arg here
3341 // as it was changed by the call to fgMorphTree
3342 nonStandardArgs.Replace(nonStandard_index, argx);
3344 #endif // !LEGACY_BACKEND
3346 /* Change the node to TYP_I_IMPL so we don't report GC info
3347 * NOTE: We deferred this from the importer because of the inliner */
3349 if (argx->IsVarAddr())
3351 argx->gtType = TYP_I_IMPL;
3354 bool passUsingFloatRegs;
3355 unsigned argAlign = 1;
3356 // Setup any HFA information about 'argx'
3357 var_types hfaType = GetHfaType(argx);
3358 bool isHfaArg = varTypeIsFloating(hfaType);
3359 unsigned hfaSlots = 0;
3363 hfaSlots = GetHfaCount(argx);
3365 // If we have a HFA struct it's possible we transition from a method that originally
3366 // only had integer types to now start having FP types. We have to communicate this
3367 // through this flag since LSRA later on will use this flag to determine whether
3368 // or not to track the FP register set.
3370 compFloatingPointUsed = true;
3374 CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
3375 bool isRegArg = false;
3376 bool isNonStandard = false;
3377 regNumber nonStdRegNum = REG_NA;
3379 fgArgTabEntry* argEntry = nullptr;
3383 argEntry = gtArgEntryByArgNum(call, argIndex);
3388 bool passUsingIntRegs;
3391 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3392 passUsingIntRegs = isValidIntArgReg(argEntry->regNum);
3396 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
3397 passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
3400 GenTree* curArg = argx;
3401 // If late args have already been computed, use the node in the argument table.
3402 if (argEntry != NULL && argEntry->isTmp)
3404 curArg = argEntry->node;
3409 argAlign = argEntry->alignment;
3413 // We don't use the "size" return value from InferOpSizeAlign().
3414 codeGen->InferOpSizeAlign(curArg, &argAlign);
3416 argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
3417 argAlign /= TARGET_POINTER_SIZE;
3422 if (passUsingFloatRegs)
3424 if (fltArgRegNum % 2 == 1)
3426 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3430 else if (passUsingIntRegs)
3432 if (intArgRegNum % 2 == 1)
3434 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3439 if (argSlots % 2 == 1)
3445 #elif defined(_TARGET_ARM64_)
3449 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3453 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
3456 #elif defined(_TARGET_AMD64_)
3459 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3463 passUsingFloatRegs = varTypeIsFloating(argx);
3465 #elif defined(_TARGET_X86_)
3467 passUsingFloatRegs = false;
3470 #error Unsupported or unset target architecture
3473 bool isBackFilled = false;
3474 unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
3475 var_types structBaseType = TYP_STRUCT;
3476 unsigned structSize = 0;
3478 bool isStructArg = varTypeIsStruct(argx);
3482 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3483 // Get the struct description for the already completed struct argument.
3484 fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, argx);
3485 assert(fgEntryPtr != nullptr);
3487 // As described in few other places, this can happen when the argx was morphed
3488 // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
3489 // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
3490 // was a struct and the struct classification.
3491 isStructArg = fgEntryPtr->isStruct;
3495 structDesc.CopyFrom(fgEntryPtr->structDesc);
3497 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3499 assert(argEntry != nullptr);
3500 if (argEntry->IsBackFilled())
3503 size = argEntry->numRegs;
3504 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
3506 isBackFilled = true;
3508 else if (argEntry->regNum == REG_STK)
3511 assert(argEntry->numRegs == 0);
3512 size = argEntry->numSlots;
3517 assert(argEntry->numRegs > 0);
3518 size = argEntry->numRegs + argEntry->numSlots;
3521 // This size has now been computed
3527 // Figure out the size of the argument. This is either in number of registers, or number of
3528 // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
3531 if (argx->IsArgPlaceHolderNode() || (!isStructArg))
3533 #if defined(_TARGET_AMD64_)
3534 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3537 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3541 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3542 TARGET_POINTER_SIZE)) /
3543 TARGET_POINTER_SIZE;
3544 eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
3547 hasMultiregStructArgs = true;
3550 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3551 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3552 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3553 #elif defined(_TARGET_ARM64_)
3558 size = GetHfaCount(argx);
3559 // HFA structs are passed by value in multiple registers
3560 hasMultiregStructArgs = true;
3564 // Structs are either passed in 1 or 2 (64-bit) slots
3565 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3566 TARGET_POINTER_SIZE)) /
3567 TARGET_POINTER_SIZE;
3571 // Structs that are the size of 2 pointers are passed by value in multiple registers,
3572 // if sufficient registers are available.
3573 hasMultiregStructArgs = true;
3577 size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
3578 // reference (to a copy)
3581 // Note that there are some additional rules for multireg structs.
3582 // (i.e they cannot be split between registers and the stack)
3586 size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
3588 #elif defined(_TARGET_ARM_)
3591 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3592 TARGET_POINTER_SIZE)) /
3593 TARGET_POINTER_SIZE;
3594 if (isHfaArg || size > 1)
3596 hasMultiregStructArgs = true;
3602 // long/double type argument(s) will be changed to GT_FIELD_LIST in lower phase
3603 size = genTypeStSz(argx->gtType);
3605 #elif defined(_TARGET_X86_)
3606 size = genTypeStSz(argx->gtType);
3608 #error Unsupported or unset target architecture
3609 #endif // _TARGET_XXX_
3613 // We handle two opcodes: GT_MKREFANY and GT_OBJ
3614 if (argx->gtOper == GT_MKREFANY)
3616 if (varTypeIsStruct(argx))
3620 #ifdef _TARGET_AMD64_
3621 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3622 if (varTypeIsStruct(argx))
3624 size = info.compCompHnd->getClassSize(impGetRefAnyClass());
3625 unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
3626 size = roundupSize / TARGET_POINTER_SIZE;
3627 eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
3630 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3638 else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
3640 GenTree* argObj = argx;
3641 GenTree** parentOfArgObj = parentArgx;
3643 assert(args->OperIsList());
3644 assert(argx == args->Current());
3646 /* The GT_OBJ may be be a child of a GT_COMMA */
3647 while (argObj->gtOper == GT_COMMA)
3649 parentOfArgObj = &argObj->gtOp.gtOp2;
3650 argObj = argObj->gtOp.gtOp2;
3653 // TODO-1stClassStructs: An OBJ node should not be required for lclVars.
3654 if (argObj->gtOper != GT_OBJ)
3656 BADCODE("illegal argument tree in fgMorphArgs");
3659 CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
3660 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3661 eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
3662 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3664 unsigned originalSize = info.compCompHnd->getClassSize(objClass);
3665 originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
3666 unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
3668 structSize = originalSize;
3670 structPassingKind howToPassStruct;
3671 structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);
3673 #ifdef _TARGET_ARM64_
3674 if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
3675 !isPow2(originalSize)) // size is 3,5,6 or 7 bytes
3677 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3679 // For ARM64 we pass structs that are 3,5,6,7 bytes in size
3680 // we can read 4 or 8 bytes from the LclVar to pass this arg
3681 originalSize = genTypeSize(structBaseType);
3684 #endif // _TARGET_ARM64_
3686 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3687 // On System V OS-es a struct is never passed by reference.
3688 // It is either passed by value on the stack or in registers.
3689 bool passStructInRegisters = false;
3690 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3691 bool passStructByRef = false;
3692 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3694 // The following if-then-else needs to be carefully refactored.
3695 // Basically the else portion wants to turn a struct load (a GT_OBJ)
3696 // into a GT_IND of the appropriate size.
3697 // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
3698 // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined (Why?)
3699 // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
3700 // It also can't do this if we have a HFA arg,
3701 // unless we have a 1-elem HFA in which case we want to do the optimization.
3702 CLANG_FORMAT_COMMENT_ANCHOR;
3704 #ifndef _TARGET_X86_
3705 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3706 // Check for struct argument with size 1, 2, 4 or 8 bytes
3707 // As we can optimize these by turning them into a GT_IND of the correct type
3709 // Check for cases that we cannot optimize:
3710 CLANG_FORMAT_COMMENT_ANCHOR;
3712 if (((originalSize > TARGET_POINTER_SIZE) && // it is struct that is larger than a pointer
3713 howToPassStruct != SPK_PrimitiveType) || // it is struct that is not one double HFA
3714 !isPow2(originalSize) || // it is not a power of two (1, 2, 4 or 8)
3715 (isHfaArg && (howToPassStruct != SPK_PrimitiveType))) // it is a one element HFA struct
3716 #else // !_TARGET_ARM_
3717 if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
3718 !isPow2(originalSize) || // it is not a power of two (1, 2, 4 or 8)
3719 (isHfaArg && (hfaSlots != 1))) // it is a one element HFA struct
3720 #endif // !_TARGET_ARM_
3721 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3723 // Normalize 'size' to the number of pointer sized items
3724 // 'size' is the number of register slots that we will use to pass the argument
3725 size = roundupSize / TARGET_POINTER_SIZE;
3726 #if defined(_TARGET_AMD64_)
3727 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3728 size = 1; // This must be copied to a temp and passed by address
3729 passStructByRef = true;
3730 copyBlkClass = objClass;
3731 #else // FEATURE_UNIX_AMD64_STRUCT_PASSING
3732 if (!structDesc.passedInRegisters)
3734 GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj);
3735 bool needCpyBlk = false;
3736 if (lclVar != nullptr)
3738 // If the struct is promoted to registers, it has to be materialized
3739 // on stack. We may want to support promoted structures in
3740 // codegening pugarg_stk instead of creating a copy here.
3741 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3742 needCpyBlk = varDsc->lvPromoted;
3746 // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
3747 // sets structDesc.passedInRegisters to be false.
3749 // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
3750 // by rationalizer. For now we will let SIMD struct arg to be copied to
3751 // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
3754 // | \--* addr byref
3755 // | | /--* lclVar simd16 V05 loc4
3756 // | \--* simd simd16 int -
3757 // | \--* lclVar simd16 V08 tmp1
3759 // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
3760 // so that we don't need to generate a copy here.
3761 GenTree* addr = argObj->gtOp.gtOp1;
3762 if (addr->OperGet() == GT_ADDR)
3764 GenTree* addrChild = addr->gtOp.gtOp1;
3765 if (addrChild->OperIsSIMDorSimdHWintrinsic())
3771 passStructInRegisters = false;
3774 copyBlkClass = objClass;
3778 copyBlkClass = NO_CLASS_HANDLE;
3783 // The objClass is used to materialize the struct on stack.
3784 // For SystemV, the code below generates copies for struct arguments classified
3785 // as register argument.
3786 // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
3787 // can be passed on registers or can be copied directly to outgoing area.
3788 passStructInRegisters = true;
3789 copyBlkClass = objClass;
3792 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3793 #elif defined(_TARGET_ARM64_)
3794 if ((size > 2) && !isHfaArg)
3796 size = 1; // This must be copied to a temp and passed by address
3797 passStructByRef = true;
3798 copyBlkClass = objClass;
3803 // If we're passing a promoted struct local var,
3804 // we may need to skip some registers due to alignment; record those.
3805 GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj);
3808 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3809 if (varDsc->lvPromoted)
3811 assert(argObj->OperGet() == GT_OBJ);
3812 if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
3814 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3816 #if !defined(LEGACY_BACKEND)
3817 copyBlkClass = objClass;
3822 #if !defined(LEGACY_BACKEND)
3823 if (structSize < TARGET_POINTER_SIZE)
3825 copyBlkClass = objClass;
3828 #endif // _TARGET_ARM_
3830 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3831 // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
3832 // generated for struct 1, 2, 4, or 8.
3833 else // We have a struct argument with size 1, 2, 4 or 8 bytes
3835 // change our GT_OBJ into a GT_IND of the correct type.
3836 // We've already ensured above that size is a power of 2, and less than or equal to pointer
3839 assert(howToPassStruct == SPK_PrimitiveType);
3841 // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
3846 // If we reach here with an HFA arg it has to be a one element HFA
3847 // If HFA type is double and it has one element, hfaSlot is 2
3848 assert(hfaSlots == 1 || (hfaSlots == 2 && hfaType == TYP_DOUBLE));
3850 // If we reach here with an HFA arg it has to be a one element HFA
3851 assert(hfaSlots == 1);
3853 structBaseType = hfaType; // change the indirection type to a floating point type
3856 noway_assert(structBaseType != TYP_UNKNOWN);
3858 argObj->ChangeOper(GT_IND);
3860 // Now see if we can fold *(&X) into X
3861 if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
3863 GenTree* temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
3865 // Keep the DONT_CSE flag in sync
3866 // (as the addr always marks it for its op1)
3867 temp->gtFlags &= ~GTF_DONT_CSE;
3868 temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
3869 DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
3870 DEBUG_DESTROY_NODE(argObj); // GT_IND
3873 *parentOfArgObj = temp;
3875 // If the OBJ had been the top level node, we've now changed argx.
3876 if (parentOfArgObj == parentArgx)
3881 if (argObj->gtOper == GT_LCL_VAR)
3883 unsigned lclNum = argObj->gtLclVarCommon.gtLclNum;
3884 LclVarDsc* varDsc = &lvaTable[lclNum];
3886 if (varDsc->lvPromoted)
3888 if (varDsc->lvFieldCnt == 1)
3890 // get the first and only promoted field
3891 LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
3892 if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
3894 // we will use the first and only promoted field
3895 argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
3897 if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
3898 (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
3900 // Just use the existing field's type
3901 argObj->gtType = fieldVarDsc->TypeGet();
3905 // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
3907 argObj->ChangeOper(GT_LCL_FLD);
3908 argObj->gtType = structBaseType;
3910 assert(varTypeCanReg(argObj->TypeGet()));
3911 assert(copyBlkClass == NO_CLASS_HANDLE);
3915 // use GT_LCL_FLD to swizzle the single field struct to a new type
3916 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
3917 argObj->ChangeOper(GT_LCL_FLD);
3918 argObj->gtType = structBaseType;
3923 // The struct fits into a single register, but it has been promoted into its
3924 // constituent fields, and so we have to re-assemble it
3925 copyBlkClass = objClass;
3927 // Alignment constraints may cause us not to use (to "skip") some argument
3928 // registers. Add those, if any, to the skipped (int) arg reg mask.
3929 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3930 #endif // _TARGET_ARM_
3933 else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
3935 // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
3936 argObj->ChangeOper(GT_LCL_FLD);
3937 argObj->gtType = structBaseType;
3942 // Not a GT_LCL_VAR, so we can just change the type on the node
3943 argObj->gtType = structBaseType;
3945 assert(varTypeCanReg(argObj->TypeGet()) ||
3946 ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
3956 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3958 #endif // not _TARGET_X86_
3959 // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
3960 if (varTypeIsStruct(structBaseType) &&
3961 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3962 !passStructInRegisters
3963 #else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3965 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3968 if (isHfaArg && passUsingFloatRegs)
3970 size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA
3974 // If the valuetype size is not a multiple of TARGET_POINTER_SIZE,
3975 // we must copyblk to a temp before doing the obj to avoid
3976 // the obj reading memory past the end of the valuetype
3977 CLANG_FORMAT_COMMENT_ANCHOR;
3979 if (roundupSize > originalSize)
3981 copyBlkClass = objClass;
3983 // There are a few special cases where we can omit using a CopyBlk
3984 // where we normally would need to use one.
3986 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3988 copyBlkClass = NO_CLASS_HANDLE;
3992 size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
3997 #if defined(_TARGET_64BIT_)
4000 hasMultiregStructArgs = true;
4002 #elif defined(_TARGET_ARM_)
4005 if (size > genTypeStSz(hfaType))
4007 hasMultiregStructArgs = true;
4012 hasMultiregStructArgs = true;
4014 #endif // _TARGET_ARM_
4017 // The 'size' value has now must have been set. (the original value of zero is an invalid value)
4021 // Figure out if the argument will be passed in a register.
4024 if (isRegParamType(genActualType(argx->TypeGet()))
4025 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4026 && (!isStructArg || structDesc.passedInRegisters)
4031 if (passUsingFloatRegs)
4033 // First, see if it can be back-filled
4034 if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
4035 (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
4036 (size == 1)) // The size to back-fill is one float register
4038 // Back-fill the register.
4039 isBackFilled = true;
4040 regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
4041 fltArgSkippedRegMask &=
4042 ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
4043 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
4044 assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
4047 // Does the entire float, double, or HFA fit in the FP arg registers?
4048 // Check if the last register needed is still in the argument register range.
4049 isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
4053 anyFloatStackArgs = true;
4058 isRegArg = intArgRegNum < MAX_REG_ARG;
4060 #elif defined(_TARGET_ARM64_)
4061 if (passUsingFloatRegs)
4063 // Check if the last register needed is still in the fp argument register range.
4064 isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
4066 // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
4067 if (isHfaArg && !isRegArg)
4069 // recompute the 'size' so that it represent the number of stack slots rather than the number of
4072 unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
4073 size = roundupSize / TARGET_POINTER_SIZE;
4075 // We also must update fltArgRegNum so that we no longer try to
4076 // allocate any new floating point registers for args
4077 // This prevents us from backfilling a subsequent arg into d7
4079 fltArgRegNum = MAX_FLOAT_REG_ARG;
4084 // Check if the last register needed is still in the int argument register range.
4085 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
4087 // Did we run out of registers when we had a 16-byte struct (size===2) ?
4088 // (i.e we only have one register remaining but we needed two registers to pass this arg)
4089 // This prevents us from backfilling a subsequent arg into x7
4091 if (!isRegArg && (size > 1))
4093 // We also must update intArgRegNum so that we no longer try to
4094 // allocate any new general purpose registers for args
4096 intArgRegNum = maxRegArgs;
4099 #else // not _TARGET_ARM_ or _TARGET_ARM64_
4101 #if defined(UNIX_AMD64_ABI)
4103 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4104 // Here a struct can be passed in register following the classifications of its members and size.
4105 // Now make sure there are actually enough registers to do so.
4108 unsigned int structFloatRegs = 0;
4109 unsigned int structIntRegs = 0;
4110 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
4112 if (structDesc.IsIntegralSlot(i))
4116 else if (structDesc.IsSseSlot(i))
4122 isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
4123 ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
4126 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4128 if (passUsingFloatRegs)
4130 isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
4134 isRegArg = intArgRegNum < MAX_REG_ARG;
4137 #else // !defined(UNIX_AMD64_ABI)
4138 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
4139 #endif // !defined(UNIX_AMD64_ABI)
4140 #endif // _TARGET_ARM_
4147 #ifndef LEGACY_BACKEND
4148 // If there are nonstandard args (outside the calling convention) they were inserted above
4149 // and noted them in a table so we can recognize them here and build their argInfo.
4151 // They should not affect the placement of any other args or stack space required.
4152 // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
4153 isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum);
4154 if (isNonStandard && (nonStdRegNum == REG_STK))
4158 #if defined(_TARGET_X86_)
4159 else if (call->IsTailCallViaHelper())
4161 // We have already (before calling fgMorphArgs()) appended the 4 special args
4162 // required by the x86 tailcall helper. These args are required to go on the
4163 // stack. Force them to the stack here.
4164 assert(numArgs >= 4);
4165 if (argIndex >= numArgs - 4)
4170 #endif // defined(_TARGET_X86_)
4171 #endif // !LEGACY_BACKEND
4172 } // end !reMorphing
4175 // Now we know if the argument goes in registers or not and how big it is,
4176 // whether we had to just compute it or this is a re-morph call and we looked it up.
4178 CLANG_FORMAT_COMMENT_ANCHOR;
4181 // If we ever allocate a floating point argument to the stack, then all
4182 // subsequent HFA/float/double arguments go on the stack.
4183 if (!isRegArg && passUsingFloatRegs)
4185 for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
4187 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
4191 // If we think we're going to split a struct between integer registers and the stack, check to
4192 // see if we've already assigned a floating-point arg to the stack.
4193 if (isRegArg && // We decided above to use a register for the argument
4194 !passUsingFloatRegs && // We're using integer registers
4195 (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
4196 anyFloatStackArgs) // We've already used the stack for a floating-point argument
4198 isRegArg = false; // Change our mind; don't pass this struct partially in registers
4200 // Skip the rest of the integer argument registers
4201 for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
4203 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
4207 #endif // _TARGET_ARM_
4211 regNumber nextRegNum = REG_STK;
4212 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4213 regNumber nextOtherRegNum = REG_STK;
4214 unsigned int structFloatRegs = 0;
4215 unsigned int structIntRegs = 0;
4217 if (isStructArg && structDesc.passedInRegisters)
4219 // It is a struct passed in registers. Assign the next available register.
4220 assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
4221 regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
4222 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
4224 if (structDesc.IsIntegralSlot(i))
4226 *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
4229 else if (structDesc.IsSseSlot(i))
4231 *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
4237 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4239 // fill in or update the argInfo table
4240 nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
4241 : genMapIntRegArgNumToRegNum(intArgRegNum);
4244 #ifdef _TARGET_AMD64_
4245 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
4250 fgArgTabEntry* newArgEntry;
4253 // This is a register argument - possibly update it in the table
4254 newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
4260 nextRegNum = nonStdRegNum;
4263 // This is a register argument - put it in the table
4264 newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
4265 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4267 isStructArg, nextOtherRegNum, &structDesc
4268 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4271 newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
4272 isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
4273 newArgEntry->SetIsBackFilled(isBackFilled);
4274 newArgEntry->isNonStandard = isNonStandard;
4277 if (newArgEntry->isNonStandard)
4279 flagsSummary |= args->Current()->gtFlags;
4283 // Set up the next intArgRegNum and fltArgRegNum values.
4286 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4289 intArgRegNum += structIntRegs;
4290 fltArgRegNum += structFloatRegs;
4293 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4295 if (passUsingFloatRegs)
4297 fltArgRegNum += size;
4299 #ifdef WINDOWS_AMD64_ABI
4300 // Whenever we pass an integer register argument
4301 // we skip the corresponding floating point register argument
4302 intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
4303 #endif // WINDOWS_AMD64_ABI
4304 // There is no partial struct using float registers
4305 // on all supported architectures
4306 assert(fltArgRegNum <= MAX_FLOAT_REG_ARG);
4310 if (hasFixedRetBuffReg() && (nextRegNum == theFixedRetBuffReg()))
4312 // we are setting up the fixed return buffer register argument
4313 // so don't increment intArgRegNum
4318 // Increment intArgRegNum by 'size' registers
4319 intArgRegNum += size;
4322 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
4323 fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
4324 #endif // _TARGET_AMD64_
4326 if (intArgRegNum > MAX_REG_ARG)
4328 // This indicates a partial enregistration of a struct type
4329 assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() ||
4330 (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
4331 unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
4332 assert((unsigned char)numRegsPartial == numRegsPartial);
4333 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4334 intArgRegNum = MAX_REG_ARG;
4335 fgPtrArgCntCur += size - numRegsPartial;
4337 #endif // _TARGET_ARM_
4342 else // We have an argument that is not passed in a register
4344 fgPtrArgCntCur += size;
4346 // If the register arguments have not been determined then we must fill in the argInfo
4350 // This is a stack argument - possibly update it in the table
4351 call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
4355 // This is a stack argument - put it in the table
4356 call->fgArgInfo->AddStkArg(argIndex, argx, args, size,
4357 argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
4361 if (copyBlkClass != NO_CLASS_HANDLE)
4363 noway_assert(!reMorphing);
4364 fgMakeOutgoingStructArgCopy(call, args, argIndex,
4365 copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
4367 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4368 hasStackArgCopy = true;
4372 #ifndef LEGACY_BACKEND
4373 if (argx->gtOper == GT_MKREFANY)
4375 // 'Lower' the MKREFANY tree and insert it.
4376 noway_assert(!reMorphing);
4380 // Build the mkrefany as a GT_FIELD_LIST
4381 GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST)
4382 GenTreeFieldList(argx->gtOp.gtOp1, offsetof(CORINFO_RefAny, dataPtr), TYP_BYREF, nullptr);
4383 (void)new (this, GT_FIELD_LIST)
4384 GenTreeFieldList(argx->gtOp.gtOp2, offsetof(CORINFO_RefAny, type), TYP_I_IMPL, fieldList);
4385 fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx);
4386 fp->node = fieldList;
4387 args->gtOp.gtOp1 = fieldList;
4389 #else // !_TARGET_X86_
4392 // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
4393 unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
4394 lvaSetStruct(tmp, impGetRefAnyClass(), false);
4396 // Build the mkrefany as a comma node:
4397 // (tmp.ptr=argx),(tmp.type=handle)
4398 GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr));
4399 GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type));
4400 destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
4401 destPtrSlot->gtFlags |= GTF_VAR_DEF;
4402 destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
4403 destTypeSlot->gtFlags |= GTF_VAR_DEF;
4405 GenTree* asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
4406 GenTree* asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
4407 GenTree* asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
4409 // Change the expression to "(tmp=val)"
4410 args->gtOp.gtOp1 = asg;
4412 // EvalArgsToTemps will cause tmp to actually get loaded as the argument
4413 call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
4414 lvaSetVarAddrExposed(tmp);
4415 #endif // !_TARGET_X86_
4417 #endif // !LEGACY_BACKEND
4419 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
4422 GenTree* lclNode = fgIsIndirOfAddrOfLocal(argx);
4423 if ((lclNode != nullptr) &&
4424 (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT))
4426 // Make a GT_FIELD_LIST of the field lclVars.
4427 GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon();
4428 LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]);
4429 GenTreeFieldList* fieldList = nullptr;
4430 for (unsigned fieldLclNum = varDsc->lvFieldLclStart;
4431 fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum)
4433 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
4434 if (fieldList == nullptr)
4436 lcl->SetLclNum(fieldLclNum);
4437 lcl->ChangeOper(GT_LCL_VAR);
4438 lcl->gtType = fieldVarDsc->lvType;
4439 fieldList = new (this, GT_FIELD_LIST)
4440 GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr);
4441 fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx);
4442 fp->node = fieldList;
4443 args->gtOp.gtOp1 = fieldList;
4447 GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
4448 fieldList = new (this, GT_FIELD_LIST)
4449 GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList);
4454 #endif // _TARGET_X86_ && !LEGACY_BACKEND
4456 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4457 if (isStructArg && !isRegArg)
4459 nonRegPassedStructSlots += size;
4462 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4466 flagsSummary |= args->Current()->gtFlags;
4467 } // end foreach argument loop
4471 call->fgArgInfo->ArgsComplete();
4473 #ifdef LEGACY_BACKEND
4474 call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
4475 #if defined(_TARGET_ARM_)
4476 call->gtCallRegUsedMask &= ~argSkippedRegMask;
4478 if (fltArgRegNum > 0)
4480 #if defined(_TARGET_ARM_)
4481 call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask;
4484 #endif // LEGACY_BACKEND
4487 if (call->gtCallArgs)
4489 UpdateGT_LISTFlags(call->gtCallArgs);
4492 /* Process the function address, if indirect call */
4494 if (call->gtCallType == CT_INDIRECT)
4496 call->gtCallAddr = fgMorphTree(call->gtCallAddr);
4499 call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur);
4501 if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
4506 /* Remember the maximum value we ever see */
4508 if (fgPtrArgCntMax < fgPtrArgCntCur)
4510 JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
4511 fgPtrArgCntMax = fgPtrArgCntCur;
4514 assert(fgPtrArgCntCur >= genPtrArgCntSav);
4515 #if defined(UNIX_X86_ABI)
4516 call->fgArgInfo->SetStkSizeBytes((fgPtrArgCntCur - genPtrArgCntSav) * TARGET_POINTER_SIZE);
4517 #endif // UNIX_X86_ABI
4519 /* The call will pop all the arguments we pushed */
4521 fgPtrArgCntCur = genPtrArgCntSav;
4523 #if FEATURE_FIXED_OUT_ARGS
4525 // Record the outgoing argument size. If the call is a fast tail
4526 // call, it will setup its arguments in incoming arg area instead
4527 // of the out-going arg area, so we don't need to track the
4528 // outgoing arg size.
4529 if (!call->IsFastTailCall())
4531 unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
4533 #if defined(UNIX_AMD64_ABI)
4534 opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly
4536 // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above.
4538 // First slots go in registers only, no stack needed.
4539 // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments,
4540 // and ignores floating point args (it is overly conservative in that case).
4541 preallocatedArgCount = nonRegPassedStructSlots;
4542 if (argSlots > MAX_REG_ARG)
4544 preallocatedArgCount += argSlots - MAX_REG_ARG;
4546 #endif // UNIX_AMD64_ABI
4548 const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
4549 call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL));
4554 printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots,
4555 preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize);
4559 #endif // FEATURE_FIXED_OUT_ARGS
4561 // Clear the ASG and EXCEPT (if possible) flags on the call node
4562 call->gtFlags &= ~GTF_ASG;
4563 if (!call->OperMayThrow(this))
4565 call->gtFlags &= ~GTF_EXCEPT;
4568 // Union in the side effect flags from the call's operands
4569 call->gtFlags |= flagsSummary & GTF_ALL_EFFECT;
4571 // If the register arguments have already been determined
4572 // or we have no register arguments then we don't need to
4573 // call SortArgs() and EvalArgsToTemps()
4575 // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
4576 // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
4577 // is added to make sure to call EvalArgsToTemp.
4578 if (!reMorphing && (call->fgArgInfo->HasRegArgs()
4579 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4581 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4584 // This is the first time that we morph this call AND it has register arguments.
4585 // Follow into the code below and do the 'defer or eval to temp' analysis.
4587 call->fgArgInfo->SortArgs();
4589 call->fgArgInfo->EvalArgsToTemps();
4591 // We may have updated the arguments
4592 if (call->gtCallArgs)
4594 UpdateGT_LISTFlags(call->gtCallArgs);
4598 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4600 // Rewrite the struct args to be passed by value on stack or in registers.
4601 fgMorphSystemVStructArgs(call, hasStructArgument);
4603 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4605 #ifndef LEGACY_BACKEND
4606 // In the future we can migrate UNIX_AMD64 to use this
4607 // method instead of fgMorphSystemVStructArgs
4609 // We only require morphing of structs that may be passed in multiple registers
4610 // for the RyuJIT backend.
4611 if (hasMultiregStructArgs)
4613 fgMorphMultiregStructArgs(call);
4615 #endif // LEGACY_BACKEND
4617 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4622 call->fgArgInfo->Dump(this);
4628 #pragma warning(pop)
4631 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4632 // fgMorphSystemVStructArgs:
4633 // Rewrite the struct args to be passed by value on stack or in registers.
4636 // call: The call whose arguments need to be morphed.
4637 // hasStructArgument: Whether this call has struct arguments.
4639 void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
4641 unsigned flagsSummary = 0;
4645 if (hasStructArgument)
4647 fgArgInfo* allArgInfo = call->fgArgInfo;
4649 for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4651 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4652 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4653 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4654 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4655 // otherwise points to the list in the late args list.
4656 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4657 fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4658 assert(fgEntryPtr != nullptr);
4659 GenTree* argx = fgEntryPtr->node;
4660 GenTree* lateList = nullptr;
4661 GenTree* lateNode = nullptr;
4665 for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
4667 assert(list->OperIsList());
4669 GenTree* argNode = list->Current();
4670 if (argx == argNode)
4677 assert(lateList != nullptr && lateNode != nullptr);
4679 GenTree* arg = argx;
4680 bool argListCreated = false;
4682 var_types type = arg->TypeGet();
4684 if (varTypeIsStruct(type))
4686 var_types originalType = type;
4687 // If we have already processed the arg...
4688 if (arg->OperGet() == GT_FIELD_LIST && varTypeIsStruct(arg))
4693 // If already OBJ it is set properly already.
4694 if (arg->OperGet() == GT_OBJ)
4696 assert(!fgEntryPtr->structDesc.passedInRegisters);
4700 assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
4701 (arg->OperGet() == GT_ADDR &&
4702 (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
4704 GenTreeLclVarCommon* lclCommon =
4705 arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
4706 if (fgEntryPtr->structDesc.passedInRegisters)
4708 if (fgEntryPtr->structDesc.eightByteCount == 1)
4710 // Change the type and below the code will change the LclVar to a LCL_FLD
4711 type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4712 fgEntryPtr->structDesc.eightByteSizes[0]);
4714 else if (fgEntryPtr->structDesc.eightByteCount == 2)
4716 // Create LCL_FLD for each eightbyte.
4717 argListCreated = true;
4720 arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
4722 GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4723 fgEntryPtr->structDesc.eightByteSizes[0]);
4724 GenTreeFieldList* fieldList =
4725 new (this, GT_FIELD_LIST) GenTreeFieldList(arg, 0, originalType, nullptr);
4726 fieldList->gtType = originalType; // Preserve the type. It is a special case.
4729 // Second eightbyte.
4730 GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
4731 GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
4732 .eightByteClassifications[1],
4733 fgEntryPtr->structDesc.eightByteSizes[1]),
4734 lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
4736 fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(newLclField, 0, originalType, fieldList);
4737 fieldList->gtType = originalType; // Preserve the type. It is a special case.
4738 newLclField->gtFieldSeq = FieldSeqStore::NotAField();
4742 assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
4747 // If we didn't change the type of the struct, it means
4748 // its classification doesn't support to be passed directly through a
4749 // register, so we need to pass a pointer to the destination where
4750 // where we copied the struct to.
4751 if (!argListCreated)
4753 if (fgEntryPtr->structDesc.passedInRegisters)
4759 // Make sure this is an addr node.
4760 if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
4762 arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
4765 assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
4767 // Create an Obj of the temp to use it as a call argument.
4768 arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
4775 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4776 fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4777 assert(fgEntryPtr != nullptr);
4778 GenTree* argx = fgEntryPtr->node;
4779 GenTree* lateList = nullptr;
4780 GenTree* lateNode = nullptr;
4783 for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
4785 assert(list->OperIsList());
4787 GenTree* argNode = list->Current();
4788 if (argx == argNode)
4795 assert(lateList != nullptr && lateNode != nullptr);
4798 fgEntryPtr->node = arg;
4801 lateList->gtOp.gtOp1 = arg;
4805 args->gtOp.gtOp1 = arg;
4812 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4814 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4816 //-----------------------------------------------------------------------------
4817 // fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
4818 // call fgMorphMultiregStructArg on each of them.
4821 // call: a GenTreeCall node that has one or more TYP_STRUCT arguments
4824 // We only call fgMorphMultiregStructArg for struct arguments that are not passed as simple types.
4825 // It will ensure that the struct arguments are in the correct form.
4826 // If this method fails to find any TYP_STRUCT arguments it will assert.
4828 void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
4830 bool foundStructArg = false;
4831 unsigned initialFlags = call->gtFlags;
4832 unsigned flagsSummary = 0;
4833 fgArgInfo* allArgInfo = call->fgArgInfo;
4835 // Currently ARM64/ARM is using this method to morph the MultiReg struct args
4836 // in the future AMD64_UNIX will also use this method
4837 CLANG_FORMAT_COMMENT_ANCHOR;
4840 assert(!"Logic error: no MultiregStructArgs for X86");
4842 #ifdef _TARGET_AMD64_
4843 #if defined(UNIX_AMD64_ABI)
4844 NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
4845 #else // WINDOWS_AMD64_ABI
4846 assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
4847 #endif // !UNIX_AMD64_ABI
4850 for (GenTree* args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4852 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4853 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4854 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4855 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4856 // otherwise points to the list in the late args list.
4857 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4858 fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4859 assert(fgEntryPtr != nullptr);
4860 GenTree* argx = fgEntryPtr->node;
4861 GenTree* lateList = nullptr;
4862 GenTree* lateNode = nullptr;
4866 for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
4868 assert(list->OperIsList());
4870 GenTree* argNode = list->Current();
4871 if (argx == argNode)
4878 assert(lateList != nullptr && lateNode != nullptr);
4881 GenTree* arg = argx;
4883 if (varTypeIsStruct(arg->TypeGet()))
4885 foundStructArg = true;
4887 arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
4889 // Did we replace 'argx' with a new tree?
4892 fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
4894 // link the new arg node into either the late arg list or the gtCallArgs list
4897 lateList->gtOp.gtOp1 = arg;
4901 args->gtOp.gtOp1 = arg;
4907 // We should only call this method when we actually have one or more multireg struct args
4908 assert(foundStructArg);
4911 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4914 //-----------------------------------------------------------------------------
4915 // fgMorphMultiregStructArg: Given a TYP_STRUCT arg from a call argument list,
4916 // morph the argument as needed to be passed correctly.
4919 // arg - A GenTree node containing a TYP_STRUCT arg
4920 // fgEntryPtr - the fgArgTabEntry information for the current 'arg'
4923 // The arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT.
4924 // If 'arg' is a lclVar passed on the stack, we will ensure that any lclVars that must be on the
4925 // stack are marked as doNotEnregister, and then we return.
4927 // If it is passed by register, we mutate the argument into the GT_FIELD_LIST form
4928 // which is only used for struct arguments.
4930 // If arg is a LclVar we check if it is struct promoted and has the right number of fields
4931 // and if they are at the appropriate offsets we will use the struct promted fields
4932 // in the GT_FIELD_LIST nodes that we create.
4933 // If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
4934 // we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
4935 // this also forces the struct to be stack allocated into the local frame.
4936 // For the GT_OBJ case will clone the address expression and generate two (or more)
4938 // Currently the implementation handles ARM64/ARM and will NYI for other architectures.
4940 GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr)
4942 assert(varTypeIsStruct(arg->TypeGet()));
4944 #ifndef _TARGET_ARMARCH_
4945 NYI("fgMorphMultiregStructArg requires implementation for this target");
4949 if ((fgEntryPtr->isSplit && fgEntryPtr->numSlots + fgEntryPtr->numRegs > 4) ||
4950 (!fgEntryPtr->isSplit && fgEntryPtr->regNum == REG_STK))
4952 GenTreeLclVarCommon* lcl = nullptr;
4954 // If already OBJ it is set properly already.
4955 if (arg->OperGet() == GT_OBJ)
4957 if (arg->gtGetOp1()->OperIs(GT_ADDR) && arg->gtGetOp1()->gtGetOp1()->OperIs(GT_LCL_VAR))
4959 lcl = arg->gtGetOp1()->gtGetOp1()->AsLclVarCommon();
4964 assert(arg->OperGet() == GT_LCL_VAR);
4966 // We need to construct a `GT_OBJ` node for the argmuent,
4967 // so we need to get the address of the lclVar.
4968 lcl = arg->AsLclVarCommon();
4970 arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
4972 // Create an Obj of the temp to use it as a call argument.
4973 arg = gtNewObjNode(lvaGetStruct(lcl->gtLclNum), arg);
4977 // Its fields will need to accessed by address.
4978 lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUG_ARG(DNER_IsStructArg));
4985 #if FEATURE_MULTIREG_ARGS
4986 // Examine 'arg' and setup argValue objClass and structSize
4988 CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE;
4989 GenTree* argValue = arg; // normally argValue will be arg, but see right below
4990 unsigned structSize = 0;
4992 if (arg->OperGet() == GT_OBJ)
4994 GenTreeObj* argObj = arg->AsObj();
4995 objClass = argObj->gtClass;
4996 structSize = info.compCompHnd->getClassSize(objClass);
4998 // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR.
4999 GenTree* op1 = argObj->gtOp1;
5000 if (op1->OperGet() == GT_ADDR)
5002 GenTree* underlyingTree = op1->gtOp.gtOp1;
5004 // Only update to the same type.
5005 if ((underlyingTree->TypeGet() == argValue->TypeGet()) &&
5006 (objClass == gtGetStructHandleIfPresent(underlyingTree)))
5008 argValue = underlyingTree;
5012 else if (arg->OperGet() == GT_LCL_VAR)
5014 GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
5015 unsigned varNum = varNode->gtLclNum;
5016 assert(varNum < lvaCount);
5017 LclVarDsc* varDsc = &lvaTable[varNum];
5019 objClass = lvaGetStruct(varNum);
5020 structSize = varDsc->lvExactSize;
5022 noway_assert(objClass != nullptr);
5024 var_types hfaType = TYP_UNDEF;
5025 var_types elemType = TYP_UNDEF;
5026 unsigned elemCount = 0;
5027 unsigned elemSize = 0;
5028 var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
5030 hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
5031 if (varTypeIsFloating(hfaType))
5034 elemSize = genTypeSize(elemType);
5035 elemCount = structSize / elemSize;
5036 assert(elemSize * elemCount == structSize);
5037 for (unsigned inx = 0; inx < elemCount; inx++)
5039 type[inx] = elemType;
5044 #ifdef _TARGET_ARM64_
5045 assert(structSize <= 2 * TARGET_POINTER_SIZE);
5046 #elif defined(_TARGET_ARM_)
5047 assert(structSize <= 4 * TARGET_POINTER_SIZE);
5050 #ifdef _TARGET_ARM64_
5051 BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
5052 info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
5054 type[0] = getJitGCType(gcPtrs[0]);
5055 type[1] = getJitGCType(gcPtrs[1]);
5056 #elif defined(_TARGET_ARM_)
5057 BYTE gcPtrs[4] = {TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE};
5058 elemCount = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
5059 info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
5060 for (unsigned inx = 0; inx < elemCount; inx++)
5062 type[inx] = getJitGCType(gcPtrs[inx]);
5064 #endif // _TARGET_ARM_
5066 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
5068 elemSize = TARGET_POINTER_SIZE;
5069 // We can safely widen this to aligned bytes since we are loading from
5070 // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
5071 // lives in the stack frame or will be a promoted field.
5073 structSize = elemCount * TARGET_POINTER_SIZE;
5075 else // we must have a GT_OBJ
5077 assert(argValue->OperGet() == GT_OBJ);
5079 // We need to load the struct from an arbitrary address
5080 // and we can't read past the end of the structSize
5081 // We adjust the last load type here
5083 unsigned remainingBytes = structSize % TARGET_POINTER_SIZE;
5084 unsigned lastElem = elemCount - 1;
5085 if (remainingBytes != 0)
5087 switch (remainingBytes)
5090 type[lastElem] = TYP_BYTE;
5093 type[lastElem] = TYP_SHORT;
5095 #ifdef _TARGET_ARM64_
5097 type[lastElem] = TYP_INT;
5099 #endif // _TARGET_ARM64_
5101 noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
5107 // We should still have a TYP_STRUCT
5108 assert(varTypeIsStruct(argValue->TypeGet()));
5110 GenTreeFieldList* newArg = nullptr;
5112 // Are we passing a struct LclVar?
5114 if (argValue->OperGet() == GT_LCL_VAR)
5116 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
5117 unsigned varNum = varNode->gtLclNum;
5118 assert(varNum < lvaCount);
5119 LclVarDsc* varDsc = &lvaTable[varNum];
5121 // At this point any TYP_STRUCT LclVar must be an aligned struct
5122 // or an HFA struct, both which are passed by value.
5124 assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
5126 varDsc->lvIsMultiRegArg = true;
5131 JITDUMP("Multireg struct argument V%02u : ", varNum);
5136 // This local variable must match the layout of the 'objClass' type exactly
5137 if (varDsc->lvIsHfa())
5139 // We have a HFA struct
5140 noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
5141 noway_assert(elemSize == genTypeSize(elemType));
5142 noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
5143 noway_assert(elemSize * elemCount == varDsc->lvExactSize);
5145 for (unsigned inx = 0; (inx < elemCount); inx++)
5147 noway_assert(type[inx] == elemType);
5152 #ifdef _TARGET_ARM64_
5153 // We must have a 16-byte struct (non-HFA)
5154 noway_assert(elemCount == 2);
5155 #elif defined(_TARGET_ARM_)
5156 noway_assert(elemCount <= 4);
5159 for (unsigned inx = 0; inx < elemCount; inx++)
5161 CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
5163 // We setup the type[inx] value above using the GC info from 'objClass'
5164 // This GT_LCL_VAR must have the same GC layout info
5166 if (currentGcLayoutType != TYPE_GC_NONE)
5168 noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
5172 // We may have use a small type when we setup the type[inx] values above
5173 // We can safely widen this to TYP_I_IMPL
5174 type[inx] = TYP_I_IMPL;
5179 #ifdef _TARGET_ARM64_
5180 // Is this LclVar a promoted struct with exactly 2 fields?
5181 // TODO-ARM64-CQ: Support struct promoted HFA types here
5182 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
5184 // See if we have two promoted fields that start at offset 0 and 8?
5185 unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
5186 unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
5188 // Did we find the promoted fields at the necessary offsets?
5189 if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
5191 LclVarDsc* loVarDsc = &lvaTable[loVarNum];
5192 LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
5194 var_types loType = loVarDsc->lvType;
5195 var_types hiType = hiVarDsc->lvType;
5197 if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
5199 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
5200 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
5202 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
5205 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
5210 // We can use the struct promoted field as the two arguments
5212 GenTree* loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
5213 GenTree* hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
5215 // Create a new tree for 'arg'
5216 // replace the existing LDOBJ(ADDR(LCLVAR))
5217 // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr))
5219 newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr);
5220 (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg);
5227 // We will create a list of GT_LCL_FLDs nodes to pass this struct
5229 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5231 #elif defined(_TARGET_ARM_)
5232 // Is this LclVar a promoted struct with exactly same size?
5233 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa())
5235 // See if we have promoted fields?
5236 unsigned varNums[4];
5237 bool hasBadVarNum = false;
5238 for (unsigned inx = 0; inx < elemCount; inx++)
5240 varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx);
5241 if (varNums[inx] == BAD_VAR_NUM)
5243 hasBadVarNum = true;
5248 // Did we find the promoted fields at the necessary offsets?
5251 LclVarDsc* varDscs[4];
5252 var_types varType[4];
5253 bool varIsFloat = false;
5255 for (unsigned inx = 0; inx < elemCount; inx++)
5257 varDscs[inx] = &lvaTable[varNums[inx]];
5258 varType[inx] = varDscs[inx]->lvType;
5259 if (varTypeIsFloating(varType[inx]))
5261 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the
5263 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
5265 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
5268 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
5277 unsigned offset = 0;
5278 GenTreeFieldList* listEntry = nullptr;
5279 // We can use the struct promoted field as arguments
5280 for (unsigned inx = 0; inx < elemCount; inx++)
5282 GenTree* lclVar = gtNewLclvNode(varNums[inx], varType[inx], varNums[inx]);
5283 // Create a new tree for 'arg'
5284 // replace the existing LDOBJ(ADDR(LCLVAR))
5285 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(lclVar, offset, varType[inx], listEntry);
5286 if (newArg == nullptr)
5290 offset += TARGET_POINTER_SIZE;
5298 // We will create a list of GT_LCL_FLDs nodes to pass this struct
5300 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5302 #endif // _TARGET_ARM_
5305 // If we didn't set newarg to a new List Node tree
5307 if (newArg == nullptr)
5309 if (fgEntryPtr->regNum == REG_STK)
5311 // We leave this stack passed argument alone
5315 // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
5316 // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
5318 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
5320 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
5321 unsigned varNum = varNode->gtLclNum;
5322 assert(varNum < lvaCount);
5323 LclVarDsc* varDsc = &lvaTable[varNum];
5325 unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
5326 unsigned lastOffset = baseOffset + (elemCount * elemSize);
5328 // The allocated size of our LocalVar must be at least as big as lastOffset
5329 assert(varDsc->lvSize() >= lastOffset);
5331 if (varDsc->lvStructGcCount > 0)
5333 // alignment of the baseOffset is required
5334 noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
5335 noway_assert(elemSize == TARGET_POINTER_SIZE);
5336 unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE;
5337 const BYTE* gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable
5338 for (unsigned inx = 0; (inx < elemCount); inx++)
5340 // The GC information must match what we setup using 'objClass'
5341 noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
5344 else // this varDsc contains no GC pointers
5346 for (unsigned inx = 0; inx < elemCount; inx++)
5348 // The GC information must match what we setup using 'objClass'
5349 noway_assert(!varTypeIsGC(type[inx]));
5354 // We create a list of GT_LCL_FLDs nodes to pass this struct
5356 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5358 // Create a new tree for 'arg'
5359 // replace the existing LDOBJ(ADDR(LCLVAR))
5360 // with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
5362 unsigned offset = baseOffset;
5363 GenTreeFieldList* listEntry = nullptr;
5364 for (unsigned inx = 0; inx < elemCount; inx++)
5366 elemSize = genTypeSize(type[inx]);
5367 GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
5368 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry);
5369 if (newArg == nullptr)
5376 // Are we passing a GT_OBJ struct?
5378 else if (argValue->OperGet() == GT_OBJ)
5380 GenTreeObj* argObj = argValue->AsObj();
5381 GenTree* baseAddr = argObj->gtOp1;
5382 var_types addrType = baseAddr->TypeGet();
5384 if (baseAddr->OperGet() == GT_ADDR)
5386 GenTree* addrTaken = baseAddr->gtOp.gtOp1;
5387 if (addrTaken->IsLocal())
5389 GenTreeLclVarCommon* varNode = addrTaken->AsLclVarCommon();
5390 unsigned varNum = varNode->gtLclNum;
5391 // We access non-struct type (for example, long) as a struct type.
5392 // Make sure lclVar lives on stack to make sure its fields are accessible by address.
5393 lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LocalField));
5397 // Create a new tree for 'arg'
5398 // replace the existing LDOBJ(EXPR)
5399 // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...)
5402 unsigned offset = 0;
5403 GenTreeFieldList* listEntry = nullptr;
5404 for (unsigned inx = 0; inx < elemCount; inx++)
5406 elemSize = genTypeSize(type[inx]);
5407 GenTree* curAddr = baseAddr;
5410 GenTree* baseAddrDup = gtCloneExpr(baseAddr);
5411 noway_assert(baseAddrDup != nullptr);
5412 curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
5418 GenTree* curItem = gtNewIndir(type[inx], curAddr);
5420 // For safety all GT_IND should have at least GT_GLOB_REF set.
5421 curItem->gtFlags |= GTF_GLOB_REF;
5423 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry);
5424 if (newArg == nullptr)
5434 // If we reach here we should have set newArg to something
5435 if (newArg == nullptr)
5437 gtDispTree(argValue);
5438 assert(!"Missing case in fgMorphMultiregStructArg");
5442 noway_assert(newArg != nullptr);
5443 noway_assert(newArg->OperIsFieldList());
5445 // We need to propagate any GTF_ALL_EFFECT flags from the end of the list back to the beginning.
5446 // This is verified in fgDebugCheckFlags().
5448 ArrayStack<GenTree*> stack(this);
5450 for (tree = newArg; (tree->gtGetOp2() != nullptr) && tree->gtGetOp2()->OperIsFieldList(); tree = tree->gtGetOp2())
5455 unsigned propFlags = (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
5456 tree->gtFlags |= propFlags;
5458 while (stack.Height() > 0)
5461 propFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
5462 propFlags |= (tree->gtGetOp2()->gtFlags & GTF_ALL_EFFECT);
5463 tree->gtFlags |= propFlags;
5469 printf("fgMorphMultiregStructArg created tree:\n");
5474 arg = newArg; // consider calling fgMorphTree(newArg);
5476 #endif // FEATURE_MULTIREG_ARGS
5481 // Make a copy of a struct variable if necessary, to pass to a callee.
5482 // returns: tree that computes address of the outgoing arg
5483 void Compiler::fgMakeOutgoingStructArgCopy(
5487 CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
5488 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
5490 GenTree* argx = args->Current();
5491 noway_assert(argx->gtOper != GT_MKREFANY);
5492 // See if we need to insert a copy at all
5493 // Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time
5494 // but if there is only one use and no loops, the use must be last.
5495 GenTreeLclVarCommon* lcl = nullptr;
5496 if (argx->OperIsLocal())
5498 lcl = argx->AsLclVarCommon();
5500 else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
5502 lcl = argx->AsObj()->Addr()->AsLclVarCommon();
5506 unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
5507 if (lvaIsImplicitByRefLocal(varNum))
5509 LclVarDsc* varDsc = &lvaTable[varNum];
5510 // JIT_TailCall helper has an implicit assumption that all tail call arguments live
5511 // on the caller's frame. If an argument lives on the caller caller's frame, it may get
5512 // overwritten if that frame is reused for the tail call. Therefore, we should always copy
5513 // struct parameters if they are passed as arguments to a tail call.
5514 if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
5516 varDsc->lvRefCnt = 0;
5517 args->gtOp.gtOp1 = lcl;
5518 fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx);
5521 JITDUMP("did not have to make outgoing copy for V%2d", varNum);
5527 if (fgOutgoingArgTemps == nullptr)
5529 fgOutgoingArgTemps = hashBv::Create(this);
5535 // Attempt to find a local we have already used for an outgoing struct and reuse it.
5536 // We do not reuse within a statement.
5537 if (!opts.MinOpts())
5540 FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
5542 LclVarDsc* varDsc = &lvaTable[lclNum];
5543 if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
5544 !fgCurrentlyInUseArgTemps->testBit(lclNum))
5546 tmp = (unsigned)lclNum;
5548 JITDUMP("reusing outgoing struct arg");
5555 // Create the CopyBlk tree and insert it.
5559 // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
5560 tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
5561 lvaSetStruct(tmp, copyBlkClass, false);
5562 fgOutgoingArgTemps->setBit(tmp);
5565 fgCurrentlyInUseArgTemps->setBit(tmp);
5567 // TYP_SIMD structs should not be enregistered, since ABI requires it to be
5568 // allocated on stack and address of it needs to be passed.
5569 if (lclVarIsSIMDType(tmp))
5571 lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
5574 // Create a reference to the temp
5575 GenTree* dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
5576 dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
5578 // TODO-Cleanup: This probably shouldn't be done here because arg morphing is done prior
5579 // to ref counting of the lclVars.
5580 lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this);
5582 if (argx->gtOper == GT_OBJ)
5584 argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
5585 argx->SetIndirExceptionFlags(this);
5589 argx->gtFlags |= GTF_DONT_CSE;
5592 // Copy the valuetype to the temp
5593 unsigned size = info.compCompHnd->getClassSize(copyBlkClass);
5594 GenTree* copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
5595 copyBlk = fgMorphCopyBlock(copyBlk);
5597 #if FEATURE_FIXED_OUT_ARGS
5599 // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
5600 // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
5601 GenTree* arg = copyBlk;
5603 #else // FEATURE_FIXED_OUT_ARGS
5605 // Structs are always on the stack, and thus never need temps
5606 // so we have to put the copy and temp all into one expression
5607 GenTree* arg = fgMakeTmpArgNode(tmp FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
5609 // Change the expression to "(tmp=val),tmp"
5610 arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
5612 #endif // FEATURE_FIXED_OUT_ARGS
5614 args->gtOp.gtOp1 = arg;
5615 call->fgArgInfo->EvalToTmp(argIndex, tmp, arg);
5621 // See declaration for specification comment.
5622 void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
5623 unsigned firstArgRegNum,
5624 regMaskTP* pArgSkippedRegMask)
5626 assert(varDsc->lvPromoted);
5627 // There's no way to do these calculations without breaking abstraction and assuming that
5628 // integer register arguments are consecutive ints. They are on ARM.
5630 // To start, figure out what register contains the last byte of the first argument.
5631 LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
5632 unsigned lastFldRegOfLastByte =
5633 (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5636 // Now we're keeping track of the register that the last field ended in; see what registers
5637 // subsequent fields start in, and whether any are skipped.
5638 // (We assume here the invariant that the fields are sorted in offset order.)
5639 for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
5641 unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset;
5642 LclVarDsc* fldVarDsc = &lvaTable[fldVarNum];
5643 unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
5644 assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
5645 // This loop should enumerate the offsets of any registers skipped.
5646 // Find what reg contains the last byte:
5647 // And start at the first register after that. If that isn't the first reg of the current
5648 for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
5649 skippedRegOffsets++)
5651 // If the register number would not be an arg reg, we're done.
5652 if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
5654 *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
5656 lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5660 #endif // _TARGET_ARM_
5662 //****************************************************************************
5663 // fgFixupStructReturn:
5664 // The companion to impFixupCallStructReturn. Now that the importer is done
5665 // change the gtType to the precomputed native return type
5666 // requires that callNode currently has a struct type
5668 void Compiler::fgFixupStructReturn(GenTree* callNode)
5670 assert(varTypeIsStruct(callNode));
5672 GenTreeCall* call = callNode->AsCall();
5673 bool callHasRetBuffArg = call->HasRetBufArg();
5674 bool isHelperCall = call->IsHelperCall();
5676 // Decide on the proper return type for this call that currently returns a struct
5678 CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd;
5679 Compiler::structPassingKind howToReturnStruct;
5680 var_types returnType;
5682 // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
5683 // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
5685 // CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
5686 // CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
5687 // CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
5691 assert(!callHasRetBuffArg);
5692 assert(retClsHnd == NO_CLASS_HANDLE);
5694 // Now that we are past the importer, re-type this node
5695 howToReturnStruct = SPK_PrimitiveType;
5696 returnType = (var_types)call->gtReturnType;
5700 returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
5703 if (howToReturnStruct == SPK_ByReference)
5705 assert(returnType == TYP_UNKNOWN);
5706 assert(callHasRetBuffArg);
5710 assert(returnType != TYP_UNKNOWN);
5712 if (!varTypeIsStruct(returnType))
5714 // Widen the primitive type if necessary
5715 returnType = genActualType(returnType);
5717 call->gtType = returnType;
5720 #if FEATURE_MULTIREG_RET
5721 // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
5722 assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
5723 #else // !FEATURE_MULTIREG_RET
5724 // No more struct returns
5725 assert(call->TypeGet() != TYP_STRUCT);
5728 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5729 // If it was a struct return, it has been transformed into a call
5730 // with a return buffer (that returns TYP_VOID) or into a return
5731 // of a primitive/enregisterable type
5732 assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
5736 /*****************************************************************************
5738 * A little helper used to rearrange nested commutative operations. The
5739 * effect is that nested associative, commutative operations are transformed
5740 * into a 'left-deep' tree, i.e. into something like this:
5742 * (((a op b) op c) op d) op...
5747 void Compiler::fgMoveOpsLeft(GenTree* tree)
5755 op1 = tree->gtOp.gtOp1;
5756 op2 = tree->gtOp.gtOp2;
5757 oper = tree->OperGet();
5759 noway_assert(GenTree::OperIsCommutative(oper));
5760 noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
5761 noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
5762 noway_assert(oper == op2->gtOper);
5764 // Commutativity doesn't hold if overflow checks are needed
5766 if (tree->gtOverflowEx() || op2->gtOverflowEx())
5771 if (gtIsActiveCSE_Candidate(op2))
5773 // If we have marked op2 as a CSE candidate,
5774 // we can't perform a commutative reordering
5775 // because any value numbers that we computed for op2
5776 // will be incorrect after performing a commutative reordering
5781 if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
5786 // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
5787 if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
5792 if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
5794 // We could deal with this, but we were always broken and just hit the assert
5795 // below regarding flags, which means it's not frequent, so will just bail out.
5800 noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
5802 GenTree* ad1 = op2->gtOp.gtOp1;
5803 GenTree* ad2 = op2->gtOp.gtOp2;
5805 // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
5806 // We can not reorder such GT_OR trees
5808 if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
5813 #if FEATURE_PREVENT_BAD_BYREFS
5815 // Don't split up a byref calculation and create a new byref. E.g.,
5816 // [byref]+ (ref, [int]+ (int, int)) => [byref]+ ([byref]+ (ref, int), int).
5817 // Doing this transformation could create a situation where the first
5818 // addition (that is, [byref]+ (ref, int) ) creates a byref pointer that
5819 // no longer points within the ref object. If a GC happens, the byref won't
5820 // get updated. This can happen, for instance, if one of the int components
5821 // is negative. It also requires the address generation be in a fully-interruptible
5824 if (varTypeIsGC(op1->TypeGet()) && op2->TypeGet() == TYP_I_IMPL)
5826 assert(varTypeIsGC(tree->TypeGet()) && (oper == GT_ADD));
5830 #endif // FEATURE_PREVENT_BAD_BYREFS
5832 /* Change "(x op (y op z))" to "(x op y) op z" */
5833 /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
5835 GenTree* new_op1 = op2;
5837 new_op1->gtOp.gtOp1 = op1;
5838 new_op1->gtOp.gtOp2 = ad1;
5840 /* Change the flags. */
5842 // Make sure we arent throwing away any flags
5843 noway_assert((new_op1->gtFlags &
5844 ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
5845 GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated
5846 GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
5849 (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
5850 (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
5852 /* Retype new_op1 if it has not/become a GC ptr. */
5854 if (varTypeIsGC(op1->TypeGet()))
5856 noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5857 oper == GT_ADD) || // byref(ref + (int+int))
5858 (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5859 oper == GT_OR)); // int(gcref | int(gcref|intval))
5861 new_op1->gtType = tree->gtType;
5863 else if (varTypeIsGC(ad2->TypeGet()))
5865 // Neither ad1 nor op1 are GC. So new_op1 isnt either
5866 noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
5867 new_op1->gtType = TYP_I_IMPL;
5870 // If new_op1 is a new expression. Assign it a new unique value number.
5871 // vnStore is null before the ValueNumber phase has run
5872 if (vnStore != nullptr)
5874 // We can only keep the old value number on new_op1 if both op1 and ad2
5875 // have the same non-NoVN value numbers. Since op is commutative, comparing
5876 // only ad2 and op1 is enough.
5877 if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5878 (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5879 (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
5881 new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
5885 tree->gtOp.gtOp1 = new_op1;
5886 tree->gtOp.gtOp2 = ad2;
5888 /* If 'new_op1' is now the same nested op, process it recursively */
5890 if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
5892 fgMoveOpsLeft(new_op1);
5895 /* If 'ad2' is now the same nested op, process it
5896 * Instead of recursion, we set up op1 and op2 for the next loop.
5901 } while ((op2->gtOper == oper) && !op2->gtOverflowEx());
5908 /*****************************************************************************/
5910 void Compiler::fgSetRngChkTarget(GenTree* tree, bool delay)
5912 if (tree->OperIsBoundsCheck())
5914 GenTreeBoundsChk* const boundsChk = tree->AsBoundsChk();
5915 BasicBlock* const failBlock = fgSetRngChkTargetInner(boundsChk->gtThrowKind, delay, &boundsChk->gtStkDepth);
5916 if (failBlock != nullptr)
5918 boundsChk->gtIndRngFailBB = gtNewCodeRef(failBlock);
5921 else if (tree->OperIs(GT_INDEX_ADDR))
5923 GenTreeIndexAddr* const indexAddr = tree->AsIndexAddr();
5924 BasicBlock* const failBlock = fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay, &indexAddr->gtStkDepth);
5925 if (failBlock != nullptr)
5927 indexAddr->gtIndRngFailBB = gtNewCodeRef(failBlock);
5932 noway_assert(tree->OperIs(GT_ARR_ELEM, GT_ARR_INDEX));
5933 fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay, nullptr);
5937 BasicBlock* Compiler::fgSetRngChkTargetInner(SpecialCodeKind kind, bool delay, unsigned* stkDepth)
5943 #if !FEATURE_FIXED_OUT_ARGS
5944 // we need to initialize this field
5945 if (fgGlobalMorph && (stkDepth != nullptr))
5947 *stkDepth = fgPtrArgCntCur;
5949 #endif // !FEATURE_FIXED_OUT_ARGS
5952 if (!opts.compDbgCode)
5954 if (delay || compIsForInlining())
5956 #if !FEATURE_FIXED_OUT_ARGS
5957 // We delay this until after loop-oriented range check analysis. For now we merely store the current stack
5958 // level in the tree node.
5959 if (stkDepth != nullptr)
5961 *stkDepth = fgPtrArgCntCur;
5963 #endif // !FEATURE_FIXED_OUT_ARGS
5967 #if !FEATURE_FIXED_OUT_ARGS
5968 // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
5969 noway_assert(fgGlobalMorph || (stkDepth != nullptr));
5970 const unsigned theStkDepth = fgGlobalMorph ? fgPtrArgCntCur : *stkDepth;
5972 // only x86 pushes args
5973 const unsigned theStkDepth = 0;
5976 // Create/find the appropriate "range-fail" label
5977 return fgRngChkTarget(compCurBB, theStkDepth, kind);
5984 /*****************************************************************************
5986 * Expand a GT_INDEX node and fully morph the child operands
5988 * The orginal GT_INDEX node is bashed into the GT_IND node that accesses
5989 * the array element. We expand the GT_INDEX node into a larger tree that
5990 * evaluates the array base and index. The simplest expansion is a GT_COMMA
5991 * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
5992 * For complex array or index expressions one or more GT_COMMA assignments
5993 * are inserted so that we only evaluate the array or index expressions once.
5995 * The fully expanded tree is then morphed. This causes gtFoldExpr to
5996 * perform local constant prop and reorder the constants in the tree and
5999 * We then parse the resulting array element expression in order to locate
6000 * and label the constants and variables that occur in the tree.
6003 const int MAX_ARR_COMPLEXITY = 4;
6004 const int MAX_INDEX_COMPLEXITY = 4;
6006 GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
6008 noway_assert(tree->gtOper == GT_INDEX);
6009 GenTreeIndex* asIndex = tree->AsIndex();
6011 var_types elemTyp = tree->TypeGet();
6012 unsigned elemSize = tree->gtIndex.gtIndElemSize;
6013 CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
6015 noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
6018 if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= maxSIMDStructBytes())
6020 // If this is a SIMD type, this is the point at which we lose the type information,
6021 // so we need to set the correct type on the GT_IND.
6022 // (We don't care about the base type here, so we only check, but don't retain, the return value).
6023 unsigned simdElemSize = 0;
6024 if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
6026 assert(simdElemSize == elemSize);
6027 elemTyp = getSIMDTypeForSize(elemSize);
6028 // This is the new type of the node.
6029 tree->gtType = elemTyp;
6030 // Now set elemStructType to null so that we don't confuse value numbering.
6031 elemStructType = nullptr;
6034 #endif // FEATURE_SIMD
6036 // Set up the the array length's offset into lenOffs
6037 // And the the first element's offset into elemOffs
6040 if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
6042 lenOffs = offsetof(CORINFO_String, stringLen);
6043 elemOffs = offsetof(CORINFO_String, chars);
6044 tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
6046 else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
6048 lenOffs = offsetof(CORINFO_RefArray, length);
6049 elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
6051 else // We have a standard array
6053 lenOffs = offsetof(CORINFO_Array, length);
6054 elemOffs = offsetof(CORINFO_Array, u1Elems);
6057 #ifndef LEGACY_BACKEND
6058 // In minopts, we expand GT_INDEX to GT_IND(GT_INDEX_ADDR) in order to minimize the size of the IR. As minopts
6059 // compilation time is roughly proportional to the size of the IR, this helps keep compilation times down.
6060 // Furthermore, this representation typically saves on code size in minopts w.r.t. the complete expansion
6061 // performed when optimizing, as it does not require LclVar nodes (which are always stack loads/stores in
6064 // When we *are* optimizing, we fully expand GT_INDEX to:
6065 // 1. Evaluate the array address expression and store the result in a temp if the expression is complex or
6067 // 2. Evaluate the array index expression and store the result in a temp if the expression is complex or
6069 // 3. Perform an explicit bounds check: GT_ARR_BOUNDS_CHK(index, GT_ARR_LENGTH(array))
6070 // 4. Compute the address of the element that will be accessed:
6071 // GT_ADD(GT_ADD(array, firstElementOffset), GT_MUL(index, elementSize))
6072 // 5. Dereference the address with a GT_IND.
6074 // This expansion explicitly exposes the bounds check and the address calculation to the optimizer, which allows
6075 // for more straightforward bounds-check removal, CSE, etc.
6078 GenTree* const array = fgMorphTree(asIndex->Arr());
6079 GenTree* const index = fgMorphTree(asIndex->Index());
6081 GenTreeIndexAddr* const indexAddr =
6082 new (this, GT_INDEX_ADDR) GenTreeIndexAddr(array, index, elemTyp, elemStructType, elemSize,
6083 static_cast<unsigned>(lenOffs), static_cast<unsigned>(elemOffs));
6084 indexAddr->gtFlags |= (array->gtFlags | index->gtFlags) & GTF_ALL_EFFECT;
6086 // Mark the indirection node as needing a range check if necessary.
6087 // Note this will always be true unless JitSkipArrayBoundCheck() is used
6088 if ((indexAddr->gtFlags & GTF_INX_RNGCHK) != 0)
6090 fgSetRngChkTarget(indexAddr);
6093 // Change `tree` into an indirection and return.
6094 tree->ChangeOper(GT_IND);
6095 GenTreeIndir* const indir = tree->AsIndir();
6096 indir->Addr() = indexAddr;
6097 indir->gtFlags = GTF_IND_ARR_INDEX | (indexAddr->gtFlags & GTF_ALL_EFFECT);
6100 indexAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
6105 #endif // LEGACY_BACKEND
6107 GenTree* arrRef = asIndex->Arr();
6108 GenTree* index = asIndex->Index();
6110 bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
6111 bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
6113 GenTree* arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
6114 GenTree* indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression
6115 GenTree* bndsChk = nullptr;
6117 // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
6120 GenTree* arrRef2 = nullptr; // The second copy will be used in array address expression
6121 GenTree* index2 = nullptr;
6123 // If the arrRef expression involves an assignment, a call or reads from global memory,
6124 // then we *must* allocate a temporary in which to "localize" those values,
6125 // to ensure that the same values are used in the bounds check and the actual
6127 // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
6128 // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true
6129 // complexity is not exposed. (Without that condition there are cases of local struct
6130 // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was
6131 // fixed, there were some regressions that were mostly ameliorated by adding this condition.)
6133 if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
6134 gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
6136 unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
6137 arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
6138 arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
6139 arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
6143 arrRef2 = gtCloneExpr(arrRef);
6144 noway_assert(arrRef2 != nullptr);
6147 // If the index expression involves an assignment, a call or reads from global memory,
6148 // we *must* allocate a temporary in which to "localize" those values,
6149 // to ensure that the same values are used in the bounds check and the actual
6151 // Also we allocate the temporary when the index is sufficiently complex/expensive.
6153 if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
6154 (arrRef->OperGet() == GT_FIELD))
6156 unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
6157 indexDefn = gtNewTempAssign(indexTmpNum, index);
6158 index = gtNewLclvNode(indexTmpNum, index->TypeGet());
6159 index2 = gtNewLclvNode(indexTmpNum, index->TypeGet());
6163 index2 = gtCloneExpr(index);
6164 noway_assert(index2 != nullptr);
6167 // Next introduce a GT_ARR_BOUNDS_CHECK node
6168 var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
6170 #ifdef _TARGET_64BIT_
6171 // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case
6172 // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
6173 // the comparison will have to be widen to 64 bits.
6174 if (index->TypeGet() == TYP_I_IMPL)
6176 bndsChkType = TYP_I_IMPL;
6178 #endif // _TARGET_64BIT_
6180 GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs);
6182 if (bndsChkType != TYP_INT)
6184 arrLen = gtNewCastNode(bndsChkType, arrLen, false, bndsChkType);
6187 GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
6188 GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);
6190 bndsChk = arrBndsChk;
6192 // Make sure to increment ref-counts if already ref-counted.
6193 if (lvaLocalVarRefCounted)
6195 lvaRecursiveIncRefCounts(index);
6196 lvaRecursiveIncRefCounts(arrRef);
6199 // Now we'll switch to using the second copies for arrRef and index
6200 // to compute the address expression
6206 // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
6210 #ifdef _TARGET_64BIT_
6211 // Widen 'index' on 64-bit targets
6212 if (index->TypeGet() != TYP_I_IMPL)
6214 if (index->OperGet() == GT_CNS_INT)
6216 index->gtType = TYP_I_IMPL;
6220 index = gtNewCastNode(TYP_I_IMPL, index, false, TYP_I_IMPL);
6223 #endif // _TARGET_64BIT_
6225 /* Scale the index value if necessary */
6228 GenTree* size = gtNewIconNode(elemSize, TYP_I_IMPL);
6230 // Fix 392756 WP7 Crossgen
6232 // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
6233 // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
6234 // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
6236 size->gtFlags |= GTF_DONT_CSE;
6238 /* Multiply by the array element size */
6239 addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
6246 #if FEATURE_PREVENT_BAD_BYREFS
6248 // Be careful to only create the byref pointer when the full index expression is added to the array reference.
6249 // We don't want to create a partial byref address expression that doesn't include the full index offset:
6250 // a byref must point within the containing object. It is dangerous (especially when optimizations come into
6251 // play) to create a "partial" byref that doesn't point exactly to the correct object; there is risk that
6252 // the partial byref will not point within the object, and thus not get updated correctly during a GC.
6253 // This is mostly a risk in fully-interruptible code regions.
6255 /* Add the first element's offset */
6257 GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
6259 addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, cns);
6261 /* Add the object ref to the element's offset */
6263 addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
6265 #else // !FEATURE_PREVENT_BAD_BYREFS
6267 /* Add the object ref to the element's offset */
6269 addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
6271 /* Add the first element's offset */
6273 GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
6275 addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);
6277 #endif // !FEATURE_PREVENT_BAD_BYREFS
6279 #if SMALL_TREE_NODES
6280 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL);
6283 // Change the orginal GT_INDEX node into a GT_IND node
6284 tree->SetOper(GT_IND);
6286 // If the index node is a floating-point type, notify the compiler
6287 // we'll potentially use floating point registers at the time of codegen.
6288 if (varTypeIsFloating(tree->gtType))
6290 this->compFloatingPointUsed = true;
6293 // We've now consumed the GTF_INX_RNGCHK, and the node
6294 // is no longer a GT_INDEX node.
6295 tree->gtFlags &= ~GTF_INX_RNGCHK;
6297 tree->gtOp.gtOp1 = addr;
6299 // This is an array index expression.
6300 tree->gtFlags |= GTF_IND_ARR_INDEX;
6302 /* An indirection will cause a GPF if the address is null */
6303 tree->gtFlags |= GTF_EXCEPT;
6307 tree->gtFlags |= GTF_DONT_CSE;
6310 // Store information about it.
6311 GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));
6313 // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
6315 GenTree* indTree = tree;
6317 // Did we create a bndsChk tree?
6320 // Use a GT_COMMA node to prepend the array bound check
6322 tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
6324 /* Mark the indirection node as needing a range check */
6325 fgSetRngChkTarget(bndsChk);
6328 if (indexDefn != nullptr)
6330 // Use a GT_COMMA node to prepend the index assignment
6332 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
6334 if (arrRefDefn != nullptr)
6336 // Use a GT_COMMA node to prepend the arRef assignment
6338 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
6341 // Currently we morph the tree to perform some folding operations prior
6342 // to attaching fieldSeq info and labeling constant array index contributions
6346 // Ideally we just want to proceed to attaching fieldSeq info and labeling the
6347 // constant array index contributions, but the morphing operation may have changed
6348 // the 'tree' into something that now unconditionally throws an exception.
6350 // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
6351 // or it could be left unchanged. If it is unchanged then we should not return,
6352 // instead we should proceed to attaching fieldSeq info, etc...
6354 GenTree* arrElem = tree->gtEffectiveVal();
6356 if (fgIsCommaThrow(tree))
6358 if ((arrElem != indTree) || // A new tree node may have been created
6359 (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
6361 return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
6365 assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
6367 addr = arrElem->gtOp.gtOp1;
6369 assert(addr->TypeGet() == TYP_BYREF);
6371 GenTree* cnsOff = nullptr;
6372 if (addr->OperGet() == GT_ADD)
6375 #if FEATURE_PREVENT_BAD_BYREFS
6377 assert(addr->TypeGet() == TYP_BYREF);
6378 assert(addr->gtOp.gtOp1->TypeGet() == TYP_REF);
6380 addr = addr->gtOp.gtOp2;
6382 // Look for the constant [#FirstElem] node here, or as the RHS of an ADD.
6384 if (addr->gtOper == GT_CNS_INT)
6391 if ((addr->OperGet() == GT_ADD) && (addr->gtOp.gtOp2->gtOper == GT_CNS_INT))
6393 cnsOff = addr->gtOp.gtOp2;
6394 addr = addr->gtOp.gtOp1;
6397 // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
6398 addr->LabelIndex(this);
6401 #else // !FEATURE_PREVENT_BAD_BYREFS
6403 if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
6405 cnsOff = addr->gtOp.gtOp2;
6406 addr = addr->gtOp.gtOp1;
6409 while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB))
6411 assert(addr->TypeGet() == TYP_BYREF);
6412 GenTree* index = addr->gtOp.gtOp2;
6414 // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
6415 index->LabelIndex(this);
6417 addr = addr->gtOp.gtOp1;
6419 assert(addr->TypeGet() == TYP_REF);
6421 #endif // !FEATURE_PREVENT_BAD_BYREFS
6423 else if (addr->OperGet() == GT_CNS_INT)
6428 FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
6430 if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
6432 // Assign it the [#FirstElem] field sequence
6434 cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
6436 else // We have folded the first element's offset with the index expression
6438 // Build the [#ConstantIndex, #FirstElem] field sequence
6440 FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
6441 FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
6443 if (cnsOff == nullptr) // It must have folded into a zero offset
6445 // Record in the general zero-offset map.
6446 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6450 cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
6458 /*****************************************************************************
6460 * Wrap fixed stack arguments for varargs functions to go through varargs
6461 * cookie to access them, except for the cookie itself.
6463 * Non-x86 platforms are allowed to access all arguments directly
6464 * so we don't need this code.
6467 GenTree* Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
6469 /* For the fixed stack arguments of a varargs function, we need to go
6470 through the varargs cookies to access them, except for the
6473 LclVarDsc* varDsc = &lvaTable[lclNum];
6475 if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
6477 // Create a node representing the local pointing to the base of the args
6479 gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
6480 gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES +
6483 // Access the argument through the local
6485 if (varTypeIsStruct(varType))
6487 tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize);
6491 tree = gtNewOperNode(GT_IND, varType, ptrArg);
6493 tree->gtFlags |= GTF_IND_TGTANYWHERE;
6495 if (varDsc->lvAddrExposed)
6497 tree->gtFlags |= GTF_GLOB_REF;
6500 return fgMorphTree(tree);
6507 /*****************************************************************************
6509 * Transform the given GT_LCL_VAR tree for code generation.
6512 GenTree* Compiler::fgMorphLocalVar(GenTree* tree, bool forceRemorph)
6514 assert(tree->gtOper == GT_LCL_VAR);
6516 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
6517 var_types varType = lvaGetRealType(lclNum);
6518 LclVarDsc* varDsc = &lvaTable[lclNum];
6520 if (varDsc->lvAddrExposed)
6522 tree->gtFlags |= GTF_GLOB_REF;
6526 if (info.compIsVarArgs)
6528 GenTree* newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
6529 if (newTree != nullptr)
6531 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
6533 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
6538 #endif // _TARGET_X86_
6540 /* If not during the global morphing phase bail */
6542 if (!fgGlobalMorph && !forceRemorph)
6547 bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
6549 noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
6551 if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
6553 #if LOCAL_ASSERTION_PROP
6554 /* Assertion prop can tell us to omit adding a cast here */
6555 if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
6560 /* Small-typed arguments and aliased locals are normalized on load.
6561 Other small-typed locals are normalized on store.
6562 Also, under the debugger as the debugger could write to the variable.
6563 If this is one of the former, insert a narrowing cast on the load.
6564 ie. Convert: var-short --> cast-short(var-int) */
6566 tree->gtType = TYP_INT;
6567 fgMorphTreeDone(tree);
6568 tree = gtNewCastNode(TYP_INT, tree, false, varType);
6569 fgMorphTreeDone(tree);
6576 /*****************************************************************************
6577 Grab a temp for big offset morphing.
6578 This method will grab a new temp if no temp of this "type" has been created.
6579 Or it will return the same cached one if it has been created.
6581 unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
6583 unsigned lclNum = fgBigOffsetMorphingTemps[type];
6585 if (lclNum == BAD_VAR_NUM)
6587 // We haven't created a temp for this kind of type. Create one now.
6588 lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
6589 fgBigOffsetMorphingTemps[type] = lclNum;
6593 // We better get the right type.
6594 noway_assert(lvaTable[lclNum].TypeGet() == type);
6597 noway_assert(lclNum != BAD_VAR_NUM);
6601 /*****************************************************************************
6603 * Transform the given GT_FIELD tree for code generation.
6606 GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac)
6608 assert(tree->gtOper == GT_FIELD);
6610 CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
6611 unsigned fldOffset = tree->gtField.gtFldOffset;
6612 GenTree* objRef = tree->gtField.gtFldObj;
6613 bool fieldMayOverlap = false;
6614 bool objIsLocal = false;
6616 if (fgGlobalMorph && (objRef != nullptr) && (objRef->gtOper == GT_ADDR))
6618 // Make sure we've checked if 'objRef' is an address of an implicit-byref parameter.
6619 // If it is, fgMorphImplicitByRefArgs may change it do a different opcode, which the
6620 // simd field rewrites are sensitive to.
6621 fgMorphImplicitByRefArgs(objRef);
6624 noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
6625 ((tree->gtFlags & GTF_GLOB_REF) != 0));
6627 if (tree->gtField.gtFldMayOverlap)
6629 fieldMayOverlap = true;
6630 // Reset the flag because we may reuse the node.
6631 tree->gtField.gtFldMayOverlap = false;
6635 // if this field belongs to simd struct, translate it to simd instrinsic.
6638 GenTree* newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
6639 if (newTree != tree)
6641 newTree = fgMorphSmpOp(newTree);
6645 else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
6647 GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr();
6650 lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField));
6655 /* Is this an instance data member? */
6660 objIsLocal = objRef->IsLocal();
6662 if (tree->gtFlags & GTF_IND_TLS_REF)
6664 NO_WAY("instance field can not be a TLS ref.");
6667 /* We'll create the expression "*(objRef + mem_offs)" */
6669 noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);
6671 // An optimization for Contextful classes:
6672 // we unwrap the proxy when we have a 'this reference'
6673 if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
6675 objRef = fgUnwrapProxy(objRef);
6679 Now we have a tree like this:
6681 +--------------------+
6683 +----------+---------+
6685 +--------------+-------------+
6686 | tree->gtField.gtFldObj |
6687 +--------------+-------------+
6690 We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6692 +--------------------+
6693 | GT_IND/GT_OBJ | tree
6694 +---------+----------+
6697 +---------+----------+
6699 +---------+----------+
6704 +-------------------+ +----------------------+
6705 | objRef | | fldOffset |
6706 | | | (when fldOffset !=0) |
6707 +-------------------+ +----------------------+
6710 or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6713 +--------------------+
6714 | GT_IND/GT_OBJ | tree
6715 +----------+---------+
6717 +----------+---------+
6719 +----------+---------+
6725 +---------+----------+ +---------+----------+
6726 comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr
6727 +---------+----------+ +---------+----------+
6732 +-----+-----+ +-----+-----+ +---------+ +-----------+
6733 asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset |
6734 +-----+-----+ +-----+-----+ +---------+ +-----------+
6739 +-----+-----+ +-----+-----+ +-----------+
6740 | tmpLcl | | objRef | | tmpLcl |
6741 +-----------+ +-----------+ +-----------+
6746 var_types objRefType = objRef->TypeGet();
6748 GenTree* comma = nullptr;
6750 // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field,
6751 // and thus is equivalent to a MACK_Ind with zero offset.
6752 MorphAddrContext defMAC(MACK_Ind);
6758 // This flag is set to enable the "conservative" style of explicit null-check insertion.
6759 // This means that we insert an explicit null check whenever we create byref by adding a
6760 // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
6761 // dereferenced). The alternative is "aggressive", which would not insert such checks (for
6762 // small offsets); in this plan, we would transfer some null-checking responsibility to
6763 // callee's of methods taking byref parameters. They would have to add explicit null checks
6764 // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
6765 // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
6766 // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
6767 // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
6768 // This is left here to point out how to implement it.
6769 CLANG_FORMAT_COMMENT_ANCHOR;
6771 #define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
6773 bool addExplicitNullCheck = false;
6775 // Implicit byref locals are never null.
6776 if (!((objRef->gtOper == GT_LCL_VAR) && lvaIsImplicitByRefLocal(objRef->gtLclVarCommon.gtLclNum)))
6778 // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression
6779 // whose address is being taken is either a local or static variable, whose address is necessarily
6780 // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
6781 if (objRef->gtOper != GT_ADDR && (mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind))
6783 if (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset))
6785 addExplicitNullCheck = true;
6789 // In R2R mode the field offset for some fields may change when the code
6790 // is loaded. So we can't rely on a zero offset here to suppress the null check.
6792 // See GitHub issue #16454.
6793 bool fieldHasChangeableOffset = false;
6795 #ifdef FEATURE_READYTORUN_COMPILER
6796 fieldHasChangeableOffset = (tree->gtField.gtFieldLookup.addr != nullptr);
6799 #if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
6800 addExplicitNullCheck = (mac->m_kind == MACK_Addr) &&
6801 ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset);
6803 addExplicitNullCheck = (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
6804 ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset));
6810 if (addExplicitNullCheck)
6815 printf("Before explicit null check morphing:\n");
6821 // Create the "comma" subtree
6823 GenTree* asg = nullptr;
6828 if (objRef->gtOper != GT_LCL_VAR)
6830 lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
6832 // Create the "asg" node
6833 asg = gtNewTempAssign(lclNum, objRef);
6837 lclNum = objRef->gtLclVarCommon.gtLclNum;
6840 // Create the "nullchk" node.
6841 // Make it TYP_BYTE so we only deference it for 1 byte.
6842 GenTree* lclVar = gtNewLclvNode(lclNum, objRefType);
6843 nullchk = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);
6845 nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
6847 // An indirection will cause a GPF if the address is null.
6848 nullchk->gtFlags |= GTF_EXCEPT;
6850 compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
6851 optMethodFlags |= OMF_HAS_NULLCHECK;
6855 // Create the "comma" node.
6856 comma = gtNewOperNode(GT_COMMA,
6857 TYP_VOID, // We don't want to return anything from this "comma" node.
6858 // Set the type to TYP_VOID, so we can select "cmp" instruction
6859 // instead of "mov" instruction later on.
6867 addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
6869 else if (fldOffset == 0)
6871 // Generate the "addr" node.
6873 FieldSeqNode* fieldSeq =
6874 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6875 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6882 #ifdef FEATURE_READYTORUN_COMPILER
6883 if (tree->gtField.gtFieldLookup.addr != nullptr)
6885 GenTree* offsetNode = nullptr;
6886 if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
6888 offsetNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)tree->gtField.gtFieldLookup.addr,
6889 GTF_ICON_FIELD_HDL, false);
6893 noway_assert(!"unexpected accessType for R2R field access");
6896 var_types addType = (objRefType == TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF;
6897 addr = gtNewOperNode(GT_ADD, addType, addr, offsetNode);
6902 // Generate the "addr" node.
6903 /* Add the member offset to the object's address */
6904 FieldSeqNode* fieldSeq =
6905 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6906 addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
6907 gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
6910 // Now let's set the "tree" as a GT_IND tree.
6912 tree->SetOper(GT_IND);
6913 tree->gtOp.gtOp1 = addr;
6915 tree->gtFlags &= (~GTF_EXCEPT | addr->gtFlags);
6916 tree->SetIndirExceptionFlags(this);
6918 if (addExplicitNullCheck)
6921 // Create "comma2" node and link it to "tree".
6924 comma2 = gtNewOperNode(GT_COMMA,
6925 addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
6927 tree->gtOp.gtOp1 = comma2;
6933 if (addExplicitNullCheck)
6935 printf("After adding explicit null check:\n");
6941 else /* This is a static data member */
6943 if (tree->gtFlags & GTF_IND_TLS_REF)
6945 // Thread Local Storage static field reference
6947 // Field ref is a TLS 'Thread-Local-Storage' reference
6949 // Build this tree: IND(*) #
6957 // IND(I_IMPL) == [Base of this DLL's TLS]
6961 // / CNS(IdValue*4) or MUL
6963 // IND(I_IMPL) / CNS(4)
6965 // CNS(TLS_HDL,0x2C) IND
6969 // # Denotes the orginal node
6971 void** pIdAddr = nullptr;
6972 unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);
6975 // If we can we access the TLS DLL index ID value directly
6976 // then pIdAddr will be NULL and
6977 // IdValue will be the actual TLS DLL index ID
6979 GenTree* dllRef = nullptr;
6980 if (pIdAddr == nullptr)
6984 dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
6989 dllRef = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)pIdAddr, GTF_ICON_STATIC_HDL, true);
6991 // Next we multiply by 4
6992 dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
6995 #define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
6997 // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
6999 GenTree* tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
7001 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
7002 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
7004 tree->gtFlags &= ~GTF_FLD_INITCLASS;
7005 tlsRef->gtFlags |= GTF_ICON_INITCLASS;
7008 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
7010 if (dllRef != nullptr)
7012 /* Add the dllRef */
7013 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
7016 /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
7017 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
7021 FieldSeqNode* fieldSeq =
7022 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
7023 GenTree* fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
7025 /* Add the TLS static field offset to the address */
7027 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
7030 // Final indirect to get to actual value of TLS static field
7032 tree->SetOper(GT_IND);
7033 tree->gtOp.gtOp1 = tlsRef;
7035 noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
7039 // Normal static field reference
7042 // If we can we access the static's address directly
7043 // then pFldAddr will be NULL and
7044 // fldAddr will be the actual address of the static field
7046 void** pFldAddr = nullptr;
7047 void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);
7049 if (pFldAddr == nullptr)
7051 #ifdef _TARGET_64BIT_
7052 if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
7054 // The address is not directly addressible, so force it into a
7055 // constant, so we handle it properly
7057 GenTree* addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
7058 addr->gtType = TYP_I_IMPL;
7059 FieldSeqNode* fieldSeq =
7060 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
7061 addr->gtIntCon.gtFieldSeq = fieldSeq;
7062 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
7063 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
7065 tree->gtFlags &= ~GTF_FLD_INITCLASS;
7066 addr->gtFlags |= GTF_ICON_INITCLASS;
7069 tree->SetOper(GT_IND);
7070 // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
7071 // We must clear it when we transform the node.
7072 // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
7073 // that the logic above does its own checking to determine whether a nullcheck is needed.
7074 tree->gtFlags &= ~GTF_IND_ARR_LEN;
7075 tree->gtOp.gtOp1 = addr;
7077 return fgMorphSmpOp(tree);
7080 #endif // _TARGET_64BIT_
7082 // Only volatile or classinit could be set, and they map over
7083 noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0);
7084 static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE);
7085 static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS);
7086 tree->SetOper(GT_CLS_VAR);
7087 tree->gtClsVar.gtClsVarHnd = symHnd;
7088 FieldSeqNode* fieldSeq =
7089 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
7090 tree->gtClsVar.gtFieldSeq = fieldSeq;
7097 GenTree* addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
7099 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
7100 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
7102 tree->gtFlags &= ~GTF_FLD_INITCLASS;
7103 addr->gtFlags |= GTF_ICON_INITCLASS;
7106 // There are two cases here, either the static is RVA based,
7107 // in which case the type of the FIELD node is not a GC type
7108 // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
7109 // a GC type and the handle to it is a TYP_BYREF in the GC heap
7110 // because handles to statics now go into the large object heap
7112 var_types handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
7113 GenTree* op1 = gtNewOperNode(GT_IND, handleTyp, addr);
7114 op1->gtFlags |= GTF_IND_INVARIANT;
7116 tree->SetOper(GT_IND);
7117 tree->gtOp.gtOp1 = op1;
7121 noway_assert(tree->gtOper == GT_IND);
7122 // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
7123 // We must clear it when we transform the node.
7124 // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
7125 // that the logic above does its own checking to determine whether a nullcheck is needed.
7126 tree->gtFlags &= ~GTF_IND_ARR_LEN;
7128 GenTree* res = fgMorphSmpOp(tree);
7130 // If we have a struct type, this node would previously have been under a GT_ADDR,
7131 // and therefore would have been marked GTF_DONT_CSE.
7132 // TODO-1stClassStructs: revisit this.
7133 if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
7135 res->gtFlags |= GTF_DONT_CSE;
7138 if (fldOffset == 0 && res->OperGet() == GT_IND)
7140 GenTree* addr = res->gtOp.gtOp1;
7141 // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
7142 FieldSeqNode* fieldSeq =
7143 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
7144 fgAddFieldSeqForZeroOffset(addr, fieldSeq);
7150 //------------------------------------------------------------------------------
7151 // fgMorphCallInline: attempt to inline a call
7154 // call - call expression to inline, inline candidate
7155 // inlineResult - result tracking and reporting
7158 // Attempts to inline the call.
7160 // If successful, callee's IR is inserted in place of the call, and
7161 // is marked with an InlineContext.
7163 // If unsuccessful, the transformations done in anticpation of a
7164 // possible inline are undone, and the candidate flag on the call
7167 void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
7169 // The call must be a candiate for inlining.
7170 assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
7172 // Attempt the inline
7173 fgMorphCallInlineHelper(call, inlineResult);
7175 // We should have made up our minds one way or another....
7176 assert(inlineResult->IsDecided());
7178 // If we failed to inline, we have a bit of work to do to cleanup
7179 if (inlineResult->IsFailure())
7184 // Before we do any cleanup, create a failing InlineContext to
7185 // capture details of the inlining attempt.
7186 m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
7190 // It was an inline candidate, but we haven't expanded it.
7191 if (call->gtCall.gtReturnType != TYP_VOID)
7193 // Detach the GT_CALL tree from the original statement by
7194 // hanging a "nothing" node to it. Later the "nothing" node will be removed
7195 // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
7197 noway_assert(fgMorphStmt->gtStmtExpr == call);
7198 fgMorphStmt->gtStmtExpr = gtNewNothingNode();
7201 // Clear the Inline Candidate flag so we can ensure later we tried
7202 // inlining all candidates.
7204 call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
7208 /*****************************************************************************
7209 * Helper to attempt to inline a call
7210 * Sets success/failure in inline result
7211 * If success, modifies current method's IR with inlinee's IR
7212 * If failed, undoes any speculative modifications to current method
7215 void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
7217 // Don't expect any surprises here.
7218 assert(result->IsCandidate());
7220 if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
7222 // For now, attributing this to call site, though it's really
7223 // more of a budget issue (lvaCount currently includes all
7224 // caller and prospective callee locals). We still might be
7225 // able to inline other callees into this caller, or inline
7226 // this callee in other callers.
7227 result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
7231 if (call->IsVirtual())
7233 result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
7237 // impMarkInlineCandidate() is expected not to mark tail prefixed calls
7238 // and recursive tail calls as inline candidates.
7239 noway_assert(!call->IsTailPrefixedCall());
7240 noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
7242 /* If the caller's stack frame is marked, then we can't do any inlining. Period.
7243 Although we have checked this in impCanInline, it is possible that later IL instructions
7244 might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
7247 if (opts.compNeedSecurityCheck)
7249 result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
7254 // Calling inlinee's compiler to inline the method.
7257 unsigned startVars = lvaCount;
7262 printf("Expanding INLINE_CANDIDATE in statement ");
7263 printTreeID(fgMorphStmt);
7264 printf(" in BB%02u:\n", compCurBB->bbNum);
7265 gtDispTree(fgMorphStmt);
7266 if (call->IsImplicitTailCall())
7268 printf("Note: candidate is implicit tail call\n");
7273 impInlineRoot()->m_inlineStrategy->NoteAttempt(result);
7276 // Invoke the compiler to inline the call.
7279 fgInvokeInlineeCompiler(call, result);
7281 if (result->IsFailure())
7283 // Undo some changes made in anticipation of inlining...
7285 // Zero out the used locals
7286 memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
7287 for (unsigned i = startVars; i < lvaCount; i++)
7289 new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
7292 lvaCount = startVars;
7297 // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
7307 // printf("After inlining lvaCount=%d.\n", lvaCount);
7312 //------------------------------------------------------------------------
7313 // fgCanFastTailCall: Check to see if this tail call can be optimized as epilog+jmp.
7316 // callee - The callee to check
7319 // Returns true or false based on whether the callee can be fastTailCalled
7322 // This function is target specific and each target will make the fastTailCall
7323 // decision differently. See the notes below.
7327 // A fast tail call can be made whenever the number of callee arguments
7328 // is larger than or equal to the number of caller arguments, or we have four
7329 // or fewer callee arguments. This is because, on Windows AMD64, each
7330 // argument uses exactly one register or one 8-byte stack slot. Thus, we only
7331 // need to count arguments, and not be concerned with the size of each
7332 // incoming or outgoing argument.
7334 // Can fast tail call examples (amd64 Windows):
7336 // -- Callee will have all register arguments --
7337 // caller(int, int, int, int)
7338 // callee(int, int, float, int)
7340 // -- Callee requires stack space that is equal to the caller --
7341 // caller(struct, struct, struct, struct, struct, struct)
7342 // callee(int, int, int, int, int, int)
7344 // -- Callee requires stack space that is less than the caller --
7345 // caller(struct, double, struct, float, struct, struct)
7346 // callee(int, int, int, int, int)
7348 // -- Callee will have all register arguments --
7350 // callee(int, int, int, int)
7352 // Cannot fast tail call examples (amd64 Windows):
7354 // -- Callee requires stack space that is larger than the caller --
7355 // caller(struct, double, struct, float, struct, struct)
7356 // callee(int, int, int, int, int, double, double, double)
7358 // Unix Amd64 && Arm64:
7359 // A fastTailCall decision can be made whenever the callee's stack space is
7360 // less than or equal to the caller's stack space. There are many permutations
7361 // of when the caller and callee have different stack sizes if there are
7362 // structs being passed to either the caller or callee.
7365 // 1) If the callee has structs which cannot be enregistered it will be
7366 // reported as cannot fast tail call. This is an implementation limitation
7367 // where the callee only is checked for non enregisterable structs. This is
7368 // tracked with https://github.com/dotnet/coreclr/issues/12644.
7370 // 2) If the caller or callee has stack arguments and the callee has more
7371 // arguments then the caller it will be reported as cannot fast tail call.
7372 // This is due to a bug in LowerFastTailCall which assumes that
7373 // nCalleeArgs <= nCallerArgs, which is always true on Windows Amd64. This
7374 // is tracked with https://github.com/dotnet/coreclr/issues/12468.
7376 // 3) If the callee has a 9 to 16 byte struct argument and the callee has
7377 // stack arguments, the decision will be to not fast tail call. This is
7378 // because before fgMorphArgs is done, the struct is unknown whether it
7379 // will be placed on the stack or enregistered. Therefore, the conservative
7380 // decision of do not fast tail call is taken. This limitations should be
7381 // removed if/when fgMorphArgs no longer depends on fgCanFastTailCall.
7383 // 4) Arm64 Only, if there are HFA arguments and the callee has stack
7384 // arguments, the decision will be reported as cannot fast tail call.
7385 // This is because before fgMorphArgs is done, the struct is unknown whether it
7386 // will be placed on the stack or enregistered. Therefore, the conservative
7387 // decision of do not fast tail call is taken.
7389 // Can fast tail call examples (amd64 Unix):
7391 // -- Callee will have all register arguments --
7392 // caller(int, int, int, int)
7393 // callee(int, int, float, int)
7395 // -- Callee requires stack space that is equal to the caller --
7396 // caller({ int, int }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte stack
7398 // callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space
7400 // -- Callee requires stack space that is less than the caller --
7401 // caller({ int, int }, int, { int, int }, int, { int, int }, { int, int }) 6 int register arguments, 32 byte stack
7403 // callee(int, int, int, int, int, int, { int, int } ) // 6 int register arguments, 16 byte stack space
7405 // -- Callee will have all register arguments --
7407 // callee(int, int, int, int)
7409 // Cannot fast tail call examples (amd64 Unix):
7411 // -- Callee requires stack space that is larger than the caller --
7412 // caller(float, float, float, float, float, float, float, float) -- 8 float register arguments
7413 // callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space
7415 // -- Callee has structs which cannot be enregistered (Implementation Limitation) --
7416 // caller(float, float, float, float, float, float, float, float, { double, double, double }) -- 8 float register
7417 // arguments, 24 byte stack space
7418 // callee({ double, double, double }) -- 24 bytes stack space
7420 // -- Callee requires stack space and has a struct argument >8 bytes and <16 bytes (Implementation Limitation) --
7421 // caller(int, int, int, int, int, int, { double, double, double }) -- 6 int register arguments, 24 byte stack space
7422 // callee(int, int, int, int, int, int, { int, int }) -- 6 int registers, 16 byte stack space
7424 // -- Caller requires stack space and nCalleeArgs > nCallerArgs (Bug) --
7425 // caller({ double, double, double, double, double, double }) // 48 byte stack
7426 // callee(int, int) -- 2 int registers
7428 bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
7430 #if FEATURE_FASTTAILCALL
7431 // To reach here means that the return types of the caller and callee are tail call compatible.
7432 // In the case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
7434 // In an implicit tail call case callSig may not be available but it is guaranteed to be available
7435 // for explicit tail call cases. The reason implicit tail case callSig may not be available is that
7436 // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
7437 // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
7438 // currently not copying/setting callSig.
7439 CLANG_FORMAT_COMMENT_ANCHOR;
7442 if (callee->IsTailPrefixedCall())
7444 assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
7445 (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
7449 auto reportFastTailCallDecision = [this, callee](const char* msg, size_t callerStackSize, size_t calleeStackSize) {
7451 if ((JitConfig.JitReportFastTailCallDecisions()) == 1)
7453 if (callee->gtCallType != CT_INDIRECT)
7455 const char* methodName;
7457 methodName = eeGetMethodFullName(callee->gtCallMethHnd);
7459 printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: ",
7460 info.compFullName, methodName);
7464 printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- "
7469 if (callerStackSize != -1)
7471 printf("%s (CallerStackSize: %d, CalleeStackSize: %d)\n\n", msg, callerStackSize, calleeStackSize);
7475 printf("%s\n\n", msg);
7480 JITDUMP("[Fast tailcall decision]: %s\n", msg);
7488 // Note on vararg methods:
7489 // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
7490 // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
7491 // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
7492 // out-going area required for callee is bounded by caller's fixed argument space.
7494 // Note that callee being a vararg method is not a problem since we can account the params being passed.
7495 unsigned nCallerArgs = info.compArgsCount;
7497 size_t callerArgRegCount = codeGen->intRegState.rsCalleeRegArgCount;
7498 size_t callerFloatArgRegCount = codeGen->floatRegState.rsCalleeRegArgCount;
7500 // Count the callee args including implicit and hidden.
7501 // Note that GenericContext and VarargCookie are added by importer while
7502 // importing the call to gtCallArgs list along with explicit user args.
7503 size_t calleeArgRegCount = 0;
7504 size_t calleeFloatArgRegCount = 0;
7506 if (callee->gtCallObjp) // thisPtr
7508 ++calleeArgRegCount;
7511 if (callee->HasRetBufArg()) // RetBuf
7513 // We don't increment calleeArgRegCount here, since it is already in callee->gtCallArgs.
7515 // If callee has RetBuf param, caller too must have it.
7516 // Otherwise go the slow route.
7517 if (info.compRetBuffArg == BAD_VAR_NUM)
7519 reportFastTailCallDecision("Callee has RetBuf but caller does not.", 0, 0);
7524 // Count user args while tracking whether any of them is a multi-byte params
7525 // that cannot be passed in a register. Note that we don't need to count
7526 // non-standard and secret params passed in registers (e.g. R10, R11) since
7527 // these won't contribute to out-going arg size.
7528 bool hasMultiByteStackArgs = false;
7529 bool hasTwoSlotSizedStruct = false;
7530 bool hasHfaArg = false;
7531 size_t nCalleeArgs = calleeArgRegCount; // Keep track of how many args we have.
7532 size_t calleeStackSize = 0;
7533 for (GenTree* args = callee->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
7536 assert(args->OperIsList());
7537 GenTree* argx = args->gtOp.gtOp1;
7539 if (varTypeIsStruct(argx))
7541 // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
7542 while (argx->gtOper == GT_COMMA)
7544 argx = argx->gtOp.gtOp2;
7547 // Get the size of the struct and see if it is register passable.
7548 CORINFO_CLASS_HANDLE objClass = nullptr;
7550 if (argx->OperGet() == GT_OBJ)
7552 objClass = argx->AsObj()->gtClass;
7554 else if (argx->IsLocal())
7556 objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
7558 if (objClass != nullptr)
7560 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
7562 // hasMultiByteStackArgs will determine if the struct can be passed
7563 // in registers. If it cannot we will break the loop and not
7564 // fastTailCall. This is an implementation limitation
7565 // where the callee only is checked for non enregisterable structs.
7566 // It is tracked with https://github.com/dotnet/coreclr/issues/12644.
7567 unsigned typeSize = 0;
7568 hasMultiByteStackArgs = hasMultiByteStackArgs ||
7569 !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
7571 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
7572 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
7574 assert(objClass != nullptr);
7575 eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
7577 if (structDesc.passedInRegisters)
7579 if (structDesc.eightByteCount == 2)
7581 hasTwoSlotSizedStruct = true;
7584 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
7586 if (structDesc.IsIntegralSlot(i))
7588 ++calleeArgRegCount;
7590 else if (structDesc.IsSseSlot(i))
7592 ++calleeFloatArgRegCount;
7596 assert(false && "Invalid eightbyte classification type.");
7603 calleeStackSize += roundUp(typeSize, TARGET_POINTER_SIZE);
7606 #elif defined(_TARGET_ARM64_) // ARM64
7607 var_types hfaType = GetHfaType(argx);
7608 bool isHfaArg = varTypeIsFloating(hfaType);
7615 calleeFloatArgRegCount += GetHfaCount(argx);
7619 // Structs are either passed in 1 or 2 (64-bit) slots
7620 size_t roundupSize = roundUp(typeSize, TARGET_POINTER_SIZE);
7621 size = roundupSize / TARGET_POINTER_SIZE;
7630 hasTwoSlotSizedStruct = true;
7633 calleeArgRegCount += size;
7636 #elif defined(WINDOWS_AMD64_ABI)
7638 ++calleeArgRegCount;
7640 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
7643 assert(!"Target platform ABI rules regarding passing struct type args in registers");
7645 #endif //_TARGET_AMD64_ || _TARGET_ARM64_
7649 hasMultiByteStackArgs = true;
7654 varTypeIsFloating(argx) ? ++calleeFloatArgRegCount : ++calleeArgRegCount;
7657 // We can break early on multiByte cases.
7658 if (hasMultiByteStackArgs)
7664 const unsigned maxRegArgs = MAX_REG_ARG;
7666 // If we reached here means that callee has only those argument types which can be passed in
7667 // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
7668 // If we are passing args on stack for the callee and it has more args passed on stack than
7669 // the caller, then fast tail call cannot be performed.
7671 // Note that the GC'ness of on stack args need not match since the arg setup area is marked
7672 // as non-interruptible for fast tail calls.
7674 #ifdef WINDOWS_AMD64_ABI
7675 assert(calleeStackSize == 0);
7676 size_t calleeStackSlots = ((calleeArgRegCount + calleeFloatArgRegCount) > maxRegArgs)
7677 ? (calleeArgRegCount + calleeFloatArgRegCount) - maxRegArgs
7679 calleeStackSize = calleeStackSlots * TARGET_POINTER_SIZE;
7680 size_t callerStackSize = info.compArgStackSize;
7682 bool hasStackArgs = false;
7684 if (callerStackSize > 0 || calleeStackSize > 0)
7686 hasStackArgs = true;
7689 // Go the slow route, if it has multi-byte params. This is an implementation
7690 // limitatio see https://github.com/dotnet/coreclr/issues/12644.
7691 if (hasMultiByteStackArgs)
7693 reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs", callerStackSize, calleeStackSize);
7697 // x64 Windows: If we have more callee registers used than MAX_REG_ARG, then
7698 // make sure the callee's incoming arguments is less than the caller's
7699 if (hasStackArgs && (nCalleeArgs > nCallerArgs))
7701 reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)", callerStackSize,
7706 #elif (defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_)
7708 // For *nix Amd64 and Arm64 check to see if all arguments for the callee
7709 // and caller are passing in registers. If not, ensure that the outgoing argument stack size
7710 // requirement for the callee is less than or equal to the caller's entire stack frame usage.
7712 // Also, in the case that we have to pass arguments on the stack make sure
7713 // that we are not dealing with structs that are >8 bytes.
7715 bool hasStackArgs = false;
7716 size_t maxFloatRegArgs = MAX_FLOAT_REG_ARG;
7718 size_t calleeIntStackArgCount = calleeArgRegCount > maxRegArgs ? calleeArgRegCount - maxRegArgs : 0;
7719 size_t calleeFloatStackArgCount =
7720 calleeFloatArgRegCount > maxFloatRegArgs ? calleeFloatArgRegCount - maxFloatRegArgs : 0;
7722 size_t calleeStackArgCount = calleeIntStackArgCount + calleeFloatStackArgCount;
7723 size_t callerStackSize = info.compArgStackSize;
7724 calleeStackSize += calleeStackArgCount * TARGET_POINTER_SIZE;
7726 if (callerStackSize > 0 || calleeStackSize > 0)
7728 hasStackArgs = true;
7731 // Go the slow route, if it has multi-byte params. This is an implementation
7732 // limitation see https://github.com/dotnet/coreclr/issues/12644.
7733 if (hasMultiByteStackArgs)
7735 reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs", callerStackSize, calleeStackSize);
7739 // Callee has a >8 and <=16 byte struct and arguments that has to go on the stack. Do not fastTailCall.
7740 if (calleeStackSize > 0 && hasTwoSlotSizedStruct)
7742 reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasTwoSlotSizedStruct",
7743 callerStackSize, calleeStackSize);
7747 // Callee has an HFA struct and arguments that has to go on the stack. Do not fastTailCall.
7748 if (calleeStackSize > 0 && hasHfaArg)
7750 reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasHfaArg", callerStackSize,
7758 // LowerFastTailCall currently assumes nCalleeArgs <= nCallerArgs. This is
7759 // not true in many cases on x64 linux, remove this pessimization when
7760 // LowerFastTailCall is fixed. See https://github.com/dotnet/coreclr/issues/12468
7761 // for more information.
7762 if (hasStackArgs && (nCalleeArgs > nCallerArgs))
7764 reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)", callerStackSize,
7769 if (calleeStackSize > callerStackSize)
7771 reportFastTailCallDecision("Will not fastTailCall calleeStackSize > callerStackSize", callerStackSize,
7778 NYI("fastTailCall not supported on this Architecture.");
7780 #endif // WINDOWS_AMD64_ABI
7782 reportFastTailCallDecision("Will fastTailCall", callerStackSize, calleeStackSize);
7784 #else // FEATURE_FASTTAILCALL
7789 /*****************************************************************************
7791 * Transform the given GT_CALL tree for tail call code generation.
7793 void Compiler::fgMorphTailCall(GenTreeCall* call)
7795 JITDUMP("fgMorphTailCall (before):\n");
7798 #if defined(_TARGET_ARM_)
7799 // For the helper-assisted tail calls, we need to push all the arguments
7800 // into a single list, and then add a few extra at the beginning
7802 // Check for PInvoke call types that we don't handle in codegen yet.
7803 assert(!call->IsUnmanaged());
7804 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));
7806 // First move the this pointer (if any) onto the regular arg list
7807 GenTree* thisPtr = NULL;
7808 if (call->gtCallObjp)
7810 GenTree* objp = call->gtCallObjp;
7811 call->gtCallObjp = NULL;
7813 if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
7815 thisPtr = gtClone(objp, true);
7816 var_types vt = objp->TypeGet();
7817 if (thisPtr == NULL)
7819 // Too complex, so use a temp
7820 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7821 GenTree* asg = gtNewTempAssign(lclNum, objp);
7822 if (!call->IsVirtualVtable())
7824 // Add an indirection to get the nullcheck
7825 GenTree* tmp = gtNewLclvNode(lclNum, vt);
7826 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7827 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7829 objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7830 thisPtr = gtNewLclvNode(lclNum, vt);
7832 else if (!call->IsVirtualVtable())
7834 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7835 objp = gtNewOperNode(GT_COMMA, vt, ind, objp);
7836 thisPtr = gtClone(thisPtr, true);
7839 call->gtFlags &= ~GTF_CALL_NULLCHECK;
7842 call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
7845 // Add the extra VSD parameter if needed
7846 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
7847 if (call->IsVirtualStub())
7849 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
7850 #ifdef LEGACY_BACKEND
7852 if (call->gtCallType == CT_INDIRECT)
7854 arg = gtClone(call->gtCallAddr, true);
7855 noway_assert(arg != nullptr);
7859 noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
7860 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7861 arg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7863 // Change the call type, so we can add the extra indirection here, rather than in codegen
7864 call->gtCallAddr = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7865 call->gtStubCallStubAddr = NULL;
7866 call->gtCallType = CT_INDIRECT;
7868 arg->gtRegNum = virtualStubParamInfo->GetReg();
7869 // Add the extra indirection to generate the real target
7870 call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr);
7871 call->gtFlags |= GTF_EXCEPT;
7872 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7874 #else // !LEGACY_BACKEND
7875 GenTree* stubAddrArg = fgGetStubAddrArg(call);
7876 // And push the stub address onto the list of arguments
7877 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
7878 #endif // !LEGACY_BACKEND
7880 else if (call->IsVirtualVtable())
7882 // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER
7884 noway_assert(thisPtr != NULL);
7886 GenTree* add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
7887 GenTree* vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7888 vtbl->gtFlags |= GTF_EXCEPT;
7890 unsigned vtabOffsOfIndirection;
7891 unsigned vtabOffsAfterIndirection;
7893 info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection,
7896 /* Get the appropriate vtable chunk */
7898 if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
7900 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
7902 GenTree* indOffTree = nullptr;
7906 indOffTree = impCloneExpr(add, &add, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
7907 nullptr DEBUGARG("virtual table call"));
7910 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7914 vtbl = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, indOffTree);
7918 /* Now the appropriate vtable slot */
7920 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
7921 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7923 // Switch this to a plain indirect call
7924 call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
7925 assert(!call->IsVirtual());
7926 call->gtCallType = CT_INDIRECT;
7928 call->gtCallAddr = vtbl;
7929 call->gtCallCookie = NULL;
7930 call->gtFlags |= GTF_EXCEPT;
7933 // Now inject a placeholder for the real call target that codegen will generate
7934 #ifdef LEGACY_BACKEND
7935 GenTree* arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL);
7936 codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR);
7937 #else // !LEGACY_BACKEND
7938 GenTree* arg = gtNewIconNode(0, TYP_I_IMPL);
7939 #endif // !LEGACY_BACKEND
7940 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7942 // Lastly inject the pointer for the copy routine
7943 noway_assert(call->callSig != NULL);
7944 void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7945 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7946 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7948 // It is now a varargs tail call
7949 #ifdef LEGACY_BACKEND
7950 call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
7951 #else // !LEGACY_BACKEND
7952 call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
7953 #endif // !LEGACY_BACKEND
7954 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7956 #elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
7958 // x86 classic codegen doesn't require any morphing
7960 // For the helper-assisted tail calls, we need to push all the arguments
7961 // into a single list, and then add a few extra at the beginning or end.
7963 // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
7965 // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
7967 // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
7968 // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
7969 // for callTarget here which will be replaced later with callTarget in tail call lowering.
7971 // For x86, the tailcall helper is defined as:
7973 // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
7976 // Note that the special arguments are on the stack, whereas the function arguments follow
7977 // the normal convention: there might be register arguments in ECX and EDX. The stack will
7978 // look like (highest address at the top):
7979 // first normal stack argument
7981 // last normal stack argument
7982 // numberOfOldStackArgs
7983 // numberOfNewStackArgs
7987 // Each special arg is 4 bytes.
7989 // 'flags' is a bitmask where:
7990 // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
7991 // callee-saved registers for tailcall functions. Note that the helper assumes
7992 // that the callee-saved registers live immediately below EBP, and must have been
7993 // pushed in this order: EDI, ESI, EBX.
7994 // 2 == call target is a virtual stub dispatch.
7996 // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
7997 // on the custom calling convention.
7999 // Check for PInvoke call types that we don't handle in codegen yet.
8000 assert(!call->IsUnmanaged());
8001 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
8003 // Don't support tail calling helper methods
8004 assert(call->gtCallType != CT_HELPER);
8006 // We come this route only for tail prefixed calls that cannot be dispatched as
8008 assert(!call->IsImplicitTailCall());
8009 assert(!fgCanFastTailCall(call));
8011 // First move the 'this' pointer (if any) onto the regular arg list. We do this because
8012 // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
8013 // and thus shift where the 'this' pointer will be passed to a later argument slot. In
8014 // addition, for all platforms, we are going to change the call into a helper call. Our code
8015 // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
8016 // do this transformation, we must explicitly create a null 'this' pointer check, if required,
8017 // since special 'this' pointer handling will no longer kick in.
8019 // Some call types, such as virtual vtable calls, require creating a call address expression
8020 // that involves the "this" pointer. Lowering will sometimes create an embedded statement
8021 // to create a temporary that is assigned to the "this" pointer expression, and then use
8022 // that temp to create the call address expression. This temp creation embedded statement
8023 // will occur immediately before the "this" pointer argument, and then will be used for both
8024 // the "this" pointer argument as well as the call address expression. In the normal ordering,
8025 // the embedded statement establishing the "this" pointer temp will execute before both uses
8026 // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
8027 // normal call argument list, and insert a placeholder which will hold the call address
8028 // expression. For non-x86, things are ok, because the order of execution of these is not
8029 // altered. However, for x86, the call address expression is inserted as the *last* argument
8030 // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
8031 // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
8032 // for those cases where call lowering creates an embedded form temp of "this", we will
8033 // create a temp here, early, that will later get morphed correctly.
8035 if (call->gtCallObjp)
8037 GenTree* thisPtr = nullptr;
8038 GenTree* objp = call->gtCallObjp;
8039 call->gtCallObjp = nullptr;
8042 if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
8045 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
8046 GenTree* asg = gtNewTempAssign(lclNum, objp);
8048 // COMMA(tmp = "this", tmp)
8049 var_types vt = objp->TypeGet();
8050 GenTree* tmp = gtNewLclvNode(lclNum, vt);
8051 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp);
8055 #endif // _TARGET_X86_
8057 #if defined(_TARGET_X86_)
8058 // When targeting x86, the runtime requires that we perforrm a null check on the `this` argument before tail
8059 // calling to a virtual dispatch stub. This requirement is a consequence of limitations in the runtime's
8060 // ability to map an AV to a NullReferenceException if the AV occurs in a dispatch stub.
8061 if (call->NeedsNullCheck() || call->IsVirtualStub())
8063 if (call->NeedsNullCheck())
8064 #endif // defined(_TARGET_X86_)
8066 // clone "this" if "this" has no side effects.
8067 if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
8069 thisPtr = gtClone(objp, true);
8072 var_types vt = objp->TypeGet();
8073 if (thisPtr == nullptr)
8075 // create a temp if either "this" has side effects or "this" is too complex to clone.
8078 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
8079 GenTree* asg = gtNewTempAssign(lclNum, objp);
8081 // COMMA(tmp = "this", deref(tmp))
8082 GenTree* tmp = gtNewLclvNode(lclNum, vt);
8083 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
8084 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
8086 // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
8087 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
8091 // thisPtr = COMMA(deref("this"), "this")
8092 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
8093 thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
8096 call->gtFlags &= ~GTF_CALL_NULLCHECK;
8103 // During rationalization tmp="this" and null check will
8104 // materialize as embedded stmts in right execution order.
8105 assert(thisPtr != nullptr);
8106 call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
8109 #if defined(_TARGET_AMD64_)
8111 // Add the extra VSD parameter to arg list in case of VSD calls.
8112 // Tail call arg copying thunk will move this extra VSD parameter
8113 // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
8114 // in Stublinkerx86.cpp for more details.
8115 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
8116 if (call->IsVirtualStub())
8118 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
8120 GenTree* stubAddrArg = fgGetStubAddrArg(call);
8121 // And push the stub address onto the list of arguments
8122 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
8125 // Now inject a placeholder for the real call target that Lower phase will generate.
8126 GenTree* arg = gtNewIconNode(0, TYP_I_IMPL);
8127 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
8129 // Inject the pointer for the copy routine to be used for struct copying
8130 noway_assert(call->callSig != nullptr);
8131 void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
8132 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
8133 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
8135 #else // !_TARGET_AMD64_
8137 // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
8138 // append to the list.
8139 GenTreeArgList** ppArg = &call->gtCallArgs;
8140 for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
8142 ppArg = (GenTreeArgList**)&args->gtOp2;
8144 assert(ppArg != nullptr);
8145 assert(*ppArg == nullptr);
8147 unsigned nOldStkArgsWords =
8148 (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
8149 GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
8150 *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
8151 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
8153 // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
8154 // The constant will be replaced.
8155 GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
8156 *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
8157 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
8159 // Inject a placeholder for the flags.
8160 // The constant will be replaced.
8161 GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
8162 *ppArg = gtNewListNode(arg1, nullptr);
8163 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
8165 // Inject a placeholder for the real call target that the Lowering phase will generate.
8166 // The constant will be replaced.
8167 GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
8168 *ppArg = gtNewListNode(arg0, nullptr);
8170 #endif // !_TARGET_AMD64_
8172 // It is now a varargs tail call dispatched via helper.
8173 call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
8174 call->gtFlags &= ~GTF_CALL_POP_ARGS;
8178 JITDUMP("fgMorphTailCall (after):\n");
8182 //------------------------------------------------------------------------
8183 // fgGetStubAddrArg: Return the virtual stub address for the given call.
8186 // the JIT must place the address of the stub used to load the call target,
8187 // the "stub indirection cell", in special call argument with special register.
8190 // call - a call that needs virtual stub dispatching.
8193 // addr tree with set resister requirements.
8195 GenTree* Compiler::fgGetStubAddrArg(GenTreeCall* call)
8197 assert(call->IsVirtualStub());
8198 GenTree* stubAddrArg;
8199 if (call->gtCallType == CT_INDIRECT)
8201 stubAddrArg = gtClone(call->gtCallAddr, true);
8205 assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
8206 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
8207 stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
8209 assert(stubAddrArg != nullptr);
8210 stubAddrArg->gtRegNum = virtualStubParamInfo->GetReg();
8214 //------------------------------------------------------------------------------
8215 // fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
8219 // block - basic block ending with a recursive fast tail call
8220 // recursiveTailCall - recursive tail call to transform
8223 // The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
8225 void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
8227 assert(recursiveTailCall->IsTailCallConvertibleToLoop());
8228 GenTree* last = block->lastStmt();
8229 assert(recursiveTailCall == last->gtStmt.gtStmtExpr);
8231 // Transform recursive tail call into a loop.
8233 GenTree* earlyArgInsertionPoint = last;
8234 IL_OFFSETX callILOffset = last->gtStmt.gtStmtILoffsx;
8236 // Hoist arg setup statement for the 'this' argument.
8237 GenTree* thisArg = recursiveTailCall->gtCallObjp;
8238 if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
8240 GenTree* thisArgStmt = gtNewStmt(thisArg, callILOffset);
8241 fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
8244 // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
8245 // then the temps need to be assigned to the method parameters. This is done so that the caller
8246 // parameters are not re-assigned before call arguments depending on them are evaluated.
8247 // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
8248 // where the next temp or parameter assignment should be inserted.
8250 // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
8251 // while the second call argument (const 1) doesn't.
8252 // Basic block before tail recursion elimination:
8253 // ***** BB04, stmt 1 (top level)
8254 // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013)
8255 // [000033] --C - G------ - \--* call void RecursiveMethod
8256 // [000030] ------------ | / --* const int - 1
8257 // [000031] ------------arg0 in rcx + --* +int
8258 // [000029] ------------ | \--* lclVar int V00 arg1
8259 // [000032] ------------arg1 in rdx \--* const int 1
8262 // Basic block after tail recursion elimination :
8263 // ***** BB04, stmt 1 (top level)
8264 // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
8265 // [000030] ------------ | / --* const int - 1
8266 // [000031] ------------ | / --* +int
8267 // [000029] ------------ | | \--* lclVar int V00 arg1
8268 // [000050] - A---------- \--* = int
8269 // [000049] D------N---- \--* lclVar int V02 tmp0
8271 // ***** BB04, stmt 2 (top level)
8272 // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
8273 // [000052] ------------ | / --* lclVar int V02 tmp0
8274 // [000054] - A---------- \--* = int
8275 // [000053] D------N---- \--* lclVar int V00 arg0
8277 // ***** BB04, stmt 3 (top level)
8278 // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
8279 // [000032] ------------ | / --* const int 1
8280 // [000057] - A---------- \--* = int
8281 // [000056] D------N---- \--* lclVar int V01 arg1
8283 GenTree* tmpAssignmentInsertionPoint = last;
8284 GenTree* paramAssignmentInsertionPoint = last;
8286 // Process early args. They may contain both setup statements for late args and actual args.
8287 // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
8288 // below has the correct second argument.
8289 int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
8290 for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
8291 (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
8293 GenTree* earlyArg = earlyArgs->Current();
8294 if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
8296 if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
8298 // This is a setup node so we need to hoist it.
8299 GenTree* earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
8300 fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
8304 // This is an actual argument that needs to be assigned to the corresponding caller parameter.
8305 fgArgTabEntry* curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
8306 GenTree* paramAssignStmt =
8307 fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
8308 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
8309 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
8311 // All temp assignments will happen before the first param assignment.
8312 tmpAssignmentInsertionPoint = paramAssignStmt;
8318 // Process late args.
8319 int lateArgIndex = 0;
8320 for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
8321 (lateArgIndex++, lateArgs = lateArgs->Rest()))
8323 // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
8324 GenTree* lateArg = lateArgs->Current();
8325 fgArgTabEntry* curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
8326 GenTree* paramAssignStmt =
8327 fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
8328 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
8330 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
8332 // All temp assignments will happen before the first param assignment.
8333 tmpAssignmentInsertionPoint = paramAssignStmt;
8337 // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
8338 // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
8339 // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
8340 if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
8342 var_types thisType = lvaTable[info.compThisArg].TypeGet();
8343 GenTree* arg0 = gtNewLclvNode(lvaArg0Var, thisType);
8344 GenTree* arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
8345 GenTree* arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
8346 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
8349 // If compInitMem is set, we may need to zero-initialize some locals. Normally it's done in the prolog
8350 // but this loop can't include the prolog. Since we don't have liveness information, we insert zero-initialization
8351 // for all non-parameter IL locals as well as temp structs with GC fields.
8352 // Liveness phase will remove unnecessary initializations.
8353 if (info.compInitMem)
8357 for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
8359 if (!varDsc->lvIsParam)
8361 var_types lclType = varDsc->TypeGet();
8362 bool isUserLocal = (varNum < info.compLocalsCount);
8363 bool structWithGCFields = ((lclType == TYP_STRUCT) && (varDsc->lvStructGcCount > 0));
8364 if (isUserLocal || structWithGCFields)
8366 GenTree* lcl = gtNewLclvNode(varNum, lclType);
8367 GenTree* init = nullptr;
8368 if (lclType == TYP_STRUCT)
8370 const bool isVolatile = false;
8371 const bool isCopyBlock = false;
8372 init = gtNewBlkOpNode(lcl, gtNewIconNode(0), varDsc->lvSize(), isVolatile, isCopyBlock);
8373 init = fgMorphInitBlock(init);
8377 GenTree* zero = gtNewZeroConNode(genActualType(lclType));
8378 init = gtNewAssignNode(lcl, zero);
8380 GenTree* initStmt = gtNewStmt(init, callILOffset);
8381 fgInsertStmtBefore(block, last, initStmt);
8388 fgRemoveStmt(block, last);
8390 // Set the loop edge. Ensure we have a scratch block and then target the
8391 // next block. Loop detection needs to see a pred out of the loop, so
8392 // mark the scratch block BBF_DONT_REMOVE to prevent empty block removal
8394 fgEnsureFirstBBisScratch();
8395 fgFirstBB->bbFlags |= BBF_DONT_REMOVE;
8396 block->bbJumpKind = BBJ_ALWAYS;
8397 block->bbJumpDest = fgFirstBB->bbNext;
8398 fgAddRefPred(block->bbJumpDest, block);
8399 block->bbFlags &= ~BBF_HAS_JMP;
8402 //------------------------------------------------------------------------------
8403 // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
8407 // arg - argument to assign
8408 // argTabEntry - argument table entry corresponding to arg
8409 // block --- basic block the call is in
8410 // callILOffset - IL offset of the call
8411 // tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary)
8412 // paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted
8415 // parameter assignment statement if one was inserted; nullptr otherwise.
8417 GenTree* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg,
8418 fgArgTabEntry* argTabEntry,
8420 IL_OFFSETX callILOffset,
8421 GenTree* tmpAssignmentInsertionPoint,
8422 GenTree* paramAssignmentInsertionPoint)
8424 // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
8425 // some argument trees may reference parameters directly.
8427 GenTree* argInTemp = nullptr;
8428 unsigned originalArgNum = argTabEntry->argNum;
8429 bool needToAssignParameter = true;
8431 // TODO-CQ: enable calls with struct arguments passed in registers.
8432 noway_assert(!varTypeIsStruct(arg->TypeGet()));
8434 if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
8436 // The argument is already assigned to a temp or is a const.
8439 else if (arg->OperGet() == GT_LCL_VAR)
8441 unsigned lclNum = arg->AsLclVar()->gtLclNum;
8442 LclVarDsc* varDsc = &lvaTable[lclNum];
8443 if (!varDsc->lvIsParam)
8445 // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
8448 else if (lclNum == originalArgNum)
8450 // The argument is the same parameter local that we were about to assign so
8451 // we can skip the assignment.
8452 needToAssignParameter = false;
8456 // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
8457 // any caller parameters. Some common cases are handled above but we may be able to eliminate
8458 // more temp assignments.
8460 GenTree* paramAssignStmt = nullptr;
8461 if (needToAssignParameter)
8463 if (argInTemp == nullptr)
8465 // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
8466 // TODO: we can avoid a temp assignment if we can prove that the argument tree
8467 // doesn't involve any caller parameters.
8468 unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp"));
8469 GenTree* tempSrc = arg;
8470 GenTree* tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType);
8471 GenTree* tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
8472 GenTree* tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
8473 fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
8474 argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
8477 // Now assign the temp to the parameter.
8478 LclVarDsc* paramDsc = lvaTable + originalArgNum;
8479 assert(paramDsc->lvIsParam);
8480 GenTree* paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType);
8481 GenTree* paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
8482 paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset);
8484 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
8486 return paramAssignStmt;
8489 /*****************************************************************************
8491 * Transform the given GT_CALL tree for code generation.
8494 GenTree* Compiler::fgMorphCall(GenTreeCall* call)
8496 if (varTypeIsStruct(call))
8498 fgFixupStructReturn(call);
8500 if (call->CanTailCall())
8502 // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
8503 assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
8505 // It cannot be an inline candidate
8506 assert(!call->IsInlineCandidate());
8508 const char* szFailReason = nullptr;
8509 bool hasStructParam = false;
8510 if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
8512 szFailReason = "Might turn into an intrinsic";
8515 if (opts.compNeedSecurityCheck)
8517 szFailReason = "Needs security check";
8519 else if (compLocallocUsed || compLocallocOptimized)
8521 szFailReason = "Localloc used";
8523 #ifdef _TARGET_AMD64_
8524 // Needed for Jit64 compat.
8525 // In future, enabling tail calls from methods that need GS cookie check
8526 // would require codegen side work to emit GS cookie check before a tail
8528 else if (getNeedsGSSecurityCookie())
8530 szFailReason = "GS Security cookie check";
8534 // DDB 99324: Just disable tailcall under compGcChecks stress mode.
8535 else if (opts.compGcChecks)
8537 szFailReason = "GcChecks";
8540 #if FEATURE_TAILCALL_OPT
8543 // We are still not sure whether it can be a tail call. Because, when converting
8544 // a call to an implicit tail call, we must check that there are no locals with
8545 // their address taken. If this is the case, we have to assume that the address
8546 // has been leaked and the current stack frame must live until after the final
8549 // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
8550 // that lvHasLdAddrOp is much more conservative. We cannot just base it on
8551 // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
8552 // during morph stage. The reason for also checking lvAddrExposed is that in case
8553 // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
8554 // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
8555 // never to be incorrect.
8557 // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
8558 // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
8559 // is set. This avoids the need for iterating through all lcl vars of the current
8560 // method. Right now throughout the code base we are not consistently using 'set'
8561 // method to set lvHasLdAddrOp and lvAddrExposed flags.
8564 bool hasAddrExposedVars = false;
8565 bool hasStructPromotedParam = false;
8566 bool hasPinnedVars = false;
8568 for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
8570 // If the method is marked as an explicit tail call we will skip the
8571 // following three hazard checks.
8572 // We still must check for any struct parameters and set 'hasStructParam'
8573 // so that we won't transform the recursive tail call into a loop.
8575 if (call->IsImplicitTailCall())
8577 if (varDsc->lvHasLdAddrOp)
8579 hasAddrExposedVars = true;
8582 if (varDsc->lvAddrExposed)
8584 if (lvaIsImplicitByRefLocal(varNum))
8586 // The address of the implicit-byref is a non-address use of the pointer parameter.
8588 else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl))
8590 // The address of the implicit-byref's field is likewise a non-address use of the pointer
8593 else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum))
8595 // This temp was used for struct promotion bookkeeping. It will not be used, and will have
8596 // its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs.
8597 assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl));
8598 assert(fgGlobalMorph);
8602 hasAddrExposedVars = true;
8606 if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum))
8608 hasStructPromotedParam = true;
8611 if (varDsc->lvPinned)
8613 // A tail call removes the method from the stack, which means the pinning
8614 // goes away for the callee. We can't allow that.
8615 hasPinnedVars = true;
8619 if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
8621 hasStructParam = true;
8622 // This prevents transforming a recursive tail call into a loop
8623 // but doesn't prevent tail call optimization so we need to
8624 // look at the rest of parameters.
8629 if (hasAddrExposedVars)
8631 szFailReason = "Local address taken";
8633 if (hasStructPromotedParam)
8635 szFailReason = "Has Struct Promoted Param";
8639 szFailReason = "Has Pinned Vars";
8642 #endif // FEATURE_TAILCALL_OPT
8644 var_types callType = call->TypeGet();
8646 // We have to ensure to pass the incoming retValBuf as the
8647 // outgoing one. Using a temp will not do as this function will
8648 // not regain control to do the copy.
8650 if (info.compRetBuffArg != BAD_VAR_NUM)
8652 noway_assert(callType == TYP_VOID);
8653 GenTree* retValBuf = call->gtCallArgs->gtOp.gtOp1;
8654 if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
8656 szFailReason = "Need to copy return buffer";
8660 // If this is an opportunistic tail call and cannot be dispatched as
8661 // fast tail call, go the non-tail call route. This is done for perf
8664 // Avoid the cost of determining whether can be dispatched as fast tail
8665 // call if we already know that tail call cannot be honored for other
8667 bool canFastTailCall = false;
8668 if (szFailReason == nullptr)
8670 canFastTailCall = fgCanFastTailCall(call);
8671 if (!canFastTailCall)
8673 // Implicit or opportunistic tail calls are always dispatched via fast tail call
8674 // mechanism and never via tail call helper for perf.
8675 if (call->IsImplicitTailCall())
8677 szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
8679 #ifndef LEGACY_BACKEND
8680 else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
8682 // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
8683 // dispatched as a fast tail call.
8685 // Methods with non-standard args will have indirection cell or cookie param passed
8686 // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
8687 // tail calling the target method and hence ".tail" prefix on such calls needs to be
8690 // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
8691 // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
8692 // This is done by by adding stubAddr as an additional arg before the original list of
8693 // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
8694 // in Stublinkerx86.cpp.
8695 szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
8696 "called via helper";
8698 #ifdef _TARGET_ARM64_
8701 // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
8702 // So, bail out if we can't make fast tail call.
8703 szFailReason = "Non-qualified fast tail call";
8706 #endif // LEGACY_BACKEND
8710 // Clear these flags before calling fgMorphCall() to avoid recursion.
8711 bool isTailPrefixed = call->IsTailPrefixedCall();
8712 call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
8714 #if FEATURE_TAILCALL_OPT
8715 call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
8719 if (!canFastTailCall && szFailReason == nullptr)
8721 szFailReason = "Non fast tail calls disabled for PAL based systems.";
8723 #endif // FEATURE_PAL
8725 if (szFailReason == nullptr)
8727 if (!fgCheckStmtAfterTailCall())
8729 szFailReason = "Unexpected statements after the tail call";
8733 if (szFailReason != nullptr)
8738 printf("\nRejecting tail call late for call ");
8740 printf(": %s\n", szFailReason);
8744 // for non user funcs, we have no handles to report
8745 info.compCompHnd->reportTailCallDecision(nullptr,
8746 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
8747 isTailPrefixed, TAILCALL_FAIL, szFailReason);
8752 #if !FEATURE_TAILCALL_OPT_SHARED_RETURN
8753 // We enable shared-ret tail call optimization for recursive calls even if
8754 // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
8755 if (gtIsRecursiveCall(call))
8758 // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
8759 // but if the call falls through to a ret, and we are doing a tailcall, change it here.
8760 if (compCurBB->bbJumpKind != BBJ_RETURN)
8762 compCurBB->bbJumpKind = BBJ_RETURN;
8766 // Set this flag before calling fgMorphCall() to prevent inlining this call.
8767 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
8769 bool fastTailCallToLoop = false;
8770 #if FEATURE_TAILCALL_OPT
8771 // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
8772 // or return type is a struct that can be passed in a register.
8774 // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
8775 // hidden generic context param or through keep alive thisptr), then while transforming a recursive
8776 // call to such a method requires that the generic context stored on stack slot be updated. Right now,
8777 // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
8778 // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the
8779 // generic type parameters of both caller and callee generic method are the same.
8780 if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
8781 !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam &&
8782 !varTypeIsStruct(call->TypeGet()) && ((info.compClassAttr & CORINFO_FLG_MARSHAL_BYREF) == 0))
8784 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
8785 fastTailCallToLoop = true;
8789 // Do some target-specific transformations (before we process the args, etc.)
8790 // This is needed only for tail prefixed calls that cannot be dispatched as
8792 if (!canFastTailCall)
8794 fgMorphTailCall(call);
8797 // Implementation note : If we optimize tailcall to do a direct jump
8798 // to the target function (after stomping on the return address, etc),
8799 // without using CORINFO_HELP_TAILCALL, we have to make certain that
8800 // we don't starve the hijacking logic (by stomping on the hijacked
8801 // return address etc).
8803 // At this point, we are committed to do the tailcall.
8804 compTailCallUsed = true;
8806 CorInfoTailCall tailCallResult;
8808 if (fastTailCallToLoop)
8810 tailCallResult = TAILCALL_RECURSIVE;
8812 else if (canFastTailCall)
8814 tailCallResult = TAILCALL_OPTIMIZED;
8818 tailCallResult = TAILCALL_HELPER;
8821 // for non user funcs, we have no handles to report
8822 info.compCompHnd->reportTailCallDecision(nullptr,
8823 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
8824 isTailPrefixed, tailCallResult, nullptr);
8826 // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
8827 // to avoid doing any extra work for the return value.
8828 call->gtType = TYP_VOID;
8833 printf("\nGTF_CALL_M_TAILCALL bit set for call ");
8836 if (fastTailCallToLoop)
8838 printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
8845 GenTree* stmtExpr = fgMorphStmt->gtStmtExpr;
8848 // Tail call needs to be in one of the following IR forms
8849 // Either a call stmt or
8850 // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
8851 // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
8852 // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
8854 // GT_CASTS may be nested.
8855 genTreeOps stmtOper = stmtExpr->gtOper;
8856 if (stmtOper == GT_CALL)
8858 assert(stmtExpr == call);
8862 assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA);
8863 GenTree* treeWithCall;
8864 if (stmtOper == GT_RETURN)
8866 treeWithCall = stmtExpr->gtGetOp1();
8868 else if (stmtOper == GT_COMMA)
8870 // Second operation must be nop.
8871 assert(stmtExpr->gtGetOp2()->IsNothingNode());
8872 treeWithCall = stmtExpr->gtGetOp1();
8876 treeWithCall = stmtExpr->gtGetOp2();
8880 while (treeWithCall->gtOper == GT_CAST)
8882 assert(!treeWithCall->gtOverflow());
8883 treeWithCall = treeWithCall->gtGetOp1();
8886 assert(treeWithCall == call);
8889 GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt;
8890 // Remove all stmts after the call.
8891 while (nextMorphStmt != nullptr)
8893 GenTreeStmt* stmtToRemove = nextMorphStmt;
8894 nextMorphStmt = stmtToRemove->gtNextStmt;
8895 fgRemoveStmt(compCurBB, stmtToRemove);
8898 fgMorphStmt->gtStmtExpr = call;
8900 // Tail call via helper: The VM can't use return address hijacking if we're
8901 // not going to return and the helper doesn't have enough info to safely poll,
8902 // so we poll before the tail call, if the block isn't already safe. Since
8903 // tail call via helper is a slow mechanism it doen't matter whether we emit
8904 // GC poll. This is done to be in parity with Jit64. Also this avoids GC info
8905 // size increase if all most all methods are expected to be tail calls (e.g. F#).
8907 // Note that we can avoid emitting GC-poll if we know that the current BB is
8908 // dominated by a Gc-SafePoint block. But we don't have dominator info at this
8909 // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
8910 // here and remove it in lowering if the block is dominated by a GC-SafePoint. For
8911 // now it not clear whether optimizing slow tail calls is worth the effort. As a
8912 // low cost check, we check whether the first and current basic blocks are
8915 // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
8916 // is going to mark the method as fully interruptible if the block containing this tail
8917 // call is reachable without executing any call.
8918 if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
8919 !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
8921 // We didn't insert a poll block, so we need to morph the call now
8922 // (Normally it will get morphed when we get to the split poll block)
8923 GenTree* temp = fgMorphCall(call);
8924 noway_assert(temp == call);
8927 // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
8928 // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
8930 // Fast tail call: in case of fast tail calls, we need a jmp epilog and
8931 // hence mark it as BBJ_RETURN with BBF_JMP flag set.
8932 noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
8934 if (canFastTailCall)
8936 compCurBB->bbFlags |= BBF_HAS_JMP;
8940 compCurBB->bbJumpKind = BBJ_THROW;
8943 // For non-void calls, we return a place holder which will be
8944 // used by the parent GT_RETURN node of this call.
8946 GenTree* result = call;
8947 if (callType != TYP_VOID && info.compRetType != TYP_VOID)
8950 // Return a dummy node, as the return is already removed.
8951 if (callType == TYP_STRUCT)
8953 // This is a HFA, use float 0.
8954 callType = TYP_FLOAT;
8956 #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
8957 // Return a dummy node, as the return is already removed.
8958 if (varTypeIsStruct(callType))
8960 // This is a register-returned struct. Return a 0.
8961 // The actual return registers are hacked in lower and the register allocator.
8966 // Return a dummy node, as the return is already removed.
8967 if (varTypeIsSIMD(callType))
8969 callType = TYP_DOUBLE;
8972 result = gtNewZeroConNode(genActualType(callType));
8973 result = fgMorphTree(result);
8981 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
8982 (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
8983 #ifdef FEATURE_READYTORUN_COMPILER
8984 || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
8987 (call == fgMorphStmt->gtStmtExpr))
8989 // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
8990 // Transform it into a null check.
8992 GenTree* thisPtr = call->gtCallArgs->gtOp.gtOp1;
8994 GenTree* nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
8995 nullCheck->gtFlags |= GTF_EXCEPT;
8997 return fgMorphTree(nullCheck);
9000 noway_assert(call->gtOper == GT_CALL);
9003 // Only count calls once (only in the global morph phase)
9007 if (call->gtCallType == CT_INDIRECT)
9010 optIndirectCallCount++;
9012 else if (call->gtCallType == CT_USER_FUNC)
9015 if (call->IsVirtual())
9017 optIndirectCallCount++;
9022 // Couldn't inline - remember that this BB contains method calls
9024 // If this is a 'regular' call, mark the basic block as
9025 // having a call (for computing full interruptibility).
9026 CLANG_FORMAT_COMMENT_ANCHOR;
9028 if (IsGcSafePoint(call))
9030 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
9033 // Morph Type.op_Equality, Type.op_Inequality, and Enum.HasFlag
9035 // We need to do these before the arguments are morphed
9036 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
9038 // See if this is foldable
9039 GenTree* optTree = gtFoldExprCall(call);
9041 // If we optimized, morph the result
9042 if (optTree != call)
9044 return fgMorphTree(optTree);
9048 // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
9049 GenTree* origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
9051 unsigned retValTmpNum = BAD_VAR_NUM;
9052 CORINFO_CLASS_HANDLE structHnd = nullptr;
9053 if (call->HasRetBufArg() &&
9054 call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
9056 // We're enforcing the invariant that return buffers pointers (at least for
9057 // struct return types containing GC pointers) are never pointers into the heap.
9058 // The large majority of cases are address of local variables, which are OK.
9059 // Otherwise, allocate a local of the given struct type, pass its address,
9060 // then assign from that into the proper destination. (We don't need to do this
9061 // if we're passing the caller's ret buff arg to the callee, since the caller's caller
9062 // will maintain the same invariant.)
9064 GenTree* dest = call->gtCallArgs->gtOp.gtOp1;
9065 assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
9066 if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
9068 // We'll exempt helper calls from this, assuming that the helper implementation
9069 // follows the old convention, and does whatever barrier is required.
9070 if (call->gtCallType != CT_HELPER)
9072 structHnd = call->gtRetClsHnd;
9073 if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
9074 !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR) &&
9075 dest->gtLclVar.gtLclNum == info.compRetBuffArg))
9079 retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
9080 lvaSetStruct(retValTmpNum, structHnd, true);
9081 dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
9086 call->gtCallArgs->gtOp.gtOp1 = dest;
9089 /* Process the "normal" argument list */
9090 call = fgMorphArgs(call);
9091 noway_assert(call->gtOper == GT_CALL);
9093 // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
9094 // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
9095 if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
9097 GenTree* value = gtArgEntryByArgNum(call, 2)->node;
9098 if (value->IsIntegralConst(0))
9100 assert(value->OperGet() == GT_CNS_INT);
9102 GenTree* arr = gtArgEntryByArgNum(call, 0)->node;
9103 GenTree* index = gtArgEntryByArgNum(call, 1)->node;
9105 // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy
9106 // the spill trees as well if necessary.
9107 GenTreeOp* argSetup = nullptr;
9108 for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest())
9110 GenTree* const arg = earlyArgs->Current();
9111 if (arg->OperGet() != GT_ASG)
9117 assert(arg != index);
9119 arg->gtFlags &= ~GTF_LATE_ARG;
9121 GenTree* op1 = argSetup;
9124 op1 = gtNewNothingNode();
9126 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9130 argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg);
9133 argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9138 auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult {
9139 (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
9140 return WALK_CONTINUE;
9143 fgWalkTreePost(&arr, resetMorphedFlag);
9144 fgWalkTreePost(&index, resetMorphedFlag);
9145 fgWalkTreePost(&value, resetMorphedFlag);
9148 GenTree* const nullCheckedArr = impCheckForNullPointer(arr);
9149 GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
9150 GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value);
9151 arrStore->gtFlags |= GTF_ASG;
9153 GenTree* result = fgMorphTree(arrStore);
9154 if (argSetup != nullptr)
9156 result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result);
9158 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9166 // Optimize get_ManagedThreadId(get_CurrentThread)
9167 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
9168 info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
9170 noway_assert(origDest == nullptr);
9171 noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);
9173 GenTree* innerCall = call->gtCallLateArgs->gtOp.gtOp1;
9175 if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
9176 info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
9177 CORINFO_INTRINSIC_GetCurrentManagedThread)
9179 // substitute expression with call to helper
9180 GenTree* newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT);
9181 JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
9182 return fgMorphTree(newCall);
9186 if (origDest != nullptr)
9188 GenTree* retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
9189 // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
9190 // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the
9191 // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to
9193 if (origDest->OperGet() == GT_ASG)
9195 if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
9197 GenTree* var = origDest->gtOp.gtOp1;
9198 origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
9199 gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
9202 GenTree* copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
9203 copyBlk = fgMorphTree(copyBlk);
9204 GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
9206 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9211 if (call->IsNoReturn())
9214 // If we know that the call does not return then we can set fgRemoveRestOfBlock
9215 // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
9216 // As a result the compiler won't need to preserve live registers across the call.
9218 // This isn't need for tail calls as there shouldn't be any code after the call anyway.
9219 // Besides, the tail call code is part of the epilog and converting the block to
9220 // BBJ_THROW would result in the tail call being dropped as the epilog is generated
9221 // only for BBJ_RETURN blocks.
9223 // Currently this doesn't work for non-void callees. Some of the code that handles
9224 // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
9225 // do not have this flag by default. We could add the flag here but the proper solution
9226 // would be to replace the return expression with a local var node during inlining
9227 // so the rest of the call tree stays in a separate statement. That statement can then
9228 // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
9231 if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
9233 fgRemoveRestOfBlock = true;
9240 /*****************************************************************************
9242 * Transform the given GTK_CONST tree for code generation.
9245 GenTree* Compiler::fgMorphConst(GenTree* tree)
9247 assert(tree->OperKind() & GTK_CONST);
9249 /* Clear any exception flags or other unnecessary flags
9250 * that may have been set before folding this node to a constant */
9252 tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
9254 if (tree->OperGet() != GT_CNS_STR)
9259 // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
9260 // guarantee slow performance for that block. Instead cache the return value
9261 // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
9263 if (compCurBB->bbJumpKind == BBJ_THROW)
9265 CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
9266 if (helper != CORINFO_HELP_UNDEF)
9268 // For un-important blocks, we want to construct the string lazily
9270 GenTreeArgList* args;
9271 if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
9273 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
9277 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
9278 gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
9281 tree = gtNewHelperCallNode(helper, TYP_REF, args);
9282 return fgMorphTree(tree);
9286 assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
9289 InfoAccessType iat =
9290 info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);
9292 tree = gtNewStringLiteralNode(iat, pValue);
9294 return fgMorphTree(tree);
9297 /*****************************************************************************
9299 * Transform the given GTK_LEAF tree for code generation.
9302 GenTree* Compiler::fgMorphLeaf(GenTree* tree)
9304 assert(tree->OperKind() & GTK_LEAF);
9306 if (tree->gtOper == GT_LCL_VAR)
9308 const bool forceRemorph = false;
9309 return fgMorphLocalVar(tree, forceRemorph);
9312 else if (tree->gtOper == GT_LCL_FLD)
9314 if (info.compIsVarArgs)
9317 fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
9318 if (newTree != nullptr)
9320 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
9322 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
9328 #endif // _TARGET_X86_
9329 else if (tree->gtOper == GT_FTN_ADDR)
9331 CORINFO_CONST_LOOKUP addrInfo;
9333 #ifdef FEATURE_READYTORUN_COMPILER
9334 if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
9336 addrInfo = tree->gtFptrVal.gtEntryPoint;
9341 info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
9344 // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
9346 tree->SetOper(GT_CNS_INT);
9347 tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
9348 tree->gtFlags |= GTF_ICON_FTN_ADDR;
9350 switch (addrInfo.accessType)
9353 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
9354 tree->gtFlags |= GTF_IND_INVARIANT;
9359 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
9363 tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
9367 noway_assert(!"Unknown addrInfo.accessType");
9370 return fgMorphTree(tree);
9376 void Compiler::fgAssignSetVarDef(GenTree* tree)
9378 GenTreeLclVarCommon* lclVarCmnTree;
9379 bool isEntire = false;
9380 if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
9384 lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
9388 // We consider partial definitions to be modeled as uses followed by definitions.
9389 // This captures the idea that precedings defs are not necessarily made redundant
9390 // by this definition.
9391 lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
9396 //------------------------------------------------------------------------
9397 // fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
9400 // tree - The block assignment to be possibly morphed
9403 // The modified tree if successful, nullptr otherwise.
9406 // 'tree' must be a block assignment.
9409 // If successful, this method always returns the incoming tree, modifying only
9412 GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
9414 // This must be a block assignment.
9415 noway_assert(tree->OperIsBlkOp());
9416 var_types asgType = tree->TypeGet();
9418 GenTree* asg = tree;
9419 GenTree* dest = asg->gtGetOp1();
9420 GenTree* src = asg->gtGetOp2();
9421 unsigned destVarNum = BAD_VAR_NUM;
9422 LclVarDsc* destVarDsc = nullptr;
9423 GenTree* lclVarTree = nullptr;
9424 bool isCopyBlock = asg->OperIsCopyBlkOp();
9425 bool isInitBlock = !isCopyBlock;
9428 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
9430 // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD/GT_HWIntrinsic)
9431 // The SIMD type in question could be Vector2f which is 8-bytes in size.
9432 // The below check is to make sure that we don't turn that copyblk
9433 // into a assignment, since rationalizer logic will transform the
9434 // copyblk appropriately. Otherwise, the transformation made in this
9435 // routine will prevent rationalizer logic and we might end up with
9436 // GT_ADDR(GT_SIMD/GT_HWIntrinsic) node post rationalization, leading to a noway assert
9438 // TODO-1stClassStructs: This is here to preserve old behavior.
9439 // It should be eliminated.
9440 if (src->OperIsSIMDorSimdHWintrinsic())
9446 if (dest->gtEffectiveVal()->OperIsBlk())
9448 GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
9449 size = lhsBlk->Size();
9450 if (impIsAddressInLocal(lhsBlk->Addr(), &lclVarTree))
9452 destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
9453 destVarDsc = &(lvaTable[destVarNum]);
9455 if (lhsBlk->OperGet() == GT_OBJ)
9457 clsHnd = lhsBlk->AsObj()->gtClass;
9462 // Is this an enregisterable struct that is already a simple assignment?
9463 // This can happen if we are re-morphing.
9464 if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
9468 noway_assert(dest->OperIsLocal());
9470 destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
9471 destVarDsc = &(lvaTable[destVarNum]);
9474 clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
9475 size = info.compCompHnd->getClassSize(clsHnd);
9479 size = destVarDsc->lvExactSize;
9484 // See if we can do a simple transformation:
9486 // GT_ASG <TYP_size>
9488 // GT_IND GT_IND or CNS_INT
9493 if (size == REGSIZE_BYTES)
9495 if (clsHnd == NO_CLASS_HANDLE)
9497 // A register-sized cpblk can be treated as an integer asignment.
9498 asgType = TYP_I_IMPL;
9503 info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
9504 asgType = getJitGCType(gcPtr);
9515 asgType = TYP_SHORT;
9518 #ifdef _TARGET_64BIT_
9522 #endif // _TARGET_64BIT_
9526 // TODO-1stClassStructs: Change this to asgType != TYP_STRUCT.
9527 if (!varTypeIsStruct(asgType))
9529 // For initBlk, a non constant source is not going to allow us to fiddle
9530 // with the bits to create a single assigment.
9531 noway_assert(size <= REGSIZE_BYTES);
9533 if (isInitBlock && !src->IsConstInitVal())
9538 if (destVarDsc != nullptr)
9540 #if LOCAL_ASSERTION_PROP
9541 // Kill everything about dest
9542 if (optLocalAssertionProp)
9544 if (optAssertionCount > 0)
9546 fgKillDependentAssertions(destVarNum DEBUGARG(tree));
9549 #endif // LOCAL_ASSERTION_PROP
9551 // A previous incarnation of this code also required the local not to be
9552 // address-exposed(=taken). That seems orthogonal to the decision of whether
9553 // to do field-wise assignments: being address-exposed will cause it to be
9554 // "dependently" promoted, so it will be in the right memory location. One possible
9555 // further reason for avoiding field-wise stores is that the struct might have alignment-induced
9556 // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid
9557 // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
9558 // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
9559 if (varTypeIsStruct(lclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
9561 // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
9564 else if (!varTypeIsFloating(lclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
9566 // Use the dest local var directly, as well as its type.
9568 asgType = destVarDsc->lvType;
9570 // If the block operation had been a write to a local var of a small int type,
9571 // of the exact size of the small int type, and the var is NormalizeOnStore,
9572 // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
9573 // have done that normalization. If we're now making it into an assignment,
9574 // the NormalizeOnStore will work, and it can be a full def.
9575 if (destVarDsc->lvNormalizeOnStore())
9577 dest->gtFlags &= (~GTF_VAR_USEASG);
9582 // Could be a non-promoted struct, or a floating point type local, or
9583 // an int subject to a partial write. Don't enregister.
9584 lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
9586 // Mark the local var tree as a definition point of the local.
9587 lclVarTree->gtFlags |= GTF_VAR_DEF;
9588 if (size < destVarDsc->lvExactSize)
9589 { // If it's not a full-width assignment....
9590 lclVarTree->gtFlags |= GTF_VAR_USEASG;
9593 if (dest == lclVarTree)
9595 dest = gtNewIndir(asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
9600 // Check to ensure we don't have a reducible *(& ... )
9601 if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
9603 // If dest is an Indir or Block, and it has a child that is a Addr node
9605 GenTree* addrNode = dest->AsIndir()->Addr(); // known to be a GT_ADDR
9607 // Can we just remove the Ind(Addr(destOp)) and operate directly on 'destOp'?
9609 GenTree* destOp = addrNode->gtGetOp1();
9610 var_types destOpType = destOp->TypeGet();
9612 // We can if we have a primitive integer type and the sizes are exactly the same.
9614 if ((varTypeIsIntegralOrI(destOp) && (size == genTypeSize(destOpType))))
9617 asgType = destOpType;
9621 if (dest->gtEffectiveVal()->OperIsIndir())
9623 // If we have no information about the destination, we have to assume it could
9624 // live anywhere (not just in the GC heap).
9625 // Mark the GT_IND node so that we use the correct write barrier helper in case
9626 // the field is a GC ref.
9628 if (!fgIsIndirOfAddrOfLocal(dest))
9630 dest->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
9631 tree->gtFlags |= GTF_GLOB_REF;
9634 dest->gtFlags &= (~GTF_EXCEPT | dest->AsIndir()->Addr()->gtFlags);
9635 dest->SetIndirExceptionFlags(this);
9636 tree->gtFlags |= (dest->gtFlags & GTF_EXCEPT);
9639 LclVarDsc* srcVarDsc = nullptr;
9642 if (src->OperGet() == GT_LCL_VAR)
9645 srcVarDsc = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
9647 else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &lclVarTree))
9649 srcVarDsc = &(lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum]);
9651 if (srcVarDsc != nullptr)
9653 if (varTypeIsStruct(lclVarTree) && (srcVarDsc->lvPromoted || srcVarDsc->lvIsSIMDType()))
9655 // Let fgMorphCopyBlock handle it.
9658 else if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
9659 size == genTypeSize(genActualType(lclVarTree->TypeGet())))
9661 // Use the src local var directly.
9666 #ifndef LEGACY_BACKEND
9668 // The source argument of the copyblk can potentially
9669 // be accessed only through indir(addr(lclVar))
9670 // or indir(lclVarAddr) in rational form and liveness
9671 // won't account for these uses. That said,
9672 // we have to mark this local as address exposed so
9673 // we don't delete it as a dead store later on.
9674 unsigned lclVarNum = lclVarTree->gtLclVarCommon.gtLclNum;
9675 lvaTable[lclVarNum].lvAddrExposed = true;
9676 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
9678 #else // LEGACY_BACKEND
9679 lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
9680 #endif // LEGACY_BACKEND
9682 if (src == lclVarTree)
9684 srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
9685 src = gtNewOperNode(GT_IND, asgType, srcAddr);
9689 assert(src->OperIsIndir());
9694 if (src->OperIsIndir())
9696 if (!fgIsIndirOfAddrOfLocal(src))
9698 // If we have no information about the src, we have to assume it could
9699 // live anywhere (not just in the GC heap).
9700 // Mark the GT_IND node so that we use the correct write barrier helper in case
9701 // the field is a GC ref.
9702 src->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
9705 src->gtFlags &= (~GTF_EXCEPT | src->AsIndir()->Addr()->gtFlags);
9706 src->SetIndirExceptionFlags(this);
9713 if (varTypeIsSIMD(asgType))
9715 assert(!isCopyBlock); // Else we would have returned the tree above.
9716 noway_assert(src->IsIntegralConst(0));
9717 noway_assert(destVarDsc != nullptr);
9719 src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
9720 tree->gtOp.gtOp2 = src;
9726 if (src->OperIsInitVal())
9728 src = src->gtGetOp1();
9730 assert(src->IsCnsIntOrI());
9731 // This will mutate the integer constant, in place, to be the correct
9732 // value for the type we are using in the assignment.
9733 src->AsIntCon()->FixupInitBlkValue(asgType);
9737 // Ensure that the dest is setup appropriately.
9738 if (dest->gtEffectiveVal()->OperIsIndir())
9740 dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/);
9743 // Ensure that the rhs is setup appropriately.
9746 src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/);
9749 // Set the lhs and rhs on the assignment.
9750 if (dest != tree->gtOp.gtOp1)
9752 asg->gtOp.gtOp1 = dest;
9754 if (src != asg->gtOp.gtOp2)
9756 asg->gtOp.gtOp2 = src;
9759 asg->ChangeType(asgType);
9760 dest->gtFlags |= GTF_DONT_CSE;
9761 asg->gtFlags &= ~GTF_EXCEPT;
9762 asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
9763 // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
9764 asg->gtFlags &= ~GTF_REVERSE_OPS;
9769 printf("fgMorphOneAsgBlock (after):\n");
9779 //------------------------------------------------------------------------
9780 // fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
9783 // tree - a tree node with a gtOper of GT_INITBLK
9784 // the child nodes for tree have already been Morphed
9787 // We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
9788 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
9789 // If we have performed struct promotion of the Dest() then we will try to
9790 // perform a field by field assignment for each of the promoted struct fields
9793 // If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
9794 // if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9795 // can not use a field by field assignment and must the orginal GT_INITBLK unmodified.
9797 GenTree* Compiler::fgMorphInitBlock(GenTree* tree)
9799 // We must have the GT_ASG form of InitBlkOp.
9800 noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp());
9802 bool morphed = false;
9805 GenTree* asg = tree;
9806 GenTree* src = tree->gtGetOp2();
9807 GenTree* origDest = tree->gtGetOp1();
9809 GenTree* dest = fgMorphBlkNode(origDest, true);
9810 if (dest != origDest)
9812 tree->gtOp.gtOp1 = dest;
9814 tree->gtType = dest->TypeGet();
9815 // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its
9816 // type will be the type of the original lclVar, in which case we will change it to TYP_INT).
9817 if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src))
9819 src->gtType = TYP_INT;
9821 JITDUMP("\nfgMorphInitBlock:");
9823 GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree);
9826 JITDUMP(" using oneAsgTree.\n");
9831 GenTree* destAddr = nullptr;
9832 GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src;
9833 GenTree* blockSize = nullptr;
9834 unsigned blockWidth = 0;
9835 FieldSeqNode* destFldSeq = nullptr;
9836 LclVarDsc* destLclVar = nullptr;
9837 bool destDoFldAsg = false;
9838 unsigned destLclNum = BAD_VAR_NUM;
9839 bool blockWidthIsConst = false;
9840 GenTreeLclVarCommon* lclVarTree = nullptr;
9841 if (dest->IsLocal())
9843 lclVarTree = dest->AsLclVarCommon();
9847 if (dest->OperIsBlk())
9849 destAddr = dest->AsBlk()->Addr();
9850 blockWidth = dest->AsBlk()->gtBlkSize;
9854 assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT));
9855 destAddr = dest->gtGetOp1();
9856 blockWidth = genTypeSize(dest->TypeGet());
9859 if (lclVarTree != nullptr)
9861 destLclNum = lclVarTree->gtLclNum;
9862 destLclVar = &lvaTable[destLclNum];
9863 blockWidth = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar);
9864 blockWidthIsConst = true;
9868 if (dest->gtOper == GT_DYN_BLK)
9870 // The size must be an integer type
9871 blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize;
9872 assert(varTypeIsIntegral(blockSize->gtType));
9876 assert(blockWidth != 0);
9877 blockWidthIsConst = true;
9880 if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
9882 destLclNum = lclVarTree->gtLclNum;
9883 destLclVar = &lvaTable[destLclNum];
9886 if (destLclNum != BAD_VAR_NUM)
9888 #if LOCAL_ASSERTION_PROP
9889 // Kill everything about destLclNum (and its field locals)
9890 if (optLocalAssertionProp)
9892 if (optAssertionCount > 0)
9894 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9897 #endif // LOCAL_ASSERTION_PROP
9899 if (destLclVar->lvPromoted && blockWidthIsConst)
9901 assert(initVal->OperGet() == GT_CNS_INT);
9902 noway_assert(varTypeIsStruct(destLclVar));
9903 noway_assert(!opts.MinOpts());
9904 if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
9906 JITDUMP(" dest is address exposed");
9910 if (blockWidth == destLclVar->lvExactSize)
9912 JITDUMP(" (destDoFldAsg=true)");
9913 // We may decide later that a copyblk is required when this struct has holes
9914 destDoFldAsg = true;
9918 JITDUMP(" with mismatched size");
9924 // Can we use field by field assignment for the dest?
9925 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9927 JITDUMP(" dest contains holes");
9928 destDoFldAsg = false;
9931 JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n");
9933 // If we're doing an InitBlock and we've transformed the dest to a non-Blk
9934 // we need to change it back.
9935 if (!destDoFldAsg && !dest->OperIsBlk())
9937 noway_assert(blockWidth != 0);
9938 tree->gtOp.gtOp1 = origDest;
9939 tree->gtType = origDest->gtType;
9942 if (!destDoFldAsg && (destLclVar != nullptr))
9944 // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
9945 if (!destLclVar->lvRegStruct)
9947 // Mark it as DoNotEnregister.
9948 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
9952 // Mark the dest struct as DoNotEnreg
9953 // when they are LclVar structs and we are using a CopyBlock
9954 // or the struct is not promoted
9958 #if CPU_USES_BLOCK_MOVE
9959 compBlkOpUsed = true;
9961 dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true);
9962 tree->gtOp.gtOp1 = dest;
9963 tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
9967 // The initVal must be a constant of TYP_INT
9968 noway_assert(initVal->OperGet() == GT_CNS_INT);
9969 noway_assert(genActualType(initVal->gtType) == TYP_INT);
9971 // The dest must be of a struct type.
9972 noway_assert(varTypeIsStruct(destLclVar));
9975 // Now, convert InitBlock to individual assignments
9979 INDEBUG(morphed = true);
9983 unsigned fieldLclNum;
9984 unsigned fieldCnt = destLclVar->lvFieldCnt;
9986 for (unsigned i = 0; i < fieldCnt; ++i)
9988 fieldLclNum = destLclVar->lvFieldLclStart + i;
9989 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
9991 noway_assert(lclVarTree->gtOper == GT_LCL_VAR);
9992 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
9993 dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));
9995 srcCopy = gtCloneExpr(initVal);
9996 noway_assert(srcCopy != nullptr);
9998 // need type of oper to be same as tree
9999 if (dest->gtType == TYP_LONG)
10001 srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
10002 // copy and extend the value
10003 srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
10004 /* Change the types of srcCopy to TYP_LONG */
10005 srcCopy->gtType = TYP_LONG;
10007 else if (varTypeIsFloating(dest->gtType))
10009 srcCopy->ChangeOperConst(GT_CNS_DBL);
10010 // setup the bit pattern
10011 memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal,
10012 sizeof(srcCopy->gtDblCon.gtDconVal));
10013 /* Change the types of srcCopy to TYP_DOUBLE */
10014 srcCopy->gtType = TYP_DOUBLE;
10018 noway_assert(srcCopy->gtOper == GT_CNS_INT);
10019 noway_assert(srcCopy->TypeGet() == TYP_INT);
10020 // setup the bit pattern
10021 memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal,
10022 sizeof(srcCopy->gtIntCon.gtIconVal));
10025 srcCopy->gtType = dest->TypeGet();
10027 asg = gtNewAssignNode(dest, srcCopy);
10029 #if LOCAL_ASSERTION_PROP
10030 if (optLocalAssertionProp)
10032 optAssertionGen(asg);
10034 #endif // LOCAL_ASSERTION_PROP
10038 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
10051 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10055 printf("fgMorphInitBlock (after):\n");
10064 //------------------------------------------------------------------------
10065 // fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
10068 // tree - the node to be modified.
10069 // type - the type of indirection to change it to.
10072 // Returns the node, modified in place.
10075 // This doesn't really warrant a separate method, but is here to abstract
10076 // the fact that these nodes can be modified in-place.
10078 GenTree* Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
10080 tree->SetOper(GT_IND);
10081 tree->gtType = type;
10085 //------------------------------------------------------------------------
10086 // fgMorphGetStructAddr: Gets the address of a struct object
10089 // pTree - the parent's pointer to the struct object node
10090 // clsHnd - the class handle for the struct type
10091 // isRValue - true if this is a source (not dest)
10094 // Returns the address of the struct value, possibly modifying the existing tree to
10095 // sink the address below any comma nodes (this is to canonicalize for value numbering).
10096 // If this is a source, it will morph it to an GT_IND before taking its address,
10097 // since it may not be remorphed (and we don't want blk nodes as rvalues).
10099 GenTree* Compiler::fgMorphGetStructAddr(GenTree** pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
10102 GenTree* tree = *pTree;
10103 // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
10104 // need to hang onto that for the purposes of value numbering.
10105 if (tree->OperIsIndir())
10107 if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
10109 addr = tree->gtOp.gtOp1;
10113 if (isRValue && tree->OperIsBlk())
10115 tree->ChangeOper(GT_IND);
10117 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
10120 else if (tree->gtOper == GT_COMMA)
10122 // If this is a comma, we're going to "sink" the GT_ADDR below it.
10123 (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
10124 tree->gtType = TYP_BYREF;
10129 switch (tree->gtOper)
10136 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
10138 case GT_INDEX_ADDR:
10143 // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
10144 // not going to use "temp"
10145 GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
10146 addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
10155 //------------------------------------------------------------------------
10156 // fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
10159 // tree - The struct type node
10160 // isDest - True if this is the destination of the assignment
10163 // Returns the possibly-morphed node. The caller is responsible for updating
10164 // the parent of this node..
10166 GenTree* Compiler::fgMorphBlkNode(GenTree* tree, bool isDest)
10168 GenTree* handleTree = nullptr;
10169 GenTree* addr = nullptr;
10170 if (tree->OperIs(GT_COMMA))
10172 // In order to CSE and value number array index expressions and bounds checks,
10173 // the commas in which they are contained need to match.
10174 // The pattern is that the COMMA should be the address expression.
10175 // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
10176 // TODO-1stClassStructs: Consider whether this can be improved.
10177 // Also consider whether some of this can be included in gtNewBlockVal (though note
10178 // that doing so may cause us to query the type system before we otherwise would).
10180 // before: [3] comma struct <- [2] comma struct <- [1] LCL_VAR struct
10181 // after: [3] comma byref <- [2] comma byref <- [4] addr byref <- [1] LCL_VAR struct
10184 GenTree* effectiveVal = tree->gtEffectiveVal();
10186 GenTreePtrStack commas(this);
10187 for (GenTree* comma = tree; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2())
10189 commas.Push(comma);
10192 GenTree* lastComma = commas.Top();
10193 noway_assert(lastComma->gtGetOp2() == effectiveVal);
10194 GenTree* effectiveValAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
10196 effectiveValAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10198 lastComma->gtOp.gtOp2 = effectiveValAddr;
10200 while (commas.Height() > 0)
10202 GenTree* comma = commas.Pop();
10203 comma->gtType = TYP_BYREF;
10204 gtUpdateNodeSideEffects(comma);
10207 handleTree = effectiveVal;
10209 else if (tree->OperIs(GT_IND) && tree->AsIndir()->Addr()->OperIs(GT_INDEX_ADDR))
10212 addr = tree->AsIndir()->Addr();
10215 if (addr != nullptr)
10217 var_types structType = handleTree->TypeGet();
10218 if (structType == TYP_STRUCT)
10220 CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(handleTree);
10221 if (structHnd == NO_CLASS_HANDLE)
10223 tree = gtNewOperNode(GT_IND, structType, addr);
10227 tree = gtNewObjNode(structHnd, addr);
10228 if (tree->OperGet() == GT_OBJ)
10230 gtSetObjGcInfo(tree->AsObj());
10236 tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
10239 gtUpdateNodeSideEffects(tree);
10241 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10245 if (!tree->OperIsBlk())
10249 GenTreeBlk* blkNode = tree->AsBlk();
10250 if (blkNode->OperGet() == GT_DYN_BLK)
10252 if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
10254 unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
10255 // A GT_BLK with size of zero is not supported,
10256 // so if we encounter such a thing we just leave it as a GT_DYN_BLK
10259 blkNode->AsDynBlk()->gtDynamicSize = nullptr;
10260 blkNode->ChangeOper(GT_BLK);
10261 blkNode->gtBlkSize = size;
10273 if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
10274 (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
10276 GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
10277 if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
10279 lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
10286 //------------------------------------------------------------------------
10287 // fgMorphBlockOperand: Canonicalize an operand of a block assignment
10290 // tree - The block operand
10291 // asgType - The type of the assignment
10292 // blockWidth - The size of the block
10293 // isDest - true iff this is the destination of the assignment
10296 // Returns the morphed block operand
10299 // This does the following:
10300 // - Ensures that a struct operand is a block node or (for non-LEGACY_BACKEND) lclVar.
10301 // - Ensures that any COMMAs are above ADDR nodes.
10302 // Although 'tree' WAS an operand of a block assignment, the assignment
10303 // may have been retyped to be a scalar assignment.
10305 GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest)
10307 GenTree* effectiveVal = tree->gtEffectiveVal();
10309 if (!varTypeIsStruct(asgType))
10311 if (effectiveVal->OperIsIndir())
10313 GenTree* addr = effectiveVal->AsIndir()->Addr();
10314 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
10316 effectiveVal = addr->gtGetOp1();
10318 else if (effectiveVal->OperIsBlk())
10320 effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
10324 effectiveVal->gtType = asgType;
10327 else if (effectiveVal->TypeGet() != asgType)
10329 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
10330 effectiveVal = gtNewIndir(asgType, addr);
10335 GenTreeIndir* indirTree = nullptr;
10336 GenTreeLclVarCommon* lclNode = nullptr;
10337 bool needsIndirection = true;
10339 if (effectiveVal->OperIsIndir())
10341 indirTree = effectiveVal->AsIndir();
10342 GenTree* addr = effectiveVal->AsIndir()->Addr();
10343 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
10345 lclNode = addr->gtGetOp1()->AsLclVarCommon();
10348 else if (effectiveVal->OperGet() == GT_LCL_VAR)
10350 lclNode = effectiveVal->AsLclVarCommon();
10352 #ifdef FEATURE_SIMD
10353 if (varTypeIsSIMD(asgType))
10355 if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) &&
10356 (indirTree->Addr()->gtGetOp1()->OperIsSIMDorSimdHWintrinsic()))
10359 needsIndirection = false;
10360 effectiveVal = indirTree->Addr()->gtGetOp1();
10362 if (effectiveVal->OperIsSIMDorSimdHWintrinsic())
10364 needsIndirection = false;
10367 #endif // FEATURE_SIMD
10368 if (lclNode != nullptr)
10370 LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]);
10371 if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth) && (varDsc->lvType == asgType))
10373 #ifndef LEGACY_BACKEND
10374 if (effectiveVal != lclNode)
10376 JITDUMP("Replacing block node [%06d] with lclVar V%02u\n", dspTreeID(tree), lclNode->gtLclNum);
10377 effectiveVal = lclNode;
10379 needsIndirection = false;
10380 #endif // !LEGACY_BACKEND
10384 // This may be a lclVar that was determined to be address-exposed.
10385 effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT);
10388 if (needsIndirection)
10390 if (indirTree != nullptr)
10392 // We should never find a struct indirection on the lhs of an assignment.
10393 assert(!isDest || indirTree->OperIsBlk());
10394 if (!isDest && indirTree->OperIsBlk())
10396 (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
10402 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
10405 CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
10406 if (clsHnd == NO_CLASS_HANDLE)
10408 newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
10412 newTree = gtNewObjNode(clsHnd, addr);
10413 if (isDest && (newTree->OperGet() == GT_OBJ))
10415 gtSetObjGcInfo(newTree->AsObj());
10417 if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
10419 // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
10420 // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
10421 // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
10422 // separately now to avoid excess diffs.
10423 newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
10429 newTree = gtNewIndir(asgType, addr);
10431 effectiveVal = newTree;
10435 tree = effectiveVal;
10439 //------------------------------------------------------------------------
10440 // fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk
10443 // dest - the GT_OBJ or GT_STORE_OBJ
10446 // The destination must be known (by the caller) to be on the stack.
10449 // If we have a CopyObj with a dest on the stack, and its size is small enough
10450 // to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a
10451 // GC Unsafe CopyBlk that is non-interruptible.
10452 // This is not supported for the JIT32_GCENCODER, in which case this method is a no-op.
10454 void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest)
10456 #if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER) && !defined(LEGACY_BACKEND)
10457 assert(dest->gtGcPtrCount != 0);
10458 unsigned blockWidth = dest->AsBlk()->gtBlkSize;
10460 bool destOnStack = false;
10461 GenTree* destAddr = dest->Addr();
10462 assert(destAddr->IsLocalAddrExpr() != nullptr);
10464 if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT))
10466 genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK;
10467 dest->SetOper(newOper);
10468 dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
10470 #endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
10473 //------------------------------------------------------------------------
10474 // fgMorphCopyBlock: Perform the Morphing of block copy
10477 // tree - a block copy (i.e. an assignment with a block op on the lhs).
10480 // We can return the orginal block copy unmodified (least desirable, but always correct)
10481 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
10482 // If we have performed struct promotion of the Source() or the Dest() then we will try to
10483 // perform a field by field assignment for each of the promoted struct fields.
10486 // The child nodes for tree have already been Morphed.
10489 // If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
10490 // When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
10491 // and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
10492 // if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
10493 // can not use a field by field assignment and must leave the orginal block copy unmodified.
10495 GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
10497 noway_assert(tree->OperIsCopyBlkOp());
10499 JITDUMP("\nfgMorphCopyBlock:");
10501 bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
10503 GenTree* asg = tree;
10504 GenTree* rhs = asg->gtGetOp2();
10505 GenTree* dest = asg->gtGetOp1();
10507 #if FEATURE_MULTIREG_RET
10508 // If this is a multi-reg return, we will not do any morphing of this node.
10509 if (rhs->IsMultiRegCall())
10511 assert(dest->OperGet() == GT_LCL_VAR);
10512 JITDUMP(" not morphing a multireg call return\n");
10515 #endif // FEATURE_MULTIREG_RET
10517 // If we have an array index on the lhs, we need to create an obj node.
10519 dest = fgMorphBlkNode(dest, true);
10520 if (dest != asg->gtGetOp1())
10522 asg->gtOp.gtOp1 = dest;
10523 if (dest->IsLocal())
10525 dest->gtFlags |= GTF_VAR_DEF;
10528 asg->gtType = dest->TypeGet();
10529 rhs = fgMorphBlkNode(rhs, false);
10531 asg->gtOp.gtOp2 = rhs;
10533 GenTree* oldTree = tree;
10534 GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree);
10538 JITDUMP(" using oneAsgTree.\n");
10543 unsigned blockWidth;
10544 bool blockWidthIsConst = false;
10545 GenTreeLclVarCommon* lclVarTree = nullptr;
10546 GenTreeLclVarCommon* srcLclVarTree = nullptr;
10547 unsigned destLclNum = BAD_VAR_NUM;
10548 LclVarDsc* destLclVar = nullptr;
10549 FieldSeqNode* destFldSeq = nullptr;
10550 bool destDoFldAsg = false;
10551 GenTree* destAddr = nullptr;
10552 GenTree* srcAddr = nullptr;
10553 bool destOnStack = false;
10554 bool hasGCPtrs = false;
10556 JITDUMP("block assignment to morph:\n");
10559 if (dest->IsLocal())
10561 blockWidthIsConst = true;
10562 destOnStack = true;
10563 if (dest->gtOper == GT_LCL_VAR)
10565 lclVarTree = dest->AsLclVarCommon();
10566 destLclNum = lclVarTree->gtLclNum;
10567 destLclVar = &lvaTable[destLclNum];
10568 if (destLclVar->lvType == TYP_STRUCT)
10570 // It would be nice if lvExactSize always corresponded to the size of the struct,
10571 // but it doesn't always for the temps that the importer creates when it spills side
10573 // TODO-Cleanup: Determine when this happens, and whether it can be changed.
10574 blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
10578 blockWidth = genTypeSize(destLclVar->lvType);
10580 hasGCPtrs = destLclVar->lvStructGcCount != 0;
10584 assert(dest->TypeGet() != TYP_STRUCT);
10585 assert(dest->gtOper == GT_LCL_FLD);
10586 blockWidth = genTypeSize(dest->TypeGet());
10587 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
10588 destFldSeq = dest->AsLclFld()->gtFieldSeq;
10593 GenTree* effectiveDest = dest->gtEffectiveVal();
10594 if (effectiveDest->OperGet() == GT_IND)
10596 assert(dest->TypeGet() != TYP_STRUCT);
10597 blockWidth = genTypeSize(effectiveDest->TypeGet());
10598 blockWidthIsConst = true;
10599 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
10601 destAddr = dest->gtGetOp1();
10606 assert(effectiveDest->OperIsBlk());
10607 GenTreeBlk* blk = effectiveDest->AsBlk();
10609 blockWidth = blk->gtBlkSize;
10610 blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
10611 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
10613 destAddr = blk->Addr();
10616 if (destAddr != nullptr)
10618 noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
10619 if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
10621 destOnStack = true;
10622 destLclNum = lclVarTree->gtLclNum;
10623 destLclVar = &lvaTable[destLclNum];
10628 if (destLclVar != nullptr)
10630 #if LOCAL_ASSERTION_PROP
10631 // Kill everything about destLclNum (and its field locals)
10632 if (optLocalAssertionProp)
10634 if (optAssertionCount > 0)
10636 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
10639 #endif // LOCAL_ASSERTION_PROP
10641 if (destLclVar->lvPromoted && blockWidthIsConst)
10643 noway_assert(varTypeIsStruct(destLclVar));
10644 noway_assert(!opts.MinOpts());
10646 if (blockWidth == destLclVar->lvExactSize)
10648 JITDUMP(" (destDoFldAsg=true)");
10649 // We may decide later that a copyblk is required when this struct has holes
10650 destDoFldAsg = true;
10654 JITDUMP(" with mismatched dest size");
10659 FieldSeqNode* srcFldSeq = nullptr;
10660 unsigned srcLclNum = BAD_VAR_NUM;
10661 LclVarDsc* srcLclVar = nullptr;
10662 bool srcDoFldAsg = false;
10664 if (rhs->IsLocal())
10666 srcLclVarTree = rhs->AsLclVarCommon();
10667 srcLclNum = srcLclVarTree->gtLclNum;
10668 if (rhs->OperGet() == GT_LCL_FLD)
10670 srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
10673 else if (rhs->OperIsIndir())
10675 if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
10677 srcLclNum = srcLclVarTree->gtLclNum;
10681 srcAddr = rhs->gtOp.gtOp1;
10685 if (srcLclNum != BAD_VAR_NUM)
10687 srcLclVar = &lvaTable[srcLclNum];
10689 if (srcLclVar->lvPromoted && blockWidthIsConst)
10691 noway_assert(varTypeIsStruct(srcLclVar));
10692 noway_assert(!opts.MinOpts());
10694 if (blockWidth == srcLclVar->lvExactSize)
10696 JITDUMP(" (srcDoFldAsg=true)");
10697 // We may decide later that a copyblk is required when this struct has holes
10698 srcDoFldAsg = true;
10702 JITDUMP(" with mismatched src size");
10707 // Check to see if we are required to do a copy block because the struct contains holes
10708 // and either the src or dest is externally visible
10710 bool requiresCopyBlock = false;
10711 bool srcSingleLclVarAsg = false;
10712 bool destSingleLclVarAsg = false;
10714 if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq))
10716 // Self-assign; no effect.
10717 GenTree* nop = gtNewNothingNode();
10718 INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
10722 // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
10723 if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
10725 requiresCopyBlock = true;
10728 // Can we use field by field assignment for the dest?
10729 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
10731 JITDUMP(" dest contains custom layout and contains holes");
10732 // C++ style CopyBlock with holes
10733 requiresCopyBlock = true;
10736 // Can we use field by field assignment for the src?
10737 if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
10739 JITDUMP(" src contains custom layout and contains holes");
10740 // C++ style CopyBlock with holes
10741 requiresCopyBlock = true;
10744 #if defined(_TARGET_ARM_)
10745 if ((rhs->OperIsIndir()) && (rhs->gtFlags & GTF_IND_UNALIGNED))
10747 JITDUMP(" rhs is unaligned");
10748 requiresCopyBlock = true;
10751 if (asg->gtFlags & GTF_BLK_UNALIGNED)
10753 JITDUMP(" asg is unaligned");
10754 requiresCopyBlock = true;
10756 #endif // _TARGET_ARM_
10758 if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
10760 requiresCopyBlock = true;
10763 // Can't use field by field assignment if the src is a call.
10764 if (rhs->OperGet() == GT_CALL)
10766 JITDUMP(" src is a call");
10767 // C++ style CopyBlock with holes
10768 requiresCopyBlock = true;
10771 // If we passed the above checks, then we will check these two
10772 if (!requiresCopyBlock)
10774 // Are both dest and src promoted structs?
10775 if (destDoFldAsg && srcDoFldAsg)
10777 // Both structs should be of the same type, or each have a single field of the same type.
10778 // If not we will use a copy block.
10779 if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
10780 lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
10782 unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart;
10783 unsigned srcFieldNum = lvaTable[srcLclNum].lvFieldLclStart;
10784 if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) ||
10785 (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType))
10787 requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
10788 JITDUMP(" with mismatched types");
10792 // Are neither dest or src promoted structs?
10793 else if (!destDoFldAsg && !srcDoFldAsg)
10795 requiresCopyBlock = true; // Leave as a CopyBlock
10796 JITDUMP(" with no promoted structs");
10798 else if (destDoFldAsg)
10800 // Match the following kinds of trees:
10801 // fgMorphTree BB01, stmt 9 (before)
10802 // [000052] ------------ const int 8
10803 // [000053] -A--G------- copyBlk void
10804 // [000051] ------------ addr byref
10805 // [000050] ------------ lclVar long V07 loc5
10806 // [000054] --------R--- <list> void
10807 // [000049] ------------ addr byref
10808 // [000048] ------------ lclVar struct(P) V06 loc4
10809 // long V06.h (offs=0x00) -> V17 tmp9
10810 // Yields this transformation
10811 // fgMorphCopyBlock (after):
10812 // [000050] ------------ lclVar long V07 loc5
10813 // [000085] -A---------- = long
10814 // [000083] D------N---- lclVar long V17 tmp9
10816 if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
10817 (blockWidth == genTypeSize(srcLclVar->TypeGet())))
10819 // Reject the following tree:
10820 // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe
10822 // fgMorphTree BB01, stmt 6 (before)
10823 // [000038] ------------- const int 4
10824 // [000039] -A--G-------- copyBlk void
10825 // [000037] ------------- addr byref
10826 // [000036] ------------- lclVar int V05 loc3
10827 // [000040] --------R---- <list> void
10828 // [000035] ------------- addr byref
10829 // [000034] ------------- lclVar struct(P) V04 loc2
10830 // float V04.f1 (offs=0x00) -> V13 tmp6
10831 // As this would framsform into
10832 // float V13 = int V05
10834 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
10835 var_types destType = lvaTable[fieldLclNum].TypeGet();
10836 if (srcLclVar->TypeGet() == destType)
10838 srcSingleLclVarAsg = true;
10844 assert(srcDoFldAsg);
10845 // Check for the symmetric case (which happens for the _pointer field of promoted spans):
10847 // [000240] -----+------ /--* lclVar struct(P) V18 tmp9
10848 // /--* byref V18._value (offs=0x00) -> V30 tmp21
10849 // [000245] -A------R--- * = struct (copy)
10850 // [000244] -----+------ \--* obj(8) struct
10851 // [000243] -----+------ \--* addr byref
10852 // [000242] D----+-N---- \--* lclVar byref V28 tmp19
10854 if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) &&
10855 (blockWidth == genTypeSize(destLclVar->TypeGet())))
10857 // Check for type agreement
10858 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart;
10859 var_types srcType = lvaTable[fieldLclNum].TypeGet();
10860 if (destLclVar->TypeGet() == srcType)
10862 destSingleLclVarAsg = true;
10868 // If we require a copy block the set both of the field assign bools to false
10869 if (requiresCopyBlock)
10871 // If a copy block is required then we won't do field by field assignments
10872 destDoFldAsg = false;
10873 srcDoFldAsg = false;
10876 JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
10878 // Mark the dest/src structs as DoNotEnreg
10879 // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
10880 // or the struct is not promoted
10882 if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg)
10884 if (!destLclVar->lvRegStruct)
10886 // Mark it as DoNotEnregister.
10887 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
10891 if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
10893 if (!srcLclVar->lvRegStruct || (srcLclVar->lvType != dest->TypeGet()))
10895 lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
10899 if (requiresCopyBlock)
10901 #if CPU_USES_BLOCK_MOVE
10902 compBlkOpUsed = true;
10904 var_types asgType = dest->TypeGet();
10905 dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
10906 asg->gtOp.gtOp1 = dest;
10907 asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
10909 // Note that the unrolling of CopyBlk is only implemented on some platforms.
10910 // Currently that includes x64 and ARM but not x86: the code generation for this
10911 // construct requires the ability to mark certain regions of the generated code
10912 // as non-interruptible, and the GC encoding for the latter platform does not
10913 // have this capability.
10915 // If we have a CopyObj with a dest on the stack
10916 // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
10917 // when its size is small enough to be completely unrolled (i.e. between [16..64] bytes).
10918 // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.)
10920 if (destOnStack && (dest->OperGet() == GT_OBJ))
10922 fgMorphUnsafeBlk(dest->AsObj());
10925 // Eliminate the "OBJ or BLK" node on the rhs.
10926 rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
10927 asg->gtOp.gtOp2 = rhs;
10929 #ifdef LEGACY_BACKEND
10930 if (!rhs->OperIsIndir())
10932 noway_assert(rhs->gtOper == GT_LCL_VAR);
10933 GenTree* rhsAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, rhs);
10934 rhs = gtNewIndir(TYP_STRUCT, rhsAddr);
10936 #endif // LEGACY_BACKEND
10937 // Formerly, liveness did not consider copyblk arguments of simple types as being
10938 // a use or def, so these variables were marked as address-exposed.
10939 // TODO-1stClassStructs: This should no longer be needed.
10940 if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
10942 JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
10943 lvaTable[srcLclNum].lvAddrExposed = true;
10946 if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
10948 JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
10949 lvaTable[destLclNum].lvAddrExposed = true;
10956 // Otherwise we convert this CopyBlock into individual field by field assignments
10961 GenTree* addrSpill = nullptr;
10962 unsigned addrSpillTemp = BAD_VAR_NUM;
10963 bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
10965 unsigned fieldCnt = DUMMY_INIT(0);
10967 if (destDoFldAsg && srcDoFldAsg)
10969 // To do fieldwise assignments for both sides, they'd better be the same struct type!
10970 // All of these conditions were checked above...
10971 assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
10972 assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
10974 fieldCnt = destLclVar->lvFieldCnt;
10975 goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
10978 else if (destDoFldAsg)
10980 fieldCnt = destLclVar->lvFieldCnt;
10981 rhs = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
10982 if (srcAddr == nullptr)
10984 srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
10989 assert(srcDoFldAsg);
10990 fieldCnt = srcLclVar->lvFieldCnt;
10991 dest = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
10992 if (dest->OperIsBlk())
10994 (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
10996 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
11001 noway_assert(!srcDoFldAsg);
11002 if (gtClone(srcAddr))
11004 // srcAddr is simple expression. No need to spill.
11005 noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
11009 // srcAddr is complex expression. Clone and spill it (unless the destination is
11010 // a struct local that only has one field, in which case we'd only use the
11011 // address value once...)
11012 if (destLclVar->lvFieldCnt > 1)
11014 addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
11015 noway_assert(addrSpill != nullptr);
11022 noway_assert(!destDoFldAsg);
11024 // If we're doing field-wise stores, to an address within a local, and we copy
11025 // the address into "addrSpill", do *not* declare the original local var node in the
11026 // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
11027 // field-wise assignments as an "indirect" assignment to the local.
11028 // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
11030 if (lclVarTree != nullptr)
11032 lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
11035 if (gtClone(destAddr))
11037 // destAddr is simple expression. No need to spill
11038 noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
11042 // destAddr is complex expression. Clone and spill it (unless
11043 // the source is a struct local that only has one field, in which case we'd only
11044 // use the address value once...)
11045 if (srcLclVar->lvFieldCnt > 1)
11047 addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
11048 noway_assert(addrSpill != nullptr);
11051 // TODO-CQ: this should be based on a more general
11052 // "BaseAddress" method, that handles fields of structs, before or after
11054 if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
11056 if (addrSpill->gtOp.gtOp1->IsLocal())
11058 // We will *not* consider this to define the local, but rather have each individual field assign
11059 // be a definition.
11060 addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
11061 assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
11062 PROMOTION_TYPE_INDEPENDENT);
11063 addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
11064 // local stack frame
11070 if (addrSpill != nullptr)
11072 // Spill the (complex) address to a BYREF temp.
11073 // Note, at most one address may need to be spilled.
11074 addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
11076 lvaTable[addrSpillTemp].lvType = TYP_BYREF;
11078 if (addrSpillIsStackDest)
11080 lvaTable[addrSpillTemp].lvStackByref = true;
11083 tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);
11085 // If we are assigning the address of a LclVar here
11086 // liveness does not account for this kind of address taken use.
11088 // We have to mark this local as address exposed so
11089 // that we don't delete the definition for this LclVar
11090 // as a dead store later on.
11092 if (addrSpill->OperGet() == GT_ADDR)
11094 GenTree* addrOp = addrSpill->gtOp.gtOp1;
11095 if (addrOp->IsLocal())
11097 unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum;
11098 lvaTable[lclVarNum].lvAddrExposed = true;
11099 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
11106 for (unsigned i = 0; i < fieldCnt; ++i)
11108 FieldSeqNode* curFieldSeq = nullptr;
11111 noway_assert(destLclNum != BAD_VAR_NUM);
11112 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
11113 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
11114 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
11115 if (destAddr != nullptr)
11117 noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
11118 dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
11122 noway_assert(lclVarTree != nullptr);
11123 dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
11125 // Don't CSE the lhs of an assignment.
11126 dest->gtFlags |= GTF_DONT_CSE;
11130 noway_assert(srcDoFldAsg);
11131 noway_assert(srcLclNum != BAD_VAR_NUM);
11132 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
11134 if (destSingleLclVarAsg)
11136 noway_assert(fieldCnt == 1);
11137 noway_assert(destLclVar != nullptr);
11138 noway_assert(addrSpill == nullptr);
11140 dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet());
11146 assert(addrSpillTemp != BAD_VAR_NUM);
11147 dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
11151 dest = gtCloneExpr(destAddr);
11152 noway_assert(dest != nullptr);
11154 // Is the address of a local?
11155 GenTreeLclVarCommon* lclVarTree = nullptr;
11156 bool isEntire = false;
11157 bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
11158 if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
11160 lclVarTree->gtFlags |= GTF_VAR_DEF;
11163 lclVarTree->gtFlags |= GTF_VAR_USEASG;
11168 GenTree* fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
11169 // Have to set the field sequence -- which means we need the field handle.
11170 CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
11171 CORINFO_FIELD_HANDLE fieldHnd =
11172 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
11173 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
11174 fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
11176 dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
11178 dest = gtNewIndir(lvaTable[fieldLclNum].TypeGet(), dest);
11180 // !!! The destination could be on stack. !!!
11181 // This flag will let us choose the correct write barrier.
11182 dest->gtFlags |= GTF_IND_TGTANYWHERE;
11188 noway_assert(srcLclNum != BAD_VAR_NUM);
11189 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
11190 src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
11192 noway_assert(srcLclVarTree != nullptr);
11193 src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
11194 // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
11195 // but they are when they are under a GT_ADDR.
11196 src->gtFlags |= GTF_DONT_CSE;
11200 noway_assert(destDoFldAsg);
11201 noway_assert(destLclNum != BAD_VAR_NUM);
11202 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
11204 if (srcSingleLclVarAsg)
11206 noway_assert(fieldCnt == 1);
11207 noway_assert(srcLclVar != nullptr);
11208 noway_assert(addrSpill == nullptr);
11210 src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet());
11216 assert(addrSpillTemp != BAD_VAR_NUM);
11217 src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
11221 src = gtCloneExpr(srcAddr);
11222 noway_assert(src != nullptr);
11225 CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
11226 CORINFO_FIELD_HANDLE fieldHnd =
11227 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
11228 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
11230 src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
11231 new (this, GT_CNS_INT)
11232 GenTreeIntCon(TYP_I_IMPL, lvaTable[fieldLclNum].lvFldOffset, curFieldSeq));
11234 src = gtNewIndir(lvaTable[fieldLclNum].TypeGet(), src);
11238 noway_assert(dest->TypeGet() == src->TypeGet());
11240 asg = gtNewAssignNode(dest, src);
11242 // If we spilled the address, and we didn't do individual field assignments to promoted fields,
11243 // and it was of a local, record the assignment as an indirect update of a local.
11244 if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
11246 curFieldSeq = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq);
11247 bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType)) == genTypeSize(dest->TypeGet()));
11248 IndirectAssignmentAnnotation* pIndirAnnot =
11249 new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire);
11250 GetIndirAssignMap()->Set(asg, pIndirAnnot);
11253 #if LOCAL_ASSERTION_PROP
11254 if (optLocalAssertionProp)
11256 optAssertionGen(asg);
11258 #endif // LOCAL_ASSERTION_PROP
11262 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
11273 tree->gtFlags |= GTF_LATE_ARG;
11277 if (tree != oldTree)
11279 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11284 printf("\nfgMorphCopyBlock (after):\n");
11293 // insert conversions and normalize to make tree amenable to register
11294 // FP architectures
11295 GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
11297 if (tree->OperIsArithmetic())
11299 if (varTypeIsFloating(tree))
11301 GenTree* op1 = tree->gtOp.gtOp1;
11302 GenTree* op2 = tree->gtGetOp2();
11304 assert(varTypeIsFloating(op1->TypeGet()) && varTypeIsFloating(op2->TypeGet()));
11306 if (op1->TypeGet() != tree->TypeGet())
11308 tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, false, tree->TypeGet());
11310 if (op2->TypeGet() != tree->TypeGet())
11312 tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, false, tree->TypeGet());
11316 else if (tree->OperIsCompare())
11318 GenTree* op1 = tree->gtOp.gtOp1;
11320 if (varTypeIsFloating(op1))
11322 GenTree* op2 = tree->gtGetOp2();
11323 assert(varTypeIsFloating(op2));
11325 if (op1->TypeGet() != op2->TypeGet())
11327 // both had better be floating, just one bigger than other
11328 if (op1->TypeGet() == TYP_FLOAT)
11330 assert(op2->TypeGet() == TYP_DOUBLE);
11331 tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE);
11333 else if (op2->TypeGet() == TYP_FLOAT)
11335 assert(op1->TypeGet() == TYP_DOUBLE);
11336 tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE);
11345 //--------------------------------------------------------------------------------------------------------------
11346 // fgMorphRecognizeBoxNullable:
11347 // Recognize this pattern:
11349 // stmtExpr void (IL 0x000... ???)
11351 // CNS_INT ref null
11353 // CALL help ref HELPER.CORINFO_HELP_BOX_NULLABLE
11354 // CNS_INT(h) long 0x7fed96836c8 class
11356 // FIELD struct value
11357 // LCL_VAR ref V00 this
11359 // which comes from this code:
11361 // return this.value==null;
11363 // and transform it into
11365 // stmtExpr void (IL 0x000... ???)
11367 // CNS_INT ref null
11371 // FIELD struct value
11372 // LCL_VAR ref V00 this
11375 // compare - Compare tree to optimize.
11378 // A tree that has a call to CORINFO_HELP_BOX_NULLABLE optimized away if the pattern is found;
11379 // the original tree otherwise.
11382 GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
11384 GenTree* op1 = compare->gtOp.gtOp1;
11385 GenTree* op2 = compare->gtOp.gtOp2;
11387 GenTreeCall* opCall;
11389 if (op1->IsCnsIntOrI() && op2->IsHelperCall())
11392 opCall = op2->AsCall();
11394 else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
11397 opCall = op1->AsCall();
11404 if (!opCns->IsIntegralConst(0))
11409 if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
11414 // Get the nullable struct argument
11415 GenTree* arg = opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
11417 // Check for cases that are unsafe to optimize and return the unchanged tree
11418 if (arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || ((arg->gtFlags & GTF_LATE_ARG) != 0))
11423 // Replace the box with an access of the nullable 'hasValue' field which is at the zero offset
11424 GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, arg);
11428 compare->gtOp.gtOp1 = newOp;
11432 compare->gtOp.gtOp2 = newOp;
11435 opCns->gtType = TYP_INT;
11440 #ifdef FEATURE_SIMD
11442 //--------------------------------------------------------------------------------------------------------------
11443 // getSIMDStructFromField:
11444 // Checking whether the field belongs to a simd struct or not. If it is, return the GenTree* for
11445 // the struct node, also base type, field index and simd size. If it is not, just return nullptr.
11446 // Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
11447 // should return nullptr, since in this case we should treat SIMD struct as a regular struct.
11448 // However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
11449 // as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
11450 // if the struct is a SIMD struct.
11453 // tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
11454 // struct used for simd intrinsic or not.
11455 // pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
11456 // to simd lclvar's base type.
11457 // indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
11458 // equals to the index number of this field.
11459 // simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
11460 // equals to the simd struct size which this tree belongs to.
11461 // ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
11462 // the UsedInSIMDIntrinsic check.
11465 // A GenTree* which points the simd lclvar tree belongs to. If the tree is not the simd
11466 // instrinic related field, return nullptr.
11469 GenTree* Compiler::getSIMDStructFromField(GenTree* tree,
11470 var_types* pBaseTypeOut,
11471 unsigned* indexOut,
11472 unsigned* simdSizeOut,
11473 bool ignoreUsedInSIMDIntrinsic /*false*/)
11475 GenTree* ret = nullptr;
11476 if (tree->OperGet() == GT_FIELD)
11478 GenTree* objRef = tree->gtField.gtFldObj;
11479 if (objRef != nullptr)
11481 GenTree* obj = nullptr;
11482 if (objRef->gtOper == GT_ADDR)
11484 obj = objRef->gtOp.gtOp1;
11486 else if (ignoreUsedInSIMDIntrinsic)
11495 if (isSIMDTypeLocal(obj))
11497 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
11498 LclVarDsc* varDsc = &lvaTable[lclNum];
11499 if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
11501 *simdSizeOut = varDsc->lvExactSize;
11502 *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
11506 else if (obj->OperGet() == GT_SIMD)
11509 GenTreeSIMD* simdNode = obj->AsSIMD();
11510 *simdSizeOut = simdNode->gtSIMDSize;
11511 *pBaseTypeOut = simdNode->gtSIMDBaseType;
11513 #ifdef FEATURE_HW_INTRINSICS
11514 else if (obj->OperIsSimdHWIntrinsic())
11517 GenTreeHWIntrinsic* simdNode = obj->AsHWIntrinsic();
11518 *simdSizeOut = simdNode->gtSIMDSize;
11519 *pBaseTypeOut = simdNode->gtSIMDBaseType;
11521 #endif // FEATURE_HW_INTRINSICS
11524 if (ret != nullptr)
11526 unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
11527 *indexOut = tree->gtField.gtFldOffset / BaseTypeSize;
11532 /*****************************************************************************
11533 * If a read operation tries to access simd struct field, then transform the
11534 * operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
11535 * Otherwise, return the old tree.
11537 * tree - GenTree*. If this pointer points to simd struct which is used for simd
11538 * intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem.
11540 * A GenTree* which points to the new tree. If the tree is not for simd intrinsic,
11544 GenTree* Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTree* tree)
11546 unsigned index = 0;
11547 var_types baseType = TYP_UNKNOWN;
11548 unsigned simdSize = 0;
11549 GenTree* simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
11550 if (simdStructNode != nullptr)
11552 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
11553 GenTree* op2 = gtNewIconNode(index);
11554 tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
11556 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11562 /*****************************************************************************
11563 * Transform an assignment of a SIMD struct field to SIMD intrinsic
11564 * SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment,
11565 * then return the old tree.
11567 * tree - GenTree*. If this pointer points to simd struct which is used for simd
11568 * intrinsic, we will morph it as simd intrinsic set.
11570 * A GenTree* which points to the new tree. If the tree is not for simd intrinsic,
11574 GenTree* Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTree* tree)
11576 assert(tree->OperGet() == GT_ASG);
11577 GenTree* op1 = tree->gtGetOp1();
11578 GenTree* op2 = tree->gtGetOp2();
11580 unsigned index = 0;
11581 var_types baseType = TYP_UNKNOWN;
11582 unsigned simdSize = 0;
11583 GenTree* simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
11584 if (simdOp1Struct != nullptr)
11586 // Generate the simd set intrinsic
11587 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
11589 SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
11593 simdIntrinsicID = SIMDIntrinsicSetX;
11596 simdIntrinsicID = SIMDIntrinsicSetY;
11599 simdIntrinsicID = SIMDIntrinsicSetZ;
11602 simdIntrinsicID = SIMDIntrinsicSetW;
11605 noway_assert(!"There is no set intrinsic for index bigger than 3");
11608 GenTree* target = gtClone(simdOp1Struct);
11609 assert(target != nullptr);
11610 GenTree* simdTree = gtNewSIMDNode(target->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
11611 tree->gtOp.gtOp1 = target;
11612 tree->gtOp.gtOp2 = simdTree;
11614 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11621 #endif // FEATURE_SIMD
11623 /*****************************************************************************
11625 * Transform the given GTK_SMPOP tree for code generation.
11629 #pragma warning(push)
11630 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
11632 GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
11635 assert(tree->OperKind() & GTK_SMPOP);
11637 /* The steps in this function are :
11638 o Perform required preorder processing
11639 o Process the first, then second operand, if any
11640 o Perform required postorder morphing
11641 o Perform optional postorder morphing if optimizing
11644 bool isQmarkColon = false;
11646 #if LOCAL_ASSERTION_PROP
11647 AssertionIndex origAssertionCount = DUMMY_INIT(0);
11648 AssertionDsc* origAssertionTab = DUMMY_INIT(NULL);
11650 AssertionIndex thenAssertionCount = DUMMY_INIT(0);
11651 AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL);
11656 tree = fgMorphForRegisterFP(tree);
11659 genTreeOps oper = tree->OperGet();
11660 var_types typ = tree->TypeGet();
11661 GenTree* op1 = tree->gtOp.gtOp1;
11662 GenTree* op2 = tree->gtGetOp2IfPresent();
11664 /*-------------------------------------------------------------------------
11665 * First do any PRE-ORDER processing
11670 // Some arithmetic operators need to use a helper call to the EE
11674 tree = fgDoNormalizeOnStore(tree);
11675 /* fgDoNormalizeOnStore can change op2 */
11676 noway_assert(op1 == tree->gtOp.gtOp1);
11677 op2 = tree->gtOp.gtOp2;
11679 #ifdef FEATURE_SIMD
11681 // We should check whether op2 should be assigned to a SIMD field or not.
11682 // If it is, we should tranlate the tree to simd intrinsic.
11683 assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0));
11684 GenTree* newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
11685 typ = tree->TypeGet();
11686 op1 = tree->gtGetOp1();
11687 op2 = tree->gtGetOp2();
11689 assert((tree == newTree) && (tree->OperGet() == oper));
11690 if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
11692 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
11698 #ifdef LEGACY_BACKEND
11717 // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
11718 // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former
11719 // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
11720 // TODO-1stClassStructs: improve this.
11721 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
11723 op1->gtFlags |= GTF_DONT_CSE;
11729 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
11730 op1->gtFlags |= GTF_DONT_CSE;
11738 if (op1->OperKind() & GTK_RELOP)
11740 noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
11741 /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
11742 not need to materialize the result as a 0 or 1. */
11744 /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
11745 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
11747 // Request that the codegen for op1 sets the condition flags
11748 // when it generates the code for op1.
11750 // Codegen for op1 must set the condition flags if
11751 // this method returns true.
11753 op1->gtRequestSetFlags();
11757 GenTree* effOp1 = op1->gtEffectiveVal();
11758 noway_assert((effOp1->gtOper == GT_CNS_INT) &&
11759 (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
11764 #if LOCAL_ASSERTION_PROP
11765 if (optLocalAssertionProp)
11768 isQmarkColon = true;
11773 return fgMorphArrayIndex(tree);
11776 return fgMorphCast(tree);
11780 #ifndef _TARGET_64BIT_
11781 if (typ == TYP_LONG)
11783 /* For (long)int1 * (long)int2, we dont actually do the
11784 casts, and just multiply the 32 bit values, which will
11785 give us the 64 bit result in edx:eax */
11788 if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
11789 genActualType(op1->CastFromType()) == TYP_INT && genActualType(op2->CastFromType()) == TYP_INT) &&
11790 !op1->gtOverflow() && !op2->gtOverflow())
11792 // The casts have to be of the same signedness.
11793 if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
11795 // We see if we can force an int constant to change its signedness
11797 if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
11799 else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
11802 goto NO_MUL_64RSLT;
11804 if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
11805 constOp->gtFlags ^= GTF_UNSIGNED;
11807 goto NO_MUL_64RSLT;
11810 // The only combination that can overflow
11811 if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
11812 goto NO_MUL_64RSLT;
11814 /* Remaining combinations can never overflow during long mul. */
11816 tree->gtFlags &= ~GTF_OVERFLOW;
11818 /* Do unsigned mul only if the casts were unsigned */
11820 tree->gtFlags &= ~GTF_UNSIGNED;
11821 tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
11823 /* Since we are committing to GTF_MUL_64RSLT, we don't want
11824 the casts to be folded away. So morph the castees directly */
11826 op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
11827 op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
11829 // Propagate side effect flags up the tree
11830 op1->gtFlags &= ~GTF_ALL_EFFECT;
11831 op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11832 op2->gtFlags &= ~GTF_ALL_EFFECT;
11833 op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11835 // If the GT_MUL can be altogether folded away, we should do that.
11837 if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
11838 opts.OptEnabled(CLFLG_CONSTANTFOLD))
11840 tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
11841 tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
11842 noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
11843 tree = gtFoldExprConst(tree);
11844 noway_assert(tree->OperIsConst());
11848 tree->gtFlags |= GTF_MUL_64RSLT;
11850 // If op1 and op2 are unsigned casts, we need to do an unsigned mult
11851 tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
11853 // Insert GT_NOP nodes for the cast operands so that they do not get folded
11854 // And propagate the new flags. We don't want to CSE the casts because
11855 // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
11857 if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
11859 op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
11860 op1->gtFlags &= ~GTF_ALL_EFFECT;
11861 op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11864 if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
11866 op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
11867 op2->gtFlags &= ~GTF_ALL_EFFECT;
11868 op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11871 op1->gtFlags |= GTF_DONT_CSE;
11872 op2->gtFlags |= GTF_DONT_CSE;
11874 tree->gtFlags &= ~GTF_ALL_EFFECT;
11875 tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
11877 goto DONE_MORPHING_CHILDREN;
11879 else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
11882 if (tree->gtOverflow())
11883 helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
11885 helper = CORINFO_HELP_LMUL;
11887 goto USE_HELPER_FOR_ARITH;
11891 /* We are seeing this node again. We have decided to use
11892 GTF_MUL_64RSLT, so leave it alone. */
11894 assert(tree->gtIsValid64RsltMul());
11897 #endif // !_TARGET_64BIT_
11902 #ifndef _TARGET_64BIT_
11903 if (typ == TYP_LONG)
11905 helper = CORINFO_HELP_LDIV;
11906 goto USE_HELPER_FOR_ARITH;
11909 #if USE_HELPERS_FOR_INT_DIV
11911 #if defined(LEGACY_BACKEND)
11912 && !fgIsSignedDivOptimizable(op2)
11913 #endif // LEGACY_BACKEND
11916 helper = CORINFO_HELP_DIV;
11917 goto USE_HELPER_FOR_ARITH;
11920 #endif // !_TARGET_64BIT_
11922 #ifndef LEGACY_BACKEND
11923 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11925 op2 = gtFoldExprConst(op2);
11927 #endif // !LEGACY_BACKEND
11932 #ifndef _TARGET_64BIT_
11933 if (typ == TYP_LONG)
11935 helper = CORINFO_HELP_ULDIV;
11936 goto USE_HELPER_FOR_ARITH;
11938 #if USE_HELPERS_FOR_INT_DIV
11940 #if defined(LEGACY_BACKEND)
11941 && !fgIsUnsignedDivOptimizable(op2)
11942 #endif // LEGACY_BACKEND
11945 helper = CORINFO_HELP_UDIV;
11946 goto USE_HELPER_FOR_ARITH;
11949 #endif // _TARGET_64BIT_
11954 if (varTypeIsFloating(typ))
11956 helper = CORINFO_HELP_DBLREM;
11958 if (op1->TypeGet() == TYP_FLOAT)
11960 if (op2->TypeGet() == TYP_FLOAT)
11962 helper = CORINFO_HELP_FLTREM;
11966 tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE);
11969 else if (op2->TypeGet() == TYP_FLOAT)
11971 tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE);
11973 goto USE_HELPER_FOR_ARITH;
11976 // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
11977 // A similar optimization for signed mod will not work for a negative perfectly divisible
11978 // HI-word. To make it correct, we would need to divide without the sign and then flip the
11979 // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
11980 goto ASSIGN_HELPER_FOR_MOD;
11984 #ifdef _TARGET_ARMARCH_
11986 // Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization
11988 #else // _TARGET_XARCH
11989 /* If this is an unsigned long mod with op2 which is a cast to long from a
11990 constant int, then don't morph to a call to the helper. This can be done
11991 faster inline using idiv.
11995 if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
11996 ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
11997 ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
11999 if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
12000 op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
12001 op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
12002 (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
12004 tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
12005 noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
12008 if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
12009 op2->gtIntConCommon.LngValue() <= 0x3fffffff)
12011 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
12012 noway_assert(op1->TypeGet() == TYP_LONG);
12014 // Update flags for op1 morph
12015 tree->gtFlags &= ~GTF_ALL_EFFECT;
12017 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
12019 // If op1 is a constant, then do constant folding of the division operator
12020 if (op1->gtOper == GT_CNS_NATIVELONG)
12022 tree = gtFoldExpr(tree);
12027 #endif // _TARGET_XARCH
12029 ASSIGN_HELPER_FOR_MOD:
12031 // For "val % 1", return 0 if op1 doesn't have any side effects
12032 // and we are not in the CSE phase, we cannot discard 'tree'
12033 // because it may contain CSE expressions that we haven't yet examined.
12035 if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
12037 if (op2->IsIntegralConst(1))
12039 GenTree* zeroNode = gtNewZeroConNode(typ);
12041 zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
12043 DEBUG_DESTROY_NODE(tree);
12048 #ifndef _TARGET_64BIT_
12049 if (typ == TYP_LONG)
12051 helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
12052 goto USE_HELPER_FOR_ARITH;
12055 #if USE_HELPERS_FOR_INT_DIV
12056 if (typ == TYP_INT)
12058 if (oper == GT_UMOD
12059 #if defined(LEGACY_BACKEND)
12060 && !fgIsUnsignedModOptimizable(op2)
12061 #endif // LEGACY_BACKEND
12064 helper = CORINFO_HELP_UMOD;
12065 goto USE_HELPER_FOR_ARITH;
12067 else if (oper == GT_MOD
12068 #if defined(LEGACY_BACKEND)
12069 && !fgIsSignedModOptimizable(op2)
12070 #endif // LEGACY_BACKEND
12073 helper = CORINFO_HELP_MOD;
12074 goto USE_HELPER_FOR_ARITH;
12078 #endif // !_TARGET_64BIT_
12080 #ifndef LEGACY_BACKEND
12081 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
12083 op2 = gtFoldExprConst(op2);
12086 #ifdef _TARGET_ARM64_
12087 // For ARM64 we don't have a remainder instruction,
12088 // The architecture manual suggests the following transformation to
12089 // generate code for such operator:
12091 // a % b = a - (a / b) * b;
12093 // We will use the suggested transform except in the special case
12094 // when the modulo operation is unsigned and the divisor is a
12095 // integer constant power of two. In this case, we will rely on lower
12096 // to make the transform:
12098 // a % b = a & (b - 1);
12100 // Note: We must always perform one or the other of these transforms.
12101 // Therefore we must also detect the special cases where lower does not do the
12102 // % to & transform. In our case there is only currently one extra condition:
12104 // * Dividend must not be constant. Lower disables this rare const % const case
12107 // Do "a % b = a - (a / b) * b" morph if ...........................
12108 bool doMorphModToSubMulDiv = (tree->OperGet() == GT_MOD) || // Modulo operation is signed
12109 !op2->IsIntegralConst() || // Divisor is not an integer constant
12110 !isPow2(op2->AsIntCon()->IconValue()) || // Divisor is not a power of two
12111 op1->IsCnsIntOrI(); // Dividend is constant
12113 if (doMorphModToSubMulDiv)
12115 assert(!optValnumCSE_phase);
12117 tree = fgMorphModToSubMulDiv(tree->AsOp());
12118 op1 = tree->gtOp.gtOp1;
12119 op2 = tree->gtOp.gtOp2;
12122 #else // !_TARGET_ARM64_
12123 // If b is not a power of 2 constant then lowering replaces a % b
12124 // with a - (a / b) * b and applies magic division optimization to
12125 // a / b. The code may already contain an a / b expression (e.g.
12126 // x = a / 10; y = a % 10;) and then we end up with redundant code.
12127 // If we convert % to / here we give CSE the opportunity to eliminate
12128 // the redundant division. If there's no redundant division then
12129 // nothing is lost, lowering would have done this transform anyway.
12131 if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
12133 ssize_t divisorValue = op2->AsIntCon()->IconValue();
12134 size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
12135 : static_cast<size_t>(abs(divisorValue));
12137 if (!isPow2(absDivisorValue))
12139 tree = fgMorphModToSubMulDiv(tree->AsOp());
12140 op1 = tree->gtOp.gtOp1;
12141 op2 = tree->gtOp.gtOp2;
12144 #endif // !_TARGET_ARM64_
12145 #endif // !LEGACY_BACKEND
12148 USE_HELPER_FOR_ARITH:
12150 /* We have to morph these arithmetic operations into helper calls
12151 before morphing the arguments (preorder), else the arguments
12152 won't get correct values of fgPtrArgCntCur.
12153 However, try to fold the tree first in case we end up with a
12154 simple node which won't need a helper call at all */
12156 noway_assert(tree->OperIsBinary());
12158 GenTree* oldTree = tree;
12160 tree = gtFoldExpr(tree);
12162 // Were we able to fold it ?
12163 // Note that gtFoldExpr may return a non-leaf even if successful
12164 // e.g. for something like "expr / 1" - see also bug #290853
12165 if (tree->OperIsLeaf() || (oldTree != tree))
12167 return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
12170 // Did we fold it into a comma node with throw?
12171 if (tree->gtOper == GT_COMMA)
12173 noway_assert(fgIsCommaThrow(tree));
12174 return fgMorphTree(tree);
12177 return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
12180 // normalize small integer return values
12181 if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) && (op1->TypeGet() != TYP_VOID) &&
12182 fgCastNeeded(op1, info.compRetType))
12184 // Small-typed return values are normalized by the callee
12185 op1 = gtNewCastNode(TYP_INT, op1, false, info.compRetType);
12187 // Propagate GTF_COLON_COND
12188 op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
12190 tree->gtOp.gtOp1 = fgMorphCast(op1);
12192 // Propagate side effect flags
12193 tree->gtFlags &= ~GTF_ALL_EFFECT;
12194 tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
12203 GenTree* optimizedTree = gtFoldTypeCompare(tree);
12205 if (optimizedTree != tree)
12207 return fgMorphTree(optimizedTree);
12215 // Try to optimize away calls to CORINFO_HELP_BOX_NULLABLE for GT_EQ, GT_NE, and unsigned GT_GT.
12216 if ((oper != GT_GT) || tree->IsUnsigned())
12218 fgMorphRecognizeBoxNullable(tree);
12221 op1 = tree->gtOp.gtOp1;
12222 op2 = tree->gtGetOp2IfPresent();
12226 case GT_RUNTIMELOOKUP:
12227 return fgMorphTree(op1);
12229 #ifdef _TARGET_ARM_
12231 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
12233 switch (tree->TypeGet())
12236 return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
12238 return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
12250 #if !CPU_HAS_FP_SUPPORT
12251 tree = fgMorphToEmulatedFP(tree);
12254 /*-------------------------------------------------------------------------
12255 * Process the first operand, if any
12261 #if LOCAL_ASSERTION_PROP
12262 // If we are entering the "then" part of a Qmark-Colon we must
12263 // save the state of the current copy assignment table
12264 // so that we can restore this state when entering the "else" part
12267 noway_assert(optLocalAssertionProp);
12268 if (optAssertionCount)
12270 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
12271 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
12272 origAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
12273 origAssertionCount = optAssertionCount;
12274 memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
12278 origAssertionCount = 0;
12279 origAssertionTab = nullptr;
12282 #endif // LOCAL_ASSERTION_PROP
12284 // We might need a new MorphAddressContext context. (These are used to convey
12285 // parent context about how addresses being calculated will be used; see the
12286 // specification comment for MorphAddrContext for full details.)
12287 // Assume it's an Ind context to start.
12288 MorphAddrContext subIndMac1(MACK_Ind);
12289 MorphAddrContext* subMac1 = mac;
12290 if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind)
12292 switch (tree->gtOper)
12295 if (subMac1 == nullptr)
12297 subMac1 = &subIndMac1;
12298 subMac1->m_kind = MACK_Addr;
12302 // In a comma, the incoming context only applies to the rightmost arg of the
12303 // comma list. The left arg (op1) gets a fresh context.
12310 subMac1 = &subIndMac1;
12317 // For additions, if we're in an IND context keep track of whether
12318 // all offsets added to the address are constant, and their sum.
12319 if (tree->gtOper == GT_ADD && subMac1 != nullptr)
12321 assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
12322 GenTree* otherOp = tree->gtOp.gtOp2;
12323 // Is the other operator a constant?
12324 if (otherOp->IsCnsIntOrI())
12326 ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
12327 totalOffset += otherOp->gtIntConCommon.IconValue();
12328 if (totalOffset.IsOverflow())
12330 // We will consider an offset so large as to overflow as "not a constant" --
12331 // we will do a null check.
12332 subMac1->m_allConstantOffsets = false;
12336 subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
12341 subMac1->m_allConstantOffsets = false;
12345 // If gtOp1 is a GT_FIELD, we need to pass down the mac if
12346 // its parent is GT_ADDR, since the address of the field
12347 // is part of an ongoing address computation. Otherwise
12348 // op1 represents the value of the field and so any address
12349 // calculations it does are in a new context.
12350 if ((op1->gtOper == GT_FIELD) && (tree->gtOper != GT_ADDR))
12354 // The impact of this field's value to any ongoing
12355 // address computation is handled below when looking
12359 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
12361 #if LOCAL_ASSERTION_PROP
12362 // If we are exiting the "then" part of a Qmark-Colon we must
12363 // save the state of the current copy assignment table
12364 // so that we can merge this state with the "else" part exit
12367 noway_assert(optLocalAssertionProp);
12368 if (optAssertionCount)
12370 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
12371 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
12372 thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
12373 thenAssertionCount = optAssertionCount;
12374 memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
12378 thenAssertionCount = 0;
12379 thenAssertionTab = nullptr;
12382 #endif // LOCAL_ASSERTION_PROP
12384 /* Morphing along with folding and inlining may have changed the
12385 * side effect flags, so we have to reset them
12387 * NOTE: Don't reset the exception flags on nodes that may throw */
12389 assert(tree->gtOper != GT_CALL);
12391 if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId))
12393 tree->gtFlags &= ~GTF_CALL;
12396 /* Propagate the new flags */
12397 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
12399 // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
12400 // Similarly for clsVar
12401 if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
12403 tree->gtFlags &= ~GTF_GLOB_REF;
12407 /*-------------------------------------------------------------------------
12408 * Process the second operand, if any
12414 #if LOCAL_ASSERTION_PROP
12415 // If we are entering the "else" part of a Qmark-Colon we must
12416 // reset the state of the current copy assignment table
12419 noway_assert(optLocalAssertionProp);
12420 optAssertionReset(0);
12421 if (origAssertionCount)
12423 size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
12424 memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
12425 optAssertionReset(origAssertionCount);
12428 #endif // LOCAL_ASSERTION_PROP
12430 // We might need a new MorphAddressContext context to use in evaluating op2.
12431 // (These are used to convey parent context about how addresses being calculated
12432 // will be used; see the specification comment for MorphAddrContext for full details.)
12433 // Assume it's an Ind context to start.
12434 switch (tree->gtOper)
12437 if (mac != nullptr && mac->m_kind == MACK_Ind)
12439 GenTree* otherOp = tree->gtOp.gtOp1;
12440 // Is the other operator a constant?
12441 if (otherOp->IsCnsIntOrI())
12443 mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
12447 mac->m_allConstantOffsets = false;
12455 // If gtOp2 is a GT_FIELD, we must be taking its value,
12456 // so it should evaluate its address in a new context.
12457 if (op2->gtOper == GT_FIELD)
12459 // The impact of this field's value to any ongoing
12460 // address computation is handled above when looking
12465 tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
12467 /* Propagate the side effect flags from op2 */
12469 tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
12471 #if LOCAL_ASSERTION_PROP
12472 // If we are exiting the "else" part of a Qmark-Colon we must
12473 // merge the state of the current copy assignment table with
12474 // that of the exit of the "then" part.
12477 noway_assert(optLocalAssertionProp);
12478 // If either exit table has zero entries then
12479 // the merged table also has zero entries
12480 if (optAssertionCount == 0 || thenAssertionCount == 0)
12482 optAssertionReset(0);
12486 size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
12487 if ((optAssertionCount != thenAssertionCount) ||
12488 (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
12490 // Yes they are different so we have to find the merged set
12491 // Iterate over the copy asgn table removing any entries
12492 // that do not have an exact match in the thenAssertionTab
12493 AssertionIndex index = 1;
12494 while (index <= optAssertionCount)
12496 AssertionDsc* curAssertion = optGetAssertion(index);
12498 for (unsigned j = 0; j < thenAssertionCount; j++)
12500 AssertionDsc* thenAssertion = &thenAssertionTab[j];
12502 // Do the left sides match?
12503 if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
12504 (curAssertion->assertionKind == thenAssertion->assertionKind))
12506 // Do the right sides match?
12507 if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
12508 (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
12519 // If we fall out of the loop above then we didn't find
12520 // any matching entry in the thenAssertionTab so it must
12521 // have been killed on that path so we remove it here
12524 // The data at optAssertionTabPrivate[i] is to be removed
12525 CLANG_FORMAT_COMMENT_ANCHOR;
12529 printf("The QMARK-COLON ");
12531 printf(" removes assertion candidate #%d\n", index);
12534 optAssertionRemove(index);
12537 // The data at optAssertionTabPrivate[i] is to be kept
12543 #endif // LOCAL_ASSERTION_PROP
12546 DONE_MORPHING_CHILDREN:
12548 if (tree->OperMayThrow(this))
12550 // Mark the tree node as potentially throwing an exception
12551 tree->gtFlags |= GTF_EXCEPT;
12555 if (tree->OperIsIndirOrArrLength())
12557 tree->gtFlags |= GTF_IND_NONFAULTING;
12559 if (((op1 == nullptr) || ((op1->gtFlags & GTF_EXCEPT) == 0)) &&
12560 ((op2 == nullptr) || ((op2->gtFlags & GTF_EXCEPT) == 0)))
12562 tree->gtFlags &= ~GTF_EXCEPT;
12566 if (tree->OperRequiresAsgFlag())
12568 tree->gtFlags |= GTF_ASG;
12572 if (((op1 == nullptr) || ((op1->gtFlags & GTF_ASG) == 0)) &&
12573 ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0)))
12575 tree->gtFlags &= ~GTF_ASG;
12578 /*-------------------------------------------------------------------------
12579 * Now do POST-ORDER processing
12582 #if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_)
12583 // Variable shifts of a long end up being helper calls, so mark the tree as such. This
12584 // is potentially too conservative, since they'll get treated as having side effects.
12585 // It is important to mark them as calls so if they are part of an argument list,
12586 // they will get sorted and processed properly (for example, it is important to handle
12587 // all nested calls before putting struct arguments in the argument registers). We
12588 // could mark the trees just before argument processing, but it would require a full
12589 // tree walk of the argument tree, so we just do it here, instead, even though we'll
12590 // mark non-argument trees (that will still get converted to calls, anyway).
12591 if (GenTree::OperIsShift(oper) && (tree->TypeGet() == TYP_LONG) && (op2->OperGet() != GT_CNS_INT))
12593 tree->gtFlags |= GTF_CALL;
12595 #endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_
12597 if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) && (op2 && !varTypeIsGC(op2->TypeGet())))
12599 // The tree is really not GC but was marked as such. Now that the
12600 // children have been unmarked, unmark the tree too.
12602 // Remember that GT_COMMA inherits it's type only from op2
12603 if (tree->gtOper == GT_COMMA)
12605 tree->gtType = genActualType(op2->TypeGet());
12609 tree->gtType = genActualType(op1->TypeGet());
12613 GenTree* oldTree = tree;
12615 GenTree* qmarkOp1 = nullptr;
12616 GenTree* qmarkOp2 = nullptr;
12618 if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
12620 qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
12621 qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
12624 // Try to fold it, maybe we get lucky,
12625 tree = gtFoldExpr(tree);
12627 if (oldTree != tree)
12629 /* if gtFoldExpr returned op1 or op2 then we are done */
12630 if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
12635 /* If we created a comma-throw tree then we need to morph op1 */
12636 if (fgIsCommaThrow(tree))
12638 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
12639 fgMorphTreeDone(tree);
12645 else if (tree->OperKind() & GTK_CONST)
12650 /* gtFoldExpr could have used setOper to change the oper */
12651 oper = tree->OperGet();
12652 typ = tree->TypeGet();
12654 /* gtFoldExpr could have changed op1 and op2 */
12655 op1 = tree->gtOp.gtOp1;
12656 op2 = tree->gtGetOp2IfPresent();
12658 // Do we have an integer compare operation?
12660 if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
12662 // Are we comparing against zero?
12664 if (op2->IsIntegralConst(0))
12666 // Request that the codegen for op1 sets the condition flags
12667 // when it generates the code for op1.
12669 // Codegen for op1 must set the condition flags if
12670 // this method returns true.
12672 op1->gtRequestSetFlags();
12675 /*-------------------------------------------------------------------------
12676 * Perform the required oper-specific postorder morphing
12682 size_t ival1, ival2;
12683 GenTree* lclVarTree;
12684 FieldSeqNode* fieldSeq = nullptr;
12690 lclVarTree = fgIsIndirOfAddrOfLocal(op1);
12691 if (lclVarTree != nullptr)
12693 lclVarTree->gtFlags |= GTF_VAR_DEF;
12696 if (op1->gtEffectiveVal()->OperIsConst())
12698 op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
12699 tree->gtOp.gtOp1 = op1;
12702 /* If we are storing a small type, we might be able to omit a cast */
12703 if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet()))
12705 if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
12707 var_types castType = op2->CastToType();
12709 // If we are performing a narrowing cast and
12710 // castType is larger or the same as op1's type
12711 // then we can discard the cast.
12713 if (varTypeIsSmall(castType) && (genTypeSize(castType) >= genTypeSize(op1->TypeGet())))
12715 tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
12718 else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet()))
12720 /* We don't need to zero extend the setcc instruction */
12721 op2->gtType = TYP_BYTE;
12724 // If we introduced a CSE we may need to undo the optimization above
12725 // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
12726 // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
12727 else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
12729 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
12730 LclVarDsc* varDsc = &lvaTable[varNum];
12732 /* We again need to zero extend the setcc instruction */
12733 op2->gtType = varDsc->TypeGet();
12735 fgAssignSetVarDef(tree);
12737 #ifdef LEGACY_BACKEND
12755 /* We can't CSE the LHS of an assignment */
12756 /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
12757 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
12759 op1->gtFlags |= GTF_DONT_CSE;
12766 /* Make sure we're allowed to do this */
12768 if (optValnumCSE_phase)
12770 // It is not safe to reorder/delete CSE's
12776 /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
12778 if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
12780 op1 = tree->gtOp.gtOp1;
12782 /* Since this can occur repeatedly we use a while loop */
12784 while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) && (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) &&
12785 (op1->gtType == TYP_INT) && (op1->gtOverflow() == false))
12787 /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
12789 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
12790 ival2 = cns2->gtIntCon.gtIconVal;
12792 if (op1->gtOper == GT_ADD)
12800 cns2->gtIntCon.gtIconVal = ival2;
12802 #ifdef _TARGET_64BIT_
12803 // we need to properly re-sign-extend or truncate as needed.
12804 cns2->AsIntCon()->TruncateOrSignExtend32();
12805 #endif // _TARGET_64BIT_
12807 op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12812 // Here we look for the following tree
12818 ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
12820 // cast to unsigned allows test for both 0 and 1
12821 if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
12823 ival2 = (size_t)cns2->gtIntConCommon.IconValue();
12825 else // cast to UINT64 allows test for both 0 and 1
12826 if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
12828 ival2 = (size_t)cns2->gtIntConCommon.LngValue();
12831 if (ival2 != INT_MAX)
12833 // If we don't have a comma and relop, we can't do this optimization
12835 if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
12837 // Here we look for the following transformation
12839 // EQ/NE Possible REVERSE(RELOP)
12841 // COMMA CNS 0/1 -> COMMA relop_op2
12843 // x RELOP x relop_op1
12845 // relop_op1 relop_op2
12849 GenTree* comma = op1;
12850 GenTree* relop = comma->gtOp.gtOp2;
12852 GenTree* relop_op1 = relop->gtOp.gtOp1;
12854 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12858 gtReverseCond(relop);
12861 relop->gtOp.gtOp1 = comma;
12862 comma->gtOp.gtOp2 = relop_op1;
12864 // Comma now has fewer nodes underneath it, so we need to regenerate its flags
12865 comma->gtFlags &= ~GTF_ALL_EFFECT;
12866 comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
12867 comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12869 noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
12870 noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
12872 tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);
12877 if (op1->gtOper == GT_COMMA)
12879 // Here we look for the following tree
12880 // and when the LCL_VAR is a temp we can fold the tree:
12884 // COMMA CNS 0/1 -> RELOP CNS 0/1
12892 GenTree* asg = op1->gtOp.gtOp1;
12893 GenTree* lcl = op1->gtOp.gtOp2;
12895 /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
12896 if (asg->gtOper != GT_ASG)
12901 /* The right side of the comma must be a LCL_VAR temp */
12902 if (lcl->gtOper != GT_LCL_VAR)
12907 unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
12908 noway_assert(lclNum < lvaCount);
12910 /* If the LCL_VAR is not a temp then bail, a temp has a single def */
12911 if (!lvaTable[lclNum].lvIsTemp)
12917 /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
12918 // Fix 383856 X86/ARM ILGEN
12919 if (lclNumIsCSE(lclNum))
12925 /* We also must be assigning the result of a RELOP */
12926 if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
12931 /* Both of the LCL_VAR must match */
12932 if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
12937 /* If right side of asg is not a RELOP then skip */
12938 if (!asg->gtOp.gtOp2->OperIsCompare())
12943 LclVarDsc* varDsc = lvaTable + lclNum;
12945 /* Set op1 to the right side of asg, (i.e. the RELOP) */
12946 op1 = asg->gtOp.gtOp2;
12948 DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
12949 DEBUG_DESTROY_NODE(lcl);
12951 /* This local variable should never be used again */
12953 // VSW 184221: Make RefCnt to zero to indicate that this local var
12954 // is not used any more. (Keey the lvType as is.)
12955 // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars
12956 // And then emitter::emitEndCodeGen will assert in the following line:
12957 // noway_assert( dsc->lvTracked);
12959 noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet.
12960 varDsc->lvRefCnt == 2 // Or, we assume this tmp should only be used here,
12961 // and it only shows up twice.
12963 lvaTable[lclNum].lvRefCnt = 0;
12964 lvaTable[lclNum].lvaResetSortAgainFlag(this);
12967 if (op1->OperIsCompare())
12969 // Here we look for the following tree
12971 // EQ/NE -> RELOP/!RELOP
12976 // Note that we will remove/destroy the EQ/NE node and move
12977 // the RELOP up into it's location.
12979 /* Here we reverse the RELOP if necessary */
12981 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12985 gtReverseCond(op1);
12988 /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
12989 op1->gtType = tree->gtType;
12991 noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
12992 op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12994 DEBUG_DESTROY_NODE(tree);
12999 // Now we check for a compare with the result of an '&' operator
13001 // Here we look for the following transformation:
13005 // AND CNS 0/1 -> AND CNS 0
13007 // RSZ/RSH CNS 1 x CNS (1 << y)
13011 if (op1->gtOper == GT_AND)
13013 GenTree* andOp = op1;
13014 GenTree* rshiftOp = andOp->gtOp.gtOp1;
13016 if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
13021 if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
13026 ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
13028 if (shiftAmount < 0)
13033 if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
13038 if (andOp->gtType == TYP_INT)
13040 if (shiftAmount > 31)
13045 UINT32 newAndOperand = ((UINT32)1) << shiftAmount;
13047 andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
13049 // Reverse the cond if necessary
13052 gtReverseCond(tree);
13053 cns2->gtIntCon.gtIconVal = 0;
13054 oper = tree->gtOper;
13057 else if (andOp->gtType == TYP_LONG)
13059 if (shiftAmount > 63)
13064 UINT64 newAndOperand = ((UINT64)1) << shiftAmount;
13066 andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
13068 // Reverse the cond if necessary
13071 gtReverseCond(tree);
13072 cns2->gtIntConCommon.SetLngValue(0);
13073 oper = tree->gtOper;
13077 andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
13079 DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
13080 DEBUG_DESTROY_NODE(rshiftOp);
13082 } // END if (ival2 != INT_MAX)
13085 /* Now check for compares with small constant longs that can be cast to int */
13087 if (!cns2->OperIsConst())
13092 if (cns2->TypeGet() != TYP_LONG)
13097 /* Is the constant 31 bits or smaller? */
13099 if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
13104 /* Is the first comparand mask operation of type long ? */
13106 if (op1->gtOper != GT_AND)
13108 /* Another interesting case: cast from int */
13110 if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
13111 !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
13112 !op1->gtOverflow()) // cannot be an overflow checking cast
13114 /* Simply make this into an integer comparison */
13116 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
13117 tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
13123 noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
13125 /* Is the result of the mask effectively an INT ? */
13128 andMask = op1->gtOp.gtOp2;
13129 if (andMask->gtOper != GT_CNS_NATIVELONG)
13133 if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
13138 /* Now we know that we can cast gtOp.gtOp1 of AND to int */
13140 op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, false, TYP_INT);
13142 /* now replace the mask node (gtOp.gtOp2 of AND node) */
13144 noway_assert(andMask == op1->gtOp.gtOp2);
13146 ival1 = (int)andMask->gtIntConCommon.LngValue();
13147 andMask->SetOper(GT_CNS_INT);
13148 andMask->gtType = TYP_INT;
13149 andMask->gtIntCon.gtIconVal = ival1;
13151 /* now change the type of the AND node */
13153 op1->gtType = TYP_INT;
13155 /* finally we replace the comparand */
13157 ival2 = (int)cns2->gtIntConCommon.LngValue();
13158 cns2->SetOper(GT_CNS_INT);
13159 cns2->gtType = TYP_INT;
13161 noway_assert(cns2 == op2);
13162 cns2->gtIntCon.gtIconVal = ival2;
13171 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
13173 if (op2->gtOper == GT_CNS_INT)
13176 /* Check for "expr relop 1" */
13177 if (cns2->IsIntegralConst(1))
13179 /* Check for "expr >= 1" */
13182 /* Change to "expr > 0" */
13186 /* Check for "expr < 1" */
13187 else if (oper == GT_LT)
13189 /* Change to "expr <= 0" */
13194 /* Check for "expr relop -1" */
13195 else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
13197 /* Check for "expr <= -1" */
13200 /* Change to "expr < 0" */
13204 /* Check for "expr > -1" */
13205 else if (oper == GT_GT)
13207 /* Change to "expr >= 0" */
13211 // IF we get here we should be changing 'oper'
13212 assert(tree->OperGet() != oper);
13214 // Keep the old ValueNumber for 'tree' as the new expr
13215 // will still compute the same value as before
13216 tree->SetOper(oper, GenTree::PRESERVE_VN);
13217 cns2->gtIntCon.gtIconVal = 0;
13219 // vnStore is null before the ValueNumber phase has run
13220 if (vnStore != nullptr)
13222 // Update the ValueNumber for 'cns2', as we just changed it to 0
13223 fgValueNumberTreeConst(cns2);
13226 op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
13231 else // we have an unsigned comparison
13233 if (op2->IsIntegralConst(0))
13235 if ((oper == GT_GT) || (oper == GT_LE))
13237 // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
13238 // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
13239 // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
13240 // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
13241 // occurs as a result of branch inversion.
13242 oper = (oper == GT_LE) ? GT_EQ : GT_NE;
13243 tree->SetOper(oper, GenTree::PRESERVE_VN);
13244 tree->gtFlags &= ~GTF_UNSIGNED;
13251 noway_assert(tree->OperKind() & GTK_RELOP);
13253 #ifdef LEGACY_BACKEND
13254 /* Check if the result of the comparison is used for a jump.
13255 * If not then only the int (i.e. 32 bit) case is handled in
13256 * the code generator through the (x86) "set" instructions.
13257 * For the rest of the cases, the simplest way is to
13258 * "simulate" the comparison with ?:
13260 * On ARM, we previously used the IT instruction, but the IT instructions
13261 * have mostly been declared obsolete and off-limits, so all cases on ARM
13262 * get converted to ?: */
13264 if (!(tree->gtFlags & GTF_RELOP_JMP_USED) && fgMorphRelopToQmark(op1))
13266 /* We convert it to "(CMP_TRUE) ? (1):(0)" */
13269 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
13270 op1->gtRequestSetFlags();
13272 op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0));
13273 op2 = fgMorphTree(op2);
13275 tree = gtNewQmarkNode(TYP_INT, op1, op2);
13277 fgMorphTreeDone(tree);
13281 #endif // LEGACY_BACKEND
13284 #ifdef LEGACY_BACKEND
13287 /* If op1 is a comma throw node then we won't be keeping op2 */
13288 if (fgIsCommaThrow(op1))
13293 /* Get hold of the two branches */
13295 noway_assert(op2->OperGet() == GT_COLON);
13296 GenTree* thenNode = op2->AsColon()->ThenNode();
13297 GenTree* elseNode = op2->AsColon()->ElseNode();
13299 /* Try to hoist assignments out of qmark colon constructs.
13300 ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */
13302 if (tree->TypeGet() == TYP_VOID && thenNode->OperGet() == GT_ASG && elseNode->OperGet() == GT_ASG &&
13303 thenNode->TypeGet() != TYP_LONG && GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) &&
13304 thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet())
13306 noway_assert(thenNode->TypeGet() == elseNode->TypeGet());
13308 GenTree* asg = thenNode;
13309 GenTree* colon = op2;
13310 colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2;
13311 colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2;
13312 tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType;
13313 asg->gtOp.gtOp2 = tree;
13315 // Asg will have all the flags that the QMARK had
13316 asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT);
13318 // Colon flag won't have the flags that x had.
13319 colon->gtFlags &= ~GTF_ALL_EFFECT;
13320 colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags | colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
13322 DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1);
13323 DEBUG_DESTROY_NODE(elseNode);
13328 /* If the 'else' branch is empty swap the two branches and reverse the condition */
13330 if (elseNode->IsNothingNode())
13332 /* This can only happen for VOID ?: */
13333 noway_assert(op2->gtType == TYP_VOID);
13335 /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */
13336 if (thenNode->IsNothingNode())
13338 // We may be able to throw away op1 (unless it has side-effects)
13340 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
13342 /* Just return a a Nop Node */
13347 /* Just return the relop, but clear the special flags. Note
13348 that we can't do that for longs and floats (see code under
13349 COMPARE label above) */
13351 if (!fgMorphRelopToQmark(op1->gtOp.gtOp1))
13353 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
13360 GenTree* tmp = elseNode;
13362 op2->AsColon()->ElseNode() = elseNode = thenNode;
13363 op2->AsColon()->ThenNode() = thenNode = tmp;
13364 gtReverseCond(op1);
13368 #if !defined(_TARGET_ARM_)
13369 // If we have (cond)?0:1, then we just return "cond" for TYP_INTs
13371 // Don't do this optimization for ARM: we always require assignment
13372 // to boolean to remain ?:, since we don't have any way to generate
13373 // this with straight-line code, like x86 does using setcc (at least
13374 // after the IT instruction is deprecated).
13376 if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT && genActualType(typ) == TYP_INT &&
13377 thenNode->gtOper == GT_CNS_INT && elseNode->gtOper == GT_CNS_INT)
13379 ival1 = thenNode->gtIntCon.gtIconVal;
13380 ival2 = elseNode->gtIntCon.gtIconVal;
13382 // Is one constant 0 and the other 1?
13383 if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0)
13385 // If the constants are {1, 0}, reverse the condition
13388 gtReverseCond(op1);
13391 // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it
13392 // needs to materialize the result as a 0 or 1.
13393 noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED));
13394 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
13396 DEBUG_DESTROY_NODE(tree);
13397 DEBUG_DESTROY_NODE(op2);
13402 #endif // !_TARGET_ARM_
13404 break; // end case GT_QMARK
13405 #endif // LEGACY_BACKEND
13409 #ifndef _TARGET_64BIT_
13410 if (typ == TYP_LONG)
13412 // This must be GTF_MUL_64RSLT
13413 assert(tree->gtIsValid64RsltMul());
13416 #endif // _TARGET_64BIT_
13421 if (tree->gtOverflow())
13426 // TODO #4104: there are a lot of other places where
13427 // this condition is not checked before transformations.
13430 /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
13433 if (op2->IsCnsIntOrI())
13435 /* Negate the constant and change the node to be "+" */
13437 op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
13439 tree->ChangeOper(oper);
13443 /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
13446 if (op1->IsCnsIntOrI())
13448 noway_assert(varTypeIsIntOrI(tree));
13450 tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG
13451 // node should be the same
13452 // as the type of the tree, i.e. tree->gtType.
13453 fgMorphTreeDone(op2);
13456 tree->ChangeOper(oper);
13460 /* No match - exit */
13464 #ifdef _TARGET_ARM64_
13466 if (!varTypeIsFloating(tree->gtType))
13468 // Codegen for this instruction needs to be able to throw two exceptions:
13469 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
13470 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
13474 // Codegen for this instruction needs to be able to throw one exception:
13475 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
13482 if (tree->gtOverflow())
13484 tree->gtRequestSetFlags();
13486 // Add the excptn-throwing basic block to jump to on overflow
13488 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
13490 // We can't do any commutative morphing for overflow instructions
13501 /* Commute any non-REF constants to the right */
13504 if (op1->OperIsConst() && (op1->gtType != TYP_REF))
13506 // TODO-Review: We used to assert here that
13507 // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
13508 // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
13509 // and would sometimes hit this assertion. This may indicate a missed "remorph".
13510 // Task is to re-enable this assertion and investigate.
13512 /* Swap the operands */
13513 tree->gtOp.gtOp1 = op2;
13514 tree->gtOp.gtOp2 = op1;
13517 op2 = tree->gtOp.gtOp2;
13520 /* See if we can fold GT_ADD nodes. */
13522 if (oper == GT_ADD)
13524 /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
13526 if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
13527 op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
13528 !op1->gtOverflow() && !op2->gtOverflow())
13530 cns1 = op1->gtOp.gtOp2;
13531 cns2 = op2->gtOp.gtOp2;
13532 cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
13533 #ifdef _TARGET_64BIT_
13534 if (cns1->TypeGet() == TYP_INT)
13536 // we need to properly re-sign-extend or truncate after adding two int constants above
13537 cns1->AsIntCon()->TruncateOrSignExtend32();
13539 #endif //_TARGET_64BIT_
13541 tree->gtOp.gtOp2 = cns1;
13542 DEBUG_DESTROY_NODE(cns2);
13544 op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
13545 op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
13546 DEBUG_DESTROY_NODE(op2);
13547 op2 = tree->gtOp.gtOp2;
13550 if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
13552 /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
13553 CLANG_FORMAT_COMMENT_ANCHOR;
13555 #if FEATURE_PREVENT_BAD_BYREFS
13557 if (op1->gtOper == GT_ADD && //
13558 !gtIsActiveCSE_Candidate(op1) && //
13559 !op1->gtOverflow() && //
13560 op1->gtOp.gtOp2->IsCnsIntOrI() && //
13561 (op1->gtOp.gtOp2->OperGet() == op2->OperGet()) && //
13562 (op1->gtOp.gtOp2->TypeGet() != TYP_REF) && // Don't fold REFs
13563 (op2->TypeGet() != TYP_REF)) // Don't fold REFs
13565 #else // !FEATURE_PREVENT_BAD_BYREFS
13567 if (op1->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op1) && op1->gtOp.gtOp2->IsCnsIntOrI() &&
13568 !op1->gtOverflow() && op1->gtOp.gtOp2->OperGet() == op2->OperGet())
13570 #endif // !FEATURE_PREVENT_BAD_BYREFS
13573 cns1 = op1->gtOp.gtOp2;
13574 op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
13575 op2->gtIntConCommon.IconValue());
13576 #ifdef _TARGET_64BIT_
13577 if (op2->TypeGet() == TYP_INT)
13579 // we need to properly re-sign-extend or truncate after adding two int constants above
13580 op2->AsIntCon()->TruncateOrSignExtend32();
13582 #endif //_TARGET_64BIT_
13584 if (cns1->OperGet() == GT_CNS_INT)
13586 op2->gtIntCon.gtFieldSeq =
13587 GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
13589 DEBUG_DESTROY_NODE(cns1);
13591 tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
13592 DEBUG_DESTROY_NODE(op1);
13593 op1 = tree->gtOp.gtOp1;
13598 if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
13601 // If this addition is adding an offset to a null pointer,
13602 // avoid the work and yield the null pointer immediately.
13603 // Dereferencing the pointer in either case will have the
13606 if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) &&
13607 ((op1->gtFlags & GTF_ALL_EFFECT) == 0))
13609 op2->gtType = tree->gtType;
13610 DEBUG_DESTROY_NODE(op1);
13611 DEBUG_DESTROY_NODE(tree);
13615 // Remove the addition iff it won't change the tree type
13618 if (!gtIsActiveCSE_Candidate(op2) &&
13619 ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
13621 if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
13622 (op2->gtIntCon.gtFieldSeq != nullptr) &&
13623 (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
13625 fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
13628 DEBUG_DESTROY_NODE(op2);
13629 DEBUG_DESTROY_NODE(tree);
13636 /* See if we can fold GT_MUL by const nodes */
13637 else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
13639 #ifndef _TARGET_64BIT_
13640 noway_assert(typ <= TYP_UINT);
13641 #endif // _TARGET_64BIT_
13642 noway_assert(!tree->gtOverflow());
13644 ssize_t mult = op2->gtIntConCommon.IconValue();
13645 bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
13646 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
13648 assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
13652 // We may be able to throw away op1 (unless it has side-effects)
13654 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
13656 DEBUG_DESTROY_NODE(op1);
13657 DEBUG_DESTROY_NODE(tree);
13658 return op2; // Just return the "0" node
13661 // We need to keep op1 for the side-effects. Hang it off
13664 tree->ChangeOper(GT_COMMA);
13668 size_t abs_mult = (mult >= 0) ? mult : -mult;
13669 size_t lowestBit = genFindLowestBit(abs_mult);
13670 bool changeToShift = false;
13672 // is it a power of two? (positive or negative)
13673 if (abs_mult == lowestBit)
13675 // if negative negate (min-int does not need negation)
13676 if (mult < 0 && mult != SSIZE_T_MIN)
13678 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
13679 fgMorphTreeDone(op1);
13682 // If "op2" is a constant array index, the other multiplicand must be a constant.
13683 // Transfer the annotation to the other one.
13684 if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
13685 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
13687 assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
13688 GenTree* otherOp = op1;
13689 if (otherOp->OperGet() == GT_NEG)
13691 otherOp = otherOp->gtOp.gtOp1;
13693 assert(otherOp->OperGet() == GT_CNS_INT);
13694 assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
13695 otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
13700 DEBUG_DESTROY_NODE(op2);
13701 DEBUG_DESTROY_NODE(tree);
13705 /* Change the multiplication into a shift by log2(val) bits */
13706 op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
13707 changeToShift = true;
13710 else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
13712 int shift = genLog2(lowestBit);
13713 ssize_t factor = abs_mult >> shift;
13715 if (factor == 3 || factor == 5 || factor == 9)
13717 // if negative negate (min-int does not need negation)
13718 if (mult < 0 && mult != SSIZE_T_MIN)
13720 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
13721 fgMorphTreeDone(op1);
13724 GenTree* factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
13725 if (op2IsConstIndex)
13727 factorIcon->AsIntCon()->gtFieldSeq =
13728 GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
13731 // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
13732 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
13733 fgMorphTreeDone(op1);
13735 op2->gtIntConCommon.SetIconValue(shift);
13736 changeToShift = true;
13739 #endif // LEA_AVAILABLE
13742 // vnStore is null before the ValueNumber phase has run
13743 if (vnStore != nullptr)
13745 // Update the ValueNumber for 'op2', as we just changed the constant
13746 fgValueNumberTreeConst(op2);
13749 // Keep the old ValueNumber for 'tree' as the new expr
13750 // will still compute the same value as before
13751 tree->ChangeOper(oper, GenTree::PRESERVE_VN);
13753 goto DONE_MORPHING_CHILDREN;
13756 else if (fgOperIsBitwiseRotationRoot(oper))
13758 tree = fgRecognizeAndMorphBitwiseRotation(tree);
13760 // fgRecognizeAndMorphBitwiseRotation may return a new tree
13761 oper = tree->OperGet();
13762 typ = tree->TypeGet();
13763 op1 = tree->gtOp.gtOp1;
13764 op2 = tree->gtOp.gtOp2;
13769 #ifdef LEGACY_BACKEND
13775 /* Any constant cases should have been folded earlier */
13776 noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
13781 noway_assert(varTypeIsFloating(op1->TypeGet()));
13783 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN, fgPtrArgCntCur);
13787 // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
13788 // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
13789 // is a local or clsVar, even if it has been address-exposed.
13790 if (op1->OperGet() == GT_ADDR)
13792 tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
13798 // Can not remove a GT_IND if it is currently a CSE candidate.
13799 if (gtIsActiveCSE_Candidate(tree))
13804 bool foldAndReturnTemp;
13805 foldAndReturnTemp = false;
13809 /* Try to Fold *(&X) into X */
13810 if (op1->gtOper == GT_ADDR)
13812 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13813 if (gtIsActiveCSE_Candidate(op1))
13818 temp = op1->gtOp.gtOp1; // X
13820 // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
13821 // they are the *same* struct type. In fact, they almost certainly aren't. If the
13822 // address has an associated field sequence, that identifies this case; go through
13823 // the "lcl_fld" path rather than this one.
13824 FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
13825 if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
13827 foldAndReturnTemp = true;
13829 else if (temp->OperIsLocal())
13831 unsigned lclNum = temp->gtLclVarCommon.gtLclNum;
13832 LclVarDsc* varDsc = &lvaTable[lclNum];
13834 // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
13835 if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
13837 noway_assert(varTypeIsStruct(varDsc));
13839 // We will try to optimize when we have a single field struct that is being struct promoted
13840 if (varDsc->lvFieldCnt == 1)
13842 unsigned lclNumFld = varDsc->lvFieldLclStart;
13843 // just grab the promoted field
13844 LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
13846 // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
13848 if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
13850 // We can just use the existing promoted field LclNum
13851 temp->gtLclVarCommon.SetLclNum(lclNumFld);
13852 temp->gtType = fieldVarDsc->TypeGet();
13854 foldAndReturnTemp = true;
13858 // If the type of the IND (typ) is a "small int", and the type of the local has the
13859 // same width, then we can reduce to just the local variable -- it will be
13860 // correctly normalized, and signed/unsigned differences won't matter.
13862 // The below transformation cannot be applied if the local var needs to be normalized on load.
13863 else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
13864 !lvaTable[lclNum].lvNormalizeOnLoad())
13866 tree->gtType = typ = temp->TypeGet();
13867 foldAndReturnTemp = true;
13871 // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
13873 assert(fieldSeq == nullptr);
13874 bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
13875 assert(b || fieldSeq == nullptr);
13877 if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
13879 // Append the field sequence, change the type.
13880 temp->AsLclFld()->gtFieldSeq =
13881 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13882 temp->gtType = typ;
13884 foldAndReturnTemp = true;
13887 // Otherwise will will fold this into a GT_LCL_FLD below
13888 // where we check (temp != nullptr)
13890 else // !temp->OperIsLocal()
13892 // We don't try to fold away the GT_IND/GT_ADDR for this case
13896 else if (op1->OperGet() == GT_ADD)
13898 /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
13900 if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
13901 (!(opts.MinOpts() || opts.compDbgCode)))
13903 // No overflow arithmetic with pointers
13904 noway_assert(!op1->gtOverflow());
13906 temp = op1->gtOp.gtOp1->gtOp.gtOp1;
13907 if (!temp->OperIsLocal())
13913 // Can not remove the GT_ADDR if it is currently a CSE candidate.
13914 if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
13919 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
13920 fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
13922 // Does the address have an associated zero-offset field sequence?
13923 FieldSeqNode* addrFieldSeq = nullptr;
13924 if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
13926 fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
13929 if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
13931 noway_assert(!varTypeIsGC(temp->TypeGet()));
13932 foldAndReturnTemp = true;
13936 // The emitter can't handle large offsets
13937 if (ival1 != (unsigned short)ival1)
13942 // The emitter can get confused by invalid offsets
13943 if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
13948 #ifdef _TARGET_ARM_
13949 // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
13951 if (varTypeIsFloating(typ))
13953 if ((ival1 % emitTypeSize(typ)) != 0)
13955 tree->gtFlags |= GTF_IND_UNALIGNED;
13961 // Now we can fold this into a GT_LCL_FLD below
13962 // where we check (temp != nullptr)
13966 // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging:
13967 // - We may have a load of a local where the load has a different type than the local
13968 // - We may have a load of a local plus an offset
13970 // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and
13971 // offset if doing so is legal. The only cases in which this transformation is illegal are if the load
13972 // begins before the local or if the load extends beyond the end of the local (i.e. if the load is
13973 // out-of-bounds w.r.t. the local).
13974 if ((temp != nullptr) && !foldAndReturnTemp)
13976 assert(temp->OperIsLocal());
13978 const unsigned lclNum = temp->AsLclVarCommon()->gtLclNum;
13979 LclVarDsc* const varDsc = &lvaTable[lclNum];
13981 const var_types tempTyp = temp->TypeGet();
13982 const bool useExactSize = varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK);
13983 const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp);
13985 // Make sure we do not enregister this lclVar.
13986 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
13988 // If the size of the load is greater than the size of the lclVar, we cannot fold this access into
13989 // a lclFld: the access represented by an lclFld node must begin at or after the start of the
13990 // lclVar and must not extend beyond the end of the lclVar.
13991 if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize))
13993 // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
13994 // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
13995 // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
13997 if (temp->OperGet() == GT_LCL_FLD)
13999 temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
14000 temp->AsLclFld()->gtFieldSeq =
14001 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
14005 temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
14006 temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
14007 if (fieldSeq != nullptr)
14008 { // If it does represent a field, note that.
14009 temp->AsLclFld()->gtFieldSeq = fieldSeq;
14012 temp->gtType = tree->gtType;
14013 foldAndReturnTemp = true;
14017 if (foldAndReturnTemp)
14019 assert(temp != nullptr);
14020 assert(temp->TypeGet() == typ);
14021 assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR));
14023 // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for
14024 // 'temp' because a GT_ADDR always marks it for its operand.
14025 temp->gtFlags &= ~GTF_DONT_CSE;
14026 temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
14028 if (op1->OperGet() == GT_ADD)
14030 DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
14031 DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
14033 DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR
14034 DEBUG_DESTROY_NODE(tree); // GT_IND
14036 // If the result of the fold is a local var, we may need to perform further adjustments e.g. for
14038 if (temp->OperIs(GT_LCL_VAR))
14041 // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear
14042 // and the node in question must have this bit set (as it has already been morphed).
14043 temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
14045 const bool forceRemorph = true;
14046 temp = fgMorphLocalVar(temp, forceRemorph);
14048 // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the
14049 // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function
14051 temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14058 // Only do this optimization when we are in the global optimizer. Doing this after value numbering
14059 // could result in an invalid value number for the newly generated GT_IND node.
14060 if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
14062 // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
14063 // TBD: this transformation is currently necessary for correctness -- it might
14064 // be good to analyze the failures that result if we don't do this, and fix them
14065 // in other ways. Ideally, this should be optional.
14066 GenTree* commaNode = op1;
14067 unsigned treeFlags = tree->gtFlags;
14068 commaNode->gtType = typ;
14069 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
14070 // dangerous, clear the GTF_REVERSE_OPS at
14073 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14075 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
14077 commaNode = commaNode->gtOp.gtOp2;
14078 commaNode->gtType = typ;
14079 commaNode->gtFlags =
14080 (treeFlags & ~GTF_REVERSE_OPS & ~GTF_ASG); // Bashing the GT_COMMA flags here is
14081 // dangerous, clear the GTF_REVERSE_OPS at
14083 commaNode->gtFlags |=
14084 ((commaNode->gtOp.gtOp1->gtFlags & GTF_ASG) | (commaNode->gtOp.gtOp2->gtFlags & GTF_ASG));
14086 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14089 bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
14093 bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
14095 GetArrayInfoMap()->Remove(tree);
14098 GenTree* addr = commaNode->gtOp.gtOp2;
14099 op1 = gtNewIndir(typ, addr);
14100 // This is very conservative
14101 op1->gtFlags |= treeFlags & ~GTF_ALL_EFFECT & ~GTF_IND_NONFAULTING;
14102 op1->gtFlags |= (addr->gtFlags & GTF_ALL_EFFECT);
14106 GetArrayInfoMap()->Set(op1, arrInfo);
14109 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14111 commaNode->gtOp.gtOp2 = op1;
14112 commaNode->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
14120 // Can not remove op1 if it is currently a CSE candidate.
14121 if (gtIsActiveCSE_Candidate(op1))
14126 if (op1->OperGet() == GT_IND)
14128 if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
14130 // Can not remove a GT_ADDR if it is currently a CSE candidate.
14131 if (gtIsActiveCSE_Candidate(tree))
14136 // Perform the transform ADDR(IND(...)) == (...).
14137 GenTree* addr = op1->gtOp.gtOp1;
14139 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
14141 DEBUG_DESTROY_NODE(op1);
14142 DEBUG_DESTROY_NODE(tree);
14147 else if (op1->OperGet() == GT_OBJ)
14149 // Can not remove a GT_ADDR if it is currently a CSE candidate.
14150 if (gtIsActiveCSE_Candidate(tree))
14155 // Perform the transform ADDR(OBJ(...)) == (...).
14156 GenTree* addr = op1->AsObj()->Addr();
14158 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
14160 DEBUG_DESTROY_NODE(op1);
14161 DEBUG_DESTROY_NODE(tree);
14165 else if (op1->gtOper == GT_CAST)
14167 GenTree* casting = op1->gtCast.CastOp();
14168 if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
14170 DEBUG_DESTROY_NODE(op1);
14171 tree->gtOp.gtOp1 = op1 = casting;
14174 else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
14176 // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
14177 // (Be sure to mark "z" as an l-value...)
14178 GenTree* commaNode = op1;
14179 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
14181 commaNode = commaNode->gtOp.gtOp2;
14183 // The top-level addr might be annotated with a zeroOffset field.
14184 FieldSeqNode* zeroFieldSeq = nullptr;
14185 bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
14187 commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
14189 // If the node we're about to put under a GT_ADDR is an indirection, it
14190 // doesn't need to be materialized, since we only want the addressing mode. Because
14191 // of this, this GT_IND is not a faulting indirection and we don't have to extract it
14192 // as a side effect.
14193 GenTree* commaOp2 = commaNode->gtOp.gtOp2;
14194 if (commaOp2->OperIsBlk())
14196 commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
14198 if (commaOp2->gtOper == GT_IND)
14200 commaOp2->gtFlags |= GTF_IND_NONFAULTING;
14201 commaOp2->gtFlags &= ~GTF_EXCEPT;
14202 commaOp2->gtFlags |= (commaOp2->gtOp.gtOp1->gtFlags & GTF_EXCEPT);
14205 op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
14209 // Transfer the annotation to the new GT_ADDR node.
14210 GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
14212 commaNode->gtOp.gtOp2 = op1;
14213 // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform
14214 // might give op1 a type different from byref (like, say, native int). So now go back and give
14215 // all the comma nodes the type of op1.
14216 // TODO: the comma flag update below is conservative and can be improved.
14217 // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
14218 // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
14220 while (commaNode->gtOper == GT_COMMA)
14222 commaNode->gtType = op1->gtType;
14223 commaNode->gtFlags |= op1->gtFlags;
14225 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14227 commaNode = commaNode->gtOp.gtOp2;
14230 tree->gtFlags &= ~GTF_EXCEPT;
14232 // Propagate the new flags
14233 tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_EXCEPT);
14234 tree->gtFlags |= (tree->gtOp.gtOp2->gtFlags & GTF_EXCEPT);
14239 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
14240 op1->gtFlags |= GTF_DONT_CSE;
14246 /* Mark the nodes that are conditionally executed */
14247 fgWalkTreePre(&tree, gtMarkColonCond);
14249 /* Since we're doing this postorder we clear this if it got set by a child */
14250 fgRemoveRestOfBlock = false;
14255 /* Special case: trees that don't produce a value */
14256 if (op2->OperIsAssignment() || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) || fgIsThrow(op2))
14258 typ = tree->gtType = TYP_VOID;
14261 // If we are in the Valuenum CSE phase then don't morph away anything as these
14262 // nodes may have CSE defs/uses in them.
14264 if (!optValnumCSE_phase)
14266 // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this
14269 GenTree* op1SideEffects = nullptr;
14270 // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
14271 // hoisted expressions in loops.
14272 gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
14273 if (op1SideEffects)
14275 // Replace the left hand side with the side effect list.
14276 tree->gtOp.gtOp1 = op1SideEffects;
14277 tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT);
14281 /* The left operand is worthless, throw it away */
14282 if (lvaLocalVarRefCounted)
14284 lvaRecursiveDecRefCounts(op1);
14286 op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
14287 DEBUG_DESTROY_NODE(tree);
14288 DEBUG_DESTROY_NODE(op1);
14292 /* If the right operand is just a void nop node, throw it away */
14293 if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
14295 op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
14296 DEBUG_DESTROY_NODE(tree);
14297 DEBUG_DESTROY_NODE(op2);
14306 /* Special case if fgRemoveRestOfBlock is set to true */
14307 if (fgRemoveRestOfBlock)
14309 if (fgIsCommaThrow(op1, true))
14311 GenTree* throwNode = op1->gtOp.gtOp1;
14312 noway_assert(throwNode->gtType == TYP_VOID);
14317 noway_assert(op1->OperKind() & GTK_RELOP);
14318 noway_assert(op1->gtFlags & GTF_EXCEPT);
14320 // We need to keep op1 for the side-effects. Hang it off
14323 tree->ChangeOper(GT_COMMA);
14324 tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
14326 // Additionally since we're eliminating the JTRUE
14327 // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
14328 // So we change it into a GT_COMMA as well.
14329 op1->ChangeOper(GT_COMMA);
14330 op1->gtType = op1->gtOp.gtOp1->gtType;
14339 assert(oper == tree->gtOper);
14341 // If we are in the Valuenum CSE phase then don't morph away anything as these
14342 // nodes may have CSE defs/uses in them.
14344 if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList())
14346 /* Check for op1 as a GT_COMMA with a unconditional throw node */
14347 if (op1 && fgIsCommaThrow(op1, true))
14349 if ((op1->gtFlags & GTF_COLON_COND) == 0)
14351 /* We can safely throw out the rest of the statements */
14352 fgRemoveRestOfBlock = true;
14355 GenTree* throwNode = op1->gtOp.gtOp1;
14356 noway_assert(throwNode->gtType == TYP_VOID);
14358 if (oper == GT_COMMA)
14360 /* Both tree and op1 are GT_COMMA nodes */
14361 /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
14362 tree->gtOp.gtOp1 = throwNode;
14364 // Possibly reset the assignment flag
14365 if (((throwNode->gtFlags & GTF_ASG) == 0) && ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0)))
14367 tree->gtFlags &= ~GTF_ASG;
14372 else if (oper != GT_NOP)
14374 if (genActualType(typ) == genActualType(op1->gtType))
14376 /* The types match so, return the comma throw node as the new tree */
14381 if (typ == TYP_VOID)
14383 // Return the throw node
14388 GenTree* commaOp2 = op1->gtOp.gtOp2;
14390 // need type of oper to be same as tree
14391 if (typ == TYP_LONG)
14393 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
14394 commaOp2->gtIntConCommon.SetLngValue(0);
14395 /* Change the types of oper and commaOp2 to TYP_LONG */
14396 op1->gtType = commaOp2->gtType = TYP_LONG;
14398 else if (varTypeIsFloating(typ))
14400 commaOp2->ChangeOperConst(GT_CNS_DBL);
14401 commaOp2->gtDblCon.gtDconVal = 0.0;
14402 /* Change the types of oper and commaOp2 to TYP_DOUBLE */
14403 op1->gtType = commaOp2->gtType = TYP_DOUBLE;
14407 commaOp2->ChangeOperConst(GT_CNS_INT);
14408 commaOp2->gtIntConCommon.SetIconValue(0);
14409 /* Change the types of oper and commaOp2 to TYP_INT */
14410 op1->gtType = commaOp2->gtType = TYP_INT;
14413 /* Return the GT_COMMA node as the new tree */
14420 /* Check for op2 as a GT_COMMA with a unconditional throw */
14422 if (op2 && fgIsCommaThrow(op2, true))
14424 if ((op2->gtFlags & GTF_COLON_COND) == 0)
14426 /* We can safely throw out the rest of the statements */
14427 fgRemoveRestOfBlock = true;
14430 // If op1 has no side-effects
14431 if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
14433 // If tree is an asg node
14434 if (tree->OperIsAssignment())
14436 /* Return the throw node as the new tree */
14437 return op2->gtOp.gtOp1;
14440 if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
14442 /* Return the throw node as the new tree */
14443 return op2->gtOp.gtOp1;
14446 // If tree is a comma node
14447 if (tree->OperGet() == GT_COMMA)
14449 /* Return the throw node as the new tree */
14450 return op2->gtOp.gtOp1;
14453 /* for the shift nodes the type of op2 can differ from the tree type */
14454 if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
14456 noway_assert(GenTree::OperIsShiftOrRotate(oper));
14458 GenTree* commaOp2 = op2->gtOp.gtOp2;
14460 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
14461 commaOp2->gtIntConCommon.SetLngValue(0);
14463 /* Change the types of oper and commaOp2 to TYP_LONG */
14464 op2->gtType = commaOp2->gtType = TYP_LONG;
14467 if ((genActualType(typ) == TYP_INT) &&
14468 (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
14470 // An example case is comparison (say GT_GT) of two longs or floating point values.
14472 GenTree* commaOp2 = op2->gtOp.gtOp2;
14474 commaOp2->ChangeOperConst(GT_CNS_INT);
14475 commaOp2->gtIntCon.gtIconVal = 0;
14476 /* Change the types of oper and commaOp2 to TYP_INT */
14477 op2->gtType = commaOp2->gtType = TYP_INT;
14480 if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
14482 noway_assert(tree->OperGet() == GT_ADD);
14484 GenTree* commaOp2 = op2->gtOp.gtOp2;
14486 commaOp2->ChangeOperConst(GT_CNS_INT);
14487 commaOp2->gtIntCon.gtIconVal = 0;
14488 /* Change the types of oper and commaOp2 to TYP_BYREF */
14489 op2->gtType = commaOp2->gtType = TYP_BYREF;
14492 /* types should now match */
14493 noway_assert((genActualType(typ) == genActualType(op2->gtType)));
14495 /* Return the GT_COMMA node as the new tree */
14501 /*-------------------------------------------------------------------------
14502 * Optional morphing is done if tree transformations is permitted
14505 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
14510 tree = fgMorphSmpOpOptional(tree->AsOp());
14515 #pragma warning(pop)
14518 GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
14520 genTreeOps oper = tree->gtOper;
14521 GenTree* op1 = tree->gtOp1;
14522 GenTree* op2 = tree->gtOp2;
14523 var_types typ = tree->TypeGet();
14525 if (fgGlobalMorph && GenTree::OperIsCommutative(oper))
14527 /* Swap the operands so that the more expensive one is 'op1' */
14529 if (tree->gtFlags & GTF_REVERSE_OPS)
14537 tree->gtFlags &= ~GTF_REVERSE_OPS;
14540 if (oper == op2->gtOper)
14542 /* Reorder nested operators at the same precedence level to be
14543 left-recursive. For example, change "(a+(b+c))" to the
14544 equivalent expression "((a+b)+c)".
14547 /* Things are handled differently for floating-point operators */
14549 if (!varTypeIsFloating(tree->TypeGet()))
14551 fgMoveOpsLeft(tree);
14560 /* Change "((x+icon)+y)" to "((x+y)+icon)"
14561 Don't reorder floating-point operations */
14563 if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
14564 varTypeIsIntegralOrI(typ))
14566 GenTree* ad2 = op1->gtOp.gtOp2;
14568 if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0)
14580 // And it swaps ad2 and op2. If (op2) is varTypeIsGC, then this implies that (tree) is
14581 // varTypeIsGC. If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
14582 // (op1) with a child that is varTypeIsGC. If we encounter that situation, make (op1) the same
14585 // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
14588 if (varTypeIsGC(op2->TypeGet()))
14590 noway_assert(varTypeIsGC(typ));
14595 op1->gtOp.gtOp2 = op2;
14596 op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
14604 /*-------------------------------------------------------------------------
14605 * Perform optional oper-specific postorder morphing
14610 #ifdef LEGACY_BACKEND
14612 bool dstIsSafeLclVar;
14616 if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
14618 if (tree->OperIsCopyBlkOp())
14620 return fgMorphCopyBlock(tree);
14624 return fgMorphInitBlock(tree);
14628 if (typ == TYP_LONG)
14633 /* Make sure we're allowed to do this */
14635 if (optValnumCSE_phase)
14637 // It is not safe to reorder/delete CSE's
14641 #ifdef LEGACY_BACKEND
14642 /* We'll convert "a = a <op> x" into "a <op>= x" */
14643 /* and also "a = x <op> a" into "a <op>= x" for communative ops */
14645 /* Are we assigning to a GT_LCL_VAR ? */
14647 dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR);
14649 /* If we have a GT_LCL_VAR, then is the address taken? */
14650 if (dstIsSafeLclVar)
14652 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
14653 LclVarDsc* varDsc = lvaTable + lclNum;
14655 noway_assert(lclNum < lvaCount);
14657 /* Is the address taken? */
14658 if (varDsc->lvAddrExposed)
14660 dstIsSafeLclVar = false;
14662 else if (op2->gtFlags & GTF_ASG)
14668 if (!dstIsSafeLclVar)
14669 #endif // LEGACY_BACKEND
14671 if (op2->gtFlags & GTF_ASG)
14676 if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
14682 /* Special case: a cast that can be thrown away */
14684 // TODO-Cleanup: fgMorphSmp does a similar optimization. However, it removes only
14685 // one cast and sometimes there is another one after it that gets removed by this
14686 // code. fgMorphSmp should be improved to remove all redundant casts so this code
14689 if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
14695 srct = op2->gtCast.CastOp()->TypeGet();
14696 cast = (var_types)op2->CastToType();
14697 dstt = op1->TypeGet();
14699 /* Make sure these are all ints and precision is not lost */
14701 if (genTypeSize(cast) >= genTypeSize(dstt) && dstt <= TYP_INT && srct <= TYP_INT)
14703 op2 = tree->gtOp2 = op2->gtCast.CastOp();
14707 #ifdef LEGACY_BACKEND
14708 /* Make sure we have the operator range right */
14710 static_assert(GT_SUB == GT_ADD + 1, "bad oper value");
14711 static_assert(GT_MUL == GT_ADD + 2, "bad oper value");
14712 static_assert(GT_DIV == GT_ADD + 3, "bad oper value");
14713 static_assert(GT_MOD == GT_ADD + 4, "bad oper value");
14714 static_assert(GT_UDIV == GT_ADD + 5, "bad oper value");
14715 static_assert(GT_UMOD == GT_ADD + 6, "bad oper value");
14717 static_assert(GT_OR == GT_ADD + 7, "bad oper value");
14718 static_assert(GT_XOR == GT_ADD + 8, "bad oper value");
14719 static_assert(GT_AND == GT_ADD + 9, "bad oper value");
14721 static_assert(GT_LSH == GT_ADD + 10, "bad oper value");
14722 static_assert(GT_RSH == GT_ADD + 11, "bad oper value");
14723 static_assert(GT_RSZ == GT_ADD + 12, "bad oper value");
14725 /* Check for a suitable operator on the RHS */
14727 cmop = op2->OperGet();
14732 // GT_CHS only supported for integer types
14733 if (varTypeIsFloating(tree->TypeGet()))
14741 // GT_ASG_MUL only supported for floating point types
14742 if (!varTypeIsFloating(tree->TypeGet()))
14751 if (op2->gtOverflow())
14753 /* Disable folding into "<op>=" if the result can be
14754 visible to anyone as <op> may throw an exception and
14755 the assignment should not proceed
14756 We are safe with an assignment to a local variables
14758 if (ehBlockHasExnFlowDsc(compCurBB))
14762 if (!dstIsSafeLclVar)
14767 #ifndef _TARGET_AMD64_
14768 // This is hard for byte-operations as we need to make
14769 // sure both operands are in RBM_BYTE_REGS.
14770 if (varTypeIsByte(op2->TypeGet()))
14772 #endif // _TARGET_AMD64_
14777 // GT_ASG_DIV only supported for floating point types
14778 if (!varTypeIsFloating(tree->TypeGet()))
14791 bool bReverse = false;
14792 bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse);
14793 if (bAsgOpFoldable)
14797 // We will transform this from "a = x <op> a" to "a <op>= x"
14798 // so we can now destroy the duplicate "a"
14799 DEBUG_DESTROY_NODE(op2->gtOp.gtOp2);
14800 op2->gtOp.gtOp2 = op2->gtOp.gtOp1;
14803 /* Special case: "x |= -1" and "x &= 0" */
14804 if (((cmop == GT_AND) && op2->gtOp.gtOp2->IsIntegralConst(0)) ||
14805 ((cmop == GT_OR) && op2->gtOp.gtOp2->IsIntegralConst(-1)))
14807 /* Simply change to an assignment */
14808 tree->gtOp2 = op2->gtOp.gtOp2;
14812 if (cmop == GT_NEG)
14814 /* This is "x = -x;", use the flipsign operator */
14816 tree->ChangeOper(GT_CHS);
14818 if (op1->gtOper == GT_LCL_VAR)
14820 op1->gtFlags |= GTF_VAR_USEASG;
14823 tree->gtOp2 = gtNewIconNode(0, op1->TypeGet());
14828 if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
14830 // Changing from x = x op y to x op= y when x is a small integer type
14831 // makes the op size smaller (originally the op size was 32 bits, after
14832 // sign or zero extension of x, and there is an implicit truncation in the
14834 // This is ok in most cases because the upper bits were
14835 // lost when assigning the op result to a small type var,
14836 // but it may not be ok for the right shift operation where the higher bits
14837 // could be shifted into the lower bits and preserved.
14838 // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) ==
14839 // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) ==
14840 // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the
14843 // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
14844 // but (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
14845 // The result becomes correct if we use >>unsigned instead of >>signed.
14846 noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet());
14850 /* Replace with an assignment operator */
14851 noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD);
14852 noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD);
14853 noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD);
14854 noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD);
14855 noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD);
14856 noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD);
14857 noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD);
14858 noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD);
14860 tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD));
14861 tree->gtOp2 = op2->gtOp.gtOp2;
14863 /* Propagate GTF_OVERFLOW */
14865 if (op2->gtOverflowEx())
14867 tree->gtType = op2->gtType;
14868 tree->gtFlags |= (op2->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
14871 #if FEATURE_SET_FLAGS
14873 /* Propagate GTF_SET_FLAGS */
14874 if (op2->gtSetFlags())
14876 tree->gtRequestSetFlags();
14879 #endif // FEATURE_SET_FLAGS
14881 DEBUG_DESTROY_NODE(op2);
14884 /* The target is used as well as being defined */
14885 if (op1->OperIsLocal())
14887 op1->gtFlags |= GTF_VAR_USEASG;
14890 #if CPU_HAS_FP_SUPPORT
14891 /* Check for the special case "x += y * x;" */
14893 // GT_ASG_MUL only supported for floating point types
14894 if (cmop != GT_ADD && cmop != GT_SUB)
14899 if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet()))
14901 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
14903 /* Change "x += x * y" into "x *= (y + 1)" */
14905 op2 = op2->gtOp.gtOp2;
14907 else if (GenTree::Compare(op1, op2->gtOp.gtOp2))
14909 /* Change "x += y * x" into "x *= (y + 1)" */
14911 op2 = op2->gtOp.gtOp1;
14918 op1 = gtNewDconNode(1.0);
14920 /* Now make the "*=" node */
14922 if (cmop == GT_ADD)
14924 /* Change "x += x * y" into "x *= (y + 1)" */
14926 tree->gtOp2 = op2 = gtNewOperNode(GT_ADD, tree->TypeGet(), op2, op1);
14930 /* Change "x -= x * y" into "x *= (1 - y)" */
14932 noway_assert(cmop == GT_SUB);
14933 tree->gtOp2 = op2 = gtNewOperNode(GT_SUB, tree->TypeGet(), op1, op2);
14935 tree->ChangeOper(GT_ASG_MUL);
14937 #endif // CPU_HAS_FP_SUPPORT
14945 /* Is the destination identical to the first RHS sub-operand? */
14947 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
14949 /* This is "x = ~x" which is the same as "x ^= -1"
14950 * Transform the node into a GT_ASG_XOR */
14952 noway_assert(genActualType(typ) == TYP_INT || genActualType(typ) == TYP_LONG);
14954 op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT) ? gtNewIconNode(-1) : gtNewLconNode(-1);
14964 #endif // LEGACY_BACKEND
14969 /* Check for the case "(val + icon) * icon" */
14971 if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
14973 GenTree* add = op1->gtOp.gtOp2;
14975 if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
14977 if (tree->gtOverflow() || op1->gtOverflow())
14982 ssize_t imul = op2->gtIntCon.gtIconVal;
14983 ssize_t iadd = add->gtIntCon.gtIconVal;
14985 /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
14988 tree->ChangeOper(oper);
14990 op2->gtIntCon.gtIconVal = iadd * imul;
14992 op1->ChangeOper(GT_MUL);
14994 add->gtIntCon.gtIconVal = imul;
14995 #ifdef _TARGET_64BIT_
14996 if (add->gtType == TYP_INT)
14998 // we need to properly re-sign-extend or truncate after multiplying two int constants above
14999 add->AsIntCon()->TruncateOrSignExtend32();
15001 #endif //_TARGET_64BIT_
15009 /* For "val / 1", just return "val" */
15011 if (op2->IsIntegralConst(1))
15013 DEBUG_DESTROY_NODE(tree);
15021 /* Check for the case "(val + icon) << icon" */
15023 if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
15025 GenTree* cns = op1->gtOp.gtOp2;
15027 if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
15029 ssize_t ishf = op2->gtIntConCommon.IconValue();
15030 ssize_t iadd = cns->gtIntConCommon.IconValue();
15032 // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
15034 /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
15036 tree->ChangeOper(GT_ADD);
15037 ssize_t result = iadd << ishf;
15038 op2->gtIntConCommon.SetIconValue(result);
15039 #ifdef _TARGET_64BIT_
15040 if (op1->gtType == TYP_INT)
15042 op2->AsIntCon()->TruncateOrSignExtend32();
15044 #endif // _TARGET_64BIT_
15046 // we are reusing the shift amount node here, but the type we want is that of the shift result
15047 op2->gtType = op1->gtType;
15049 if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
15050 cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
15052 assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
15053 op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
15056 op1->ChangeOper(GT_LSH);
15058 cns->gtIntConCommon.SetIconValue(ishf);
15066 if (!optValnumCSE_phase)
15068 /* "x ^ -1" is "~x" */
15070 if (op2->IsIntegralConst(-1))
15072 tree->ChangeOper(GT_NOT);
15073 tree->gtOp2 = nullptr;
15074 DEBUG_DESTROY_NODE(op2);
15076 else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
15078 /* "binaryVal ^ 1" is "!binaryVal" */
15079 gtReverseCond(op1);
15080 DEBUG_DESTROY_NODE(op2);
15081 DEBUG_DESTROY_NODE(tree);
15089 // Initialization values for initBlk have special semantics - their lower
15090 // byte is used to fill the struct. However, we allow 0 as a "bare" value,
15091 // which enables them to get a VNForZero, and be propagated.
15092 if (op1->IsIntegralConst(0))
15104 //------------------------------------------------------------------------
15105 // fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b
15106 // (see ECMA III 3.55 and III.3.56).
15109 // tree - The GT_MOD/GT_UMOD tree to morph
15112 // The morphed tree
15115 // For ARM64 we don't have a remainder instruction so this transform is
15116 // always done. For XARCH this transform is done if we know that magic
15117 // division will be used, in that case this transform allows CSE to
15118 // eliminate the redundant div from code like "x = a / 3; y = a % 3;".
15120 // This method will produce the above expression in 'a' and 'b' are
15121 // leaf nodes, otherwise, if any of them is not a leaf it will spill
15122 // its value into a temporary variable, an example:
15123 // (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
15125 GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
15127 if (tree->OperGet() == GT_MOD)
15129 tree->SetOper(GT_DIV);
15131 else if (tree->OperGet() == GT_UMOD)
15133 tree->SetOper(GT_UDIV);
15137 noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
15140 var_types type = tree->gtType;
15141 GenTree* denominator = tree->gtOp2;
15142 GenTree* numerator = tree->gtOp1;
15144 if (!numerator->OperIsLeaf())
15146 numerator = fgMakeMultiUse(&tree->gtOp1);
15148 else if (lvaLocalVarRefCounted && numerator->OperIsLocal())
15150 // Morphing introduces new lclVar references. Increase ref counts
15151 lvaIncRefCnts(numerator);
15154 if (!denominator->OperIsLeaf())
15156 denominator = fgMakeMultiUse(&tree->gtOp2);
15158 else if (lvaLocalVarRefCounted && denominator->OperIsLocal())
15160 // Morphing introduces new lclVar references. Increase ref counts
15161 lvaIncRefCnts(denominator);
15164 // The numerator and denominator may have been assigned to temps, in which case
15165 // their defining assignments are in the current tree. Therefore, we need to
15166 // set the execuction order accordingly on the nodes we create.
15167 // That is, the "mul" will be evaluated in "normal" order, and the "sub" must
15168 // be set to be evaluated in reverse order.
15170 GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
15171 assert(!mul->IsReverseOp());
15172 GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
15173 sub->gtFlags |= GTF_REVERSE_OPS;
15176 sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
15182 //------------------------------------------------------------------------------
15183 // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
15187 // oper - Operation to check
15190 // True if the operation can be a root of a bitwise rotation tree; false otherwise.
15192 bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
15194 return (oper == GT_OR) || (oper == GT_XOR);
15197 //------------------------------------------------------------------------------
15198 // fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
15199 // an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
15202 // tree - tree to check for a rotation pattern
15205 // An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
15208 // The input is a GT_OR or a GT_XOR tree.
15210 GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree)
15212 #ifndef LEGACY_BACKEND
15214 // Check for a rotation pattern, e.g.,
15227 // The patterns recognized:
15228 // (x << (y & M)) op (x >>> ((-y + N) & M))
15229 // (x >>> ((-y + N) & M)) op (x << (y & M))
15231 // (x << y) op (x >>> (-y + N))
15232 // (x >> > (-y + N)) op (x << y)
15234 // (x >>> (y & M)) op (x << ((-y + N) & M))
15235 // (x << ((-y + N) & M)) op (x >>> (y & M))
15237 // (x >>> y) op (x << (-y + N))
15238 // (x << (-y + N)) op (x >>> y)
15240 // (x << c1) op (x >>> c2)
15241 // (x >>> c1) op (x << c2)
15244 // c1 and c2 are const
15245 // c1 + c2 == bitsize(x)
15248 // M & (N - 1) == N - 1
15249 // op is either | or ^
15251 if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
15253 // We can't do anything if the tree has assignments, calls, or volatile
15254 // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
15255 // thrown by the original tree will be thrown by the transformed tree as well.
15259 genTreeOps oper = tree->OperGet();
15260 assert(fgOperIsBitwiseRotationRoot(oper));
15262 // Check if we have an LSH on one side of the OR and an RSZ on the other side.
15263 GenTree* op1 = tree->gtGetOp1();
15264 GenTree* op2 = tree->gtGetOp2();
15265 GenTree* leftShiftTree = nullptr;
15266 GenTree* rightShiftTree = nullptr;
15267 if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
15269 leftShiftTree = op1;
15270 rightShiftTree = op2;
15272 else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
15274 leftShiftTree = op2;
15275 rightShiftTree = op1;
15282 // Check if the trees representing the value to shift are identical.
15283 // We already checked that there are no side effects above.
15284 if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
15286 GenTree* rotatedValue = leftShiftTree->gtGetOp1();
15287 var_types rotatedValueActualType = genActualType(rotatedValue->gtType);
15288 ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8;
15289 noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
15290 GenTree* leftShiftIndex = leftShiftTree->gtGetOp2();
15291 GenTree* rightShiftIndex = rightShiftTree->gtGetOp2();
15293 // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
15294 // shouldn't be masked for the transformation to be valid. If additional
15295 // higher bits are not masked, the transformation is still valid since the result
15296 // of MSIL shift instructions is unspecified if the shift amount is greater or equal
15297 // than the width of the value being shifted.
15298 ssize_t minimalMask = rotatedValueBitSize - 1;
15299 ssize_t leftShiftMask = -1;
15300 ssize_t rightShiftMask = -1;
15302 if ((leftShiftIndex->OperGet() == GT_AND))
15304 if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
15306 leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
15307 leftShiftIndex = leftShiftIndex->gtGetOp1();
15315 if ((rightShiftIndex->OperGet() == GT_AND))
15317 if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
15319 rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
15320 rightShiftIndex = rightShiftIndex->gtGetOp1();
15328 if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
15330 // The shift index is overmasked, e.g., we have
15331 // something like (x << y & 15) or
15332 // (x >> (32 - y) & 15 with 32 bit x.
15333 // The transformation is not valid.
15337 GenTree* shiftIndexWithAdd = nullptr;
15338 GenTree* shiftIndexWithoutAdd = nullptr;
15339 genTreeOps rotateOp = GT_NONE;
15340 GenTree* rotateIndex = nullptr;
15342 if (leftShiftIndex->OperGet() == GT_ADD)
15344 shiftIndexWithAdd = leftShiftIndex;
15345 shiftIndexWithoutAdd = rightShiftIndex;
15348 else if (rightShiftIndex->OperGet() == GT_ADD)
15350 shiftIndexWithAdd = rightShiftIndex;
15351 shiftIndexWithoutAdd = leftShiftIndex;
15355 if (shiftIndexWithAdd != nullptr)
15357 if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
15359 if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
15361 if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
15363 if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
15365 // We found one of these patterns:
15366 // (x << (y & M)) | (x >>> ((-y + N) & M))
15367 // (x << y) | (x >>> (-y + N))
15368 // (x >>> (y & M)) | (x << ((-y + N) & M))
15369 // (x >>> y) | (x << (-y + N))
15370 // where N == bitsize(x), M is const, and
15371 // M & (N - 1) == N - 1
15372 CLANG_FORMAT_COMMENT_ANCHOR;
15374 #ifndef _TARGET_64BIT_
15375 if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
15377 // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86.
15378 // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
15379 // to add helpers for GT_ROL and GT_ROR.
15384 rotateIndex = shiftIndexWithoutAdd;
15390 else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
15392 if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
15394 // We found this pattern:
15395 // (x << c1) | (x >>> c2)
15396 // where c1 and c2 are const and c1 + c2 == bitsize(x)
15398 rotateIndex = leftShiftIndex;
15402 if (rotateIndex != nullptr)
15404 noway_assert(GenTree::OperIsRotate(rotateOp));
15406 unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
15408 // We can use the same tree only during global morph; reusing the tree in a later morph
15409 // may invalidate value numbers.
15412 tree->gtOp.gtOp1 = rotatedValue;
15413 tree->gtOp.gtOp2 = rotateIndex;
15414 tree->ChangeOper(rotateOp);
15416 unsigned childFlags = 0;
15417 for (GenTree* op : tree->Operands())
15419 childFlags |= (op->gtFlags & GTF_ALL_EFFECT);
15422 // The parent's flags should be a superset of its operands' flags
15423 noway_assert((inputTreeEffects & childFlags) == childFlags);
15427 tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
15428 noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
15434 #endif // LEGACY_BACKEND
15438 #if !CPU_HAS_FP_SUPPORT
15439 GenTree* Compiler::fgMorphToEmulatedFP(GenTree* tree)
15442 genTreeOps oper = tree->OperGet();
15443 var_types typ = tree->TypeGet();
15444 GenTree* op1 = tree->gtOp.gtOp1;
15445 GenTree* op2 = tree->gtGetOp2IfPresent();
15448 We have to use helper calls for all FP operations:
15450 FP operators that operate on FP values
15451 casts to and from FP
15452 comparisons of FP values
15455 if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
15459 size_t argc = genTypeStSz(typ);
15461 /* Not all FP operations need helper calls */
15475 /* If the result isn't FP, it better be a compare or cast */
15477 if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
15480 noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
15483 /* Keep track of how many arguments we're passing */
15485 fgPtrArgCntCur += argc;
15487 /* Is this a binary operator? */
15491 /* Add the second operand to the argument count */
15493 fgPtrArgCntCur += argc;
15496 /* What kind of an operator do we have? */
15501 helper = CPX_R4_ADD;
15504 helper = CPX_R4_SUB;
15507 helper = CPX_R4_MUL;
15510 helper = CPX_R4_DIV;
15512 // case GT_MOD: helper = CPX_R4_REM; break;
15515 helper = CPX_R4_EQ;
15518 helper = CPX_R4_NE;
15521 helper = CPX_R4_LT;
15524 helper = CPX_R4_LE;
15527 helper = CPX_R4_GE;
15530 helper = CPX_R4_GT;
15537 noway_assert(!"unexpected FP binary op");
15541 args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
15551 noway_assert(!"FP cast");
15554 helper = CPX_R4_NEG;
15561 noway_assert(!"unexpected FP unary op");
15565 args = gtNewArgList(tree->gtOp.gtOp1);
15568 /* If we have double result/operands, modify the helper */
15570 if (typ == TYP_DOUBLE)
15572 static_assert_no_msg(CPX_R4_NEG + 1 == CPX_R8_NEG);
15573 static_assert_no_msg(CPX_R4_ADD + 1 == CPX_R8_ADD);
15574 static_assert_no_msg(CPX_R4_SUB + 1 == CPX_R8_SUB);
15575 static_assert_no_msg(CPX_R4_MUL + 1 == CPX_R8_MUL);
15576 static_assert_no_msg(CPX_R4_DIV + 1 == CPX_R8_DIV);
15582 noway_assert(tree->OperIsCompare());
15584 static_assert_no_msg(CPX_R4_EQ + 1 == CPX_R8_EQ);
15585 static_assert_no_msg(CPX_R4_NE + 1 == CPX_R8_NE);
15586 static_assert_no_msg(CPX_R4_LT + 1 == CPX_R8_LT);
15587 static_assert_no_msg(CPX_R4_LE + 1 == CPX_R8_LE);
15588 static_assert_no_msg(CPX_R4_GE + 1 == CPX_R8_GE);
15589 static_assert_no_msg(CPX_R4_GT + 1 == CPX_R8_GT);
15592 tree = fgMorphIntoHelperCall(tree, helper, args);
15594 if (fgPtrArgCntMax < fgPtrArgCntCur)
15596 JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
15597 fgPtrArgCntMax = fgPtrArgCntCur;
15600 fgPtrArgCntCur -= argc;
15608 if (compCurBB == genReturnBB)
15610 /* This is the 'exitCrit' call at the exit label */
15612 noway_assert(op1->gtType == TYP_VOID);
15613 noway_assert(op2 == 0);
15615 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
15620 /* This is a (real) return value -- check its type */
15621 CLANG_FORMAT_COMMENT_ANCHOR;
15624 if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
15626 bool allowMismatch = false;
15628 // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
15629 if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
15630 (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
15631 allowMismatch = true;
15633 if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
15634 allowMismatch = true;
15636 if (!allowMismatch)
15637 NO_WAY("Return type mismatch");
15647 /*****************************************************************************
15649 * Transform the given tree for code generation and return an equivalent tree.
15652 GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac)
15655 assert(tree->gtOper != GT_STMT);
15660 if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
15662 noway_assert(!"JitBreakMorphTree hit");
15668 int thisMorphNum = 0;
15669 if (verbose && treesBeforeAfterMorph)
15671 thisMorphNum = morphNum++;
15672 printf("\nfgMorphTree (before %d):\n", thisMorphNum);
15679 // Apply any rewrites for implicit byref arguments before morphing the
15682 if (fgMorphImplicitByRefArgs(tree))
15685 if (verbose && treesBeforeAfterMorph)
15687 printf("\nfgMorphTree (%d), after implicit-byref rewrite:\n", thisMorphNum);
15694 /*-------------------------------------------------------------------------
15695 * fgMorphTree() can potentially replace a tree with another, and the
15696 * caller has to store the return value correctly.
15697 * Turn this on to always make copy of "tree" here to shake out
15698 * hidden/unupdated references.
15703 if (compStressCompile(STRESS_GENERIC_CHECK, 0))
15707 #ifdef SMALL_TREE_NODES
15708 if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
15710 copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
15715 copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
15718 copy->ReplaceWith(tree, this);
15720 #if defined(LATE_DISASM)
15721 // GT_CNS_INT is considered small, so ReplaceWith() won't copy all fields
15722 if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
15724 copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle;
15728 DEBUG_DESTROY_NODE(tree);
15735 /* Ensure that we haven't morphed this node already */
15736 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
15738 #if LOCAL_ASSERTION_PROP
15739 /* Before morphing the tree, we try to propagate any active assertions */
15740 if (optLocalAssertionProp)
15742 /* Do we have any active assertions? */
15744 if (optAssertionCount > 0)
15746 GenTree* newTree = tree;
15747 while (newTree != nullptr)
15750 /* newTree is non-Null if we propagated an assertion */
15751 newTree = optAssertionProp(apFull, tree, nullptr);
15753 assert(tree != nullptr);
15756 PREFAST_ASSUME(tree != nullptr);
15760 /* Save the original un-morphed tree for fgMorphTreeDone */
15762 GenTree* oldTree = tree;
15764 /* Figure out what kind of a node we have */
15766 unsigned kind = tree->OperKind();
15768 /* Is this a constant node? */
15770 if (kind & GTK_CONST)
15772 tree = fgMorphConst(tree);
15776 /* Is this a leaf node? */
15778 if (kind & GTK_LEAF)
15780 tree = fgMorphLeaf(tree);
15784 /* Is it a 'simple' unary/binary operator? */
15786 if (kind & GTK_SMPOP)
15788 tree = fgMorphSmpOp(tree, mac);
15792 /* See what kind of a special operator we have here */
15794 switch (tree->OperGet())
15797 tree = fgMorphField(tree, mac);
15801 if (tree->OperMayThrow(this))
15803 tree->gtFlags |= GTF_EXCEPT;
15807 tree->gtFlags &= ~GTF_EXCEPT;
15809 tree = fgMorphCall(tree->AsCall());
15812 case GT_ARR_BOUNDS_CHECK:
15813 #ifdef FEATURE_SIMD
15815 #endif // FEATURE_SIMD
15816 #ifdef FEATURE_HW_INTRINSICS
15817 case GT_HW_INTRINSIC_CHK:
15818 #endif // FEATURE_HW_INTRINSICS
15820 fgSetRngChkTarget(tree);
15822 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
15823 bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex);
15824 bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
15825 // If the index is a comma(throw, x), just return that.
15826 if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
15828 tree = bndsChk->gtIndex;
15831 // Propagate effects flags upwards
15832 bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
15833 bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
15835 // Otherwise, we don't change the tree.
15840 tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
15843 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
15845 tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
15848 tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15850 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
15852 tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
15857 fgSetRngChkTarget(tree, false);
15861 case GT_ARR_OFFSET:
15862 tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
15863 tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
15864 tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
15866 tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
15867 tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
15868 tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15871 fgSetRngChkTarget(tree, false);
15876 tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
15877 tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue);
15878 tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
15880 tree->gtFlags &= ~GTF_EXCEPT;
15882 tree->gtFlags |= tree->gtCmpXchg.gtOpLocation->gtFlags & GTF_ALL_EFFECT;
15883 tree->gtFlags |= tree->gtCmpXchg.gtOpValue->gtFlags & GTF_ALL_EFFECT;
15884 tree->gtFlags |= tree->gtCmpXchg.gtOpComparand->gtFlags & GTF_ALL_EFFECT;
15887 case GT_STORE_DYN_BLK:
15889 if (tree->OperGet() == GT_STORE_DYN_BLK)
15891 tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
15893 tree->gtDynBlk.Addr() = fgMorphTree(tree->gtDynBlk.Addr());
15894 tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
15896 tree->gtFlags &= ~GTF_EXCEPT;
15897 tree->SetIndirExceptionFlags(this);
15899 if (tree->OperGet() == GT_STORE_DYN_BLK)
15901 tree->gtFlags |= tree->gtDynBlk.Data()->gtFlags & GTF_ALL_EFFECT;
15903 tree->gtFlags |= tree->gtDynBlk.Addr()->gtFlags & GTF_ALL_EFFECT;
15904 tree->gtFlags |= tree->gtDynBlk.gtDynamicSize->gtFlags & GTF_ALL_EFFECT;
15907 case GT_INDEX_ADDR:
15908 tree->AsIndexAddr()->Index() = fgMorphTree(tree->AsIndexAddr()->Index());
15909 tree->AsIndexAddr()->Arr() = fgMorphTree(tree->AsIndexAddr()->Arr());
15916 noway_assert(!"unexpected operator");
15920 fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
15925 #if LOCAL_ASSERTION_PROP
15926 //------------------------------------------------------------------------
15927 // fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum
15930 // lclNum - The varNum of the lclVar for which we're killing assertions.
15931 // tree - (DEBUG only) the tree responsible for killing its assertions.
15933 void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTree* tree))
15935 /* All dependent assertions are killed here */
15937 ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
15941 AssertionIndex index = optAssertionCount;
15942 while (killed && (index > 0))
15944 if (BitVecOps::IsMember(apTraits, killed, index - 1))
15947 AssertionDsc* curAssertion = optGetAssertion(index);
15948 noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
15949 ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
15952 printf("\nThe assignment ");
15954 printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
15955 optPrintAssertion(curAssertion);
15958 // Remove this bit from the killed mask
15959 BitVecOps::RemoveElemD(apTraits, killed, index - 1);
15961 optAssertionRemove(index);
15967 // killed mask should now be zero
15968 noway_assert(BitVecOps::IsEmpty(apTraits, killed));
15971 //------------------------------------------------------------------------
15972 // fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum.
15975 // lclNum - The varNum of the lclVar for which we're killing assertions.
15976 // tree - (DEBUG only) the tree responsible for killing its assertions.
15979 // For structs and struct fields, it will invalidate the children and parent
15981 // Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar.
15983 void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTree* tree))
15985 LclVarDsc* varDsc = &lvaTable[lclNum];
15987 if (varDsc->lvPromoted)
15989 noway_assert(varTypeIsStruct(varDsc));
15991 // Kill the field locals.
15992 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
15994 fgKillDependentAssertionsSingle(i DEBUGARG(tree));
15997 // Kill the struct local itself.
15998 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
16000 else if (varDsc->lvIsStructField)
16002 // Kill the field local.
16003 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
16005 // Kill the parent struct.
16006 fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree));
16010 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
16013 #endif // LOCAL_ASSERTION_PROP
16015 /*****************************************************************************
16017 * This function is called to complete the morphing of a tree node
16018 * It should only be called once for each node.
16019 * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
16020 * to enforce the invariant that each node is only morphed once.
16021 * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
16022 * by an equivalent tree.
16026 void Compiler::fgMorphTreeDone(GenTree* tree,
16027 GenTree* oldTree /* == NULL */
16028 DEBUGARG(int morphNum))
16031 if (verbose && treesBeforeAfterMorph)
16033 printf("\nfgMorphTree (after %d):\n", morphNum);
16035 printf(""); // in our logic this causes a flush
16039 if (!fgGlobalMorph)
16044 if ((oldTree != nullptr) && (oldTree != tree))
16046 /* Ensure that we have morphed this node */
16047 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
16050 TransferTestDataToNode(oldTree, tree);
16055 // Ensure that we haven't morphed this node already
16056 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
16059 if (tree->OperKind() & GTK_CONST)
16064 #if LOCAL_ASSERTION_PROP
16066 if (!optLocalAssertionProp)
16071 /* Do we have any active assertions? */
16073 if (optAssertionCount > 0)
16075 /* Is this an assignment to a local variable */
16076 GenTreeLclVarCommon* lclVarTree = nullptr;
16077 if (tree->DefinesLocal(this, &lclVarTree))
16079 unsigned lclNum = lclVarTree->gtLclNum;
16080 noway_assert(lclNum < lvaCount);
16081 fgKillDependentAssertions(lclNum DEBUGARG(tree));
16085 /* If this tree makes a new assertion - make it available */
16086 optAssertionGen(tree);
16088 #endif // LOCAL_ASSERTION_PROP
16093 /* Mark this node as being morphed */
16094 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
16098 /*****************************************************************************
16100 * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
16101 * Returns true if we modified the flow graph
16104 bool Compiler::fgFoldConditional(BasicBlock* block)
16106 bool result = false;
16108 // We don't want to make any code unreachable
16109 if (opts.compDbgCode || opts.MinOpts())
16114 if (block->bbJumpKind == BBJ_COND)
16116 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
16118 GenTree* stmt = block->bbTreeList->gtPrev;
16120 noway_assert(stmt->gtNext == nullptr);
16122 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
16124 noway_assert(fgRemoveRestOfBlock);
16126 /* Unconditional throw - transform the basic block into a BBJ_THROW */
16127 fgConvertBBToThrowBB(block);
16129 /* Remove 'block' from the predecessor list of 'block->bbNext' */
16130 fgRemoveRefPred(block->bbNext, block);
16132 /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
16133 fgRemoveRefPred(block->bbJumpDest, block);
16138 printf("\nConditional folded at BB%02u\n", block->bbNum);
16139 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
16145 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
16147 /* Did we fold the conditional */
16149 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
16151 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
16153 if (cond->OperKind() & GTK_CONST)
16155 /* Yupee - we folded the conditional!
16156 * Remove the conditional statement */
16158 noway_assert(cond->gtOper == GT_CNS_INT);
16159 noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));
16161 /* remove the statement from bbTreelist - No need to update
16162 * the reference counts since there are no lcl vars */
16163 fgRemoveStmt(block, stmt);
16165 // block is a BBJ_COND that we are folding the conditional for
16166 // bTaken is the path that will always be taken from block
16167 // bNotTaken is the path that will never be taken from block
16169 BasicBlock* bTaken;
16170 BasicBlock* bNotTaken;
16172 if (cond->gtIntCon.gtIconVal != 0)
16174 /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
16175 block->bbJumpKind = BBJ_ALWAYS;
16176 bTaken = block->bbJumpDest;
16177 bNotTaken = block->bbNext;
16181 /* Unmark the loop if we are removing a backwards branch */
16182 /* dest block must also be marked as a loop head and */
16183 /* We must be able to reach the backedge block */
16184 if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
16185 fgReachable(block->bbJumpDest, block))
16187 optUnmarkLoopBlocks(block->bbJumpDest, block);
16190 /* JTRUE 0 - transform the basic block into a BBJ_NONE */
16191 block->bbJumpKind = BBJ_NONE;
16192 noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
16193 bTaken = block->bbNext;
16194 bNotTaken = block->bbJumpDest;
16197 if (fgHaveValidEdgeWeights)
16199 // We are removing an edge from block to bNotTaken
16200 // and we have already computed the edge weights, so
16201 // we will try to adjust some of the weights
16203 flowList* edgeTaken = fgGetPredForBlock(bTaken, block);
16204 BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block
16206 // We examine the taken edge (block -> bTaken)
16207 // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
16208 // else if bTaken has valid profile weight and block does not we try to adjust block's weight
16209 // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
16211 if (block->hasProfileWeight())
16213 // The edge weights for (block -> bTaken) are 100% of block's weight
16214 edgeTaken->flEdgeWeightMin = block->bbWeight;
16215 edgeTaken->flEdgeWeightMax = block->bbWeight;
16217 if (!bTaken->hasProfileWeight())
16219 if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
16221 // Update the weight of bTaken
16222 bTaken->inheritWeight(block);
16227 else if (bTaken->hasProfileWeight())
16229 if (bTaken->countOfInEdges() == 1)
16231 // There is only one in edge to bTaken
16232 edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
16233 edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
16235 // Update the weight of block
16236 block->inheritWeight(bTaken);
16241 if (bUpdated != nullptr)
16244 // Now fix the weights of the edges out of 'bUpdated'
16245 switch (bUpdated->bbJumpKind)
16248 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
16249 edge->flEdgeWeightMax = bUpdated->bbWeight;
16252 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
16253 edge->flEdgeWeightMax = bUpdated->bbWeight;
16256 edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
16257 edge->flEdgeWeightMax = bUpdated->bbWeight;
16260 // We don't handle BBJ_SWITCH
16266 /* modify the flow graph */
16268 /* Remove 'block' from the predecessor list of 'bNotTaken' */
16269 fgRemoveRefPred(bNotTaken, block);
16274 printf("\nConditional folded at BB%02u\n", block->bbNum);
16275 printf("BB%02u becomes a %s", block->bbNum,
16276 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
16277 if (block->bbJumpKind == BBJ_ALWAYS)
16279 printf(" to BB%02u", block->bbJumpDest->bbNum);
16285 /* if the block was a loop condition we may have to modify
16286 * the loop table */
16288 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
16290 /* Some loops may have been already removed by
16291 * loop unrolling or conditional folding */
16293 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
16298 /* We are only interested in the loop bottom */
16300 if (optLoopTable[loopNum].lpBottom == block)
16302 if (cond->gtIntCon.gtIconVal == 0)
16304 /* This was a bogus loop (condition always false)
16305 * Remove the loop from the table */
16307 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
16311 printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n", loopNum,
16312 optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
16322 else if (block->bbJumpKind == BBJ_SWITCH)
16324 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
16326 GenTree* stmt = block->bbTreeList->gtPrev;
16328 noway_assert(stmt->gtNext == nullptr);
16330 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
16332 noway_assert(fgRemoveRestOfBlock);
16334 /* Unconditional throw - transform the basic block into a BBJ_THROW */
16335 fgConvertBBToThrowBB(block);
16337 /* update the flow graph */
16339 unsigned jumpCnt = block->bbJumpSwt->bbsCount;
16340 BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
16342 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
16344 BasicBlock* curJump = *jumpTab;
16346 /* Remove 'block' from the predecessor list of 'curJump' */
16347 fgRemoveRefPred(curJump, block);
16353 printf("\nConditional folded at BB%02u\n", block->bbNum);
16354 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
16360 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
16362 /* Did we fold the conditional */
16364 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
16366 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
16368 if (cond->OperKind() & GTK_CONST)
16370 /* Yupee - we folded the conditional!
16371 * Remove the conditional statement */
16373 noway_assert(cond->gtOper == GT_CNS_INT);
16375 /* remove the statement from bbTreelist - No need to update
16376 * the reference counts since there are no lcl vars */
16377 fgRemoveStmt(block, stmt);
16379 /* modify the flow graph */
16381 /* Find the actual jump target */
16382 unsigned switchVal;
16383 switchVal = (unsigned)cond->gtIntCon.gtIconVal;
16385 jumpCnt = block->bbJumpSwt->bbsCount;
16386 BasicBlock** jumpTab;
16387 jumpTab = block->bbJumpSwt->bbsDstTab;
16391 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
16393 BasicBlock* curJump = *jumpTab;
16395 assert(curJump->countOfInEdges() > 0);
16397 // If val matches switchVal or we are at the last entry and
16398 // we never found the switch value then set the new jump dest
16400 if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
16402 if (curJump != block->bbNext)
16404 /* transform the basic block into a BBJ_ALWAYS */
16405 block->bbJumpKind = BBJ_ALWAYS;
16406 block->bbJumpDest = curJump;
16408 // if we are jumping backwards, make sure we have a GC Poll.
16409 if (curJump->bbNum > block->bbNum)
16411 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
16416 /* transform the basic block into a BBJ_NONE */
16417 block->bbJumpKind = BBJ_NONE;
16418 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
16424 /* Remove 'block' from the predecessor list of 'curJump' */
16425 fgRemoveRefPred(curJump, block);
16431 printf("\nConditional folded at BB%02u\n", block->bbNum);
16432 printf("BB%02u becomes a %s", block->bbNum,
16433 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
16434 if (block->bbJumpKind == BBJ_ALWAYS)
16436 printf(" to BB%02u", block->bbJumpDest->bbNum);
16448 //*****************************************************************************
16450 // Morphs a single statement in a block.
16451 // Can be called anytime, unlike fgMorphStmts() which should only be called once.
16453 // Returns true if 'stmt' was removed from the block.
16454 // Returns false if 'stmt' is still in the block (even if other statements were removed).
16457 bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg))
16459 assert(block != nullptr);
16460 assert(stmt != nullptr);
16463 compCurStmt = stmt;
16465 GenTree* morph = fgMorphTree(stmt->gtStmtExpr);
16467 // Bug 1106830 - During the CSE phase we can't just remove
16468 // morph->gtOp.gtOp2 as it could contain CSE expressions.
16469 // This leads to a noway_assert in OptCSE.cpp when
16470 // searching for the removed CSE ref. (using gtFindLink)
16472 if (!optValnumCSE_phase)
16474 // Check for morph as a GT_COMMA with an unconditional throw
16475 if (fgIsCommaThrow(morph, true))
16480 printf("Folding a top-level fgIsCommaThrow stmt\n");
16481 printf("Removing op2 as unreachable:\n");
16482 gtDispTree(morph->gtOp.gtOp2);
16486 // Use the call as the new stmt
16487 morph = morph->gtOp.gtOp1;
16488 noway_assert(morph->gtOper == GT_CALL);
16491 // we can get a throw as a statement root
16492 if (fgIsThrow(morph))
16497 printf("We have a top-level fgIsThrow stmt\n");
16498 printf("Removing the rest of block as unreachable:\n");
16501 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
16502 fgRemoveRestOfBlock = true;
16506 stmt->gtStmtExpr = morph;
16508 if (lvaLocalVarRefCounted)
16510 // fgMorphTree may have introduced new lclVar references. Bump the ref counts if requested.
16511 lvaRecursiveIncRefCounts(stmt->gtStmtExpr);
16514 // Can the entire tree be removed?
16515 bool removedStmt = false;
16517 // Defer removing statements during CSE so we don't inadvertently remove any CSE defs.
16518 if (!optValnumCSE_phase)
16520 removedStmt = fgCheckRemoveStmt(block, stmt);
16523 // Or this is the last statement of a conditional branch that was just folded?
16524 if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock)
16526 if (fgFoldConditional(block))
16528 if (block->bbJumpKind != BBJ_THROW)
16530 removedStmt = true;
16537 // Have to re-do the evaluation order since for example some later code does not expect constants as op1
16538 gtSetStmtInfo(stmt);
16540 // Have to re-link the nodes for this statement
16541 fgSetStmtSeq(stmt);
16547 printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
16553 if (fgRemoveRestOfBlock)
16555 // Remove the rest of the stmts in the block
16556 for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
16558 fgRemoveStmt(block, stmt);
16561 // The rest of block has been removed and we will always throw an exception.
16563 // Update succesors of block
16564 fgRemoveBlockAsPred(block);
16566 // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
16567 // We should not convert it to a ThrowBB.
16568 if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
16570 // Convert block to a throw bb
16571 fgConvertBBToThrowBB(block);
16577 printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum);
16580 fgRemoveRestOfBlock = false;
16583 return removedStmt;
16586 /*****************************************************************************
16588 * Morph the statements of the given block.
16589 * This function should be called just once for a block. Use fgMorphBlockStmt()
16590 * for reentrant calls.
16593 #ifdef LEGACY_BACKEND
16594 void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw)
16596 void Compiler::fgMorphStmts(BasicBlock* block, bool* lnot, bool* loadw)
16599 fgRemoveRestOfBlock = false;
16601 /* Make the current basic block address available globally */
16605 *lnot = *loadw = false;
16606 #ifdef LEGACY_BACKEND
16610 fgCurrentlyInUseArgTemps = hashBv::Create(this);
16612 GenTreeStmt* stmt = block->firstStmt();
16613 GenTree* prev = nullptr;
16614 for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt)
16616 assert(stmt->gtOper == GT_STMT);
16618 if (fgRemoveRestOfBlock)
16620 fgRemoveStmt(block, stmt);
16623 #ifdef FEATURE_SIMD
16624 if (!opts.MinOpts() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT && stmt->gtStmtExpr->OperGet() == GT_ASG)
16626 fgMorphCombineSIMDFieldAssignments(block, stmt);
16630 fgMorphStmt = stmt;
16631 compCurStmt = stmt;
16632 GenTree* tree = stmt->gtStmtExpr;
16636 if (stmt == block->bbTreeList)
16638 block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
16641 unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
16645 printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum);
16650 /* Morph this statement tree */
16652 GenTree* morph = fgMorphTree(tree);
16654 // mark any outgoing arg temps as free so we can reuse them in the next statement.
16656 fgCurrentlyInUseArgTemps->ZeroAll();
16658 // Has fgMorphStmt been sneakily changed ?
16660 if (stmt->gtStmtExpr != tree)
16662 /* This must be tailcall. Ignore 'morph' and carry on with
16663 the tail-call node */
16665 morph = stmt->gtStmtExpr;
16666 noway_assert(compTailCallUsed);
16667 noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
16668 noway_assert(stmt->gtNextStmt == nullptr);
16670 GenTreeCall* call = morph->AsCall();
16672 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
16673 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
16675 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
16676 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
16677 (compCurBB->bbFlags & BBF_HAS_JMP)));
16679 else if (block != compCurBB)
16681 /* This must be a tail call that caused a GCPoll to get
16682 injected. We haven't actually morphed the call yet
16683 but the flag still got set, clear it here... */
16684 CLANG_FORMAT_COMMENT_ANCHOR;
16687 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
16690 noway_assert(compTailCallUsed);
16691 noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
16692 noway_assert(stmt->gtNextStmt == nullptr);
16694 GenTreeCall* call = morph->AsCall();
16697 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
16698 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
16700 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
16701 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
16702 (compCurBB->bbFlags & BBF_HAS_JMP)));
16706 if (compStressCompile(STRESS_CLONE_EXPR, 30))
16708 // Clone all the trees to stress gtCloneExpr()
16712 printf("\nfgMorphTree (stressClone from):\n");
16716 morph = gtCloneExpr(morph);
16717 noway_assert(morph);
16721 printf("\nfgMorphTree (stressClone to):\n");
16726 /* If the hash value changes. we modified the tree during morphing */
16729 unsigned newHash = gtHashValue(morph);
16730 if (newHash != oldHash)
16732 printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum);
16738 /* Check for morph as a GT_COMMA with an unconditional throw */
16739 if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
16741 /* Use the call as the new stmt */
16742 morph = morph->gtOp.gtOp1;
16743 noway_assert(morph->gtOper == GT_CALL);
16744 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
16746 fgRemoveRestOfBlock = true;
16749 stmt->gtStmtExpr = tree = morph;
16751 noway_assert(fgPtrArgCntCur == 0);
16753 if (fgRemoveRestOfBlock)
16758 /* Has the statement been optimized away */
16760 if (fgCheckRemoveStmt(block, stmt))
16765 /* Check if this block ends with a conditional branch that can be folded */
16767 if (fgFoldConditional(block))
16772 if (ehBlockHasExnFlowDsc(block))
16777 #ifdef LEGACY_BACKEND
16778 /* Note whether we have two or more +=/-= operators in a row */
16780 if (tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB)
16782 if (prev && prev->gtOper == tree->gtOper)
16788 /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */
16790 if (tree->gtOper == GT_ASG_OR && prev && prev->gtOper == GT_ASG)
16794 #endif // LEGACY_BACKEND
16797 if (fgRemoveRestOfBlock)
16799 if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
16801 GenTree* first = block->bbTreeList;
16802 noway_assert(first);
16803 GenTree* last = first->gtPrev;
16804 noway_assert(last && last->gtNext == nullptr);
16805 GenTree* lastStmt = last->gtStmt.gtStmtExpr;
16807 if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) ||
16808 ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)))
16810 GenTree* op1 = lastStmt->gtOp.gtOp1;
16812 if (op1->OperKind() & GTK_RELOP)
16814 /* Unmark the comparison node with GTF_RELOP_JMP_USED */
16815 op1->gtFlags &= ~GTF_RELOP_JMP_USED;
16818 last->gtStmt.gtStmtExpr = fgMorphTree(op1);
16822 /* Mark block as a BBJ_THROW block */
16823 fgConvertBBToThrowBB(block);
16826 #if FEATURE_FASTTAILCALL
16827 GenTree* recursiveTailCall = nullptr;
16828 if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
16830 fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
16835 compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
16838 // Reset this back so that it doesn't leak out impacting other blocks
16839 fgRemoveRestOfBlock = false;
16842 /*****************************************************************************
16844 * Morph the blocks of the method.
16845 * Returns true if the basic block list is modified.
16846 * This function should be called just once.
16849 void Compiler::fgMorphBlocks()
16854 printf("\n*************** In fgMorphBlocks()\n");
16858 /* Since fgMorphTree can be called after various optimizations to re-arrange
16859 * the nodes we need a global flag to signal if we are during the one-pass
16860 * global morphing */
16862 fgGlobalMorph = true;
16864 #if LOCAL_ASSERTION_PROP
16866 // Local assertion prop is enabled if we are optimized
16868 optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts());
16870 if (optLocalAssertionProp)
16873 // Initialize for local assertion prop
16875 optAssertionInit(true);
16877 #elif ASSERTION_PROP
16879 // If LOCAL_ASSERTION_PROP is not set
16880 // and we have global assertion prop
16881 // then local assertion prop is always off
16883 optLocalAssertionProp = false;
16887 /*-------------------------------------------------------------------------
16888 * Process all basic blocks in the function
16891 BasicBlock* block = fgFirstBB;
16892 noway_assert(block);
16895 compCurStmtNum = 0;
16900 #ifdef LEGACY_BACKEND
16908 bool loadw = false;
16913 printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName);
16917 #if LOCAL_ASSERTION_PROP
16918 if (optLocalAssertionProp)
16921 // Clear out any currently recorded assertion candidates
16922 // before processing each basic block,
16923 // also we must handle QMARK-COLON specially
16925 optAssertionReset(0);
16929 /* Process all statement trees in the basic block */
16931 #ifndef LEGACY_BACKEND
16932 fgMorphStmts(block, &lnot, &loadw);
16934 fgMorphStmts(block, &mult, &lnot, &loadw);
16936 if (mult && (opts.compFlags & CLFLG_TREETRANS) && !opts.compDbgCode && !opts.MinOpts())
16938 for (GenTree* tree = block->bbTreeList; tree; tree = tree->gtNext)
16940 assert(tree->gtOper == GT_STMT);
16941 GenTree* last = tree->gtStmt.gtStmtExpr;
16943 if (last->gtOper == GT_ASG_ADD || last->gtOper == GT_ASG_SUB)
16948 GenTree* dst1 = last->gtOp.gtOp1;
16949 GenTree* src1 = last->gtOp.gtOp2;
16951 if (!last->IsCnsIntOrI())
16956 if (dst1->gtOper != GT_LCL_VAR)
16960 if (!src1->IsCnsIntOrI())
16970 /* Look at the next statement */
16972 temp = tree->gtNext;
16978 noway_assert(temp->gtOper == GT_STMT);
16979 next = temp->gtStmt.gtStmtExpr;
16981 if (next->gtOper != last->gtOper)
16985 if (next->gtType != last->gtType)
16990 dst2 = next->gtOp.gtOp1;
16991 src2 = next->gtOp.gtOp2;
16993 if (dst2->gtOper != GT_LCL_VAR)
16997 if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum)
17002 if (!src2->IsCnsIntOrI())
17007 if (last->gtOverflow() != next->gtOverflow())
17012 const ssize_t i1 = src1->gtIntCon.gtIconVal;
17013 const ssize_t i2 = src2->gtIntCon.gtIconVal;
17014 const ssize_t itemp = i1 + i2;
17016 /* if the operators are checking for overflow, check for overflow of the operands */
17018 if (next->gtOverflow())
17020 if (next->TypeGet() == TYP_LONG)
17022 if (next->gtFlags & GTF_UNSIGNED)
17024 ClrSafeInt<UINT64> si1(i1);
17025 if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow())
17032 ClrSafeInt<INT64> si1(i1);
17033 if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow())
17039 else if (next->gtFlags & GTF_UNSIGNED)
17041 ClrSafeInt<UINT32> si1(i1);
17042 if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow())
17049 ClrSafeInt<INT32> si1(i1);
17050 if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow())
17057 /* Fold the two increments/decrements into one */
17059 src1->gtIntCon.gtIconVal = itemp;
17060 #ifdef _TARGET_64BIT_
17061 if (src1->gtType == TYP_INT)
17063 src1->AsIntCon()->TruncateOrSignExtend32();
17065 #endif //_TARGET_64BIT_
17067 /* Remove the second statement completely */
17069 noway_assert(tree->gtNext == temp);
17070 noway_assert(temp->gtPrev == tree);
17074 noway_assert(temp->gtNext->gtPrev == temp);
17076 temp->gtNext->gtPrev = tree;
17077 tree->gtNext = temp->gtNext;
17081 tree->gtNext = nullptr;
17083 noway_assert(block->bbTreeList->gtPrev == temp);
17085 block->bbTreeList->gtPrev = tree;
17094 #endif // LEGACY_BACKEND
17096 /* Are we using a single return block? */
17098 if (block->bbJumpKind == BBJ_RETURN)
17100 if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
17103 // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
17104 // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
17105 // Such blocks do materialize as part of in-lining.
17107 // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
17108 // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
17109 // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
17112 // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
17114 GenTree* last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
17115 GenTree* ret = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;
17117 if ((ret != nullptr) && (ret->OperGet() == GT_RETURN) && ((ret->gtFlags & GTF_RET_MERGED) != 0))
17119 // This return was generated during epilog merging, so leave it alone
17123 /* We'll jump to the genReturnBB */
17124 CLANG_FORMAT_COMMENT_ANCHOR;
17126 #if !defined(_TARGET_X86_)
17127 if (info.compFlags & CORINFO_FLG_SYNCH)
17129 fgConvertSyncReturnToLeave(block);
17132 #endif // !_TARGET_X86_
17134 block->bbJumpKind = BBJ_ALWAYS;
17135 block->bbJumpDest = genReturnBB;
17138 if (genReturnLocal != BAD_VAR_NUM)
17140 // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
17142 // Method must be returning a value other than TYP_VOID.
17143 noway_assert(compMethodHasRetVal());
17145 // This block must be ending with a GT_RETURN
17146 noway_assert(last != nullptr);
17147 noway_assert(last->gtOper == GT_STMT);
17148 noway_assert(last->gtNext == nullptr);
17149 noway_assert(ret != nullptr);
17151 // GT_RETURN must have non-null operand as the method is returning the value assigned to
17153 noway_assert(ret->OperGet() == GT_RETURN);
17154 noway_assert(ret->gtGetOp1() != nullptr);
17156 GenTree* tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
17158 last->gtStmt.gtStmtExpr = (tree->OperIsCopyBlkOp()) ? fgMorphCopyBlock(tree) : tree;
17160 // make sure that copy-prop ignores this assignment.
17161 last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
17163 else if (ret != nullptr && ret->OperGet() == GT_RETURN)
17165 // This block ends with a GT_RETURN
17166 noway_assert(last != nullptr);
17167 noway_assert(last->gtOper == GT_STMT);
17168 noway_assert(last->gtNext == nullptr);
17170 // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn
17172 noway_assert(ret->TypeGet() == TYP_VOID);
17173 noway_assert(ret->gtGetOp1() == nullptr);
17175 fgRemoveStmt(block, last);
17180 printf("morph BB%02u to point at onereturn. New block is\n", block->bbNum);
17181 fgTableDispBasicBlock(block);
17187 block = block->bbNext;
17190 /* We are done with the global morphing phase */
17192 fgGlobalMorph = false;
17197 fgDispBasicBlocks(true);
17202 //------------------------------------------------------------------------
17203 // fgCheckArgCnt: Check whether the maximum arg size will change codegen requirements
17206 // fpPtrArgCntMax records the maximum number of pushed arguments.
17207 // Depending upon this value of the maximum number of pushed arguments
17208 // we may need to use an EBP frame or be partially interuptible.
17209 // This functionality has been factored out of fgSetOptions() because
17210 // the Rationalizer can create new calls.
17213 // This must be called before isFramePointerRequired() is called, because it is a
17214 // phased variable (can only be written before it has been read).
17216 void Compiler::fgCheckArgCnt()
17218 if (!compCanEncodePtrArgCntMax())
17223 printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
17224 "interruptible\n");
17227 genInterruptible = false;
17229 if (fgPtrArgCntMax >= sizeof(unsigned))
17234 printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
17237 codeGen->setFramePointerRequired(true);
17241 /*****************************************************************************
17243 * Make some decisions about the kind of code to generate.
17246 void Compiler::fgSetOptions()
17249 /* Should we force fully interruptible code ? */
17250 if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
17252 noway_assert(!codeGen->isGCTypeFixed());
17253 genInterruptible = true;
17257 if (opts.compDbgCode)
17259 assert(!codeGen->isGCTypeFixed());
17260 genInterruptible = true; // debugging is easier this way ...
17263 /* Assume we won't need an explicit stack frame if this is allowed */
17265 // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
17266 // the callee-saved registers.
17267 noway_assert(!compTailCallUsed || !compLocallocUsed);
17269 if (compLocallocUsed)
17271 codeGen->setFramePointerRequired(true);
17274 #ifdef _TARGET_X86_
17276 if (compTailCallUsed)
17277 codeGen->setFramePointerRequired(true);
17279 #endif // _TARGET_X86_
17281 if (!opts.genFPopt)
17283 codeGen->setFramePointerRequired(true);
17286 // Assert that the EH table has been initialized by now. Note that
17287 // compHndBBtabAllocCount never decreases; it is a high-water mark
17288 // of table allocation. In contrast, compHndBBtabCount does shrink
17289 // if we delete a dead EH region, and if it shrinks to zero, the
17290 // table pointer compHndBBtab is unreliable.
17291 assert(compHndBBtabAllocCount >= info.compXcptnsCount);
17293 #ifdef _TARGET_X86_
17295 // Note: this case, and the !X86 case below, should both use the
17296 // !X86 path. This would require a few more changes for X86 to use
17297 // compHndBBtabCount (the current number of EH clauses) instead of
17298 // info.compXcptnsCount (the number of EH clauses in IL), such as
17299 // in ehNeedsShadowSPslots(). This is because sometimes the IL has
17300 // an EH clause that we delete as statically dead code before we
17301 // get here, leaving no EH clauses left, and thus no requirement
17302 // to use a frame pointer because of EH. But until all the code uses
17303 // the same test, leave info.compXcptnsCount here.
17304 if (info.compXcptnsCount > 0)
17306 codeGen->setFramePointerRequiredEH(true);
17309 #else // !_TARGET_X86_
17311 if (compHndBBtabCount > 0)
17313 codeGen->setFramePointerRequiredEH(true);
17316 #endif // _TARGET_X86_
17318 #ifdef UNIX_X86_ABI
17319 if (info.compXcptnsCount > 0)
17321 assert(!codeGen->isGCTypeFixed());
17322 // Enforce fully interruptible codegen for funclet unwinding
17323 genInterruptible = true;
17325 #endif // UNIX_X86_ABI
17329 if (info.compCallUnmanaged)
17331 codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
17334 if (info.compPublishStubParam)
17336 codeGen->setFramePointerRequiredGCInfo(true);
17339 if (opts.compNeedSecurityCheck)
17341 codeGen->setFramePointerRequiredGCInfo(true);
17343 #ifndef JIT32_GCENCODER
17345 // The decoder only reports objects in frames with exceptions if the frame
17346 // is fully interruptible.
17347 // Even if there is no catch or other way to resume execution in this frame
17348 // the VM requires the security object to remain alive until later, so
17349 // Frames with security objects must be fully interruptible.
17350 genInterruptible = true;
17352 #endif // JIT32_GCENCODER
17355 if (compIsProfilerHookNeeded())
17357 codeGen->setFramePointerRequired(true);
17360 if (info.compIsVarArgs)
17362 // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
17363 codeGen->setFramePointerRequiredGCInfo(true);
17366 if (lvaReportParamTypeArg())
17368 codeGen->setFramePointerRequiredGCInfo(true);
17371 // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not");
17374 /*****************************************************************************/
17376 GenTree* Compiler::fgInitThisClass()
17378 noway_assert(!compIsForInlining());
17380 CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
17382 if (!kind.needsRuntimeLookup)
17384 return fgGetSharedCCtor(info.compClassHnd);
17388 #ifdef FEATURE_READYTORUN_COMPILER
17389 // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR.
17390 if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI))
17392 CORINFO_RESOLVED_TOKEN resolvedToken;
17393 memset(&resolvedToken, 0, sizeof(resolvedToken));
17395 // We are in a shared method body, but maybe we don't need a runtime lookup after all.
17396 // This covers the case of a generic method on a non-generic type.
17397 if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
17399 resolvedToken.hClass = info.compClassHnd;
17400 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
17403 // We need a runtime lookup.
17404 GenTree* ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
17406 // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
17407 // base of the class that owns the method being compiled". If we're in this method, it means we're not
17408 // inlining and there's no ambiguity.
17409 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF,
17410 gtNewArgList(ctxTree), &kind);
17414 // Collectible types requires that for shared generic code, if we use the generic context paramter
17415 // that we report it. (This is a conservative approach, we could detect some cases particularly when the
17416 // context parameter is this that we don't need the eager reporting logic.)
17417 lvaGenericsContextUseCount++;
17419 switch (kind.runtimeLookupKind)
17421 case CORINFO_LOOKUP_THISOBJ:
17422 // This code takes a this pointer; but we need to pass the static method desc to get the right point in
17425 GenTree* vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
17426 // Vtable pointer of this object
17427 vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
17428 vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
17429 GenTree* methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
17431 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, gtNewArgList(vtTree, methodHnd));
17434 case CORINFO_LOOKUP_CLASSPARAM:
17436 GenTree* vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
17437 return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, gtNewArgList(vtTree));
17440 case CORINFO_LOOKUP_METHODPARAM:
17442 GenTree* methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
17443 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID,
17444 gtNewArgList(gtNewIconNode(0), methHndTree));
17449 noway_assert(!"Unknown LOOKUP_KIND");
17454 /*****************************************************************************
17456 * Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
17457 * except for the allowed ? 1 : 0; pattern.
17459 Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTree** tree, fgWalkData* data)
17461 if ((*tree)->OperGet() == GT_QMARK)
17463 fgCheckQmarkAllowedForm(*tree);
17465 return WALK_CONTINUE;
17468 void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
17470 assert(tree->OperGet() == GT_QMARK);
17471 #ifndef LEGACY_BACKEND
17472 assert(!"Qmarks beyond morph disallowed.");
17473 #else // LEGACY_BACKEND
17474 GenTree* colon = tree->gtOp.gtOp2;
17476 assert(colon->gtOp.gtOp1->IsIntegralConst(0));
17477 assert(colon->gtOp.gtOp2->IsIntegralConst(1));
17478 #endif // LEGACY_BACKEND
17481 /*****************************************************************************
17483 * Verify that the importer has created GT_QMARK nodes in a way we can
17484 * process them. The following is allowed:
17486 * 1. A top level qmark. Top level qmark is of the form:
17487 * a) (bool) ? (void) : (void) OR
17488 * b) V0N = (bool) ? (type) : (type)
17490 * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
17491 * of either op1 of colon or op2 of colon but not a child of any other
17494 void Compiler::fgPreExpandQmarkChecks(GenTree* expr)
17496 GenTree* topQmark = fgGetTopLevelQmark(expr);
17498 // If the top level Qmark is null, then scan the tree to make sure
17499 // there are no qmarks within it.
17500 if (topQmark == nullptr)
17502 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
17506 // We could probably expand the cond node also, but don't think the extra effort is necessary,
17507 // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
17508 fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);
17510 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
17511 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
17516 /*****************************************************************************
17518 * Get the top level GT_QMARK node in a given "expr", return NULL if such a
17519 * node is not present. If the top level GT_QMARK node is assigned to a
17520 * GT_LCL_VAR, then return the lcl node in ppDst.
17523 GenTree* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = NULL */)
17525 if (ppDst != nullptr)
17530 GenTree* topQmark = nullptr;
17531 if (expr->gtOper == GT_QMARK)
17535 else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
17537 topQmark = expr->gtOp.gtOp2;
17538 if (ppDst != nullptr)
17540 *ppDst = expr->gtOp.gtOp1;
17546 /*********************************************************************************
17548 * For a castclass helper call,
17549 * Importer creates the following tree:
17550 * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
17552 * This method splits the qmark expression created by the importer into the
17553 * following blocks: (block, asg, cond1, cond2, helper, remainder)
17554 * Notice that op1 is the result for both the conditions. So we coalesce these
17555 * assignments into a single block instead of two blocks resulting a nested diamond.
17557 * +---------->-----------+
17561 * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
17563 * We expect to achieve the following codegen:
17564 * mov rsi, rdx tmp = op1 // asgBlock
17565 * test rsi, rsi goto skip if tmp == null ? // cond1Block
17567 * mov rcx, 0x76543210 cns = op2 // cond2Block
17568 * cmp qword ptr [rsi], rcx goto skip if *tmp == op2
17570 * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock
17572 * SKIP: // remainderBlock
17573 * tmp has the result.
17576 void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTree* stmt)
17581 printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum);
17582 fgDispBasicBlocks(block, block, true);
17586 GenTree* expr = stmt->gtStmt.gtStmtExpr;
17588 GenTree* dst = nullptr;
17589 GenTree* qmark = fgGetTopLevelQmark(expr, &dst);
17590 noway_assert(dst != nullptr);
17592 assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
17594 // Get cond, true, false exprs for the qmark.
17595 GenTree* condExpr = qmark->gtGetOp1();
17596 GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
17597 GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
17599 // Get cond, true, false exprs for the nested qmark.
17600 GenTree* nestedQmark = falseExpr;
17601 GenTree* cond2Expr;
17602 GenTree* true2Expr;
17603 GenTree* false2Expr;
17605 if (nestedQmark->gtOper == GT_QMARK)
17607 cond2Expr = nestedQmark->gtGetOp1();
17608 true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
17609 false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
17611 assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
17612 cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
17616 // This is a rare case that arises when we are doing minopts and encounter isinst of null
17617 // gtFoldExpr was still is able to optimize away part of the tree (but not all).
17618 // That means it does not match our pattern.
17620 // Rather than write code to handle this case, just fake up some nodes to make it match the common
17621 // case. Synthesize a comparison that is always true, and for the result-on-true, use the
17622 // entire subtree we expected to be the nested question op.
17624 cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
17625 true2Expr = nestedQmark;
17626 false2Expr = gtNewIconNode(0, TYP_I_IMPL);
17628 assert(false2Expr->OperGet() == trueExpr->OperGet());
17630 // Clear flags as they are now going to be part of JTRUE.
17631 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
17632 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
17634 // Create the chain of blocks. See method header comment.
17635 // The order of blocks after this is the following:
17636 // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
17638 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
17639 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
17640 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
17641 // remainderBlock will still be GC safe.
17642 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
17643 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
17644 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
17646 BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
17647 BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true);
17648 BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true);
17649 BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true);
17651 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
17653 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
17654 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
17655 if ((block->bbFlags & BBF_INTERNAL) == 0)
17657 helperBlock->bbFlags &= ~BBF_INTERNAL;
17658 cond2Block->bbFlags &= ~BBF_INTERNAL;
17659 cond1Block->bbFlags &= ~BBF_INTERNAL;
17660 asgBlock->bbFlags &= ~BBF_INTERNAL;
17661 helperBlock->bbFlags |= BBF_IMPORTED;
17662 cond2Block->bbFlags |= BBF_IMPORTED;
17663 cond1Block->bbFlags |= BBF_IMPORTED;
17664 asgBlock->bbFlags |= BBF_IMPORTED;
17667 // Chain the flow correctly.
17668 fgAddRefPred(asgBlock, block);
17669 fgAddRefPred(cond1Block, asgBlock);
17670 fgAddRefPred(cond2Block, cond1Block);
17671 fgAddRefPred(helperBlock, cond2Block);
17672 fgAddRefPred(remainderBlock, helperBlock);
17673 fgAddRefPred(remainderBlock, cond1Block);
17674 fgAddRefPred(remainderBlock, cond2Block);
17676 cond1Block->bbJumpDest = remainderBlock;
17677 cond2Block->bbJumpDest = remainderBlock;
17679 // Set the weights; some are guesses.
17680 asgBlock->inheritWeight(block);
17681 cond1Block->inheritWeight(block);
17682 cond2Block->inheritWeightPercentage(cond1Block, 50);
17683 helperBlock->inheritWeightPercentage(cond2Block, 50);
17685 // Append cond1 as JTRUE to cond1Block
17686 GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
17687 GenTree* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
17688 fgInsertStmtAtEnd(cond1Block, jmpStmt);
17690 // Append cond2 as JTRUE to cond2Block
17691 jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
17692 jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
17693 fgInsertStmtAtEnd(cond2Block, jmpStmt);
17695 // AsgBlock should get tmp = op1 assignment.
17696 trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
17697 GenTree* trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
17698 fgInsertStmtAtEnd(asgBlock, trueStmt);
17700 // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
17701 gtReverseCond(cond2Expr);
17702 GenTree* helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
17703 GenTree* helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
17704 fgInsertStmtAtEnd(helperBlock, helperStmt);
17706 // Finally remove the nested qmark stmt.
17707 fgRemoveStmt(block, stmt);
17712 printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum);
17713 fgDispBasicBlocks(block, remainderBlock, true);
17718 /*****************************************************************************
17720 * Expand a statement with a top level qmark node. There are three cases, based
17721 * on whether the qmark has both "true" and "false" arms, or just one of them.
17732 * S0 -->-- ~C -->-- T F -->-- S1
17737 * -----------------------------------------
17746 * S0 -->-- ~C -->-- T -->-- S1
17748 * +-->-------------+
17751 * -----------------------------------------
17760 * S0 -->-- C -->-- F -->-- S1
17762 * +-->------------+
17765 * If the qmark assigns to a variable, then create tmps for "then"
17766 * and "else" results and assign the temp to the variable as a writeback step.
17768 void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTree* stmt)
17770 GenTree* expr = stmt->gtStmt.gtStmtExpr;
17772 // Retrieve the Qmark node to be expanded.
17773 GenTree* dst = nullptr;
17774 GenTree* qmark = fgGetTopLevelQmark(expr, &dst);
17775 if (qmark == nullptr)
17780 if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
17782 fgExpandQmarkForCastInstOf(block, stmt);
17789 printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum);
17790 fgDispBasicBlocks(block, block, true);
17794 // Retrieve the operands.
17795 GenTree* condExpr = qmark->gtGetOp1();
17796 GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
17797 GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
17799 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
17800 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
17802 assert(!varTypeIsFloating(condExpr->TypeGet()));
17804 bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP);
17805 bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
17806 assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
17808 // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
17809 // block ... condBlock ... elseBlock ... remainderBlock
17811 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
17812 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
17813 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
17814 // remainderBlock will still be GC safe.
17815 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
17816 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
17817 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
17819 BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
17820 BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
17822 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
17823 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
17824 if ((block->bbFlags & BBF_INTERNAL) == 0)
17826 condBlock->bbFlags &= ~BBF_INTERNAL;
17827 elseBlock->bbFlags &= ~BBF_INTERNAL;
17828 condBlock->bbFlags |= BBF_IMPORTED;
17829 elseBlock->bbFlags |= BBF_IMPORTED;
17832 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
17834 condBlock->inheritWeight(block);
17836 fgAddRefPred(condBlock, block);
17837 fgAddRefPred(elseBlock, condBlock);
17838 fgAddRefPred(remainderBlock, elseBlock);
17840 BasicBlock* thenBlock = nullptr;
17841 if (hasTrueExpr && hasFalseExpr)
17846 // S0 -->-- ~C -->-- T F -->-- S1
17851 gtReverseCond(condExpr);
17852 condBlock->bbJumpDest = elseBlock;
17854 thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
17855 thenBlock->bbJumpDest = remainderBlock;
17856 if ((block->bbFlags & BBF_INTERNAL) == 0)
17858 thenBlock->bbFlags &= ~BBF_INTERNAL;
17859 thenBlock->bbFlags |= BBF_IMPORTED;
17862 elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
17864 fgAddRefPred(thenBlock, condBlock);
17865 fgAddRefPred(remainderBlock, thenBlock);
17867 thenBlock->inheritWeightPercentage(condBlock, 50);
17868 elseBlock->inheritWeightPercentage(condBlock, 50);
17870 else if (hasTrueExpr)
17873 // S0 -->-- ~C -->-- T -->-- S1
17875 // +-->-------------+
17878 gtReverseCond(condExpr);
17879 condBlock->bbJumpDest = remainderBlock;
17880 fgAddRefPred(remainderBlock, condBlock);
17881 // Since we have no false expr, use the one we'd already created.
17882 thenBlock = elseBlock;
17883 elseBlock = nullptr;
17885 thenBlock->inheritWeightPercentage(condBlock, 50);
17887 else if (hasFalseExpr)
17890 // S0 -->-- C -->-- F -->-- S1
17892 // +-->------------+
17895 condBlock->bbJumpDest = remainderBlock;
17896 fgAddRefPred(remainderBlock, condBlock);
17898 elseBlock->inheritWeightPercentage(condBlock, 50);
17901 GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
17902 GenTree* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
17903 fgInsertStmtAtEnd(condBlock, jmpStmt);
17905 // Remove the original qmark statement.
17906 fgRemoveStmt(block, stmt);
17908 // Since we have top level qmarks, we either have a dst for it in which case
17909 // we need to create tmps for true and falseExprs, else just don't bother
17911 unsigned lclNum = BAD_VAR_NUM;
17912 if (dst != nullptr)
17914 assert(dst->gtOper == GT_LCL_VAR);
17915 lclNum = dst->gtLclVar.gtLclNum;
17919 assert(qmark->TypeGet() == TYP_VOID);
17924 if (dst != nullptr)
17926 trueExpr = gtNewTempAssign(lclNum, trueExpr);
17928 GenTree* trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
17929 fgInsertStmtAtEnd(thenBlock, trueStmt);
17932 // Assign the falseExpr into the dst or tmp, insert in elseBlock
17935 if (dst != nullptr)
17937 falseExpr = gtNewTempAssign(lclNum, falseExpr);
17939 GenTree* falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
17940 fgInsertStmtAtEnd(elseBlock, falseStmt);
17946 printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum);
17947 fgDispBasicBlocks(block, remainderBlock, true);
17952 /*****************************************************************************
17954 * Expand GT_QMARK nodes from the flow graph into basic blocks.
17958 void Compiler::fgExpandQmarkNodes()
17962 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
17964 for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
17966 GenTree* expr = stmt->gtStmt.gtStmtExpr;
17968 fgPreExpandQmarkChecks(expr);
17970 fgExpandQmarkStmt(block, stmt);
17974 fgPostExpandQmarkChecks();
17977 compQmarkRationalized = true;
17981 /*****************************************************************************
17983 * Make sure we don't have any more GT_QMARK nodes.
17986 void Compiler::fgPostExpandQmarkChecks()
17988 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
17990 for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
17992 GenTree* expr = stmt->gtStmt.gtStmtExpr;
17993 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
17999 /*****************************************************************************
18001 * Transform all basic blocks for codegen.
18004 void Compiler::fgMorph()
18006 noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
18008 fgOutgoingArgTemps = nullptr;
18013 printf("*************** In fgMorph()\n");
18017 fgDispBasicBlocks(true);
18021 // Insert call to class constructor as the first basic block if
18022 // we were asked to do so.
18023 if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
18024 impTokenLookupContextHandle /* context */) &
18025 CORINFO_INITCLASS_USE_HELPER)
18027 fgEnsureFirstBBisScratch();
18028 fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
18032 if (opts.compGcChecks)
18034 for (unsigned i = 0; i < info.compArgsCount; i++)
18036 if (lvaTable[i].TypeGet() == TYP_REF)
18038 // confirm that the argument is a GC pointer (for debugging (GC stress))
18039 GenTree* op = gtNewLclvNode(i, TYP_REF);
18040 GenTreeArgList* args = gtNewArgList(op);
18041 op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, args);
18043 fgEnsureFirstBBisScratch();
18044 fgInsertStmtAtEnd(fgFirstBB, op);
18049 if (opts.compStackCheckOnRet)
18051 lvaReturnEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck"));
18052 lvaTable[lvaReturnEspCheck].lvType = TYP_INT;
18055 if (opts.compStackCheckOnCall)
18057 lvaCallEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck"));
18058 lvaTable[lvaCallEspCheck].lvType = TYP_INT;
18062 /* Filter out unimported BBs */
18064 fgRemoveEmptyBlocks();
18067 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
18068 fgDebugCheckBBlist(false, false);
18071 EndPhase(PHASE_MORPH_INIT);
18076 JITDUMP("trees after inlining\n");
18077 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
18080 RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
18082 EndPhase(PHASE_MORPH_INLINE);
18084 /* Add any internal blocks/trees we may need */
18089 fgMultipleNots = false;
18093 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
18094 fgDebugCheckBBlist(false, false);
18095 /* Inliner could clone some trees. */
18096 fgDebugCheckNodesUniqueness();
18099 fgRemoveEmptyTry();
18101 EndPhase(PHASE_EMPTY_TRY);
18103 fgRemoveEmptyFinally();
18105 EndPhase(PHASE_EMPTY_FINALLY);
18107 fgMergeFinallyChains();
18109 EndPhase(PHASE_MERGE_FINALLY_CHAINS);
18113 EndPhase(PHASE_CLONE_FINALLY);
18115 fgUpdateFinallyTargetFlags();
18117 /* For x64 and ARM64 we need to mark irregular parameters */
18118 fgMarkImplicitByRefArgs();
18120 /* Promote struct locals if necessary */
18121 fgPromoteStructs();
18123 /* Now it is the time to figure out what locals have address-taken. */
18124 fgMarkAddressExposedLocals();
18126 EndPhase(PHASE_STR_ADRLCL);
18128 /* Apply the type update to implicit byref parameters; also choose (based on address-exposed
18129 analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. */
18130 fgRetypeImplicitByRefArgs();
18133 /* Now that locals have address-taken and implicit byref marked, we can safely apply stress. */
18135 fgStress64RsltMul();
18138 EndPhase(PHASE_MORPH_IMPBYREF);
18140 /* Morph the trees in all the blocks of the method */
18144 /* Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args */
18145 fgMarkDemotedImplicitByRefArgs();
18147 EndPhase(PHASE_MORPH_GLOBAL);
18150 JITDUMP("trees after fgMorphBlocks\n");
18151 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
18154 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
18155 if (fgNeedToAddFinallyTargetBits)
18157 // We previously wiped out the BBF_FINALLY_TARGET bits due to some morphing; add them back.
18158 fgAddFinallyTargetFlags();
18159 fgNeedToAddFinallyTargetBits = false;
18161 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
18163 /* Decide the kind of code we want to generate */
18167 fgExpandQmarkNodes();
18170 compCurBB = nullptr;
18174 /*****************************************************************************
18176 * Promoting struct locals
18178 void Compiler::fgPromoteStructs()
18183 printf("*************** In fgPromoteStructs()\n");
18187 if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
18189 JITDUMP(" promotion opt flag not enabled\n");
18193 if (fgNoStructPromotion)
18195 JITDUMP(" promotion disabled by JitNoStructPromotion\n");
18200 // The code in this #if has been useful in debugging struct promotion issues, by
18201 // enabling selective enablement of the struct promotion optimization according to
18204 unsigned methHash = info.compMethodHash();
18205 char* lostr = getenv("structpromohashlo");
18206 unsigned methHashLo = 0;
18209 sscanf_s(lostr, "%x", &methHashLo);
18211 char* histr = getenv("structpromohashhi");
18212 unsigned methHashHi = UINT32_MAX;
18215 sscanf_s(histr, "%x", &methHashHi);
18217 if (methHash < methHashLo || methHash > methHashHi)
18223 printf("Promoting structs for method %s, hash = 0x%x.\n",
18224 info.compFullName, info.compMethodHash());
18225 printf(""); // in our logic this causes a flush
18230 if (info.compIsVarArgs)
18232 JITDUMP(" promotion disabled because of varargs\n");
18239 printf("\nlvaTable before fgPromoteStructs\n");
18244 // The lvaTable might grow as we grab temps. Make a local copy here.
18245 unsigned startLvaCount = lvaCount;
18248 // Loop through the original lvaTable. Looking for struct locals to be promoted.
18250 lvaStructPromotionInfo structPromotionInfo;
18251 bool tooManyLocals = false;
18253 for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
18255 // Whether this var got promoted
18256 bool promotedVar = false;
18257 LclVarDsc* varDsc = &lvaTable[lclNum];
18259 // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
18260 // its fields. Instead, we will attempt to enregister the entire struct.
18261 if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic())
18263 varDsc->lvRegStruct = true;
18265 // Don't promote if we have reached the tracking limit.
18266 else if (lvaHaveManyLocals())
18268 // Print the message first time when we detected this condition
18269 if (!tooManyLocals)
18271 JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
18273 tooManyLocals = true;
18275 else if (varTypeIsStruct(varDsc))
18277 bool shouldPromote;
18279 lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
18280 if (structPromotionInfo.canPromote)
18282 shouldPromote = lvaShouldPromoteStructVar(lclNum, &structPromotionInfo);
18286 shouldPromote = false;
18290 // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
18291 // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
18292 static int structPromoVarNum = 0;
18293 structPromoVarNum++;
18294 if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
18299 // Promote the this struct local var.
18300 lvaPromoteStructVar(lclNum, &structPromotionInfo);
18301 promotedVar = true;
18303 #ifdef _TARGET_ARM_
18304 if (structPromotionInfo.requiresScratchVar)
18306 // Ensure that the scratch variable is allocated, in case we
18307 // pass a promoted struct as an argument.
18308 if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
18310 lvaPromotedStructAssemblyScratchVar =
18311 lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
18312 lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
18315 #endif // _TARGET_ARM_
18319 if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed)
18321 // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
18322 // we will treat it as a reg struct.
18323 varDsc->lvRegStruct = true;
18330 printf("\nlvaTable after fgPromoteStructs\n");
18336 Compiler::fgWalkResult Compiler::fgMorphStructField(GenTree* tree, fgWalkData* fgWalkPre)
18338 noway_assert(tree->OperGet() == GT_FIELD);
18340 GenTree* objRef = tree->gtField.gtFldObj;
18341 GenTree* obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
18342 noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));
18344 /* Is this an instance data member? */
18346 if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
18348 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
18349 LclVarDsc* varDsc = &lvaTable[lclNum];
18351 if (varTypeIsStruct(obj))
18353 if (varDsc->lvPromoted)
18356 unsigned fldOffset = tree->gtField.gtFldOffset;
18357 unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
18358 noway_assert(fieldLclIndex != BAD_VAR_NUM);
18360 if (lvaIsImplicitByRefLocal(lclNum))
18362 // Keep track of the number of appearances of each promoted implicit
18363 // byref (here during struct promotion, which happens during address-exposed
18364 // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
18365 // byref params when deciding if it's legal to elide certain copies of them.
18366 // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
18367 // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
18368 // chance, so have to check now.
18370 "Incrementing ref count from %d to %d for V%02d in fgMorphStructField for promoted struct\n",
18371 varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
18372 varDsc->lvRefCnt++;
18375 tree->SetOper(GT_LCL_VAR);
18376 tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
18377 tree->gtType = lvaTable[fieldLclIndex].TypeGet();
18378 tree->gtFlags &= GTF_NODE_MASK;
18379 tree->gtFlags &= ~GTF_GLOB_REF;
18381 GenTree* parent = fgWalkPre->parentStack->Index(1);
18382 if (parent->gtOper == GT_ASG)
18384 if (parent->gtOp.gtOp1 == tree)
18386 tree->gtFlags |= GTF_VAR_DEF;
18387 tree->gtFlags |= GTF_DONT_CSE;
18390 // Promotion of struct containing struct fields where the field
18391 // is a struct with a single pointer sized scalar type field: in
18392 // this case struct promotion uses the type of the underlying
18393 // scalar field as the type of struct field instead of recursively
18394 // promoting. This can lead to a case where we have a block-asgn
18395 // with its RHS replaced with a scalar type. Mark RHS value as
18396 // DONT_CSE so that assertion prop will not do const propagation.
18397 // The reason this is required is that if RHS of a block-asg is a
18398 // constant, then it is interpreted as init-block incorrectly.
18400 // TODO - This can also be avoided if we implement recursive struct
18402 if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree))
18404 tree->gtFlags |= GTF_DONT_CSE;
18410 printf("Replacing the field in promoted struct with a local var:\n");
18411 fgWalkPre->printModified = true;
18414 return WALK_SKIP_SUBTREES;
18420 // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
18421 // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
18422 // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
18423 // there is one extremely rare case where that won't be true. An enum type is a special value type
18424 // that contains exactly one element of a primitive integer type (that, for CLS programs is named
18425 // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
18426 // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
18427 // ldfld. For example:
18429 // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
18431 // .field public specialname rtspecialname int16 value__
18432 // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
18434 // .method public hidebysig static void Main() cil managed
18436 // .locals init (valuetype mynamespace.e_t V_0)
18439 // ldflda int16 mynamespace.e_t::value__
18443 // Normally, compilers will not generate the ldflda, since it is superfluous.
18445 // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
18446 // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
18447 // mismatch like this, don't do this morphing. The local var may end up getting marked as
18448 // address taken, and the appropriate SHORT load will be done from memory in that case.
18450 if (tree->TypeGet() == obj->TypeGet())
18452 if (lvaIsImplicitByRefLocal(lclNum))
18454 // Keep track of the number of appearances of each promoted implicit
18455 // byref (here during struct promotion, which happens during address-exposed
18456 // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
18457 // byref params when deciding if it's legal to elide certain copies of them.
18458 // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
18459 // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
18460 // chance, so have to check now.
18461 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField for normed struct\n",
18462 varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
18463 varDsc->lvRefCnt++;
18466 tree->ChangeOper(GT_LCL_VAR);
18467 tree->gtLclVarCommon.SetLclNum(lclNum);
18468 tree->gtFlags &= GTF_NODE_MASK;
18470 GenTree* parent = fgWalkPre->parentStack->Index(1);
18471 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
18473 tree->gtFlags |= GTF_VAR_DEF;
18474 tree->gtFlags |= GTF_DONT_CSE;
18479 printf("Replacing the field in normed struct with the local var:\n");
18480 fgWalkPre->printModified = true;
18483 return WALK_SKIP_SUBTREES;
18488 return WALK_CONTINUE;
18491 Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTree* tree, fgWalkData* fgWalkPre)
18493 noway_assert(tree->OperGet() == GT_LCL_FLD);
18495 unsigned lclNum = tree->gtLclFld.gtLclNum;
18496 LclVarDsc* varDsc = &lvaTable[lclNum];
18498 if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
18501 unsigned fldOffset = tree->gtLclFld.gtLclOffs;
18502 unsigned fieldLclIndex = 0;
18503 LclVarDsc* fldVarDsc = nullptr;
18505 if (fldOffset != BAD_VAR_NUM)
18507 fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
18508 noway_assert(fieldLclIndex != BAD_VAR_NUM);
18509 fldVarDsc = &lvaTable[fieldLclIndex];
18512 if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
18513 #ifdef _TARGET_X86_
18514 && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
18518 // There is an existing sub-field we can use.
18519 tree->gtLclFld.SetLclNum(fieldLclIndex);
18521 // The field must be an enregisterable type; otherwise it would not be a promoted field.
18522 // The tree type may not match, e.g. for return types that have been morphed, but both
18523 // must be enregisterable types.
18524 // TODO-Cleanup: varTypeCanReg should presumably return true for SIMD types, but
18525 // there may be places where that would violate existing assumptions.
18526 var_types treeType = tree->TypeGet();
18527 var_types fieldType = fldVarDsc->TypeGet();
18528 assert((varTypeCanReg(treeType) || varTypeIsSIMD(treeType)) &&
18529 (varTypeCanReg(fieldType) || varTypeIsSIMD(fieldType)));
18531 tree->ChangeOper(GT_LCL_VAR);
18532 assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
18533 tree->gtType = fldVarDsc->TypeGet();
18537 printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n");
18538 fgWalkPre->printModified = true;
18542 GenTree* parent = fgWalkPre->parentStack->Index(1);
18543 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
18545 tree->gtFlags |= GTF_VAR_DEF;
18546 tree->gtFlags |= GTF_DONT_CSE;
18551 // There is no existing field that has all the parts that we need
18552 // So we must ensure that the struct lives in memory.
18553 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
18556 // We can't convert this guy to a float because he really does have his
18558 varDsc->lvKeepType = 1;
18562 return WALK_SKIP_SUBTREES;
18565 return WALK_CONTINUE;
18568 //------------------------------------------------------------------------
18569 // fgMarkImplicitByRefArgs: Identify any by-value struct parameters which are "implicit by-reference";
18570 // i.e. which the ABI requires to be passed by making a copy in the caller and
18571 // passing its address to the callee. Mark their `LclVarDsc`s such that
18572 // `lvaIsImplicitByRefLocal` will return true for them.
18574 void Compiler::fgMarkImplicitByRefArgs()
18576 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
18580 printf("\n*************** In fgMarkImplicitByRefs()\n");
18584 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
18586 LclVarDsc* varDsc = &lvaTable[lclNum];
18588 if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
18592 if (varDsc->lvSize() > REGSIZE_BYTES)
18594 size = varDsc->lvSize();
18598 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
18599 size = info.compCompHnd->getClassSize(typeHnd);
18602 #if defined(_TARGET_AMD64_)
18603 if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
18604 #elif defined(_TARGET_ARM64_)
18605 if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc))
18608 // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
18609 // So I am now using it to indicate that this is one of the weird implicit
18611 // The address taken cleanup will look for references to locals marked like
18612 // this, and transform them appropriately.
18613 varDsc->lvIsTemp = 1;
18615 // Clear the ref count field; fgMarkAddressTakenLocals will increment it per
18616 // appearance of implicit-by-ref param so that call arg morphing can do an
18617 // optimization for single-use implicit-by-ref params whose single use is as
18618 // an outgoing call argument.
18619 varDsc->lvRefCnt = 0;
18624 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18627 //------------------------------------------------------------------------
18628 // fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from
18629 // struct to pointer). Also choose (based on address-exposed analysis)
18630 // which struct promotions of implicit byrefs to keep or discard.
18631 // For those which are kept, insert the appropriate initialization code.
18632 // For those which are to be discarded, annotate the promoted field locals
18633 // so that fgMorphImplicitByRefArgs will know to rewrite their appearances
18634 // using indirections off the pointer parameters.
18636 void Compiler::fgRetypeImplicitByRefArgs()
18638 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
18642 printf("\n*************** In fgRetypeImplicitByRefArgs()\n");
18646 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
18648 LclVarDsc* varDsc = &lvaTable[lclNum];
18650 if (lvaIsImplicitByRefLocal(lclNum))
18654 if (varDsc->lvSize() > REGSIZE_BYTES)
18656 size = varDsc->lvSize();
18660 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
18661 size = info.compCompHnd->getClassSize(typeHnd);
18664 if (varDsc->lvPromoted)
18666 // This implicit-by-ref was promoted; create a new temp to represent the
18667 // promoted struct before rewriting this parameter as a pointer.
18668 unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref"));
18669 lvaSetStruct(newLclNum, lvaGetStruct(lclNum), true);
18670 // Update varDsc since lvaGrabTemp might have re-allocated the var dsc array.
18671 varDsc = &lvaTable[lclNum];
18673 // Copy the struct promotion annotations to the new temp.
18674 LclVarDsc* newVarDsc = &lvaTable[newLclNum];
18675 newVarDsc->lvPromoted = true;
18676 newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart;
18677 newVarDsc->lvFieldCnt = varDsc->lvFieldCnt;
18678 newVarDsc->lvContainsHoles = varDsc->lvContainsHoles;
18679 newVarDsc->lvCustomLayout = varDsc->lvCustomLayout;
18681 newVarDsc->lvKeepType = true;
18684 // Propagate address-taken-ness and do-not-enregister-ness.
18685 newVarDsc->lvAddrExposed = varDsc->lvAddrExposed;
18686 newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister;
18688 newVarDsc->lvLclBlockOpAddr = varDsc->lvLclBlockOpAddr;
18689 newVarDsc->lvLclFieldExpr = varDsc->lvLclFieldExpr;
18690 newVarDsc->lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr;
18691 newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr;
18692 newVarDsc->lvLiveAcrossUCall = varDsc->lvLiveAcrossUCall;
18695 // If the promotion is dependent, the promoted temp would just be committed
18696 // to memory anyway, so we'll rewrite its appearances to be indirections
18697 // through the pointer parameter, the same as we'd do for this
18698 // parameter if it weren't promoted at all (otherwise the initialization
18699 // of the new temp would just be a needless memcpy at method entry).
18700 bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ||
18701 (varDsc->lvRefCnt <= varDsc->lvFieldCnt);
18703 if (!undoPromotion)
18705 // Insert IR that initializes the temp from the parameter.
18706 // LHS is a simple reference to the temp.
18707 fgEnsureFirstBBisScratch();
18708 GenTree* lhs = gtNewLclvNode(newLclNum, varDsc->lvType);
18709 // RHS is an indirection (using GT_OBJ) off the parameter.
18710 GenTree* addr = gtNewLclvNode(lclNum, TYP_BYREF);
18711 GenTree* rhs = gtNewBlockVal(addr, (unsigned)size);
18712 GenTree* assign = gtNewAssignNode(lhs, rhs);
18713 fgInsertStmtAtBeg(fgFirstBB, assign);
18716 // Update the locals corresponding to the promoted fields.
18717 unsigned fieldLclStart = varDsc->lvFieldLclStart;
18718 unsigned fieldCount = varDsc->lvFieldCnt;
18719 unsigned fieldLclStop = fieldLclStart + fieldCount;
18721 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
18723 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
18727 // Leave lvParentLcl pointing to the parameter so that fgMorphImplicitByRefArgs
18728 // will know to rewrite appearances of this local.
18729 assert(fieldVarDsc->lvParentLcl == lclNum);
18733 // Set the new parent.
18734 fieldVarDsc->lvParentLcl = newLclNum;
18735 // Clear the ref count field; it is used to communicate the nubmer of references
18736 // to the implicit byref parameter when morphing calls that pass the implicit byref
18737 // out as an outgoing argument value, but that doesn't pertain to this field local
18738 // which is now a field of a non-arg local.
18739 fieldVarDsc->lvRefCnt = 0;
18742 fieldVarDsc->lvIsParam = false;
18743 // The fields shouldn't inherit any register preferences from
18744 // the parameter which is really a pointer to the struct.
18745 fieldVarDsc->lvIsRegArg = false;
18746 fieldVarDsc->lvIsMultiRegArg = false;
18747 fieldVarDsc->lvSetIsHfaRegArg(false);
18748 fieldVarDsc->lvArgReg = REG_NA;
18749 #if FEATURE_MULTIREG_ARGS
18750 fieldVarDsc->lvOtherArgReg = REG_NA;
18752 fieldVarDsc->lvPrefReg = 0;
18755 // Hijack lvFieldLclStart to record the new temp number.
18756 // It will get fixed up in fgMarkDemotedImplicitByRefArgs.
18757 varDsc->lvFieldLclStart = newLclNum;
18758 // Go ahead and clear lvFieldCnt -- either we're promoting
18759 // a replacement temp or we're not promoting this arg, and
18760 // in either case the parameter is now a pointer that doesn't
18761 // have these fields.
18762 varDsc->lvFieldCnt = 0;
18764 // Hijack lvPromoted to communicate to fgMorphImplicitByRefArgs
18765 // whether references to the struct should be rewritten as
18766 // indirections off the pointer (not promoted) or references
18767 // to the new struct local (promoted).
18768 varDsc->lvPromoted = !undoPromotion;
18772 // The "undo promotion" path above clears lvPromoted for args that struct
18773 // promotion wanted to promote but that aren't considered profitable to
18774 // rewrite. It hijacks lvFieldLclStart to communicate to
18775 // fgMarkDemotedImplicitByRefArgs that it needs to clean up annotations left
18776 // on such args for fgMorphImplicitByRefArgs to consult in the interim.
18777 // Here we have an arg that was simply never promoted, so make sure it doesn't
18778 // have nonzero lvFieldLclStart, since that would confuse fgMorphImplicitByRefArgs
18779 // and fgMarkDemotedImplicitByRefArgs.
18780 assert(varDsc->lvFieldLclStart == 0);
18783 // Since the parameter in this position is really a pointer, its type is TYP_BYREF.
18784 varDsc->lvType = TYP_BYREF;
18786 // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
18787 // make sure that the following flag is not set as these will force SSA to
18788 // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa)
18790 varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
18792 // The struct parameter may have had its address taken, but the pointer parameter
18793 // cannot -- any uses of the struct parameter's address are uses of the pointer
18794 // parameter's value, and there's no way for the MSIL to reference the pointer
18795 // parameter's address. So clear the address-taken bit for the parameter.
18796 varDsc->lvAddrExposed = 0;
18797 varDsc->lvDoNotEnregister = 0;
18800 // This should not be converted to a double in stress mode,
18801 // because it is really a pointer
18802 varDsc->lvKeepType = 1;
18806 printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
18812 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18815 //------------------------------------------------------------------------
18816 // fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion
18817 // asked to promote. Appearances of these have now been rewritten
18818 // (by fgMorphImplicitByRefArgs) using indirections from the pointer
18819 // parameter or references to the promotion temp, as appropriate.
18821 void Compiler::fgMarkDemotedImplicitByRefArgs()
18823 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
18825 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
18827 LclVarDsc* varDsc = &lvaTable[lclNum];
18829 if (lvaIsImplicitByRefLocal(lclNum))
18831 if (varDsc->lvPromoted)
18833 // The parameter is simply a pointer now, so clear lvPromoted. It was left set
18834 // by fgRetypeImplicitByRefArgs to communicate to fgMorphImplicitByRefArgs that
18835 // appearances of this arg needed to be rewritten to a new promoted struct local.
18836 varDsc->lvPromoted = false;
18838 // Clear the lvFieldLclStart value that was set by fgRetypeImplicitByRefArgs
18839 // to tell fgMorphImplicitByRefArgs which local is the new promoted struct one.
18840 varDsc->lvFieldLclStart = 0;
18842 else if (varDsc->lvFieldLclStart != 0)
18844 // We created new temps to represent a promoted struct corresponding to this
18845 // parameter, but decided not to go through with the promotion and have
18846 // rewritten all uses as indirections off the pointer parameter.
18847 // We stashed the pointer to the new struct temp in lvFieldLclStart; make
18848 // note of that and clear the annotation.
18849 unsigned structLclNum = varDsc->lvFieldLclStart;
18850 varDsc->lvFieldLclStart = 0;
18852 // Clear the arg's ref count; this was set during address-taken analysis so that
18853 // call morphing could identify single-use implicit byrefs; we're done with
18854 // that, and want it to be in its default state of zero when we go to set
18855 // real ref counts for all variables.
18856 varDsc->lvRefCnt = 0;
18858 // The temp struct is now unused; set flags appropriately so that we
18859 // won't allocate space for it on the stack.
18860 LclVarDsc* structVarDsc = &lvaTable[structLclNum];
18861 structVarDsc->lvRefCnt = 0;
18862 structVarDsc->lvAddrExposed = false;
18864 structVarDsc->lvUnusedStruct = true;
18867 unsigned fieldLclStart = structVarDsc->lvFieldLclStart;
18868 unsigned fieldCount = structVarDsc->lvFieldCnt;
18869 unsigned fieldLclStop = fieldLclStart + fieldCount;
18871 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
18873 // Fix the pointer to the parent local.
18874 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
18875 assert(fieldVarDsc->lvParentLcl == lclNum);
18876 fieldVarDsc->lvParentLcl = structLclNum;
18878 // The field local is now unused; set flags appropriately so that
18879 // we won't allocate stack space for it.
18880 fieldVarDsc->lvRefCnt = 0;
18881 fieldVarDsc->lvAddrExposed = false;
18887 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18890 /*****************************************************************************
18892 * Morph irregular parameters
18893 * for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
18895 bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree)
18897 #if (!defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) && !defined(_TARGET_ARM64_)
18901 #else // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18903 bool changed = false;
18905 // Implicit byref morphing needs to know if the reference to the parameter is a
18906 // child of GT_ADDR or not, so this method looks one level down and does the
18907 // rewrite whenever a child is a reference to an implicit byref parameter.
18908 if (tree->gtOper == GT_ADDR)
18910 if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
18912 GenTree* morphedTree = fgMorphImplicitByRefArgs(tree, true);
18913 changed = (morphedTree != nullptr);
18914 assert(!changed || (morphedTree == tree));
18919 for (GenTree** pTree : tree->UseEdges())
18921 GenTree* childTree = *pTree;
18922 if (childTree->gtOper == GT_LCL_VAR)
18924 GenTree* newChildTree = fgMorphImplicitByRefArgs(childTree, false);
18925 if (newChildTree != nullptr)
18928 *pTree = newChildTree;
18935 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18938 GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr)
18940 assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
18941 assert(isAddr == (tree->gtOper == GT_ADDR));
18943 GenTree* lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
18944 unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
18945 LclVarDsc* lclVarDsc = &lvaTable[lclNum];
18947 CORINFO_FIELD_HANDLE fieldHnd;
18948 unsigned fieldOffset = 0;
18949 var_types fieldRefType = TYP_UNKNOWN;
18951 if (lvaIsImplicitByRefLocal(lclNum))
18953 // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
18954 // re-invoke the traversal to mark address-taken locals.
18955 // So, we may encounter a tree that has already been transformed to TYP_BYREF.
18956 // If we do, leave it as-is.
18957 if (!varTypeIsStruct(lclVarTree))
18959 assert(lclVarTree->TypeGet() == TYP_BYREF);
18963 else if (lclVarDsc->lvPromoted)
18965 // fgRetypeImplicitByRefArgs created a new promoted struct local to represent this
18966 // arg. Rewrite this to refer to the new local.
18967 assert(lclVarDsc->lvFieldLclStart != 0);
18968 lclVarTree->AsLclVarCommon()->SetLclNum(lclVarDsc->lvFieldLclStart);
18972 fieldHnd = nullptr;
18974 else if (lclVarDsc->lvIsStructField && lvaIsImplicitByRefLocal(lclVarDsc->lvParentLcl))
18976 // This was a field reference to an implicit-by-reference struct parameter that was
18977 // dependently promoted; update it to a field reference off the pointer.
18978 // Grab the field handle from the struct field lclVar.
18979 fieldHnd = lclVarDsc->lvFieldHnd;
18980 fieldOffset = lclVarDsc->lvFldOffset;
18981 assert(fieldHnd != nullptr);
18982 // Update lclNum/lclVarDsc to refer to the parameter
18983 lclNum = lclVarDsc->lvParentLcl;
18984 lclVarDsc = &lvaTable[lclNum];
18985 fieldRefType = lclVarTree->TypeGet();
18989 // We only need to tranform the 'marked' implicit by ref parameters
18993 // This is no longer a def of the lclVar, even if it WAS a def of the struct.
18994 lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);
18998 if (fieldHnd == nullptr)
19000 // change &X into just plain X
19001 tree->ReplaceWith(lclVarTree, this);
19002 tree->gtType = TYP_BYREF;
19006 // change &(X.f) [i.e. GT_ADDR of local for promoted arg field]
19007 // into &(X, f) [i.e. GT_ADDR of GT_FIELD off ptr param]
19008 lclVarTree->gtLclVarCommon.SetLclNum(lclNum);
19009 lclVarTree->gtType = TYP_BYREF;
19010 tree->gtOp.gtOp1 = gtNewFieldRef(fieldRefType, fieldHnd, lclVarTree, fieldOffset);
19016 printf("Replacing address of implicit by ref struct parameter with byref:\n");
19022 // Change X into OBJ(X) or FIELD(X, f)
19023 var_types structType = tree->gtType;
19024 tree->gtType = TYP_BYREF;
19028 tree->gtLclVarCommon.SetLclNum(lclNum);
19029 tree = gtNewFieldRef(fieldRefType, fieldHnd, tree, fieldOffset);
19033 tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
19036 if (structType == TYP_STRUCT)
19038 gtSetObjGcInfo(tree->AsObj());
19041 // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
19042 // we could remove TGTANYWHERE
19043 tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
19048 printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
19063 // An "AddrExposedContext" expresses the calling context in which an address expression occurs.
19064 enum AddrExposedContext
19066 AXC_None, // None of the below seen yet.
19067 AXC_Ind, // The address being computed is to be dereferenced.
19068 AXC_Addr, // We're computing a raw address (not dereferenced, at least not immediately).
19069 AXC_IndWide, // A block operation dereferenced an address referencing more bytes than the address
19070 // addresses -- if the address addresses a field of a struct local, we need to consider
19071 // the entire local address taken (not just the field).
19072 AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
19073 // on more bytes than the width of the storage location addressed. If this is a
19074 // field of a promoted struct local, declare the entire struct local address-taken.
19075 AXC_IndAdd, // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
19076 // If one arg is a constant int, evaluate the other in an IND context. Otherwise, none.
19079 typedef ArrayStack<AddrExposedContext> AXCStack;
19081 // We use pre-post to simulate passing an argument in a recursion, via a stack.
19082 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTree** pTree, fgWalkData* fgWalkPre)
19084 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
19085 (void)axcStack->Pop();
19086 return WALK_CONTINUE;
19089 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTree** pTree, fgWalkData* fgWalkPre)
19091 GenTree* tree = *pTree;
19092 Compiler* comp = fgWalkPre->compiler;
19093 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
19094 AddrExposedContext axc = axcStack->Top();
19096 // In some situations, we have to figure out what the effective context is in which to
19097 // evaluate the current tree, depending on which argument position it is in its parent.
19104 GenTree* parent = fgWalkPre->parentStack->Index(1);
19105 assert(parent->OperGet() == GT_ADD);
19106 // Is one of the args a constant representing a field offset,
19107 // and is this the other? If so, Ind context.
19108 if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree)
19112 else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree)
19127 // Now recurse properly for the tree.
19128 switch (tree->gtOper)
19131 if (axc != AXC_Addr)
19133 axcStack->Push(AXC_Ind);
19137 axcStack->Push(AXC_None);
19139 return WALK_CONTINUE;
19143 if (axc == AXC_Addr)
19145 axcStack->Push(AXC_None);
19147 else if (tree->TypeGet() == TYP_STRUCT)
19149 // The block operation will derefence its argument(s) -- usually. If the size of the initblk
19150 // or copyblk exceeds the size of a storage location whose address is used as one of the
19151 // arguments, then we have to consider that storage location (indeed, it's underlying containing
19152 // location) to be address taken. So get the width of the initblk or copyblk.
19154 GenTree* parent = fgWalkPre->parentStack->Index(1);
19155 GenTreeBlk* blk = tree->AsBlk();
19156 unsigned width = blk->gtBlkSize;
19157 noway_assert(width != 0);
19159 GenTree* addr = blk->Addr();
19160 if (addr->OperGet() == GT_ADDR)
19162 if (parent->gtOper == GT_ASG)
19164 if ((tree == parent->gtOp.gtOp1) &&
19165 ((width == 0) || !comp->fgFitsInOrNotLoc(addr->gtGetOp1(), width)))
19172 assert(parent->gtOper == GT_CALL);
19175 axcStack->Push(axc);
19179 // This is like a regular GT_IND.
19180 axcStack->Push(AXC_Ind);
19182 return WALK_CONTINUE;
19185 // Assume maximal width.
19186 axcStack->Push(AXC_IndWide);
19187 return WALK_CONTINUE;
19190 case GT_FIELD_LIST:
19191 axcStack->Push(AXC_None);
19192 return WALK_CONTINUE;
19195 // Taking the address of an array element never takes the address of a local.
19196 axcStack->Push(AXC_None);
19197 return WALK_CONTINUE;
19200 #ifdef FEATURE_SIMD
19201 if (tree->gtOp.gtOp1->OperIsSIMDorSimdHWintrinsic())
19203 axcStack->Push(AXC_None);
19206 #endif // FEATURE_SIMD
19207 if (axc == AXC_Ind)
19209 axcStack->Push(AXC_None);
19211 else if (axc == AXC_IndWide)
19213 axcStack->Push(AXC_AddrWide);
19217 assert(axc == AXC_None);
19218 axcStack->Push(AXC_Addr);
19220 return WALK_CONTINUE;
19223 // First, handle a couple of special cases: field of promoted struct local, field
19224 // of "normed" struct.
19225 if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES)
19227 // It (may have) replaced the field with a local var or local field. If we're in an addr context,
19228 // label it addr-taken.
19229 if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide))
19231 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
19232 comp->lvaSetVarAddrExposed(lclNum);
19233 if (axc == AXC_AddrWide)
19235 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
19236 if (varDsc->lvIsStructField)
19238 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
19242 // Push something to keep the PostCB, which will pop it, happy.
19243 axcStack->Push(AXC_None);
19244 return WALK_SKIP_SUBTREES;
19248 // GT_FIELD is an implicit deref.
19249 if (axc == AXC_Addr)
19251 axcStack->Push(AXC_None);
19253 else if (axc == AXC_AddrWide)
19255 axcStack->Push(AXC_IndWide);
19259 axcStack->Push(AXC_Ind);
19261 return WALK_CONTINUE;
19266 assert(axc != AXC_Addr);
19267 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
19268 if (comp->lvaIsImplicitByRefLocal(lclNum))
19270 // Keep track of the number of appearances of each promoted implicit
19271 // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
19272 // checks the ref counts for implicit byref params when deciding if it's legal
19273 // to elide certain copies of them.
19274 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
19275 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
19276 varDsc->lvRefCnt + 1, lclNum);
19278 varDsc->lvRefCnt++;
19280 // This recognizes certain forms, and does all the work. In that case, returns WALK_SKIP_SUBTREES,
19281 // else WALK_CONTINUE. We do the same here.
19282 fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre);
19283 if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide))
19285 comp->lvaSetVarAddrExposed(lclNum);
19286 if (axc == AXC_AddrWide)
19288 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
19289 if (varDsc->lvIsStructField)
19291 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
19295 // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter
19296 // what, but something to be popped by the post callback. If we're going
19297 // to analyze children, the LCL_FLD creates an Ind context, so use that.
19298 axcStack->Push(AXC_Ind);
19304 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
19305 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
19307 if (comp->lvaIsImplicitByRefLocal(lclNum))
19309 // Keep track of the number of appearances of each promoted implicit
19310 // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
19311 // checks the ref counts for implicit byref params when deciding if it's legal
19312 // to elide certain copies of them.
19313 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
19314 varDsc->lvRefCnt + 1, lclNum);
19316 varDsc->lvRefCnt++;
19319 if (axc == AXC_Addr || axc == AXC_AddrWide)
19321 comp->lvaSetVarAddrExposed(lclNum);
19322 if (axc == AXC_AddrWide)
19324 if (varDsc->lvIsStructField)
19326 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
19330 // We may need to Quirk the storage size for this LCL_VAR
19331 // some PInvoke signatures incorrectly specify a ByRef to an INT32
19332 // when they actually write a SIZE_T or INT64
19333 comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack);
19336 // Push something to keep the PostCB, which will pop it, happy.
19337 axcStack->Push(AXC_None);
19338 // The tree is a leaf.
19339 return WALK_SKIP_SUBTREES;
19343 assert(axc != AXC_Addr);
19344 // See below about treating pointer operations as wider indirection.
19345 if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
19347 axcStack->Push(AXC_IndWide);
19349 else if (axc == AXC_Ind)
19351 // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
19352 // If it's an add of a constant and an address, and the constant represents a field,
19353 // then we'll evaluate the address argument in an Ind context; otherwise, the None context.
19354 axcStack->Push(AXC_IndAdd);
19358 axcStack->Push(axc);
19360 return WALK_CONTINUE;
19362 // !!! Treat Pointer Operations as Wider Indirection
19364 // If we are performing pointer operations, make sure we treat that as equivalent to a wider
19365 // indirection. This is because the pointers could be pointing to the address of struct fields
19366 // and could be used to perform operations on the whole struct or passed to another method.
19368 // When visiting a node in this pre-order walk, we do not know if we would in the future
19369 // encounter a GT_ADDR of a GT_FIELD below.
19371 // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
19372 // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
19373 // wider indirection context down the expr tree.
19375 // Example, in unsafe code,
19377 // IL_000e 12 00 ldloca.s 0x0
19378 // IL_0010 7c 02 00 00 04 ldflda 0x4000002
19379 // IL_0015 12 00 ldloca.s 0x0
19380 // IL_0017 7c 01 00 00 04 ldflda 0x4000001
19383 // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
19384 // consider GT_SUB to be equivalent of an AXC_IndWide.
19386 // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
19387 // them as AXC_IndWide.
19392 // Scan for byref args
19393 GenTreeCall* const call = tree->AsCall();
19394 for (GenTree* args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
19396 if (args->gtOp.gtOp1->gtType == TYP_BYREF)
19398 axcStack->Push(AXC_IndWide);
19399 return WALK_CONTINUE;
19428 if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) ||
19429 (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
19431 axcStack->Push(AXC_IndWide);
19432 return WALK_CONTINUE;
19440 // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None". We must
19441 // handle the "Ind" propogation explicitly above.
19442 if (axc == AXC_Addr || axc == AXC_AddrWide)
19444 axcStack->Push(axc);
19448 axcStack->Push(AXC_None);
19450 return WALK_CONTINUE;
19453 bool Compiler::fgFitsInOrNotLoc(GenTree* tree, unsigned width)
19455 if (tree->TypeGet() != TYP_STRUCT)
19457 return width <= genTypeSize(tree->TypeGet());
19459 else if (tree->OperGet() == GT_LCL_VAR)
19461 assert(tree->TypeGet() == TYP_STRUCT);
19462 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
19463 return width <= lvaTable[lclNum].lvExactSize;
19465 else if (tree->OperGet() == GT_FIELD)
19467 CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass(tree->gtField.gtFldHnd);
19468 return width <= info.compCompHnd->getClassSize(fldClass);
19470 else if (tree->OperGet() == GT_INDEX)
19472 return width <= tree->gtIndex.gtIndElemSize;
19480 void Compiler::fgAddFieldSeqForZeroOffset(GenTree* op1, FieldSeqNode* fieldSeq)
19482 assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);
19484 switch (op1->OperGet())
19487 if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
19489 GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
19490 lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
19495 if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
19497 FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
19498 if (op1Fs != nullptr)
19500 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
19501 op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
19504 else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
19506 FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
19507 if (op2Fs != nullptr)
19509 op2Fs = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
19510 op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
19517 FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
19518 if (op1Fs != nullptr)
19520 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
19521 op1->gtIntCon.gtFieldSeq = op1Fs;
19527 // Record in the general zero-offset map.
19528 GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
19533 /*****************************************************************************
19535 * Mark address-taken locals.
19538 void Compiler::fgMarkAddressExposedLocals()
19543 printf("\n*************** In fgMarkAddressExposedLocals()\n");
19547 BasicBlock* block = fgFirstBB;
19548 noway_assert(block);
19552 /* Make the current basic block address available globally */
19558 for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
19560 // Call Compiler::fgMarkAddrTakenLocalsCB on each node
19561 AXCStack stk(this);
19562 stk.Push(AXC_None); // We start in neither an addr or ind context.
19563 fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
19566 block = block->bbNext;
19571 // fgNodesMayInterfere:
19572 // return true if moving nodes relative to each other can change the result of a computation
19575 // read: a node which reads
19578 bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read)
19580 LclVarDsc* srcVar = nullptr;
19582 bool readIsIndir = read->OperIsIndir() || read->OperIsImplicitIndir();
19583 bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir();
19585 if (read->OperIsLocal())
19587 srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum];
19592 if (srcVar && srcVar->lvAddrExposed)
19596 else if (readIsIndir)
19602 else if (write->OperIsLocal())
19604 LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum];
19607 return dstVar->lvAddrExposed;
19609 else if (read->OperIsLocal())
19611 if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum)
19628 #ifdef LEGACY_BACKEND
19629 /** This predicate decides whether we will fold a tree with the structure:
19630 * x = x <op> y where x could be any arbitrary expression into
19633 * This modification is only performed when the target architecture supports
19634 * complex addressing modes. In the case of ARM for example, this transformation
19635 * yields no benefit.
19637 * In case this functions decides we can proceed to fold into an assignment operator
19638 * we need to inspect whether the operator is commutative to tell fgMorph whether we need to
19639 * reverse the tree due to the fact we saw x = y <op> x and we want to fold that into
19640 * x <op>= y because the operator property.
19642 bool Compiler::fgShouldCreateAssignOp(GenTree* tree, bool* bReverse)
19644 #if CPU_LOAD_STORE_ARCH
19645 /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */
19648 GenTree* op1 = tree->gtOp.gtOp1;
19649 GenTree* op2 = tree->gtGetOp2();
19650 genTreeOps cmop = op2->OperGet();
19652 /* Is the destination identical to the first RHS sub-operand? */
19653 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
19656 Do not transform the following tree
19658 [0024CFA4] ----------- const int 1
19659 [0024CFDC] ----G------ | int
19660 [0024CF5C] ----------- lclVar ubyte V01 tmp0
19661 [0024D05C] -A--G------ = ubyte
19662 [0024D014] D------N--- lclVar ubyte V01 tmp0
19666 [0024CFA4] ----------- const int 1
19667 [0024D05C] -A--G------ |= ubyte
19668 [0024D014] U------N--- lclVar ubyte V01 tmp0
19670 , when V01 is a struct field local.
19673 if (op1->gtOper == GT_LCL_VAR && varTypeIsSmall(op1->TypeGet()) && op1->TypeGet() != op2->gtOp.gtOp2->TypeGet())
19675 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
19676 LclVarDsc* varDsc = lvaTable + lclNum;
19678 if (varDsc->lvIsStructField)
19687 else if (GenTree::OperIsCommutative(cmop))
19689 /* For commutative ops only, check for "a = x <op> a" */
19691 /* Should we be doing this at all? */
19692 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
19697 /* Can we swap the operands to cmop ... */
19698 if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) && (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT))
19700 // Both sides must have side effects to prevent swap */
19704 /* Is the destination identical to the second RHS sub-operand? */
19705 if (GenTree::Compare(op1, op2->gtOp.gtOp2))
19712 #endif // !CPU_LOAD_STORE_ARCH
19714 #endif // LEGACY_BACKEND
19716 #ifdef FEATURE_SIMD
19718 //-----------------------------------------------------------------------------------
19719 // fgMorphCombineSIMDFieldAssignments:
19720 // If the RHS of the input stmt is a read for simd vector X Field, then this function
19721 // will keep reading next few stmts based on the vector size(2, 3, 4).
19722 // If the next stmts LHS are located contiguous and RHS are also located
19723 // contiguous, then we replace those statements with a copyblk.
19726 // block - BasicBlock*. block which stmt belongs to
19727 // stmt - GenTreeStmt*. the stmt node we want to check
19730 // if this funciton successfully optimized the stmts, then return true. Otherwise
19733 bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTree* stmt)
19736 noway_assert(stmt->gtOper == GT_STMT);
19737 GenTree* tree = stmt->gtStmt.gtStmtExpr;
19738 assert(tree->OperGet() == GT_ASG);
19740 GenTree* originalLHS = tree->gtOp.gtOp1;
19741 GenTree* prevLHS = tree->gtOp.gtOp1;
19742 GenTree* prevRHS = tree->gtOp.gtOp2;
19743 unsigned index = 0;
19744 var_types baseType = TYP_UNKNOWN;
19745 unsigned simdSize = 0;
19746 GenTree* simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
19748 if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
19750 // if the RHS is not from a SIMD vector field X, then there is no need to check further.
19754 var_types simdType = getSIMDTypeForSize(simdSize);
19755 int assignmentsCount = simdSize / genTypeSize(baseType) - 1;
19756 int remainingAssignments = assignmentsCount;
19757 GenTree* curStmt = stmt->gtNext;
19758 GenTree* lastStmt = stmt;
19760 while (curStmt != nullptr && remainingAssignments > 0)
19762 GenTree* exp = curStmt->gtStmt.gtStmtExpr;
19763 if (exp->OperGet() != GT_ASG)
19767 GenTree* curLHS = exp->gtGetOp1();
19768 GenTree* curRHS = exp->gtGetOp2();
19770 if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
19775 remainingAssignments--;
19779 lastStmt = curStmt;
19780 curStmt = curStmt->gtNext;
19783 if (remainingAssignments > 0)
19785 // if the left assignments number is bigger than zero, then this means
19786 // that the assignments are not assgining to the contiguously memory
19787 // locations from same vector.
19793 printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
19794 printf("From BB%02u, stmt", block->bbNum);
19796 printf(" to stmt");
19797 printTreeID(lastStmt);
19802 for (int i = 0; i < assignmentsCount; i++)
19804 fgRemoveStmt(block, stmt->gtNext);
19807 GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
19808 if (simdStructNode->OperIsLocal())
19810 setLclRelatedToSIMDIntrinsic(simdStructNode);
19812 GenTree* copyBlkAddr = copyBlkDst;
19813 if (copyBlkAddr->gtOper == GT_LEA)
19815 copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
19817 GenTreeLclVarCommon* localDst = nullptr;
19818 if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
19820 setLclRelatedToSIMDIntrinsic(localDst);
19823 if (simdStructNode->TypeGet() == TYP_BYREF)
19825 assert(simdStructNode->OperIsLocal());
19826 assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
19827 simdStructNode = gtNewIndir(simdType, simdStructNode);
19831 assert(varTypeIsSIMD(simdStructNode));
19837 printf("\nBB%02u stmt", block->bbNum);
19839 printf("(before)\n");
19844 // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
19845 GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
19846 blkNode->gtType = simdType;
19847 tree = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
19848 false, // not volatile
19849 true); // copyBlock
19851 stmt->gtStmt.gtStmtExpr = tree;
19853 // Since we generated a new address node which didn't exist before,
19854 // we should expose this address manually here.
19855 AXCStack stk(this);
19856 stk.Push(AXC_None);
19857 fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
19862 printf("\nReplaced BB%02u stmt", block->bbNum);
19864 printf("(after)\n");
19871 #endif // FEATURE_SIMD
19873 #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_)
19874 GenTreeStmt* SkipNopStmts(GenTreeStmt* stmt)
19876 while ((stmt != nullptr) && !stmt->IsNothingNode())
19878 stmt = stmt->gtNextStmt;
19883 #endif // !FEATURE_CORECLR && _TARGET_AMD64_
19885 //------------------------------------------------------------------------
19886 // fgCheckStmtAfterTailCall: check that statements after the tail call stmt
19887 // candidate are in one of expected forms, that are desctibed below.
19890 // 'true' if stmts are in the expected form, else 'false'.
19892 bool Compiler::fgCheckStmtAfterTailCall()
19895 // For void calls, we would have created a GT_CALL in the stmt list.
19896 // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
19897 // For calls returning structs, we would have a void call, followed by a void return.
19898 // For debuggable code, it would be an assignment of the call to a temp
19899 // We want to get rid of any of this extra trees, and just leave
19901 GenTreeStmt* callStmt = fgMorphStmt;
19903 GenTreeStmt* nextMorphStmt = callStmt->gtNextStmt;
19905 #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_)
19906 // Legacy Jit64 Compat:
19907 // There could be any number of GT_NOPs between tail call and GT_RETURN.
19908 // That is tail call pattern could be one of the following:
19909 // 1) tail.call, nop*, ret
19910 // 2) tail.call, nop*, pop, nop*, ret
19911 // 3) var=tail.call, nop*, ret(var)
19912 // 4) var=tail.call, nop*, pop, ret
19913 // 5) comma(tail.call, nop), nop*, ret
19915 // See impIsTailCallILPattern() for details on tail call IL patterns
19916 // that are supported.
19917 GenTree* callExpr = callStmt->gtStmtExpr;
19919 if (callExpr->gtOper != GT_RETURN)
19921 // First skip all GT_NOPs after the call
19922 nextMorphStmt = SkipNopStmts(nextMorphStmt);
19924 // Check to see if there is a pop.
19925 // Since tail call is honored, we can get rid of the stmt corresponding to pop.
19926 if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN)
19928 // Note that pop opcode may or may not result in a new stmt (for details see
19929 // impImportBlockCode()). Hence, it is not possible to assert about the IR
19930 // form generated by pop but pop tree must be side-effect free so that we can
19931 // delete it safely.
19932 GenTreeStmt* popStmt = nextMorphStmt;
19934 // Side effect flags on a GT_COMMA may be overly pessimistic, so examine
19935 // the constituent nodes.
19936 GenTree* popExpr = popStmt->gtStmtExpr;
19937 bool isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0;
19938 if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA))
19940 isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) &&
19941 ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0);
19943 noway_assert(isSideEffectFree);
19945 nextMorphStmt = popStmt->gtNextStmt;
19948 // Next skip any GT_NOP nodes after the pop
19949 nextMorphStmt = SkipNopStmts(nextMorphStmt);
19951 #endif // !FEATURE_CORECLR && _TARGET_AMD64_
19953 // Check that the rest stmts in the block are in one of the following pattern:
19955 // 2) ret(cast*(callResultLclVar))
19956 // 3) lclVar = callResultLclVar, the actual ret(lclVar) in another block
19957 if (nextMorphStmt != nullptr)
19959 GenTree* callExpr = callStmt->gtStmtExpr;
19960 if (callExpr->gtOper != GT_ASG)
19962 // The next stmt can be GT_RETURN(TYP_VOID) or GT_RETURN(lclVar),
19963 // where lclVar was return buffer in the call for structs or simd.
19964 GenTreeStmt* retStmt = nextMorphStmt;
19965 GenTree* retExpr = retStmt->gtStmtExpr;
19966 noway_assert(retExpr->gtOper == GT_RETURN);
19968 nextMorphStmt = retStmt->gtNextStmt;
19972 noway_assert(callExpr->gtGetOp1()->OperIsLocal());
19973 unsigned callResultLclNumber = callExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum;
19975 #if FEATURE_TAILCALL_OPT_SHARED_RETURN
19977 // We can have a move from the call result to an lvaInlineeReturnSpillTemp.
19978 // However, we can't check that this assignment was created there.
19979 if (nextMorphStmt->gtStmtExpr->gtOper == GT_ASG)
19981 GenTreeStmt* moveStmt = nextMorphStmt;
19982 GenTree* moveExpr = nextMorphStmt->gtStmtExpr;
19983 noway_assert(moveExpr->gtGetOp1()->OperIsLocal() && moveExpr->gtGetOp2()->OperIsLocal());
19985 unsigned srcLclNum = moveExpr->gtGetOp2()->AsLclVarCommon()->gtLclNum;
19986 noway_assert(srcLclNum == callResultLclNumber);
19987 unsigned dstLclNum = moveExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum;
19988 callResultLclNumber = dstLclNum;
19990 nextMorphStmt = moveStmt->gtNextStmt;
19992 if (nextMorphStmt != nullptr)
19995 GenTreeStmt* retStmt = nextMorphStmt;
19996 GenTree* retExpr = nextMorphStmt->gtStmtExpr;
19997 noway_assert(retExpr->gtOper == GT_RETURN);
19999 GenTree* treeWithLcl = retExpr->gtGetOp1();
20000 while (treeWithLcl->gtOper == GT_CAST)
20002 noway_assert(!treeWithLcl->gtOverflow());
20003 treeWithLcl = treeWithLcl->gtGetOp1();
20006 noway_assert(callResultLclNumber == treeWithLcl->AsLclVarCommon()->gtLclNum);
20008 nextMorphStmt = retStmt->gtNextStmt;
20012 return nextMorphStmt == nullptr;