1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 #include "allocacheck.h" // for alloca
21 // Convert the given node into a call to the specified helper passing
22 // the given argument list.
24 // Tries to fold constants and also adds an edge for overflow exception
25 // returns the morphed tree
26 GenTree* Compiler::fgMorphCastIntoHelper(GenTree* tree, int helper, GenTree* oper)
30 /* If the operand is a constant, we'll try to fold it */
31 if (oper->OperIsConst())
33 GenTree* oldTree = tree;
35 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
39 return fgMorphTree(tree);
41 else if (tree->OperKind() & GTK_CONST)
43 return fgMorphConst(tree);
46 // assert that oper is unchanged and that it is still a GT_CAST node
47 noway_assert(tree->gtCast.CastOp() == oper);
48 noway_assert(tree->gtOper == GT_CAST);
50 result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
51 assert(result == tree);
55 /*****************************************************************************
57 * Convert the given node into a call to the specified helper passing
58 * the given argument list.
61 GenTree* Compiler::fgMorphIntoHelperCall(GenTree* tree, int helper, GenTreeArgList* args)
63 // The helper call ought to be semantically equivalent to the original node, so preserve its VN.
64 tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN);
66 tree->gtCall.gtCallType = CT_HELPER;
67 tree->gtCall.gtCallMethHnd = eeFindHelper(helper);
68 tree->gtCall.gtCallArgs = args;
69 tree->gtCall.gtCallObjp = nullptr;
70 tree->gtCall.gtCallLateArgs = nullptr;
71 tree->gtCall.fgArgInfo = nullptr;
72 tree->gtCall.gtRetClsHnd = nullptr;
73 tree->gtCall.gtCallMoreFlags = 0;
74 tree->gtCall.gtInlineCandidateInfo = nullptr;
75 tree->gtCall.gtControlExpr = nullptr;
78 tree->gtCall.gtCallRegUsedMask = RBM_NONE;
79 #endif // LEGACY_BACKEND
82 // Helper calls are never candidates.
84 tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
87 #ifdef FEATURE_READYTORUN_COMPILER
88 tree->gtCall.gtEntryPoint.addr = nullptr;
89 tree->gtCall.gtEntryPoint.accessType = IAT_VALUE;
92 #if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND)
93 if (varTypeIsLong(tree))
95 GenTreeCall* callNode = tree->AsCall();
96 ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
98 retTypeDesc->InitializeLongReturnType(this);
99 callNode->ClearOtherRegs();
101 #endif // _TARGET_XXX_
103 if (tree->OperMayThrow(this))
105 tree->gtFlags |= GTF_EXCEPT;
109 tree->gtFlags &= ~GTF_EXCEPT;
111 tree->gtFlags |= GTF_CALL;
114 tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
117 /* Perform the morphing */
119 tree = fgMorphArgs(tree->AsCall());
124 /*****************************************************************************
126 * Determine if a relop must be morphed to a qmark to manifest a boolean value.
127 * This is done when code generation can't create straight-line code to do it.
129 bool Compiler::fgMorphRelopToQmark(GenTree* tree)
131 #ifndef LEGACY_BACKEND
133 #else // LEGACY_BACKEND
134 return (genActualType(tree->TypeGet()) == TYP_LONG) || varTypeIsFloating(tree->TypeGet());
135 #endif // LEGACY_BACKEND
138 /*****************************************************************************
140 * Morph a cast node (we perform some very simple transformations here).
144 #pragma warning(push)
145 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
147 GenTree* Compiler::fgMorphCast(GenTree* tree)
149 noway_assert(tree->gtOper == GT_CAST);
150 noway_assert(genTypeSize(TYP_I_IMPL) == TARGET_POINTER_SIZE);
152 /* The first sub-operand is the thing being cast */
154 GenTree* oper = tree->gtCast.CastOp();
156 if (fgGlobalMorph && (oper->gtOper == GT_ADDR))
158 // Make sure we've checked if 'oper' is an address of an implicit-byref parameter.
159 // If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast
160 // morphing code to see that type.
161 fgMorphImplicitByRefArgs(oper);
164 var_types srcType = genActualType(oper->TypeGet());
166 var_types dstType = tree->CastToType();
167 unsigned dstSize = genTypeSize(dstType);
169 // See if the cast has to be done in two steps. R -> I
170 if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
172 // Only x86 must go through TYP_DOUBLE to get to all
173 // integral types everybody else can get straight there
174 // except for when using helpers
175 if (srcType == TYP_FLOAT
176 #if !FEATURE_STACK_FP_X87
178 #if defined(_TARGET_ARM64_)
179 // Amd64: src = float, dst is overflow conversion.
180 // This goes through helper and hence src needs to be converted to double.
181 && tree->gtOverflow()
182 #elif defined(_TARGET_AMD64_)
183 // Amd64: src = float, dst = uint64 or overflow conversion.
184 // This goes through helper and hence src needs to be converted to double.
185 && (tree->gtOverflow() || (dstType == TYP_ULONG))
186 #elif defined(_TARGET_ARM_)
187 // Arm: src = float, dst = int64/uint64 or overflow conversion.
188 && (tree->gtOverflow() || varTypeIsLong(dstType))
191 #endif // FEATURE_STACK_FP_X87
194 oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE);
197 // do we need to do it in two steps R -> I, '-> smallType
198 CLANG_FORMAT_COMMENT_ANCHOR;
200 #if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
201 if (dstSize < genTypeSize(TYP_INT))
203 oper = gtNewCastNodeL(TYP_INT, oper, tree->IsUnsigned(), TYP_INT);
204 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
205 tree->gtFlags &= ~GTF_UNSIGNED;
208 if (dstSize < TARGET_POINTER_SIZE)
210 oper = gtNewCastNodeL(TYP_I_IMPL, oper, false, TYP_I_IMPL);
211 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
216 /* Note that if we need to use a helper call then we can not morph oper */
217 if (!tree->gtOverflow())
219 #ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
225 #ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
226 #ifdef LEGACY_BACKEND
227 // the RyuJIT backend does not use the x87 FPU and therefore
228 // does not support folding the cast conv.i4(round.d(d))
229 if ((oper->gtOper == GT_INTRINSIC) &&
230 (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
232 /* optimization: conv.i4(round.d(d)) -> round.i(d) */
233 oper->gtType = dstType;
234 return fgMorphTree(oper);
236 // if SSE2 is not enabled, we need the helper
238 #endif // LEGACY_BACKEND
239 if (!opts.compCanUseSSE2)
241 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
244 #endif // _TARGET_X86_
248 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
251 #else // _TARGET_ARM_
253 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
254 #endif // _TARGET_ARM_
256 #ifdef _TARGET_AMD64_
257 // SSE2 has instructions to convert a float/double directly to a long
262 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
263 #endif //_TARGET_AMD64_
265 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
269 #endif // _TARGET_ARM64_
276 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
278 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
280 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
282 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
287 noway_assert(!"Unexpected dstType");
290 #ifndef _TARGET_64BIT_
291 // The code generation phase (for x86 & ARM32) does not handle casts
292 // directly from [u]long to anything other than [u]int. Insert an
293 // intermediate cast to native int.
294 else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
296 oper = gtNewCastNode(TYP_I_IMPL, oper, tree->IsUnsigned(), TYP_I_IMPL);
297 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
298 tree->gtFlags &= ~GTF_UNSIGNED;
300 #endif //!_TARGET_64BIT_
303 else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
304 !varTypeIsLong(oper->gtCast.CastOp()))
306 // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
307 // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
308 // This happens semi-frequently because there is no IL 'conv.r4.un'
309 oper->gtType = TYP_FLOAT;
310 oper->CastToType() = TYP_FLOAT;
311 return fgMorphTree(oper);
313 // converts long/ulong --> float/double casts into helper calls.
314 else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
316 if (dstType == TYP_FLOAT)
318 // there is only a double helper, so we
319 // - change the dsttype to double
320 // - insert a cast from double to float
321 // - recurse into the resulting tree
322 tree->CastToType() = TYP_DOUBLE;
323 tree->gtType = TYP_DOUBLE;
325 tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT);
327 return fgMorphTree(tree);
329 if (tree->gtFlags & GTF_UNSIGNED)
330 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
331 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
333 #endif //_TARGET_ARM_
335 #ifdef _TARGET_AMD64_
336 // Do we have to do two step U4/8 -> R4/8 ?
337 // Codegen supports the following conversion as one-step operation
341 // The following conversions are performed as two-step operations using above.
342 // U4 -> R4/8 = U4-> Long -> R4/8
343 // U8 -> R4 = U8 -> R8 -> R4
344 else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
346 srcType = genUnsignedType(srcType);
348 if (srcType == TYP_ULONG)
350 if (dstType == TYP_FLOAT)
352 // Codegen can handle U8 -> R8 conversion.
353 // U8 -> R4 = U8 -> R8 -> R4
354 // - change the dsttype to double
355 // - insert a cast from double to float
356 // - recurse into the resulting tree
357 tree->CastToType() = TYP_DOUBLE;
358 tree->gtType = TYP_DOUBLE;
359 tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT);
360 return fgMorphTree(tree);
363 else if (srcType == TYP_UINT)
365 oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
366 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
367 tree->gtFlags &= ~GTF_UNSIGNED;
370 #endif // _TARGET_AMD64_
373 // Do we have to do two step U4/8 -> R4/8 ?
374 else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
376 srcType = genUnsignedType(srcType);
378 if (srcType == TYP_ULONG)
380 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
382 else if (srcType == TYP_UINT)
384 oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
385 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
386 tree->gtFlags &= ~GTF_UNSIGNED;
387 #ifndef LEGACY_BACKEND
388 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
392 #ifndef LEGACY_BACKEND
393 else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
395 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
398 #endif //_TARGET_XARCH_
399 else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
401 // We are casting away GC information. we would like to just
402 // change the type to int, however this gives the emitter fits because
403 // it believes the variable is a GC variable at the begining of the
404 // instruction group, but is not turned non-gc by the code generator
405 // we fix this by copying the GC pointer to a non-gc pointer temp.
406 noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
408 // We generate an assignment to an int and then do the cast from an int. With this we avoid
409 // the gc problem and we allow casts to bytes, longs, etc...
410 unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
411 oper->gtType = TYP_I_IMPL;
412 GenTree* asg = gtNewTempAssign(lclNum, oper);
413 oper->gtType = srcType;
416 GenTree* cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), false, dstType);
418 // Generate the comma tree
419 oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
421 return fgMorphTree(oper);
424 // Look for narrowing casts ([u]long -> [u]int) and try to push them
425 // down into the operand before morphing it.
427 // It doesn't matter if this is cast is from ulong or long (i.e. if
428 // GTF_UNSIGNED is set) because the transformation is only applied to
429 // overflow-insensitive narrowing casts, which always silently truncate.
431 // Note that casts from [u]long to small integer types are handled above.
432 if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
434 // As a special case, look for overflow-sensitive casts of an AND
435 // expression, and see if the second operand is a small constant. Since
436 // the result of an AND is bound by its smaller operand, it may be
437 // possible to prove that the cast won't overflow, which will in turn
438 // allow the cast's operand to be transformed.
439 if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
441 GenTree* andOp2 = oper->gtOp.gtOp2;
443 // Special case to the special case: AND with a casted int.
444 if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
446 // gtFoldExprConst will deal with whether the cast is signed or
447 // unsigned, or overflow-sensitive.
448 andOp2 = gtFoldExprConst(andOp2);
449 oper->gtOp.gtOp2 = andOp2;
452 // Look for a constant less than 2^{32} for a cast to uint, or less
453 // than 2^{31} for a cast to int.
454 int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
456 if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
458 // This cast can't overflow.
459 tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
463 // Only apply this transformation during global morph,
464 // when neither the cast node nor the oper node may throw an exception
465 // based on the upper 32 bits.
467 if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
469 // For these operations the lower 32 bits of the result only depends
470 // upon the lower 32 bits of the operands.
472 bool canPushCast = oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG);
474 // For long LSH cast to int, there is a discontinuity in behavior
475 // when the shift amount is 32 or larger.
477 // CAST(INT, LSH(1LL, 31)) == LSH(1, 31)
478 // LSH(CAST(INT, 1LL), CAST(INT, 31)) == LSH(1, 31)
480 // CAST(INT, LSH(1LL, 32)) == 0
481 // LSH(CAST(INT, 1LL), CAST(INT, 32)) == LSH(1, 32) == LSH(1, 0) == 1
483 // So some extra validation is needed.
485 if (oper->OperIs(GT_LSH))
487 GenTree* shiftAmount = oper->gtOp.gtOp2;
489 // Expose constant value for shift, if possible, to maximize the number
490 // of cases we can handle.
491 shiftAmount = gtFoldExpr(shiftAmount);
492 oper->gtOp.gtOp2 = shiftAmount;
495 // We may remorph the shift amount tree again later, so clear any morphed flag.
496 shiftAmount->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
499 if (shiftAmount->IsIntegralConst())
501 const ssize_t shiftAmountValue = shiftAmount->AsIntCon()->IconValue();
503 if (shiftAmountValue >= 64)
505 // Shift amount is large enough that result is undefined.
506 // Don't try and optimize.
507 assert(!canPushCast);
509 else if (shiftAmountValue >= 32)
511 // Result of the shift is zero.
512 DEBUG_DESTROY_NODE(tree);
513 GenTree* zero = gtNewZeroConNode(TYP_INT);
514 return fgMorphTree(zero);
516 else if (shiftAmountValue >= 0)
518 // Shift amount is small enough that we can push the cast through.
523 // Shift amount is negative and so result is undefined.
524 // Don't try and optimize.
525 assert(!canPushCast);
530 // Shift amount is unknown. We can't optimize this case.
531 assert(!canPushCast);
537 DEBUG_DESTROY_NODE(tree);
539 // Insert narrowing casts for op1 and op2
540 oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, false, dstType);
541 if (oper->gtOp.gtOp2 != nullptr)
543 oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, false, dstType);
546 // Clear the GT_MUL_64RSLT if it is set
547 if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
549 oper->gtFlags &= ~GTF_MUL_64RSLT;
552 // The operation now produces a 32-bit result.
553 oper->gtType = TYP_INT;
555 // Remorph the new tree as the casts that we added may be folded away.
556 return fgMorphTree(oper);
562 noway_assert(tree->gtOper == GT_CAST);
564 /* Morph the operand */
565 tree->gtCast.CastOp() = oper = fgMorphTree(oper);
567 /* Reset the call flag */
568 tree->gtFlags &= ~GTF_CALL;
570 /* Reset the assignment flag */
571 tree->gtFlags &= ~GTF_ASG;
573 /* unless we have an overflow cast, reset the except flag */
574 if (!tree->gtOverflow())
576 tree->gtFlags &= ~GTF_EXCEPT;
579 /* Just in case new side effects were introduced */
580 tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
582 if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate
584 srcType = oper->TypeGet();
586 /* See if we can discard the cast */
587 if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
589 if (tree->IsUnsigned() && !varTypeIsUnsigned(srcType))
591 if (varTypeIsSmall(srcType))
593 // Small signed values are automatically sign extended to TYP_INT. If the cast is interpreting the
594 // resulting TYP_INT value as unsigned then the "sign" bits end up being "value" bits and srcType
595 // must be TYP_UINT, not the original small signed type. Otherwise "conv.ovf.i2.un(i1(-1))" is
596 // wrongly treated as a widening conversion from i1 to i2 when in fact it is a narrowing conversion
598 srcType = genActualType(srcType);
601 srcType = genUnsignedType(srcType);
604 if (srcType == dstType)
605 { // Certainly if they are identical it is pointless
609 if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
611 unsigned varNum = oper->gtLclVarCommon.gtLclNum;
612 LclVarDsc* varDsc = &lvaTable[varNum];
613 if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
619 bool unsignedSrc = varTypeIsUnsigned(srcType);
620 bool unsignedDst = varTypeIsUnsigned(dstType);
621 bool signsDiffer = (unsignedSrc != unsignedDst);
622 unsigned srcSize = genTypeSize(srcType);
624 // For same sized casts with
625 // the same signs or non-overflow cast we discard them as well
626 if (srcSize == dstSize)
628 /* This should have been handled above */
629 noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
636 if (!tree->gtOverflow())
638 /* For small type casts, when necessary we force
639 the src operand to the dstType and allow the
640 implied load from memory to perform the casting */
641 if (varTypeIsSmall(srcType))
643 switch (oper->gtOper)
649 oper->gtType = dstType;
661 else if (srcSize < dstSize) // widening cast
663 // Keep any long casts
664 if (dstSize == sizeof(int))
666 // Only keep signed to unsigned widening cast with overflow check
667 if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
673 // Widening casts from unsigned or to signed can never overflow
675 if (unsignedSrc || !unsignedDst)
677 tree->gtFlags &= ~GTF_OVERFLOW;
678 if (!(oper->gtFlags & GTF_EXCEPT))
680 tree->gtFlags &= ~GTF_EXCEPT;
684 else // if (srcSize > dstSize)
686 // Try to narrow the operand of the cast and discard the cast
687 // Note: Do not narrow a cast that is marked as a CSE
688 // And do not narrow if the oper is marked as a CSE either
690 if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
691 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
693 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
695 /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
696 if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
698 oper = oper->gtCast.CastOp();
705 switch (oper->gtOper)
707 /* If the operand is a constant, we'll fold it */
713 GenTree* oldTree = tree;
715 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
717 // Did we get a comma throw as a result of gtFoldExprConst?
718 if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
720 noway_assert(fgIsCommaThrow(tree));
721 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
722 fgMorphTreeDone(tree);
725 else if (tree->gtOper != GT_CAST)
730 noway_assert(tree->gtCast.CastOp() == oper); // unchanged
735 /* Check for two consecutive casts into the same dstType */
736 if (!tree->gtOverflow())
738 var_types dstType2 = oper->CastToType();
739 if (dstType == dstType2)
746 #ifdef LEGACY_BACKEND
748 /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag
749 so that the code generator will know not to convert the result
750 of the idiv to a regpair */
752 if (dstType == TYP_INT)
754 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
759 if (dstType == TYP_UINT)
761 tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
765 #endif // LEGACY_BACKEND
768 // Check for cast of a GT_COMMA with a throw overflow
769 // Bug 110829: Since this optimization will bash the types
770 // neither oper or commaOp2 can be CSE candidates
771 if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
773 GenTree* commaOp2 = oper->gtOp.gtOp2;
775 if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
777 // need type of oper to be same as tree
778 if (tree->gtType == TYP_LONG)
780 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
781 commaOp2->gtIntConCommon.SetLngValue(0);
782 /* Change the types of oper and commaOp2 to TYP_LONG */
783 oper->gtType = commaOp2->gtType = TYP_LONG;
785 else if (varTypeIsFloating(tree->gtType))
787 commaOp2->ChangeOperConst(GT_CNS_DBL);
788 commaOp2->gtDblCon.gtDconVal = 0.0;
789 // Change the types of oper and commaOp2
790 // X87 promotes everything to TYP_DOUBLE
791 // But other's are a little more precise
792 const var_types newTyp
793 #if FEATURE_X87_DOUBLES
795 #else // FEATURE_X87_DOUBLES
797 #endif // FEATURE_X87_DOUBLES
798 oper->gtType = commaOp2->gtType = newTyp;
802 commaOp2->ChangeOperConst(GT_CNS_INT);
803 commaOp2->gtIntCon.gtIconVal = 0;
804 /* Change the types of oper and commaOp2 to TYP_INT */
805 oper->gtType = commaOp2->gtType = TYP_INT;
809 if (vnStore != nullptr)
811 fgValueNumberTreeConst(commaOp2);
814 /* Return the GT_COMMA node as the new tree */
821 } /* end switch (oper->gtOper) */
824 if (tree->gtOverflow())
826 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
833 /* Here we've eliminated the cast, so just return it's operand */
834 assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
836 DEBUG_DESTROY_NODE(tree);
843 /*****************************************************************************
845 * Perform an unwrap operation on a Proxy object
848 GenTree* Compiler::fgUnwrapProxy(GenTree* objRef)
850 assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
852 CORINFO_EE_INFO* pInfo = eeGetEEInfo();
855 // Perform the unwrap:
857 // This requires two extra indirections.
858 // We mark these indirections as 'invariant' and
859 // the CSE logic will hoist them when appropriate.
861 // Note that each dereference is a GC pointer
863 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
865 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
866 objRef->gtFlags |= GTF_IND_INVARIANT;
868 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
870 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
871 objRef->gtFlags |= GTF_IND_INVARIANT;
873 // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
877 /*****************************************************************************
879 * Morph an argument list; compute the pointer argument count in the process.
881 * NOTE: This function can be called from any place in the JIT to perform re-morphing
882 * due to graph altering modifications such as copy / constant propagation
885 unsigned UpdateGT_LISTFlags(GenTree* tree)
887 assert(tree->gtOper == GT_LIST);
890 if (tree->gtOp.gtOp2)
892 flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
895 flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
897 tree->gtFlags &= ~GTF_ALL_EFFECT;
898 tree->gtFlags |= flags;
900 return tree->gtFlags;
904 void fgArgTabEntry::Dump()
906 printf("fgArgTabEntry[arg %u", argNum);
907 printf(" %d.%s", node->gtTreeID, GenTree::OpName(node->gtOper));
908 if (regNum != REG_STK)
910 printf(", %s, regs=%u", getRegName(regNum), numRegs);
914 printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
916 printf(", align=%u", alignment);
917 if (lateArgInx != (unsigned)-1)
919 printf(", lateArgInx=%u", lateArgInx);
927 printf(", tmpNum=V%02u", tmpNum);
931 printf(", needPlace");
939 printf(", processed");
947 printf(", isBackFilled");
951 printf(", isNonStandard");
957 fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
961 argCount = 0; // filled in arg count, starts at zero
962 nextSlotNum = INIT_ARG_STACK_SLOT;
964 #if defined(UNIX_X86_ABI)
965 alignmentDone = false;
969 #if FEATURE_FIXED_OUT_ARGS
973 argTableSize = numArgs; // the allocated table size
976 hasStackArgs = false;
977 argsComplete = false;
980 if (argTableSize == 0)
986 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize];
990 /*****************************************************************************
992 * fgArgInfo Copy Constructor
994 * This method needs to act like a copy constructor for fgArgInfo.
995 * The newCall needs to have its fgArgInfo initialized such that
996 * we have newCall that is an exact copy of the oldCall.
997 * We have to take care since the argument information
998 * in the argTable contains pointers that must point to the
999 * new arguments and not the old arguments.
1001 fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
1003 fgArgInfo* oldArgInfo = oldCall->gtCall.fgArgInfo;
1005 compiler = oldArgInfo->compiler;
1007 argCount = 0; // filled in arg count, starts at zero
1008 nextSlotNum = INIT_ARG_STACK_SLOT;
1009 stkLevel = oldArgInfo->stkLevel;
1010 #if defined(UNIX_X86_ABI)
1011 alignmentDone = oldArgInfo->alignmentDone;
1012 stkSizeBytes = oldArgInfo->stkSizeBytes;
1013 padStkAlign = oldArgInfo->padStkAlign;
1015 #if FEATURE_FIXED_OUT_ARGS
1016 outArgSize = oldArgInfo->outArgSize;
1018 argTableSize = oldArgInfo->argTableSize;
1019 argsComplete = false;
1021 if (argTableSize > 0)
1023 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize];
1024 for (unsigned inx = 0; inx < argTableSize; inx++)
1026 argTable[inx] = nullptr;
1030 assert(oldArgInfo->argsComplete);
1032 // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
1033 // so we can iterate over these argument lists more uniformly.
1034 // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
1035 GenTreeArgList* newArgs;
1036 GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs);
1037 GenTreeArgList* oldArgs;
1038 GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs);
1040 if (newCall->gtCallObjp == nullptr)
1042 assert(oldCall->gtCallObjp == nullptr);
1043 newArgs = newCall->gtCallArgs;
1044 oldArgs = oldCall->gtCallArgs;
1048 assert(oldCall->gtCallObjp != nullptr);
1049 newArgObjp.Current() = newCall->gtCallArgs;
1050 newArgs = &newArgObjp;
1051 oldArgObjp.Current() = oldCall->gtCallObjp;
1052 oldArgs = &oldArgObjp;
1057 GenTreeArgList* newParent = nullptr;
1058 GenTreeArgList* oldParent = nullptr;
1059 fgArgTabEntry** oldArgTable = oldArgInfo->argTable;
1060 bool scanRegArgs = false;
1064 /* Get hold of the next argument values for the oldCall and newCall */
1066 newCurr = newArgs->Current();
1067 oldCurr = oldArgs->Current();
1068 if (newArgs != &newArgObjp)
1070 newParent = newArgs;
1071 oldParent = oldArgs;
1075 assert(newParent == nullptr && oldParent == nullptr);
1077 newArgs = newArgs->Rest();
1078 oldArgs = oldArgs->Rest();
1080 fgArgTabEntry* oldArgTabEntry = nullptr;
1081 fgArgTabEntry* newArgTabEntry = nullptr;
1083 for (unsigned inx = 0; inx < argTableSize; inx++)
1085 oldArgTabEntry = oldArgTable[inx];
1087 if (oldArgTabEntry->parent == oldParent)
1089 assert((oldParent == nullptr) == (newParent == nullptr));
1091 // We have found the matching "parent" field in oldArgTabEntry
1093 newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1095 // First block copy all fields
1097 *newArgTabEntry = *oldArgTabEntry;
1099 // Then update all GenTree* fields in the newArgTabEntry
1101 newArgTabEntry->parent = newParent;
1103 // The node field is likely to have been updated
1104 // to point at a node in the gtCallLateArgs list
1106 if (oldArgTabEntry->node == oldCurr)
1108 // node is not pointing into the gtCallLateArgs list
1109 newArgTabEntry->node = newCurr;
1113 // node must be pointing into the gtCallLateArgs list
1115 // We will fix this pointer up in the next loop
1117 newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
1122 // Now initialize the proper element in the argTable array
1124 argTable[inx] = newArgTabEntry;
1128 // We should have found the matching oldArgTabEntry and created the newArgTabEntry
1130 assert(newArgTabEntry != nullptr);
1135 newArgs = newCall->gtCallLateArgs;
1136 oldArgs = oldCall->gtCallLateArgs;
1140 /* Get hold of the next argument values for the oldCall and newCall */
1142 assert(newArgs->OperIsList());
1144 newCurr = newArgs->Current();
1145 newArgs = newArgs->Rest();
1147 assert(oldArgs->OperIsList());
1149 oldCurr = oldArgs->Current();
1150 oldArgs = oldArgs->Rest();
1152 fgArgTabEntry* oldArgTabEntry = nullptr;
1153 fgArgTabEntry* newArgTabEntry = nullptr;
1155 for (unsigned inx = 0; inx < argTableSize; inx++)
1157 oldArgTabEntry = oldArgTable[inx];
1159 if (oldArgTabEntry->node == oldCurr)
1161 // We have found the matching "node" field in oldArgTabEntry
1163 newArgTabEntry = argTable[inx];
1164 assert(newArgTabEntry != nullptr);
1166 // update the "node" GenTree* fields in the newArgTabEntry
1168 assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
1170 newArgTabEntry->node = newCurr;
1177 argCount = oldArgInfo->argCount;
1178 nextSlotNum = oldArgInfo->nextSlotNum;
1179 hasRegArgs = oldArgInfo->hasRegArgs;
1180 hasStackArgs = oldArgInfo->hasStackArgs;
1181 argsComplete = true;
1185 void fgArgInfo::AddArg(fgArgTabEntry* curArgTabEntry)
1187 assert(argCount < argTableSize);
1188 argTable[argCount] = curArgTabEntry;
1192 fgArgTabEntry* fgArgInfo::AddRegArg(
1193 unsigned argNum, GenTree* node, GenTree* parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1195 fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1197 curArgTabEntry->argNum = argNum;
1198 curArgTabEntry->node = node;
1199 curArgTabEntry->parent = parent;
1200 curArgTabEntry->regNum = regNum;
1201 curArgTabEntry->slotNum = 0;
1202 curArgTabEntry->numRegs = numRegs;
1203 curArgTabEntry->numSlots = 0;
1204 curArgTabEntry->alignment = alignment;
1205 curArgTabEntry->lateArgInx = (unsigned)-1;
1206 curArgTabEntry->tmpNum = (unsigned)-1;
1207 curArgTabEntry->isSplit = false;
1208 curArgTabEntry->isTmp = false;
1209 curArgTabEntry->needTmp = false;
1210 curArgTabEntry->needPlace = false;
1211 curArgTabEntry->processed = false;
1212 curArgTabEntry->isHfaRegArg = false;
1213 curArgTabEntry->isBackFilled = false;
1214 curArgTabEntry->isNonStandard = false;
1217 AddArg(curArgTabEntry);
1218 return curArgTabEntry;
1221 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1222 fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum,
1228 const bool isStruct,
1229 const regNumber otherRegNum,
1230 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
1232 fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
1233 assert(curArgTabEntry != nullptr);
1235 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1236 // PlaceHolder node (in case of needed late argument, for example.)
1237 // This requires using of an extra flag. At creation time the state is right, so
1238 // and this assert enforces that.
1239 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1240 curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
1241 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1243 if (isStruct && structDescPtr != nullptr)
1245 curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
1248 return curArgTabEntry;
1250 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1252 fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum,
1256 unsigned alignment FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
1258 fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1260 nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1262 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1263 // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
1264 // PlaceHolder node (in case of needed late argument, for example.)
1265 // This reqires using of an extra flag. At creation time the state is right, so
1266 // and this assert enforces that.
1267 assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
1268 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1269 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1271 curArgTabEntry->argNum = argNum;
1272 curArgTabEntry->node = node;
1273 curArgTabEntry->parent = parent;
1274 curArgTabEntry->regNum = REG_STK;
1275 curArgTabEntry->slotNum = nextSlotNum;
1276 curArgTabEntry->numRegs = 0;
1277 curArgTabEntry->numSlots = numSlots;
1278 curArgTabEntry->alignment = alignment;
1279 curArgTabEntry->lateArgInx = (unsigned)-1;
1280 curArgTabEntry->tmpNum = (unsigned)-1;
1281 curArgTabEntry->isSplit = false;
1282 curArgTabEntry->isTmp = false;
1283 curArgTabEntry->needTmp = false;
1284 curArgTabEntry->needPlace = false;
1285 curArgTabEntry->processed = false;
1286 curArgTabEntry->isHfaRegArg = false;
1287 curArgTabEntry->isBackFilled = false;
1288 curArgTabEntry->isNonStandard = false;
1290 hasStackArgs = true;
1291 AddArg(curArgTabEntry);
1293 nextSlotNum += numSlots;
1294 return curArgTabEntry;
1297 void fgArgInfo::RemorphReset()
1299 nextSlotNum = INIT_ARG_STACK_SLOT;
1302 fgArgTabEntry* fgArgInfo::RemorphRegArg(
1303 unsigned argNum, GenTree* node, GenTree* parent, regNumber regNum, unsigned numRegs, unsigned alignment)
1305 fgArgTabEntry* curArgTabEntry = nullptr;
1306 unsigned regArgInx = 0;
1309 for (inx = 0; inx < argCount; inx++)
1311 curArgTabEntry = argTable[inx];
1312 if (curArgTabEntry->argNum == argNum)
1319 if (curArgTabEntry->parent != nullptr)
1321 assert(curArgTabEntry->parent->OperIsList());
1322 argx = curArgTabEntry->parent->Current();
1323 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1327 argx = curArgTabEntry->node;
1336 // if this was a nonstandard arg the table is definitive
1337 if (curArgTabEntry->isNonStandard)
1339 regNum = curArgTabEntry->regNum;
1342 assert(curArgTabEntry->argNum == argNum);
1343 assert(curArgTabEntry->regNum == regNum);
1344 assert(curArgTabEntry->alignment == alignment);
1345 assert(curArgTabEntry->parent == parent);
1347 if (curArgTabEntry->node != node)
1349 GenTree* argx = nullptr;
1350 unsigned regIndex = 0;
1352 /* process the register argument list */
1353 for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
1355 argx = list->Current();
1356 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1357 if (regIndex == regArgInx)
1362 assert(regIndex == regArgInx);
1363 assert(regArgInx == curArgTabEntry->lateArgInx);
1365 if (curArgTabEntry->node != argx)
1367 curArgTabEntry->node = argx;
1370 return curArgTabEntry;
1373 void fgArgInfo::RemorphStkArg(unsigned argNum, GenTree* node, GenTree* parent, unsigned numSlots, unsigned alignment)
1375 fgArgTabEntry* curArgTabEntry = nullptr;
1376 bool isRegArg = false;
1377 unsigned regArgInx = 0;
1381 for (inx = 0; inx < argCount; inx++)
1383 curArgTabEntry = argTable[inx];
1385 if (curArgTabEntry->parent != nullptr)
1387 assert(curArgTabEntry->parent->OperIsList());
1388 argx = curArgTabEntry->parent->Current();
1389 isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
1393 argx = curArgTabEntry->node;
1397 if (curArgTabEntry->argNum == argNum)
1408 nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
1410 assert(curArgTabEntry->argNum == argNum);
1411 assert(curArgTabEntry->slotNum == nextSlotNum);
1412 assert(curArgTabEntry->numSlots == numSlots);
1413 assert(curArgTabEntry->alignment == alignment);
1414 assert(curArgTabEntry->parent == parent);
1415 assert(parent->OperIsList());
1417 #if FEATURE_FIXED_OUT_ARGS
1418 if (curArgTabEntry->node != node)
1422 GenTree* argx = nullptr;
1423 unsigned regIndex = 0;
1425 /* process the register argument list */
1426 for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
1428 argx = list->Current();
1429 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1430 if (regIndex == regArgInx)
1435 assert(regIndex == regArgInx);
1436 assert(regArgInx == curArgTabEntry->lateArgInx);
1438 if (curArgTabEntry->node != argx)
1440 curArgTabEntry->node = argx;
1445 assert(parent->Current() == node);
1446 curArgTabEntry->node = node;
1450 curArgTabEntry->node = node;
1453 nextSlotNum += numSlots;
1456 void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
1458 fgArgTabEntry* curArgTabEntry = nullptr;
1459 assert(argNum < argCount);
1460 for (unsigned inx = 0; inx < argCount; inx++)
1462 curArgTabEntry = argTable[inx];
1463 if (curArgTabEntry->argNum == argNum)
1469 assert(numRegs > 0);
1470 assert(numSlots > 0);
1474 assert(curArgTabEntry->isSplit == true);
1475 assert(curArgTabEntry->numRegs == numRegs);
1476 assert(curArgTabEntry->numSlots == numSlots);
1477 assert(hasStackArgs == true);
1481 curArgTabEntry->isSplit = true;
1482 curArgTabEntry->numRegs = numRegs;
1483 curArgTabEntry->numSlots = numSlots;
1484 hasStackArgs = true;
1486 nextSlotNum += numSlots;
1489 void fgArgInfo::EvalToTmp(unsigned argNum, unsigned tmpNum, GenTree* newNode)
1491 fgArgTabEntry* curArgTabEntry = nullptr;
1492 assert(argNum < argCount);
1493 for (unsigned inx = 0; inx < argCount; inx++)
1495 curArgTabEntry = argTable[inx];
1496 if (curArgTabEntry->argNum == argNum)
1501 assert(curArgTabEntry->parent->Current() == newNode);
1503 curArgTabEntry->node = newNode;
1504 curArgTabEntry->tmpNum = tmpNum;
1505 curArgTabEntry->isTmp = true;
1508 void fgArgInfo::ArgsComplete()
1510 bool hasStackArgs = false;
1511 bool hasStructRegArg = false;
1513 for (unsigned curInx = 0; curInx < argCount; curInx++)
1515 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1516 assert(curArgTabEntry != nullptr);
1517 GenTree* argx = curArgTabEntry->node;
1519 if (curArgTabEntry->regNum == REG_STK)
1521 hasStackArgs = true;
1522 #if !FEATURE_FIXED_OUT_ARGS
1523 // On x86 we use push instructions to pass arguments:
1524 // The non-register arguments are evaluated and pushed in order
1525 // and they are never evaluated into temps
1530 #if defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND)
1531 else if (curArgTabEntry->isSplit)
1533 hasStructRegArg = true;
1534 hasStackArgs = true;
1537 else // we have a register argument, next we look for a struct type.
1539 if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
1541 hasStructRegArg = true;
1545 /* If the argument tree contains an assignment (GTF_ASG) then the argument and
1546 and every earlier argument (except constants) must be evaluated into temps
1547 since there may be other arguments that follow and they may use the value being assigned.
1549 EXAMPLE: ArgTab is "a, a=5, a"
1550 -> when we see the second arg "a=5"
1551 we know the first two arguments "a, a=5" have to be evaluated into temps
1553 For the case of an assignment, we only know that there exist some assignment someplace
1554 in the tree. We don't know what is being assigned so we are very conservative here
1555 and assume that any local variable could have been assigned.
1558 if (argx->gtFlags & GTF_ASG)
1560 // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
1561 // a tmp, then we need a temp in the late arg list.
1562 if ((argCount > 1) || argx->OperIsCopyBlkOp()
1563 #ifdef FEATURE_FIXED_OUT_ARGS
1564 || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
1565 // that we only have late non-register args when that feature is on.
1566 #endif // FEATURE_FIXED_OUT_ARGS
1569 curArgTabEntry->needTmp = true;
1572 // For all previous arguments, unless they are a simple constant
1573 // we require that they be evaluated into temps
1574 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1576 fgArgTabEntry* prevArgTabEntry = argTable[prevInx];
1577 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1579 assert(prevArgTabEntry->node);
1580 if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
1582 prevArgTabEntry->needTmp = true;
1587 #if FEATURE_FIXED_OUT_ARGS
1588 // Like calls, if this argument has a tree that will do an inline throw,
1589 // a call to a jit helper, then we need to treat it like a call (but only
1590 // if there are/were any stack args).
1591 // This means unnesting, sorting, etc. Technically this is overly
1592 // conservative, but I want to avoid as much special-case debug-only code
1593 // as possible, so leveraging the GTF_CALL flag is the easiest.
1595 if (!(argx->gtFlags & GTF_CALL) && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) &&
1596 compiler->opts.compDbgCode &&
1597 (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
1599 for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
1601 if (otherInx == curInx)
1606 if (argTable[otherInx]->regNum == REG_STK)
1608 argx->gtFlags |= GTF_CALL;
1613 #endif // FEATURE_FIXED_OUT_ARGS
1615 /* If it contains a call (GTF_CALL) then itself and everything before the call
1616 with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
1617 has to be kept in the right order since we will move the call to the first position)
1619 For calls we don't have to be quite as conservative as we are with an assignment
1620 since the call won't be modifying any non-address taken LclVars.
1623 if (argx->gtFlags & GTF_CALL)
1625 if (argCount > 1) // If this is not the only argument
1627 curArgTabEntry->needTmp = true;
1629 else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
1631 // Spill all arguments that are floating point calls
1632 curArgTabEntry->needTmp = true;
1635 // All previous arguments may need to be evaluated into temps
1636 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1638 fgArgTabEntry* prevArgTabEntry = argTable[prevInx];
1639 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1640 assert(prevArgTabEntry->node);
1642 // For all previous arguments, if they have any GTF_ALL_EFFECT
1643 // we require that they be evaluated into a temp
1644 if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
1646 prevArgTabEntry->needTmp = true;
1648 #if FEATURE_FIXED_OUT_ARGS
1649 // Or, if they are stored into the FIXED_OUT_ARG area
1650 // we require that they be moved to the gtCallLateArgs
1651 // and replaced with a placeholder node
1652 else if (prevArgTabEntry->regNum == REG_STK)
1654 prevArgTabEntry->needPlace = true;
1656 #if defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND)
1657 else if (prevArgTabEntry->isSplit)
1659 prevArgTabEntry->needPlace = true;
1666 #ifndef LEGACY_BACKEND
1667 #if FEATURE_MULTIREG_ARGS
1668 // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
1669 // with multiple indirections, so here we consider spilling it into a tmp LclVar.
1671 CLANG_FORMAT_COMMENT_ANCHOR;
1673 bool isMultiRegArg = (curArgTabEntry->numRegs > 0) && (curArgTabEntry->numRegs + curArgTabEntry->numSlots > 1);
1675 bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
1678 if ((varTypeIsStruct(argx->TypeGet())) && (curArgTabEntry->needTmp == false))
1680 if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
1682 // Spill multireg struct arguments that have Assignments or Calls embedded in them
1683 curArgTabEntry->needTmp = true;
1687 // We call gtPrepareCost to measure the cost of evaluating this tree
1688 compiler->gtPrepareCost(argx);
1690 if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
1692 // Spill multireg struct arguments that are expensive to evaluate twice
1693 curArgTabEntry->needTmp = true;
1695 #if defined(FEATURE_SIMD) && defined(_TARGET_ARM64_)
1696 else if (isMultiRegArg && varTypeIsSIMD(argx->TypeGet()))
1698 // SIMD types do not need the optimization below due to their sizes
1699 if (argx->OperIs(GT_SIMD) || (argx->OperIs(GT_OBJ) && argx->AsObj()->gtOp1->OperIs(GT_ADDR) &&
1700 argx->AsObj()->gtOp1->gtOp.gtOp1->OperIs(GT_SIMD)))
1702 curArgTabEntry->needTmp = true;
1706 #ifndef _TARGET_ARM_
1707 // TODO-Arm: This optimization is not implemented for ARM32
1708 // so we skip this for ARM32 until it is ported to use RyuJIT backend
1710 else if (argx->OperGet() == GT_OBJ)
1712 GenTreeObj* argObj = argx->AsObj();
1713 CORINFO_CLASS_HANDLE objClass = argObj->gtClass;
1714 unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass);
1721 // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
1723 if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
1725 // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
1726 // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
1728 curArgTabEntry->needTmp = true;
1735 // Spill any GT_OBJ multireg structs that are difficult to extract
1737 // When we have a GT_OBJ of a struct with the above sizes we would need
1738 // to use 3 or 4 load instructions to load the exact size of this struct.
1739 // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
1740 // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
1741 // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
1744 curArgTabEntry->needTmp = true;
1751 #endif // !_TARGET_ARM_
1754 #endif // FEATURE_MULTIREG_ARGS
1755 #endif // LEGACY_BACKEND
1758 // We only care because we can't spill structs and qmarks involve a lot of spilling, but
1759 // if we don't have qmarks, then it doesn't matter.
1760 // So check for Qmark's globally once here, instead of inside the loop.
1762 const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
1764 #if FEATURE_FIXED_OUT_ARGS
1766 // For Arm/x64 we only care because we can't reorder a register
1767 // argument that uses GT_LCLHEAP. This is an optimization to
1768 // save a check inside the below loop.
1770 const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
1774 const bool hasStackArgsWeCareAbout = hasStackArgs;
1776 #endif // FEATURE_FIXED_OUT_ARGS
1778 // If we have any stack args we have to force the evaluation
1779 // of any arguments passed in registers that might throw an exception
1781 // Technically we only a required to handle the following two cases:
1782 // a GT_IND with GTF_IND_RNGCHK (only on x86) or
1783 // a GT_LCLHEAP node that allocates stuff on the stack
1785 if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
1787 for (unsigned curInx = 0; curInx < argCount; curInx++)
1789 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1790 assert(curArgTabEntry != nullptr);
1791 GenTree* argx = curArgTabEntry->node;
1793 // Examine the register args that are currently not marked needTmp
1795 if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
1797 if (hasStackArgsWeCareAbout)
1799 #if !FEATURE_FIXED_OUT_ARGS
1800 // On x86 we previously recorded a stack depth of zero when
1801 // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
1802 // Thus we can not reorder the argument after any stack based argument
1803 // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
1804 // check for it explicitly
1806 if (argx->gtFlags & GTF_EXCEPT)
1808 curArgTabEntry->needTmp = true;
1812 // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
1814 if (argx->gtFlags & GTF_EXCEPT)
1816 assert(compiler->compLocallocUsed);
1818 // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
1820 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
1822 curArgTabEntry->needTmp = true;
1828 if (hasStructRegArgWeCareAbout)
1830 // Returns true if a GT_QMARK node is encountered in the argx tree
1832 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
1834 curArgTabEntry->needTmp = true;
1842 argsComplete = true;
1845 void fgArgInfo::SortArgs()
1847 assert(argsComplete == true);
1850 if (compiler->verbose)
1852 printf("\nSorting the arguments:\n");
1856 /* Shuffle the arguments around before we build the gtCallLateArgs list.
1857 The idea is to move all "simple" arguments like constants and local vars
1858 to the end of the table, and move the complex arguments towards the beginning
1859 of the table. This will help prevent registers from being spilled by
1860 allowing us to evaluate the more complex arguments before the simpler arguments.
1861 The argTable ends up looking like:
1862 +------------------------------------+ <--- argTable[argCount - 1]
1864 +------------------------------------+
1865 | local var / local field |
1866 +------------------------------------+
1867 | remaining arguments sorted by cost |
1868 +------------------------------------+
1869 | temps (argTable[].needTmp = true) |
1870 +------------------------------------+
1871 | args with calls (GTF_CALL) |
1872 +------------------------------------+ <--- argTable[0]
1875 /* Set the beginning and end for the new argument table */
1878 unsigned begTab = 0;
1879 unsigned endTab = argCount - 1;
1880 unsigned argsRemaining = argCount;
1882 // First take care of arguments that are constants.
1883 // [We use a backward iterator pattern]
1890 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1892 if (curArgTabEntry->regNum != REG_STK)
1897 // Skip any already processed args
1899 if (!curArgTabEntry->processed)
1901 GenTree* argx = curArgTabEntry->node;
1903 // put constants at the end of the table
1905 if (argx->gtOper == GT_CNS_INT)
1907 noway_assert(curInx <= endTab);
1909 curArgTabEntry->processed = true;
1911 // place curArgTabEntry at the endTab position by performing a swap
1913 if (curInx != endTab)
1915 argTable[curInx] = argTable[endTab];
1916 argTable[endTab] = curArgTabEntry;
1923 } while (curInx > 0);
1925 if (argsRemaining > 0)
1927 // Next take care of arguments that are calls.
1928 // [We use a forward iterator pattern]
1930 for (curInx = begTab; curInx <= endTab; curInx++)
1932 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1934 // Skip any already processed args
1936 if (!curArgTabEntry->processed)
1938 GenTree* argx = curArgTabEntry->node;
1940 // put calls at the beginning of the table
1942 if (argx->gtFlags & GTF_CALL)
1944 curArgTabEntry->processed = true;
1946 // place curArgTabEntry at the begTab position by performing a swap
1948 if (curInx != begTab)
1950 argTable[curInx] = argTable[begTab];
1951 argTable[begTab] = curArgTabEntry;
1961 if (argsRemaining > 0)
1963 // Next take care arguments that are temps.
1964 // These temps come before the arguments that are
1965 // ordinary local vars or local fields
1966 // since this will give them a better chance to become
1967 // enregistered into their actual argument register.
1968 // [We use a forward iterator pattern]
1970 for (curInx = begTab; curInx <= endTab; curInx++)
1972 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1974 // Skip any already processed args
1976 if (!curArgTabEntry->processed)
1978 if (curArgTabEntry->needTmp)
1980 curArgTabEntry->processed = true;
1982 // place curArgTabEntry at the begTab position by performing a swap
1984 if (curInx != begTab)
1986 argTable[curInx] = argTable[begTab];
1987 argTable[begTab] = curArgTabEntry;
1997 if (argsRemaining > 0)
1999 // Next take care of local var and local field arguments.
2000 // These are moved towards the end of the argument evaluation.
2001 // [We use a backward iterator pattern]
2003 curInx = endTab + 1;
2008 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2010 // Skip any already processed args
2012 if (!curArgTabEntry->processed)
2014 GenTree* argx = curArgTabEntry->node;
2016 if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
2018 noway_assert(curInx <= endTab);
2020 curArgTabEntry->processed = true;
2022 // place curArgTabEntry at the endTab position by performing a swap
2024 if (curInx != endTab)
2026 argTable[curInx] = argTable[endTab];
2027 argTable[endTab] = curArgTabEntry;
2034 } while (curInx > begTab);
2037 // Finally, take care of all the remaining arguments.
2038 // Note that we fill in one arg at a time using a while loop.
2039 bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
2040 while (argsRemaining > 0)
2042 /* Find the most expensive arg remaining and evaluate it next */
2044 fgArgTabEntry* expensiveArgTabEntry = nullptr;
2045 unsigned expensiveArg = UINT_MAX;
2046 unsigned expensiveArgCost = 0;
2048 // [We use a forward iterator pattern]
2050 for (curInx = begTab; curInx <= endTab; curInx++)
2052 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2054 // Skip any already processed args
2056 if (!curArgTabEntry->processed)
2058 GenTree* argx = curArgTabEntry->node;
2060 // We should have already handled these kinds of args
2061 assert(argx->gtOper != GT_LCL_VAR);
2062 assert(argx->gtOper != GT_LCL_FLD);
2063 assert(argx->gtOper != GT_CNS_INT);
2065 // This arg should either have no persistent side effects or be the last one in our table
2066 // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
2068 if (argsRemaining == 1)
2070 // This is the last arg to place
2071 expensiveArg = curInx;
2072 expensiveArgTabEntry = curArgTabEntry;
2073 assert(begTab == endTab);
2080 /* We call gtPrepareCost to measure the cost of evaluating this tree */
2081 compiler->gtPrepareCost(argx);
2084 if (argx->gtCostEx > expensiveArgCost)
2086 // Remember this arg as the most expensive one that we have yet seen
2087 expensiveArgCost = argx->gtCostEx;
2088 expensiveArg = curInx;
2089 expensiveArgTabEntry = curArgTabEntry;
2095 noway_assert(expensiveArg != UINT_MAX);
2097 // put the most expensive arg towards the beginning of the table
2099 expensiveArgTabEntry->processed = true;
2101 // place expensiveArgTabEntry at the begTab position by performing a swap
2103 if (expensiveArg != begTab)
2105 argTable[expensiveArg] = argTable[begTab];
2106 argTable[begTab] = expensiveArgTabEntry;
2112 costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
2115 // The table should now be completely filled and thus begTab should now be adjacent to endTab
2116 // and regArgsRemaining should be zero
2117 assert(begTab == (endTab + 1));
2118 assert(argsRemaining == 0);
2120 #if !FEATURE_FIXED_OUT_ARGS
2121 // Finally build the regArgList
2123 callTree->gtCall.regArgList = NULL;
2124 callTree->gtCall.regArgListCount = regCount;
2126 unsigned regInx = 0;
2127 for (curInx = 0; curInx < argCount; curInx++)
2129 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2131 if (curArgTabEntry->regNum != REG_STK)
2133 // Encode the argument register in the register mask
2135 callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
2139 #endif // !FEATURE_FIXED_OUT_ARGS
2145 void fgArgInfo::Dump(Compiler* compiler)
2147 for (unsigned curInx = 0; curInx < ArgCount(); curInx++)
2149 fgArgTabEntry* curArgEntry = ArgTable()[curInx];
2150 curArgEntry->Dump();
2155 //------------------------------------------------------------------------------
2156 // fgMakeTmpArgNode : This function creates a tmp var only if needed.
2157 // We need this to be done in order to enforce ordering
2158 // of the evaluation of arguments.
2161 // tmpVarNum - the var num which we clone into the newly created temp var.
2164 // the newly created temp var tree.
2166 GenTree* Compiler::fgMakeTmpArgNode(
2167 unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
2169 LclVarDsc* varDsc = &lvaTable[tmpVarNum];
2170 assert(varDsc->lvIsTemp);
2171 var_types type = varDsc->TypeGet();
2173 // Create a copy of the temp to go into the late argument list
2174 GenTree* arg = gtNewLclvNode(tmpVarNum, type);
2175 GenTree* addrNode = nullptr;
2177 if (varTypeIsStruct(type))
2180 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || (!defined(LEGACY_BACKEND) && defined(_TARGET_ARM_))
2182 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2184 arg->gtFlags |= GTF_DONT_CSE;
2186 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2187 // Can this type be passed in a single register?
2188 // If so, the following call will return the corresponding primitive type.
2189 // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.
2191 bool passedInRegisters = false;
2192 structPassingKind kind;
2193 CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
2194 var_types structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
2196 if (structBaseType != TYP_UNKNOWN)
2198 passedInRegisters = true;
2199 type = structBaseType;
2201 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
2203 // If it is passed in registers, don't get the address of the var. Make it a
2204 // field instead. It will be loaded in registers with putarg_reg tree in lower.
2205 if (passedInRegisters)
2207 arg->ChangeOper(GT_LCL_FLD);
2212 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2213 // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
2214 var_types addrType = type;
2216 var_types addrType = TYP_BYREF;
2218 arg = gtNewOperNode(GT_ADDR, addrType, arg);
2221 #if FEATURE_MULTIREG_ARGS
2222 #ifdef _TARGET_ARM64_
2223 assert(varTypeIsStruct(type));
2224 if (lvaIsMultiregStruct(varDsc))
2226 // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD);
2227 // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
2228 // We will create a GT_OBJ for the argument below.
2229 // This will be passed by value in two registers.
2230 assert(addrNode != nullptr);
2232 // Create an Obj of the temp to use it as a call argument.
2233 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2235 // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
2236 // this is only to preserve former behavior (though some CSE'ing of struct
2237 // values can be pessimizing, so enabling this may require some additional tuning).
2238 arg->gtFlags |= GTF_DONT_CSE;
2240 #elif defined(_TARGET_ARM_)
2241 // Always create an Obj of the temp to use it as a call argument.
2242 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2243 arg->gtFlags |= GTF_DONT_CSE;
2244 #endif // _TARGET_ARM_
2245 #endif // FEATURE_MULTIREG_ARGS
2248 #else // not (_TARGET_AMD64_ or _TARGET_ARM64_ or (!LEGACY_BACKEND and _TARGET_ARM_))
2250 // other targets, we pass the struct by value
2251 assert(varTypeIsStruct(type));
2253 addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
2255 // Get a new Obj node temp to use it as a call argument.
2256 // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
2257 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
2259 #endif // not (_TARGET_AMD64_ or _TARGET_ARM64_ or (!LEGACY_BACKEND and _TARGET_ARM_))
2261 } // (varTypeIsStruct(type))
2263 if (addrNode != nullptr)
2265 assert(addrNode->gtOper == GT_ADDR);
2267 // This will prevent this LclVar from being optimized away
2268 lvaSetVarAddrExposed(tmpVarNum);
2270 // the child of a GT_ADDR is required to have this flag set
2271 addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
2277 void fgArgInfo::EvalArgsToTemps()
2279 assert(argsSorted == true);
2281 unsigned regArgInx = 0;
2282 // Now go through the argument table and perform the necessary evaluation into temps
2283 GenTreeArgList* tmpRegArgNext = nullptr;
2284 for (unsigned curInx = 0; curInx < argCount; curInx++)
2286 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2288 GenTree* argx = curArgTabEntry->node;
2289 GenTree* setupArg = nullptr;
2292 #if !FEATURE_FIXED_OUT_ARGS
2293 // Only ever set for FEATURE_FIXED_OUT_ARGS
2294 assert(curArgTabEntry->needPlace == false);
2296 // On x86 and other archs that use push instructions to pass arguments:
2297 // Only the register arguments need to be replaced with placeholder nodes.
2298 // Stacked arguments are evaluated and pushed (or stored into the stack) in order.
2300 if (curArgTabEntry->regNum == REG_STK)
2304 if (curArgTabEntry->needTmp)
2308 if (curArgTabEntry->isTmp == true)
2310 // Create a copy of the temp to go into the late argument list
2311 tmpVarNum = curArgTabEntry->tmpNum;
2312 defArg = compiler->fgMakeTmpArgNode(tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
2313 argTable[curInx]->structDesc.passedInRegisters));
2315 // mark the original node as a late argument
2316 argx->gtFlags |= GTF_LATE_ARG;
2320 // Create a temp assignment for the argument
2321 // Put the temp in the gtCallLateArgs list
2322 CLANG_FORMAT_COMMENT_ANCHOR;
2325 if (compiler->verbose)
2327 printf("Argument with 'side effect'...\n");
2328 compiler->gtDispTree(argx);
2332 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2333 noway_assert(argx->gtType != TYP_STRUCT);
2336 tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
2337 if (argx->gtOper == GT_MKREFANY)
2339 // For GT_MKREFANY, typically the actual struct copying does
2340 // not have any side-effects and can be delayed. So instead
2341 // of using a temp for the whole struct, we can just use a temp
2342 // for operand that that has a side-effect
2344 if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
2346 operand = argx->gtOp.gtOp1;
2348 // In the early argument evaluation, place an assignment to the temp
2349 // from the source operand of the mkrefany
2350 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2352 // Replace the operand for the mkrefany with the new temp.
2353 argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2355 else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
2357 operand = argx->gtOp.gtOp2;
2359 // In the early argument evaluation, place an assignment to the temp
2360 // from the source operand of the mkrefany
2361 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2363 // Replace the operand for the mkrefany with the new temp.
2364 argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2368 if (setupArg != nullptr)
2370 // Now keep the mkrefany for the late argument list
2373 // Clear the side-effect flags because now both op1 and op2 have no side-effects
2374 defArg->gtFlags &= ~GTF_ALL_EFFECT;
2378 setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
2380 LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
2382 #ifndef LEGACY_BACKEND
2383 if (compiler->fgOrder == Compiler::FGOrderLinear)
2385 // We'll reference this temporary variable just once
2386 // when we perform the function call after
2387 // setting up this argument.
2388 varDsc->lvRefCnt = 1;
2390 #endif // !LEGACY_BACKEND
2392 var_types lclVarType = genActualType(argx->gtType);
2393 var_types scalarType = TYP_UNKNOWN;
2395 if (setupArg->OperIsCopyBlkOp())
2397 setupArg = compiler->fgMorphCopyBlock(setupArg);
2398 #if defined(_TARGET_ARM64_) || (!defined(LEGACY_BACKEND) && defined(_TARGET_ARM_))
2399 // This scalar LclVar widening step is only performed for ARM architectures.
2401 CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum);
2402 unsigned structSize = varDsc->lvExactSize;
2404 scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd);
2405 #endif // _TARGET_ARM*_
2408 // scalarType can be set to a wider type for ARM architectures: (3 => 4) or (5,6,7 => 8)
2409 if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
2411 // Create a GT_LCL_FLD using the wider type to go to the late argument list
2412 defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
2416 // Create a copy of the temp to go to the late argument list
2417 defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
2420 curArgTabEntry->isTmp = true;
2421 curArgTabEntry->tmpNum = tmpVarNum;
2424 // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
2425 // might have left holes in the used registers (see
2426 // fgAddSkippedRegsInPromotedStructArg).
2427 // Too bad we're not that smart for these intermediate temps...
2428 if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
2430 regNumber argReg = curArgTabEntry->regNum;
2431 regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
2432 for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
2434 argReg = genRegArgNext(argReg);
2435 allUsedRegs |= genRegMask(argReg);
2437 #ifdef LEGACY_BACKEND
2438 callTree->gtCall.gtCallRegUsedMask |= allUsedRegs;
2439 #endif // LEGACY_BACKEND
2441 #endif // _TARGET_ARM_
2444 /* mark the assignment as a late argument */
2445 setupArg->gtFlags |= GTF_LATE_ARG;
2448 if (compiler->verbose)
2450 printf("\n Evaluate to a temp:\n");
2451 compiler->gtDispTree(setupArg);
2456 else // curArgTabEntry->needTmp == false
2459 // Only register args are replaced with placeholder nodes
2460 // and the stack based arguments are evaluated and pushed in order.
2462 // On Arm/x64 - When needTmp is false and needPlace is false,
2463 // the non-register arguments are evaluated and stored in order.
2464 // When needPlace is true we have a nested call that comes after
2465 // this argument so we have to replace it in the gtCallArgs list
2466 // (the initial argument evaluation list) with a placeholder.
2468 if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
2473 /* No temp needed - move the whole node to the gtCallLateArgs list */
2475 /* The argument is deferred and put in the late argument list */
2479 // Create a placeholder node to put in its place in gtCallLateArgs.
2481 // For a struct type we also need to record the class handle of the arg.
2482 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
2484 #if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2486 // All structs are either passed (and retyped) as integral types, OR they
2487 // are passed by reference.
2488 noway_assert(argx->gtType != TYP_STRUCT);
2490 #else // !defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2492 if (varTypeIsStruct(defArg))
2494 // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
2495 GenTree* defArgTmp = defArg;
2497 // The GT_OBJ may be be a child of a GT_COMMA.
2498 while (defArgTmp->gtOper == GT_COMMA)
2500 defArgTmp = defArgTmp->gtOp.gtOp2;
2502 assert(varTypeIsStruct(defArgTmp));
2504 // We handle two opcodes: GT_MKREFANY and GT_OBJ.
2505 if (defArgTmp->gtOper == GT_MKREFANY)
2507 clsHnd = compiler->impGetRefAnyClass();
2509 else if (defArgTmp->gtOper == GT_OBJ)
2511 clsHnd = defArgTmp->AsObj()->gtClass;
2515 BADCODE("Unhandled struct argument tree in fgMorphArgs");
2519 #endif // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
2521 setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
2523 /* mark the placeholder node as a late argument */
2524 setupArg->gtFlags |= GTF_LATE_ARG;
2527 if (compiler->verbose)
2529 if (curArgTabEntry->regNum == REG_STK)
2531 printf("Deferred stack argument :\n");
2535 printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
2538 compiler->gtDispTree(argx);
2539 printf("Replaced with placeholder node:\n");
2540 compiler->gtDispTree(setupArg);
2545 if (setupArg != nullptr)
2547 if (curArgTabEntry->parent)
2549 GenTree* parent = curArgTabEntry->parent;
2550 /* a normal argument from the list */
2551 noway_assert(parent->OperIsList());
2552 noway_assert(parent->gtOp.gtOp1 == argx);
2554 parent->gtFlags |= (setupArg->gtFlags & GTF_ALL_EFFECT);
2556 parent->gtOp.gtOp1 = setupArg;
2560 /* must be the gtCallObjp */
2561 noway_assert(callTree->gtCall.gtCallObjp == argx);
2563 callTree->gtCall.gtCallObjp = setupArg;
2567 /* deferred arg goes into the late argument list */
2569 if (tmpRegArgNext == nullptr)
2571 tmpRegArgNext = compiler->gtNewArgList(defArg);
2572 callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
2576 noway_assert(tmpRegArgNext->OperIsList());
2577 noway_assert(tmpRegArgNext->Current());
2578 tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
2580 tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT);
2581 tmpRegArgNext = tmpRegArgNext->Rest();
2584 tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT);
2586 curArgTabEntry->node = defArg;
2587 curArgTabEntry->lateArgInx = regArgInx++;
2591 if (compiler->verbose)
2593 printf("\nShuffled argument table: ");
2594 for (unsigned curInx = 0; curInx < argCount; curInx++)
2596 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2598 if (curArgTabEntry->regNum != REG_STK)
2600 printf("%s ", getRegName(curArgTabEntry->regNum));
2608 // Get the late arg for arg at position argIndex.
2609 // argIndex - 0-based position to get late arg for.
2610 // Caller must ensure this position has a late arg.
2611 GenTree* fgArgInfo::GetLateArg(unsigned argIndex)
2613 for (unsigned j = 0; j < this->ArgCount(); j++)
2615 if (this->ArgTable()[j]->argNum == argIndex)
2617 return this->ArgTable()[j]->node;
2620 // Caller must ensure late arg exists.
2624 void fgArgInfo::RecordStkLevel(unsigned stkLvl)
2626 assert(!IsUninitialized(stkLvl));
2627 this->stkLevel = stkLvl;
2630 unsigned fgArgInfo::RetrieveStkLevel()
2632 assert(!IsUninitialized(stkLevel));
2636 // Return a conservative estimate of the stack size in bytes.
2637 // It will be used only on the intercepted-for-host code path to copy the arguments.
2638 int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
2642 for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
2648 if (numArgs > MAX_REG_ARG)
2650 numStkArgs = numArgs - MAX_REG_ARG;
2657 return numStkArgs * REGSIZE_BYTES;
2660 //------------------------------------------------------------------------------
2661 // fgMakeMultiUse : If the node is a local, clone it and increase the ref count
2662 // otherwise insert a comma form temp
2665 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2666 // evaluates ppTree to a temp and returns the result
2669 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2672 // The result tree MUST be added to the tree structure since the ref counts are
2673 // already incremented.
2675 GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
2677 GenTree* tree = *pOp;
2678 if (tree->IsLocal())
2680 auto result = gtClone(tree);
2681 if (lvaLocalVarRefCounted)
2683 lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2689 GenTree* result = fgInsertCommaFormTemp(pOp);
2691 // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
2692 // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
2693 // be added by the caller.
2694 if (lvaLocalVarRefCounted)
2696 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2697 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2698 lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
2705 //------------------------------------------------------------------------------
2706 // fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
2707 // and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
2710 // ppTree - a pointer to the child node we will be replacing with the comma expression that
2711 // evaluates ppTree to a temp and returns the result
2713 // structType - value type handle if the temp created is of TYP_STRUCT.
2716 // A fresh GT_LCL_VAR node referencing the temp which has not been used
2719 GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
2721 GenTree* subTree = *ppTree;
2723 unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
2725 if (varTypeIsStruct(subTree))
2727 assert(structType != nullptr);
2728 lvaSetStruct(lclNum, structType, false);
2731 // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
2732 // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for
2733 // setting type of lcl vars created.
2734 GenTree* asg = gtNewTempAssign(lclNum, subTree);
2736 GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2738 GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
2742 return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2745 //------------------------------------------------------------------------
2746 // fgMorphArgs: Walk and transform (morph) the arguments of a call
2749 // callNode - the call for which we are doing the argument morphing
2752 // Like most morph methods, this method returns the morphed node,
2753 // though in this case there are currently no scenarios where the
2754 // node itself is re-created.
2757 // This method is even less idempotent than most morph methods.
2758 // That is, it makes changes that should not be redone. It uses the existence
2759 // of gtCallLateArgs (the late arguments list) to determine if it has
2760 // already done that work.
2762 // The first time it is called (i.e. during global morphing), this method
2763 // computes the "late arguments". This is when it determines which arguments
2764 // need to be evaluated to temps prior to the main argument setup, and which
2765 // can be directly evaluated into the argument location. It also creates a
2766 // second argument list (gtCallLateArgs) that does the final placement of the
2767 // arguments, e.g. into registers or onto the stack.
2769 // The "non-late arguments", aka the gtCallArgs, are doing the in-order
2770 // evaluation of the arguments that might have side-effects, such as embedded
2771 // assignments, calls or possible throws. In these cases, it and earlier
2772 // arguments must be evaluated to temps.
2774 // On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
2775 // if we have any nested calls, we need to defer the copying of the argument
2776 // into the fixed argument area until after the call. If the argument did not
2777 // otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
2778 // replaced in the "early" arg list (gtCallArgs) with a placeholder node.
2781 #pragma warning(push)
2782 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2784 GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
2789 unsigned flagsSummary = 0;
2790 unsigned genPtrArgCntSav = fgPtrArgCntCur;
2792 unsigned argIndex = 0;
2794 unsigned intArgRegNum = 0;
2795 unsigned fltArgRegNum = 0;
2798 regMaskTP argSkippedRegMask = RBM_NONE;
2799 regMaskTP fltArgSkippedRegMask = RBM_NONE;
2800 #endif // _TARGET_ARM_
2802 #if defined(_TARGET_X86_)
2803 unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
2805 const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
2808 unsigned argSlots = 0;
2809 unsigned nonRegPassedStructSlots = 0;
2810 bool reMorphing = call->AreArgsComplete();
2811 bool callHasRetBuffArg = call->HasRetBufArg();
2813 #ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
2814 bool callIsVararg = call->IsVarargs();
2817 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2818 // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
2819 // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
2820 // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
2821 bool hasStackArgCopy = false;
2824 #ifndef LEGACY_BACKEND
2825 // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
2826 // following the normal calling convention or in the normal argument registers. We either mark existing
2827 // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
2828 // non-standard arguments into the argument list, below.
2829 class NonStandardArgs
2831 struct NonStandardArg
2833 regNumber reg; // The register to be assigned to this non-standard argument.
2834 GenTree* node; // The tree node representing this non-standard argument.
2835 // Note that this must be updated if the tree node changes due to morphing!
2838 ArrayStack<NonStandardArg> args;
2841 NonStandardArgs(Compiler* compiler) : args(compiler, 3) // We will have at most 3 non-standard arguments
2845 //-----------------------------------------------------------------------------
2846 // Add: add a non-standard argument to the table of non-standard arguments
2849 // node - a GenTree node that has a non-standard argument.
2850 // reg - the register to assign to this node.
2855 void Add(GenTree* node, regNumber reg)
2857 NonStandardArg nsa = {reg, node};
2861 //-----------------------------------------------------------------------------
2862 // Find: Look for a GenTree* in the set of non-standard args.
2865 // node - a GenTree node to look for
2868 // The index of the non-standard argument (a non-negative, unique, stable number).
2869 // If the node is not a non-standard argument, return -1.
2871 int Find(GenTree* node)
2873 for (int i = 0; i < args.Height(); i++)
2875 if (node == args.Index(i).node)
2883 //-----------------------------------------------------------------------------
2884 // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
2885 // set the register to use for the node.
2888 // node - a GenTree node to look for
2889 // pReg - an OUT argument. *pReg is set to the non-standard register to use if
2890 // 'node' is found in the non-standard argument set.
2893 // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
2895 // 'false' otherwise (in this case, *pReg is unmodified).
2897 bool FindReg(GenTree* node, regNumber* pReg)
2899 for (int i = 0; i < args.Height(); i++)
2901 NonStandardArg& nsa = args.IndexRef(i);
2902 if (node == nsa.node)
2911 //-----------------------------------------------------------------------------
2912 // Replace: Replace the non-standard argument node at a given index. This is done when
2913 // the original node was replaced via morphing, but we need to continue to assign a
2914 // particular non-standard arg to it.
2917 // index - the index of the non-standard arg. It must exist.
2918 // node - the new GenTree node.
2923 void Replace(int index, GenTree* node)
2925 args.IndexRef(index).node = node;
2928 } nonStandardArgs(this);
2929 #endif // !LEGACY_BACKEND
2931 // Count of args. On first morph, this is counted before we've filled in the arg table.
2932 // On remorph, we grab it from the arg table.
2933 unsigned numArgs = 0;
2935 // Process the late arguments (which were determined by a previous caller).
2936 // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
2937 // may need to refer to it.
2940 // We need to reMorph the gtCallLateArgs early since that is what triggers
2941 // the expression folding and we need to have the final folded gtCallLateArgs
2942 // available when we call RemorphRegArg so that we correctly update the fgArgInfo
2943 // with the folded tree that represents the final optimized argument nodes.
2945 // However if a range-check needs to be generated for any of these late
2946 // arguments we also need to "know" what the stack depth will be when we generate
2947 // code to branch to the throw range check failure block as that is part of the
2948 // GC information contract for that block.
2950 // Since the late arguments are evaluated last we have pushed all of the
2951 // other arguments on the stack before we evaluate these late arguments,
2952 // so we record the stack depth on the first morph call when reMorphing
2953 // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
2955 if (call->gtCallLateArgs != nullptr)
2957 unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
2958 fgPtrArgCntCur += callStkLevel;
2959 call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
2960 flagsSummary |= call->gtCallLateArgs->gtFlags;
2961 fgPtrArgCntCur -= callStkLevel;
2963 assert(call->fgArgInfo != nullptr);
2964 call->fgArgInfo->RemorphReset();
2966 numArgs = call->fgArgInfo->ArgCount();
2970 // First we need to count the args
2971 if (call->gtCallObjp)
2975 for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
2980 // Insert or mark non-standard args. These are either outside the normal calling convention, or
2981 // arguments registers that don't follow the normal progression of argument registers in the calling
2982 // convention (such as for the ARM64 fixed return buffer argument x8).
2984 // *********** NOTE *************
2985 // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
2986 // in the implementation of fast tail call.
2987 // *********** END NOTE *********
2988 CLANG_FORMAT_COMMENT_ANCHOR;
2990 #if !defined(LEGACY_BACKEND)
2991 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2992 // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
2993 // Set the argument registers correctly here.
2994 if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
2996 GenTreeArgList* args = call->gtCallArgs;
2997 GenTree* arg1 = args->Current();
2998 assert(arg1 != nullptr);
2999 nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
3001 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
3002 #if defined(_TARGET_ARM_)
3003 // A non-standard calling convention using secure delegate invoke is used on ARM, only, but not for secure
3004 // delegates. It is used for VSD delegate calls where the VSD custom calling convention ABI requires passing
3005 // R4, a callee-saved register, with a special value. Since R4 is a callee-saved register, its value needs
3006 // to be preserved. Thus, the VM uses a secure delegate IL stub, which preserves R4 and also sets up R4
3007 // correctly for the VSD call. The VM is simply reusing an existing mechanism (secure delegate IL stub)
3008 // to achieve its goal for delegate VSD call. See COMDelegate::NeedsWrapperDelegate() in the VM for details.
3009 else if (call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV)
3011 GenTree* arg = call->gtCallObjp;
3012 if (arg->OperIsLocal())
3014 arg = gtClone(arg, true);
3018 GenTree* tmp = fgInsertCommaFormTemp(&arg);
3019 call->gtCallObjp = arg;
3020 call->gtFlags |= GTF_ASG;
3023 noway_assert(arg != nullptr);
3025 GenTree* newArg = new (this, GT_ADDR)
3026 GenTreeAddrMode(TYP_BYREF, arg, nullptr, 0, eeGetEEInfo()->offsetOfSecureDelegateIndirectCell);
3028 // Append newArg as the last arg
3029 GenTreeArgList** insertionPoint = &call->gtCallArgs;
3030 for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
3033 *insertionPoint = gtNewListNode(newArg, nullptr);
3036 nonStandardArgs.Add(newArg, virtualStubParamInfo->GetReg());
3038 #endif // defined(_TARGET_ARM_)
3039 #if defined(_TARGET_X86_)
3040 // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
3041 // hi part to be in EDX. This sets the argument registers up correctly.
3042 else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
3043 call->IsHelperCall(this, CORINFO_HELP_LRSZ))
3045 GenTreeArgList* args = call->gtCallArgs;
3046 GenTree* arg1 = args->Current();
3047 assert(arg1 != nullptr);
3048 nonStandardArgs.Add(arg1, REG_LNGARG_LO);
3050 args = args->Rest();
3051 GenTree* arg2 = args->Current();
3052 assert(arg2 != nullptr);
3053 nonStandardArgs.Add(arg2, REG_LNGARG_HI);
3055 #else // !_TARGET_X86_
3056 // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
3057 // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
3058 // convention for x86/SSE.
3060 // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
3062 if (hasFixedRetBuffReg() && call->HasRetBufArg())
3064 args = call->gtCallArgs;
3065 assert(args != nullptr);
3066 assert(args->OperIsList());
3068 argx = call->gtCallArgs->Current();
3070 // We don't increment numArgs here, since we already counted this argument above.
3072 nonStandardArgs.Add(argx, theFixedRetBuffReg());
3075 // We are allowed to have a Fixed Return Buffer argument combined
3076 // with any of the remaining non-standard arguments
3078 if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
3080 assert(!call->gtCallCookie);
3081 // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
3082 // It will be used only on the intercepted-for-host code path to copy the arguments.
3084 GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
3085 call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
3088 nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
3090 else if (call->IsVirtualStub())
3092 if (!call->IsTailCallViaHelper())
3094 GenTree* stubAddrArg = fgGetStubAddrArg(call);
3095 // And push the stub address onto the list of arguments
3096 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
3099 nonStandardArgs.Add(stubAddrArg, stubAddrArg->gtRegNum);
3103 // If it is a VSD call getting dispatched via tail call helper,
3104 // fgMorphTailCall() would materialize stub addr as an additional
3105 // parameter added to the original arg list and hence no need to
3106 // add as a non-standard arg.
3110 #endif // !_TARGET_X86_
3111 if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
3113 assert(!call->IsUnmanaged());
3115 GenTree* arg = call->gtCallCookie;
3116 noway_assert(arg != nullptr);
3117 call->gtCallCookie = nullptr;
3119 #if defined(_TARGET_X86_)
3120 // x86 passes the cookie on the stack as the final argument to the call.
3121 GenTreeArgList** insertionPoint = &call->gtCallArgs;
3122 for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
3125 *insertionPoint = gtNewListNode(arg, nullptr);
3126 #else // !defined(_TARGET_X86_)
3127 // All other architectures pass the cookie in a register.
3128 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
3129 #endif // defined(_TARGET_X86_)
3131 nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
3134 // put destination into R10/EAX
3135 arg = gtClone(call->gtCallAddr, true);
3136 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
3139 nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
3141 // finally change this call to a helper call
3142 call->gtCallType = CT_HELPER;
3143 call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
3145 #if defined(FEATURE_READYTORUN_COMPILER) && defined(_TARGET_ARMARCH_)
3146 // For arm, we dispatch code same as VSD using virtualStubParamInfo->GetReg()
3147 // for indirection cell address, which ZapIndirectHelperThunk expects.
3148 if (call->IsR2RRelativeIndir())
3150 assert(call->gtEntryPoint.addr != nullptr);
3152 size_t addrValue = (size_t)call->gtEntryPoint.addr;
3153 GenTree* indirectCellAddress = gtNewIconHandleNode(addrValue, GTF_ICON_FTN_ADDR);
3154 indirectCellAddress->gtRegNum = REG_R2R_INDIRECT_PARAM;
3156 // Push the stub address onto the list of arguments.
3157 call->gtCallArgs = gtNewListNode(indirectCellAddress, call->gtCallArgs);
3160 nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->gtRegNum);
3163 #endif // FEATURE_READYTORUN_COMPILER && _TARGET_ARMARCH_
3164 #endif // !LEGACY_BACKEND
3166 // Allocate the fgArgInfo for the call node;
3168 call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
3171 /* First we morph the argument subtrees ('this' pointer, arguments, etc.).
3172 * During the first call to fgMorphArgs we also record the
3173 * information about late arguments we have in 'fgArgInfo'.
3174 * This information is used later to contruct the gtCallLateArgs */
3176 /* Process the 'this' argument value, if present */
3178 argx = call->gtCallObjp;
3182 argx = fgMorphTree(argx);
3183 call->gtCallObjp = argx;
3184 flagsSummary |= argx->gtFlags;
3186 assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
3188 assert(argIndex == 0);
3190 /* We must fill in or update the argInfo table */
3194 /* this is a register argument - possibly update it in the table */
3195 call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
3199 assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
3201 /* this is a register argument - put it in the table */
3202 call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
3203 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3205 false, REG_STK, nullptr
3206 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3209 // this can't be a struct.
3210 assert(argx->gtType != TYP_STRUCT);
3212 /* Increment the argument register count and argument index */
3213 if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
3216 #ifdef WINDOWS_AMD64_ABI
3217 // Whenever we pass an integer register argument
3218 // we skip the corresponding floating point register argument
3220 #endif // WINDOWS_AMD64_ABI
3224 noway_assert(!"the 'this' pointer can not be a floating point type");
3231 // Compute the maximum number of arguments that can be passed in registers.
3232 // For X86 we handle the varargs and unmanaged calling conventions
3234 if (call->gtFlags & GTF_CALL_POP_ARGS)
3236 noway_assert(intArgRegNum < MAX_REG_ARG);
3237 // No more register arguments for varargs (CALL_POP_ARGS)
3238 maxRegArgs = intArgRegNum;
3240 // Add in the ret buff arg
3241 if (callHasRetBuffArg)
3245 if (call->IsUnmanaged())
3247 noway_assert(intArgRegNum == 0);
3249 if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
3251 noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
3252 call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
3253 call->gtCallArgs->gtOp.gtOp1->gtOper ==
3254 GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
3262 // Add in the ret buff arg
3263 if (callHasRetBuffArg)
3266 #endif // _TARGET_X86_
3268 /* Morph the user arguments */
3269 CLANG_FORMAT_COMMENT_ANCHOR;
3271 #if defined(_TARGET_ARM_)
3273 // The ARM ABI has a concept of back-filling of floating-point argument registers, according
3274 // to the "Procedure Call Standard for the ARM Architecture" document, especially
3275 // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
3276 // appear in a lower-numbered register than floating point argument N. That is, argument
3277 // register allocation is not strictly increasing. To support this, we need to keep track of unused
3278 // floating-point argument registers that we can back-fill. We only support 4-byte float and
3279 // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
3280 // only back-fill single registers, since there is no way with these types to create
3281 // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
3282 // available (with 16 FP argument registers). Consider this code:
3284 // struct HFA { float x, y, z; }; // a three element HFA
3285 // void bar(float a1, // passed in f0
3286 // double a2, // passed in f2/f3; skip f1 for alignment
3287 // HFA a3, // passed in f4/f5/f6
3288 // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
3289 // HFA a5, // passed in f10/f11/f12
3290 // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
3292 // float a7, // passed in f1 (back-filled)
3293 // float a8, // passed in f7 (back-filled)
3294 // float a9, // passed in f13 (back-filled)
3295 // float a10) // passed on the stack in [OutArg+0]
3297 // Note that if we ever support FP types with larger alignment requirements, then there could
3298 // be more than single register back-fills.
3300 // Once we assign a floating-pointer register to the stack, they all must be on the stack.
3301 // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
3302 // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
3303 // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
3304 // and prevent any additional floating-point arguments from going in registers.
3306 bool anyFloatStackArgs = false;
3308 #endif // _TARGET_ARM_
3310 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3311 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3312 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3314 bool hasStructArgument = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
3315 // hasMultiregStructArgs is true if there are any structs that are eligible for passing
3316 // in registers; this is true even if it is not actually passed in registers (i.e. because
3317 // previous arguments have used up available argument registers).
3318 bool hasMultiregStructArgs = false;
3319 for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
3321 GenTree** parentArgx = &args->gtOp.gtOp1;
3323 #if FEATURE_MULTIREG_ARGS
3324 if (!hasStructArgument)
3326 hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
3328 #endif // FEATURE_MULTIREG_ARGS
3330 #ifndef LEGACY_BACKEND
3331 // Record the index of any nonStandard arg that we may be processing here, as we are
3332 // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
3333 GenTree* orig_argx = *parentArgx;
3334 int nonStandard_index = nonStandardArgs.Find(orig_argx);
3335 #endif // !LEGACY_BACKEND
3337 argx = fgMorphTree(*parentArgx);
3340 assert(args->OperIsList());
3341 assert(argx == args->Current());
3343 #ifndef LEGACY_BACKEND
3344 if ((nonStandard_index != -1) && (argx != orig_argx))
3346 // We need to update the node field for this nonStandard arg here
3347 // as it was changed by the call to fgMorphTree
3348 nonStandardArgs.Replace(nonStandard_index, argx);
3350 #endif // !LEGACY_BACKEND
3352 /* Change the node to TYP_I_IMPL so we don't report GC info
3353 * NOTE: We deferred this from the importer because of the inliner */
3355 if (argx->IsVarAddr())
3357 argx->gtType = TYP_I_IMPL;
3360 bool passUsingFloatRegs;
3361 unsigned argAlign = 1;
3362 // Setup any HFA information about 'argx'
3363 var_types hfaType = GetHfaType(argx);
3364 bool isHfaArg = varTypeIsFloating(hfaType);
3365 unsigned hfaSlots = 0;
3369 hfaSlots = GetHfaCount(argx);
3371 // If we have a HFA struct it's possible we transition from a method that originally
3372 // only had integer types to now start having FP types. We have to communicate this
3373 // through this flag since LSRA later on will use this flag to determine whether
3374 // or not to track the FP register set.
3376 compFloatingPointUsed = true;
3380 CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
3381 bool isRegArg = false;
3382 bool isNonStandard = false;
3383 regNumber nonStdRegNum = REG_NA;
3385 fgArgTabEntry* argEntry = nullptr;
3389 argEntry = gtArgEntryByArgNum(call, argIndex);
3394 bool passUsingIntRegs;
3397 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3398 passUsingIntRegs = isValidIntArgReg(argEntry->regNum);
3402 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
3403 passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
3406 GenTree* curArg = argx;
3407 // If late args have already been computed, use the node in the argument table.
3408 if (argEntry != NULL && argEntry->isTmp)
3410 curArg = argEntry->node;
3415 argAlign = argEntry->alignment;
3419 // We don't use the "size" return value from InferOpSizeAlign().
3420 codeGen->InferOpSizeAlign(curArg, &argAlign);
3422 argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
3423 argAlign /= TARGET_POINTER_SIZE;
3428 if (passUsingFloatRegs)
3430 if (fltArgRegNum % 2 == 1)
3432 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3436 else if (passUsingIntRegs)
3438 if (intArgRegNum % 2 == 1)
3440 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3445 if (argSlots % 2 == 1)
3451 #elif defined(_TARGET_ARM64_)
3455 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3459 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
3462 #elif defined(_TARGET_AMD64_)
3465 passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
3469 passUsingFloatRegs = varTypeIsFloating(argx);
3471 #elif defined(_TARGET_X86_)
3473 passUsingFloatRegs = false;
3476 #error Unsupported or unset target architecture
3479 bool isBackFilled = false;
3480 unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
3481 var_types structBaseType = TYP_STRUCT;
3482 unsigned structSize = 0;
3484 bool isStructArg = varTypeIsStruct(argx);
3488 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3489 // Get the struct description for the already completed struct argument.
3490 fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, argx);
3491 assert(fgEntryPtr != nullptr);
3493 // As described in few other places, this can happen when the argx was morphed
3494 // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
3495 // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
3496 // was a struct and the struct classification.
3497 isStructArg = fgEntryPtr->isStruct;
3501 structDesc.CopyFrom(fgEntryPtr->structDesc);
3503 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3505 assert(argEntry != nullptr);
3506 if (argEntry->IsBackFilled())
3509 size = argEntry->numRegs;
3510 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
3512 isBackFilled = true;
3514 else if (argEntry->regNum == REG_STK)
3517 assert(argEntry->numRegs == 0);
3518 size = argEntry->numSlots;
3523 assert(argEntry->numRegs > 0);
3524 size = argEntry->numRegs + argEntry->numSlots;
3527 // This size has now been computed
3530 isNonStandard = argEntry->isNonStandard;
3535 // Figure out the size of the argument. This is either in number of registers, or number of
3536 // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
3539 if (argx->IsArgPlaceHolderNode() || (!isStructArg))
3541 #if defined(_TARGET_AMD64_)
3542 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3545 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3549 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3550 TARGET_POINTER_SIZE)) /
3551 TARGET_POINTER_SIZE;
3552 eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
3555 hasMultiregStructArgs = true;
3558 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3559 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3560 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3561 #elif defined(_TARGET_ARM64_)
3566 size = GetHfaCount(argx);
3567 // HFA structs are passed by value in multiple registers
3568 hasMultiregStructArgs = true;
3572 // Structs are either passed in 1 or 2 (64-bit) slots
3573 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3574 TARGET_POINTER_SIZE)) /
3575 TARGET_POINTER_SIZE;
3579 // Structs that are the size of 2 pointers are passed by value in multiple registers,
3580 // if sufficient registers are available.
3581 hasMultiregStructArgs = true;
3585 size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
3586 // reference (to a copy)
3589 // Note that there are some additional rules for multireg structs.
3590 // (i.e they cannot be split between registers and the stack)
3594 size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
3596 #elif defined(_TARGET_ARM_)
3599 size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
3600 TARGET_POINTER_SIZE)) /
3601 TARGET_POINTER_SIZE;
3602 if (isHfaArg || size > 1)
3604 hasMultiregStructArgs = true;
3610 // long/double type argument(s) will be changed to GT_FIELD_LIST in lower phase
3611 size = genTypeStSz(argx->gtType);
3613 #elif defined(_TARGET_X86_)
3614 size = genTypeStSz(argx->gtType);
3616 #error Unsupported or unset target architecture
3617 #endif // _TARGET_XXX_
3621 // We handle two opcodes: GT_MKREFANY and GT_OBJ
3622 if (argx->gtOper == GT_MKREFANY)
3624 if (varTypeIsStruct(argx))
3628 #ifdef _TARGET_AMD64_
3629 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3630 if (varTypeIsStruct(argx))
3632 size = info.compCompHnd->getClassSize(impGetRefAnyClass());
3633 unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
3634 size = roundupSize / TARGET_POINTER_SIZE;
3635 eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
3638 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3646 else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
3648 GenTree* argObj = argx;
3649 GenTree** parentOfArgObj = parentArgx;
3651 assert(args->OperIsList());
3652 assert(argx == args->Current());
3654 /* The GT_OBJ may be be a child of a GT_COMMA */
3655 while (argObj->gtOper == GT_COMMA)
3657 parentOfArgObj = &argObj->gtOp.gtOp2;
3658 argObj = argObj->gtOp.gtOp2;
3661 // TODO-1stClassStructs: An OBJ node should not be required for lclVars.
3662 if (argObj->gtOper != GT_OBJ)
3664 BADCODE("illegal argument tree in fgMorphArgs");
3667 CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
3668 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3669 eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
3670 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3672 unsigned originalSize = info.compCompHnd->getClassSize(objClass);
3673 originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
3674 unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
3676 structSize = originalSize;
3678 structPassingKind howToPassStruct;
3679 structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);
3681 #ifdef _TARGET_ARM64_
3682 if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
3683 !isPow2(originalSize)) // size is 3,5,6 or 7 bytes
3685 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3687 // For ARM64 we pass structs that are 3,5,6,7 bytes in size
3688 // we can read 4 or 8 bytes from the LclVar to pass this arg
3689 originalSize = genTypeSize(structBaseType);
3692 #endif // _TARGET_ARM64_
3694 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3695 // On System V OS-es a struct is never passed by reference.
3696 // It is either passed by value on the stack or in registers.
3697 bool passStructInRegisters = false;
3698 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3699 bool passStructByRef = false;
3700 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
3702 // The following if-then-else needs to be carefully refactored.
3703 // Basically the else portion wants to turn a struct load (a GT_OBJ)
3704 // into a GT_IND of the appropriate size.
3705 // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
3706 // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined (Why?)
3707 // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
3708 // It also can't do this if we have a HFA arg,
3709 // unless we have a 1-elem HFA in which case we want to do the optimization.
3710 CLANG_FORMAT_COMMENT_ANCHOR;
3712 #ifndef _TARGET_X86_
3713 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3714 // Check for struct argument with size 1, 2, 4 or 8 bytes
3715 // As we can optimize these by turning them into a GT_IND of the correct type
3717 // Check for cases that we cannot optimize:
3718 CLANG_FORMAT_COMMENT_ANCHOR;
3720 if (((originalSize > TARGET_POINTER_SIZE) && // it is struct that is larger than a pointer
3721 howToPassStruct != SPK_PrimitiveType) || // it is struct that is not one double HFA
3722 !isPow2(originalSize) || // it is not a power of two (1, 2, 4 or 8)
3723 (isHfaArg && (howToPassStruct != SPK_PrimitiveType))) // it is a one element HFA struct
3724 #else // !_TARGET_ARM_
3725 if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
3726 !isPow2(originalSize) || // it is not a power of two (1, 2, 4 or 8)
3727 (isHfaArg && (hfaSlots != 1))) // it is a one element HFA struct
3728 #endif // !_TARGET_ARM_
3729 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3731 // Normalize 'size' to the number of pointer sized items
3732 // 'size' is the number of register slots that we will use to pass the argument
3733 size = roundupSize / TARGET_POINTER_SIZE;
3734 #if defined(_TARGET_AMD64_)
3735 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3736 size = 1; // This must be copied to a temp and passed by address
3737 passStructByRef = true;
3738 copyBlkClass = objClass;
3739 #else // FEATURE_UNIX_AMD64_STRUCT_PASSING
3740 if (!structDesc.passedInRegisters)
3742 GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj);
3743 bool needCpyBlk = false;
3744 if (lclVar != nullptr)
3746 // If the struct is promoted to registers, it has to be materialized
3747 // on stack. We may want to support promoted structures in
3748 // codegening pugarg_stk instead of creating a copy here.
3749 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3750 needCpyBlk = varDsc->lvPromoted;
3754 // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
3755 // sets structDesc.passedInRegisters to be false.
3757 // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
3758 // by rationalizer. For now we will let SIMD struct arg to be copied to
3759 // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
3762 // | \--* addr byref
3763 // | | /--* lclVar simd16 V05 loc4
3764 // | \--* simd simd16 int -
3765 // | \--* lclVar simd16 V08 tmp1
3767 // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
3768 // so that we don't need to generate a copy here.
3769 GenTree* addr = argObj->gtOp.gtOp1;
3770 if (addr->OperGet() == GT_ADDR)
3772 GenTree* addrChild = addr->gtOp.gtOp1;
3773 if (addrChild->OperIsSIMDorSimdHWintrinsic())
3779 passStructInRegisters = false;
3782 copyBlkClass = objClass;
3786 copyBlkClass = NO_CLASS_HANDLE;
3791 // The objClass is used to materialize the struct on stack.
3792 // For SystemV, the code below generates copies for struct arguments classified
3793 // as register argument.
3794 // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
3795 // can be passed on registers or can be copied directly to outgoing area.
3796 passStructInRegisters = true;
3797 copyBlkClass = objClass;
3800 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3801 #elif defined(_TARGET_ARM64_)
3802 if ((size > 2) && !isHfaArg)
3804 size = 1; // This must be copied to a temp and passed by address
3805 passStructByRef = true;
3806 copyBlkClass = objClass;
3811 // If we're passing a promoted struct local var,
3812 // we may need to skip some registers due to alignment; record those.
3813 GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj);
3816 LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3817 if (varDsc->lvPromoted)
3819 assert(argObj->OperGet() == GT_OBJ);
3820 if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
3822 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3824 #if !defined(LEGACY_BACKEND)
3825 copyBlkClass = objClass;
3830 #if !defined(LEGACY_BACKEND)
3831 if (structSize < TARGET_POINTER_SIZE)
3833 copyBlkClass = objClass;
3836 #endif // _TARGET_ARM_
3838 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3839 // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
3840 // generated for struct 1, 2, 4, or 8.
3841 else // We have a struct argument with size 1, 2, 4 or 8 bytes
3843 // change our GT_OBJ into a GT_IND of the correct type.
3844 // We've already ensured above that size is a power of 2, and less than or equal to pointer
3847 assert(howToPassStruct == SPK_PrimitiveType);
3849 // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
3854 // If we reach here with an HFA arg it has to be a one element HFA
3855 // If HFA type is double and it has one element, hfaSlot is 2
3856 assert(hfaSlots == 1 || (hfaSlots == 2 && hfaType == TYP_DOUBLE));
3858 // If we reach here with an HFA arg it has to be a one element HFA
3859 assert(hfaSlots == 1);
3861 structBaseType = hfaType; // change the indirection type to a floating point type
3864 noway_assert(structBaseType != TYP_UNKNOWN);
3866 argObj->ChangeOper(GT_IND);
3868 // Now see if we can fold *(&X) into X
3869 if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
3871 GenTree* temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
3873 // Keep the DONT_CSE flag in sync
3874 // (as the addr always marks it for its op1)
3875 temp->gtFlags &= ~GTF_DONT_CSE;
3876 temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
3877 DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
3878 DEBUG_DESTROY_NODE(argObj); // GT_IND
3881 *parentOfArgObj = temp;
3883 // If the OBJ had been the top level node, we've now changed argx.
3884 if (parentOfArgObj == parentArgx)
3889 if (argObj->gtOper == GT_LCL_VAR)
3891 unsigned lclNum = argObj->gtLclVarCommon.gtLclNum;
3892 LclVarDsc* varDsc = &lvaTable[lclNum];
3894 if (varDsc->lvPromoted)
3896 if (varDsc->lvFieldCnt == 1)
3898 // get the first and only promoted field
3899 LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
3900 if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
3902 // we will use the first and only promoted field
3903 argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
3905 if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
3906 (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
3908 // Just use the existing field's type
3909 argObj->gtType = fieldVarDsc->TypeGet();
3913 // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
3915 argObj->ChangeOper(GT_LCL_FLD);
3916 argObj->gtType = structBaseType;
3918 assert(varTypeCanReg(argObj->TypeGet()));
3919 assert(copyBlkClass == NO_CLASS_HANDLE);
3923 // use GT_LCL_FLD to swizzle the single field struct to a new type
3924 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
3925 argObj->ChangeOper(GT_LCL_FLD);
3926 argObj->gtType = structBaseType;
3931 // The struct fits into a single register, but it has been promoted into its
3932 // constituent fields, and so we have to re-assemble it
3933 copyBlkClass = objClass;
3935 // Alignment constraints may cause us not to use (to "skip") some argument
3936 // registers. Add those, if any, to the skipped (int) arg reg mask.
3937 fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
3938 #endif // _TARGET_ARM_
3941 else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
3943 // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
3944 argObj->ChangeOper(GT_LCL_FLD);
3945 argObj->gtType = structBaseType;
3950 // Not a GT_LCL_VAR, so we can just change the type on the node
3951 argObj->gtType = structBaseType;
3953 assert(varTypeCanReg(argObj->TypeGet()) ||
3954 ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
3964 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3966 #endif // not _TARGET_X86_
3967 // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
3968 if (varTypeIsStruct(structBaseType) &&
3969 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3970 !passStructInRegisters
3971 #else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3973 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
3976 if (isHfaArg && passUsingFloatRegs)
3978 size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA
3982 // If the valuetype size is not a multiple of TARGET_POINTER_SIZE,
3983 // we must copyblk to a temp before doing the obj to avoid
3984 // the obj reading memory past the end of the valuetype
3985 CLANG_FORMAT_COMMENT_ANCHOR;
3987 if (roundupSize > originalSize)
3989 copyBlkClass = objClass;
3991 // There are a few special cases where we can omit using a CopyBlk
3992 // where we normally would need to use one.
3994 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
3996 copyBlkClass = NO_CLASS_HANDLE;
4000 size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
4005 #if defined(_TARGET_64BIT_)
4008 hasMultiregStructArgs = true;
4010 #elif defined(_TARGET_ARM_)
4013 if (size > genTypeStSz(hfaType))
4015 hasMultiregStructArgs = true;
4020 hasMultiregStructArgs = true;
4022 #endif // _TARGET_ARM_
4025 // The 'size' value has now must have been set. (the original value of zero is an invalid value)
4029 // Figure out if the argument will be passed in a register.
4032 if (isRegParamType(genActualType(argx->TypeGet()))
4033 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4034 && (!isStructArg || structDesc.passedInRegisters)
4039 if (passUsingFloatRegs)
4041 // First, see if it can be back-filled
4042 if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
4043 (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
4044 (size == 1)) // The size to back-fill is one float register
4046 // Back-fill the register.
4047 isBackFilled = true;
4048 regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
4049 fltArgSkippedRegMask &=
4050 ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
4051 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
4052 assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
4055 // Does the entire float, double, or HFA fit in the FP arg registers?
4056 // Check if the last register needed is still in the argument register range.
4057 isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
4061 anyFloatStackArgs = true;
4066 isRegArg = intArgRegNum < MAX_REG_ARG;
4068 #elif defined(_TARGET_ARM64_)
4069 if (passUsingFloatRegs)
4071 // Check if the last register needed is still in the fp argument register range.
4072 isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
4074 // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
4075 if (isHfaArg && !isRegArg)
4077 // recompute the 'size' so that it represent the number of stack slots rather than the number of
4080 unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
4081 size = roundupSize / TARGET_POINTER_SIZE;
4083 // We also must update fltArgRegNum so that we no longer try to
4084 // allocate any new floating point registers for args
4085 // This prevents us from backfilling a subsequent arg into d7
4087 fltArgRegNum = MAX_FLOAT_REG_ARG;
4092 // Check if the last register needed is still in the int argument register range.
4093 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
4095 // Did we run out of registers when we had a 16-byte struct (size===2) ?
4096 // (i.e we only have one register remaining but we needed two registers to pass this arg)
4097 // This prevents us from backfilling a subsequent arg into x7
4099 if (!isRegArg && (size > 1))
4101 // We also must update intArgRegNum so that we no longer try to
4102 // allocate any new general purpose registers for args
4104 intArgRegNum = maxRegArgs;
4107 #else // not _TARGET_ARM_ or _TARGET_ARM64_
4109 #if defined(UNIX_AMD64_ABI)
4111 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4112 // Here a struct can be passed in register following the classifications of its members and size.
4113 // Now make sure there are actually enough registers to do so.
4116 unsigned int structFloatRegs = 0;
4117 unsigned int structIntRegs = 0;
4118 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
4120 if (structDesc.IsIntegralSlot(i))
4124 else if (structDesc.IsSseSlot(i))
4130 isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
4131 ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
4134 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4136 if (passUsingFloatRegs)
4138 isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
4142 isRegArg = intArgRegNum < MAX_REG_ARG;
4145 #else // !defined(UNIX_AMD64_ABI)
4146 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
4147 #endif // !defined(UNIX_AMD64_ABI)
4148 #endif // _TARGET_ARM_
4155 #ifndef LEGACY_BACKEND
4156 // If there are nonstandard args (outside the calling convention) they were inserted above
4157 // and noted them in a table so we can recognize them here and build their argInfo.
4159 // They should not affect the placement of any other args or stack space required.
4160 // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
4161 isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum);
4164 isRegArg = (nonStdRegNum != REG_STK);
4166 #if defined(_TARGET_X86_)
4167 else if (call->IsTailCallViaHelper())
4169 // We have already (before calling fgMorphArgs()) appended the 4 special args
4170 // required by the x86 tailcall helper. These args are required to go on the
4171 // stack. Force them to the stack here.
4172 assert(numArgs >= 4);
4173 if (argIndex >= numArgs - 4)
4178 #endif // defined(_TARGET_X86_)
4179 #endif // !LEGACY_BACKEND
4180 } // end !reMorphing
4183 // Now we know if the argument goes in registers or not and how big it is,
4184 // whether we had to just compute it or this is a re-morph call and we looked it up.
4186 CLANG_FORMAT_COMMENT_ANCHOR;
4189 // If we ever allocate a floating point argument to the stack, then all
4190 // subsequent HFA/float/double arguments go on the stack.
4191 if (!isRegArg && passUsingFloatRegs)
4193 for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
4195 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
4199 // If we think we're going to split a struct between integer registers and the stack, check to
4200 // see if we've already assigned a floating-point arg to the stack.
4201 if (isRegArg && // We decided above to use a register for the argument
4202 !passUsingFloatRegs && // We're using integer registers
4203 (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
4204 anyFloatStackArgs) // We've already used the stack for a floating-point argument
4206 isRegArg = false; // Change our mind; don't pass this struct partially in registers
4208 // Skip the rest of the integer argument registers
4209 for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
4211 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
4215 #endif // _TARGET_ARM_
4219 regNumber nextRegNum = REG_STK;
4221 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4222 regNumber nextOtherRegNum = REG_STK;
4223 unsigned int structFloatRegs = 0;
4224 unsigned int structIntRegs = 0;
4225 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4229 nextRegNum = nonStdRegNum;
4231 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4232 else if (isStructArg && structDesc.passedInRegisters)
4234 // It is a struct passed in registers. Assign the next available register.
4235 assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
4236 regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
4237 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
4239 if (structDesc.IsIntegralSlot(i))
4241 *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
4244 else if (structDesc.IsSseSlot(i))
4246 *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
4251 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4254 // fill in or update the argInfo table
4255 nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
4256 : genMapIntRegArgNumToRegNum(intArgRegNum);
4259 #ifdef _TARGET_AMD64_
4260 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
4265 fgArgTabEntry* newArgEntry;
4268 // This is a register argument - possibly update it in the table
4269 newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
4273 // This is a register argument - put it in the table
4274 newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
4275 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4277 isStructArg, nextOtherRegNum, &structDesc
4278 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4281 newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
4282 isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
4283 newArgEntry->SetIsBackFilled(isBackFilled);
4284 newArgEntry->isNonStandard = isNonStandard;
4287 if (newArgEntry->isNonStandard)
4289 flagsSummary |= args->Current()->gtFlags;
4293 // Set up the next intArgRegNum and fltArgRegNum values.
4296 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4299 intArgRegNum += structIntRegs;
4300 fltArgRegNum += structFloatRegs;
4303 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
4305 if (passUsingFloatRegs)
4307 fltArgRegNum += size;
4309 #ifdef WINDOWS_AMD64_ABI
4310 // Whenever we pass an integer register argument
4311 // we skip the corresponding floating point register argument
4312 intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
4313 #endif // WINDOWS_AMD64_ABI
4314 // There is no partial struct using float registers
4315 // on all supported architectures
4316 assert(fltArgRegNum <= MAX_FLOAT_REG_ARG);
4320 if (hasFixedRetBuffReg() && (nextRegNum == theFixedRetBuffReg()))
4322 // we are setting up the fixed return buffer register argument
4323 // so don't increment intArgRegNum
4328 // Increment intArgRegNum by 'size' registers
4329 intArgRegNum += size;
4332 #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
4333 fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
4334 #endif // _TARGET_AMD64_
4336 if (intArgRegNum > MAX_REG_ARG)
4338 // This indicates a partial enregistration of a struct type
4339 assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() ||
4340 (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
4341 unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
4342 assert((unsigned char)numRegsPartial == numRegsPartial);
4343 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
4344 intArgRegNum = MAX_REG_ARG;
4345 fgPtrArgCntCur += size - numRegsPartial;
4347 #endif // _TARGET_ARM_
4352 else // We have an argument that is not passed in a register
4354 fgPtrArgCntCur += size;
4356 // If the register arguments have not been determined then we must fill in the argInfo
4360 // This is a stack argument - possibly update it in the table
4361 call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
4365 // This is a stack argument - put it in the table
4366 call->fgArgInfo->AddStkArg(argIndex, argx, args, size,
4367 argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
4371 if (copyBlkClass != NO_CLASS_HANDLE)
4373 noway_assert(!reMorphing);
4374 fgMakeOutgoingStructArgCopy(call, args, argIndex,
4375 copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
4377 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4378 hasStackArgCopy = true;
4382 #ifndef LEGACY_BACKEND
4383 if (argx->gtOper == GT_MKREFANY)
4385 // 'Lower' the MKREFANY tree and insert it.
4386 noway_assert(!reMorphing);
4390 // Build the mkrefany as a GT_FIELD_LIST
4391 GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST)
4392 GenTreeFieldList(argx->gtOp.gtOp1, offsetof(CORINFO_RefAny, dataPtr), TYP_BYREF, nullptr);
4393 (void)new (this, GT_FIELD_LIST)
4394 GenTreeFieldList(argx->gtOp.gtOp2, offsetof(CORINFO_RefAny, type), TYP_I_IMPL, fieldList);
4395 fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx);
4396 fp->node = fieldList;
4397 args->gtOp.gtOp1 = fieldList;
4399 #else // !_TARGET_X86_
4402 // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
4403 unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
4404 lvaSetStruct(tmp, impGetRefAnyClass(), false);
4406 // Build the mkrefany as a comma node:
4407 // (tmp.ptr=argx),(tmp.type=handle)
4408 GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr));
4409 GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type));
4410 destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
4411 destPtrSlot->gtFlags |= GTF_VAR_DEF;
4412 destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
4413 destTypeSlot->gtFlags |= GTF_VAR_DEF;
4415 GenTree* asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
4416 GenTree* asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
4417 GenTree* asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
4419 // Change the expression to "(tmp=val)"
4420 args->gtOp.gtOp1 = asg;
4422 // EvalArgsToTemps will cause tmp to actually get loaded as the argument
4423 call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
4424 lvaSetVarAddrExposed(tmp);
4425 #endif // !_TARGET_X86_
4427 #endif // !LEGACY_BACKEND
4429 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
4432 GenTree* lclNode = fgIsIndirOfAddrOfLocal(argx);
4433 if ((lclNode != nullptr) &&
4434 (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT))
4436 // Make a GT_FIELD_LIST of the field lclVars.
4437 GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon();
4438 LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]);
4439 GenTreeFieldList* fieldList = nullptr;
4440 for (unsigned fieldLclNum = varDsc->lvFieldLclStart;
4441 fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum)
4443 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
4444 if (fieldList == nullptr)
4446 lcl->SetLclNum(fieldLclNum);
4447 lcl->ChangeOper(GT_LCL_VAR);
4448 lcl->gtType = fieldVarDsc->lvType;
4449 fieldList = new (this, GT_FIELD_LIST)
4450 GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr);
4451 fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx);
4452 fp->node = fieldList;
4453 args->gtOp.gtOp1 = fieldList;
4457 GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
4458 fieldList = new (this, GT_FIELD_LIST)
4459 GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList);
4464 #endif // _TARGET_X86_ && !LEGACY_BACKEND
4466 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4467 if (isStructArg && !isRegArg)
4469 nonRegPassedStructSlots += size;
4472 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4476 flagsSummary |= args->Current()->gtFlags;
4477 } // end foreach argument loop
4481 call->fgArgInfo->ArgsComplete();
4483 #ifdef LEGACY_BACKEND
4484 call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
4485 #if defined(_TARGET_ARM_)
4486 call->gtCallRegUsedMask &= ~argSkippedRegMask;
4488 if (fltArgRegNum > 0)
4490 #if defined(_TARGET_ARM_)
4491 call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask;
4494 #endif // LEGACY_BACKEND
4497 if (call->gtCallArgs)
4499 UpdateGT_LISTFlags(call->gtCallArgs);
4502 /* Process the function address, if indirect call */
4504 if (call->gtCallType == CT_INDIRECT)
4506 call->gtCallAddr = fgMorphTree(call->gtCallAddr);
4509 call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur);
4511 if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
4516 /* Remember the maximum value we ever see */
4518 if (fgPtrArgCntMax < fgPtrArgCntCur)
4520 JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
4521 fgPtrArgCntMax = fgPtrArgCntCur;
4524 assert(fgPtrArgCntCur >= genPtrArgCntSav);
4525 #if defined(UNIX_X86_ABI)
4526 call->fgArgInfo->SetStkSizeBytes((fgPtrArgCntCur - genPtrArgCntSav) * TARGET_POINTER_SIZE);
4527 #endif // UNIX_X86_ABI
4529 /* The call will pop all the arguments we pushed */
4531 fgPtrArgCntCur = genPtrArgCntSav;
4533 #if FEATURE_FIXED_OUT_ARGS
4535 // Record the outgoing argument size. If the call is a fast tail
4536 // call, it will setup its arguments in incoming arg area instead
4537 // of the out-going arg area, so we don't need to track the
4538 // outgoing arg size.
4539 if (!call->IsFastTailCall())
4541 unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
4543 #if defined(UNIX_AMD64_ABI)
4544 opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly
4546 // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above.
4548 // First slots go in registers only, no stack needed.
4549 // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments,
4550 // and ignores floating point args (it is overly conservative in that case).
4551 preallocatedArgCount = nonRegPassedStructSlots;
4552 if (argSlots > MAX_REG_ARG)
4554 preallocatedArgCount += argSlots - MAX_REG_ARG;
4556 #endif // UNIX_AMD64_ABI
4558 const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
4559 call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL));
4564 printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots,
4565 preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize);
4569 #endif // FEATURE_FIXED_OUT_ARGS
4571 // Clear the ASG and EXCEPT (if possible) flags on the call node
4572 call->gtFlags &= ~GTF_ASG;
4573 if (!call->OperMayThrow(this))
4575 call->gtFlags &= ~GTF_EXCEPT;
4578 // Union in the side effect flags from the call's operands
4579 call->gtFlags |= flagsSummary & GTF_ALL_EFFECT;
4581 // If the register arguments have already been determined
4582 // or we have no register arguments then we don't need to
4583 // call SortArgs() and EvalArgsToTemps()
4585 // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
4586 // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
4587 // is added to make sure to call EvalArgsToTemp.
4588 if (!reMorphing && (call->fgArgInfo->HasRegArgs()
4589 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4591 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4594 // This is the first time that we morph this call AND it has register arguments.
4595 // Follow into the code below and do the 'defer or eval to temp' analysis.
4597 call->fgArgInfo->SortArgs();
4599 call->fgArgInfo->EvalArgsToTemps();
4601 // We may have updated the arguments
4602 if (call->gtCallArgs)
4604 UpdateGT_LISTFlags(call->gtCallArgs);
4608 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4610 // Rewrite the struct args to be passed by value on stack or in registers.
4611 fgMorphSystemVStructArgs(call, hasStructArgument);
4613 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
4615 #ifndef LEGACY_BACKEND
4616 // In the future we can migrate UNIX_AMD64 to use this
4617 // method instead of fgMorphSystemVStructArgs
4619 // We only require morphing of structs that may be passed in multiple registers
4620 // for the RyuJIT backend.
4621 if (hasMultiregStructArgs)
4623 fgMorphMultiregStructArgs(call);
4625 #endif // LEGACY_BACKEND
4627 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4632 call->fgArgInfo->Dump(this);
4638 #pragma warning(pop)
4641 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4642 // fgMorphSystemVStructArgs:
4643 // Rewrite the struct args to be passed by value on stack or in registers.
4646 // call: The call whose arguments need to be morphed.
4647 // hasStructArgument: Whether this call has struct arguments.
4649 void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
4651 unsigned flagsSummary = 0;
4655 if (hasStructArgument)
4657 fgArgInfo* allArgInfo = call->fgArgInfo;
4659 for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4661 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4662 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4663 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4664 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4665 // otherwise points to the list in the late args list.
4666 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4667 fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4668 assert(fgEntryPtr != nullptr);
4669 GenTree* argx = fgEntryPtr->node;
4670 GenTree* lateList = nullptr;
4671 GenTree* lateNode = nullptr;
4675 for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
4677 assert(list->OperIsList());
4679 GenTree* argNode = list->Current();
4680 if (argx == argNode)
4687 assert(lateList != nullptr && lateNode != nullptr);
4689 GenTree* arg = argx;
4690 bool argListCreated = false;
4692 var_types type = arg->TypeGet();
4694 if (varTypeIsStruct(type))
4696 var_types originalType = type;
4697 // If we have already processed the arg...
4698 if (arg->OperGet() == GT_FIELD_LIST && varTypeIsStruct(arg))
4703 // If already OBJ it is set properly already.
4704 if (arg->OperGet() == GT_OBJ)
4706 assert(!fgEntryPtr->structDesc.passedInRegisters);
4710 assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
4711 (arg->OperGet() == GT_ADDR &&
4712 (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
4714 GenTreeLclVarCommon* lclCommon =
4715 arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
4716 if (fgEntryPtr->structDesc.passedInRegisters)
4718 if (fgEntryPtr->structDesc.eightByteCount == 1)
4720 // Change the type and below the code will change the LclVar to a LCL_FLD
4721 type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4722 fgEntryPtr->structDesc.eightByteSizes[0]);
4724 else if (fgEntryPtr->structDesc.eightByteCount == 2)
4726 // Create LCL_FLD for each eightbyte.
4727 argListCreated = true;
4730 arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
4732 GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
4733 fgEntryPtr->structDesc.eightByteSizes[0]);
4734 GenTreeFieldList* fieldList =
4735 new (this, GT_FIELD_LIST) GenTreeFieldList(arg, 0, originalType, nullptr);
4736 fieldList->gtType = originalType; // Preserve the type. It is a special case.
4739 // Second eightbyte.
4740 GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
4741 GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
4742 .eightByteClassifications[1],
4743 fgEntryPtr->structDesc.eightByteSizes[1]),
4744 lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
4746 fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(newLclField, 0, originalType, fieldList);
4747 fieldList->gtType = originalType; // Preserve the type. It is a special case.
4748 newLclField->gtFieldSeq = FieldSeqStore::NotAField();
4752 assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
4757 // If we didn't change the type of the struct, it means
4758 // its classification doesn't support to be passed directly through a
4759 // register, so we need to pass a pointer to the destination where
4760 // where we copied the struct to.
4761 if (!argListCreated)
4763 if (fgEntryPtr->structDesc.passedInRegisters)
4769 // Make sure this is an addr node.
4770 if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
4772 arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
4775 assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
4777 // Create an Obj of the temp to use it as a call argument.
4778 arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
4785 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4786 fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4787 assert(fgEntryPtr != nullptr);
4788 GenTree* argx = fgEntryPtr->node;
4789 GenTree* lateList = nullptr;
4790 GenTree* lateNode = nullptr;
4793 for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
4795 assert(list->OperIsList());
4797 GenTree* argNode = list->Current();
4798 if (argx == argNode)
4805 assert(lateList != nullptr && lateNode != nullptr);
4808 fgEntryPtr->node = arg;
4811 lateList->gtOp.gtOp1 = arg;
4815 args->gtOp.gtOp1 = arg;
4822 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4824 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4826 //-----------------------------------------------------------------------------
4827 // fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
4828 // call fgMorphMultiregStructArg on each of them.
4831 // call: a GenTreeCall node that has one or more TYP_STRUCT arguments
4834 // We only call fgMorphMultiregStructArg for struct arguments that are not passed as simple types.
4835 // It will ensure that the struct arguments are in the correct form.
4836 // If this method fails to find any TYP_STRUCT arguments it will assert.
4838 void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
4840 bool foundStructArg = false;
4841 unsigned initialFlags = call->gtFlags;
4842 unsigned flagsSummary = 0;
4843 fgArgInfo* allArgInfo = call->fgArgInfo;
4845 // Currently ARM64/ARM is using this method to morph the MultiReg struct args
4846 // in the future AMD64_UNIX will also use this method
4847 CLANG_FORMAT_COMMENT_ANCHOR;
4850 assert(!"Logic error: no MultiregStructArgs for X86");
4852 #ifdef _TARGET_AMD64_
4853 #if defined(UNIX_AMD64_ABI)
4854 NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
4855 #else // WINDOWS_AMD64_ABI
4856 assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
4857 #endif // !UNIX_AMD64_ABI
4860 for (GenTree* args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4862 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4863 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4864 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4865 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4866 // otherwise points to the list in the late args list.
4867 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4868 fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4869 assert(fgEntryPtr != nullptr);
4870 GenTree* argx = fgEntryPtr->node;
4871 GenTree* lateList = nullptr;
4872 GenTree* lateNode = nullptr;
4876 for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
4878 assert(list->OperIsList());
4880 GenTree* argNode = list->Current();
4881 if (argx == argNode)
4888 assert(lateList != nullptr && lateNode != nullptr);
4891 GenTree* arg = argx;
4893 if (varTypeIsStruct(arg->TypeGet()))
4895 foundStructArg = true;
4897 arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
4899 // Did we replace 'argx' with a new tree?
4902 fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
4904 // link the new arg node into either the late arg list or the gtCallArgs list
4907 lateList->gtOp.gtOp1 = arg;
4911 args->gtOp.gtOp1 = arg;
4917 // We should only call this method when we actually have one or more multireg struct args
4918 assert(foundStructArg);
4921 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4924 //-----------------------------------------------------------------------------
4925 // fgMorphMultiregStructArg: Given a TYP_STRUCT arg from a call argument list,
4926 // morph the argument as needed to be passed correctly.
4929 // arg - A GenTree node containing a TYP_STRUCT arg
4930 // fgEntryPtr - the fgArgTabEntry information for the current 'arg'
4933 // The arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT.
4934 // If 'arg' is a lclVar passed on the stack, we will ensure that any lclVars that must be on the
4935 // stack are marked as doNotEnregister, and then we return.
4937 // If it is passed by register, we mutate the argument into the GT_FIELD_LIST form
4938 // which is only used for struct arguments.
4940 // If arg is a LclVar we check if it is struct promoted and has the right number of fields
4941 // and if they are at the appropriate offsets we will use the struct promted fields
4942 // in the GT_FIELD_LIST nodes that we create.
4943 // If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
4944 // we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
4945 // this also forces the struct to be stack allocated into the local frame.
4946 // For the GT_OBJ case will clone the address expression and generate two (or more)
4948 // Currently the implementation handles ARM64/ARM and will NYI for other architectures.
4950 GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr)
4952 assert(varTypeIsStruct(arg->TypeGet()));
4954 #ifndef _TARGET_ARMARCH_
4955 NYI("fgMorphMultiregStructArg requires implementation for this target");
4959 if ((fgEntryPtr->isSplit && fgEntryPtr->numSlots + fgEntryPtr->numRegs > 4) ||
4960 (!fgEntryPtr->isSplit && fgEntryPtr->regNum == REG_STK))
4962 GenTreeLclVarCommon* lcl = nullptr;
4964 // If already OBJ it is set properly already.
4965 if (arg->OperGet() == GT_OBJ)
4967 if (arg->gtGetOp1()->OperIs(GT_ADDR) && arg->gtGetOp1()->gtGetOp1()->OperIs(GT_LCL_VAR))
4969 lcl = arg->gtGetOp1()->gtGetOp1()->AsLclVarCommon();
4974 assert(arg->OperGet() == GT_LCL_VAR);
4976 // We need to construct a `GT_OBJ` node for the argmuent,
4977 // so we need to get the address of the lclVar.
4978 lcl = arg->AsLclVarCommon();
4980 arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
4982 // Create an Obj of the temp to use it as a call argument.
4983 arg = gtNewObjNode(lvaGetStruct(lcl->gtLclNum), arg);
4987 // Its fields will need to accessed by address.
4988 lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUG_ARG(DNER_IsStructArg));
4995 #if FEATURE_MULTIREG_ARGS
4996 // Examine 'arg' and setup argValue objClass and structSize
4998 CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE;
4999 GenTree* argValue = arg; // normally argValue will be arg, but see right below
5000 unsigned structSize = 0;
5002 if (arg->OperGet() == GT_OBJ)
5004 GenTreeObj* argObj = arg->AsObj();
5005 objClass = argObj->gtClass;
5006 structSize = info.compCompHnd->getClassSize(objClass);
5008 // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR.
5009 GenTree* op1 = argObj->gtOp1;
5010 if (op1->OperGet() == GT_ADDR)
5012 GenTree* underlyingTree = op1->gtOp.gtOp1;
5014 // Only update to the same type.
5015 if ((underlyingTree->TypeGet() == argValue->TypeGet()) &&
5016 (objClass == gtGetStructHandleIfPresent(underlyingTree)))
5018 argValue = underlyingTree;
5022 else if (arg->OperGet() == GT_LCL_VAR)
5024 GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
5025 unsigned varNum = varNode->gtLclNum;
5026 assert(varNum < lvaCount);
5027 LclVarDsc* varDsc = &lvaTable[varNum];
5029 objClass = lvaGetStruct(varNum);
5030 structSize = varDsc->lvExactSize;
5032 noway_assert(objClass != nullptr);
5034 var_types hfaType = TYP_UNDEF;
5035 var_types elemType = TYP_UNDEF;
5036 unsigned elemCount = 0;
5037 unsigned elemSize = 0;
5038 var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
5040 hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
5041 if (varTypeIsFloating(hfaType))
5044 elemSize = genTypeSize(elemType);
5045 elemCount = structSize / elemSize;
5046 assert(elemSize * elemCount == structSize);
5047 for (unsigned inx = 0; inx < elemCount; inx++)
5049 type[inx] = elemType;
5054 #ifdef _TARGET_ARM64_
5055 assert(structSize <= 2 * TARGET_POINTER_SIZE);
5056 #elif defined(_TARGET_ARM_)
5057 assert(structSize <= 4 * TARGET_POINTER_SIZE);
5060 #ifdef _TARGET_ARM64_
5061 BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
5062 info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
5064 type[0] = getJitGCType(gcPtrs[0]);
5065 type[1] = getJitGCType(gcPtrs[1]);
5066 #elif defined(_TARGET_ARM_)
5067 BYTE gcPtrs[4] = {TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE};
5068 elemCount = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
5069 info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
5070 for (unsigned inx = 0; inx < elemCount; inx++)
5072 type[inx] = getJitGCType(gcPtrs[inx]);
5074 #endif // _TARGET_ARM_
5076 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
5078 elemSize = TARGET_POINTER_SIZE;
5079 // We can safely widen this to aligned bytes since we are loading from
5080 // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
5081 // lives in the stack frame or will be a promoted field.
5083 structSize = elemCount * TARGET_POINTER_SIZE;
5085 else // we must have a GT_OBJ
5087 assert(argValue->OperGet() == GT_OBJ);
5089 // We need to load the struct from an arbitrary address
5090 // and we can't read past the end of the structSize
5091 // We adjust the last load type here
5093 unsigned remainingBytes = structSize % TARGET_POINTER_SIZE;
5094 unsigned lastElem = elemCount - 1;
5095 if (remainingBytes != 0)
5097 switch (remainingBytes)
5100 type[lastElem] = TYP_BYTE;
5103 type[lastElem] = TYP_SHORT;
5105 #ifdef _TARGET_ARM64_
5107 type[lastElem] = TYP_INT;
5109 #endif // _TARGET_ARM64_
5111 noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
5117 // We should still have a TYP_STRUCT
5118 assert(varTypeIsStruct(argValue->TypeGet()));
5120 GenTreeFieldList* newArg = nullptr;
5122 // Are we passing a struct LclVar?
5124 if (argValue->OperGet() == GT_LCL_VAR)
5126 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
5127 unsigned varNum = varNode->gtLclNum;
5128 assert(varNum < lvaCount);
5129 LclVarDsc* varDsc = &lvaTable[varNum];
5131 // At this point any TYP_STRUCT LclVar must be an aligned struct
5132 // or an HFA struct, both which are passed by value.
5134 assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
5136 varDsc->lvIsMultiRegArg = true;
5141 JITDUMP("Multireg struct argument V%02u : ", varNum);
5146 // This local variable must match the layout of the 'objClass' type exactly
5147 if (varDsc->lvIsHfa())
5149 // We have a HFA struct
5150 noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
5151 noway_assert(elemSize == genTypeSize(elemType));
5152 noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
5153 noway_assert(elemSize * elemCount == varDsc->lvExactSize);
5155 for (unsigned inx = 0; (inx < elemCount); inx++)
5157 noway_assert(type[inx] == elemType);
5162 #ifdef _TARGET_ARM64_
5163 // We must have a 16-byte struct (non-HFA)
5164 noway_assert(elemCount == 2);
5165 #elif defined(_TARGET_ARM_)
5166 noway_assert(elemCount <= 4);
5169 for (unsigned inx = 0; inx < elemCount; inx++)
5171 CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
5173 // We setup the type[inx] value above using the GC info from 'objClass'
5174 // This GT_LCL_VAR must have the same GC layout info
5176 if (currentGcLayoutType != TYPE_GC_NONE)
5178 noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
5182 // We may have use a small type when we setup the type[inx] values above
5183 // We can safely widen this to TYP_I_IMPL
5184 type[inx] = TYP_I_IMPL;
5189 #ifdef _TARGET_ARM64_
5190 // Is this LclVar a promoted struct with exactly 2 fields?
5191 // TODO-ARM64-CQ: Support struct promoted HFA types here
5192 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
5194 // See if we have two promoted fields that start at offset 0 and 8?
5195 unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
5196 unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
5198 // Did we find the promoted fields at the necessary offsets?
5199 if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
5201 LclVarDsc* loVarDsc = &lvaTable[loVarNum];
5202 LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
5204 var_types loType = loVarDsc->lvType;
5205 var_types hiType = hiVarDsc->lvType;
5207 if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
5209 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
5210 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
5212 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
5215 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
5220 // We can use the struct promoted field as the two arguments
5222 GenTree* loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
5223 GenTree* hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
5225 // Create a new tree for 'arg'
5226 // replace the existing LDOBJ(ADDR(LCLVAR))
5227 // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr))
5229 newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr);
5230 (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg);
5237 // We will create a list of GT_LCL_FLDs nodes to pass this struct
5239 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5241 #elif defined(_TARGET_ARM_)
5242 // Is this LclVar a promoted struct with exactly same size?
5243 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa())
5245 // See if we have promoted fields?
5246 unsigned varNums[4];
5247 bool hasBadVarNum = false;
5248 for (unsigned inx = 0; inx < elemCount; inx++)
5250 varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx);
5251 if (varNums[inx] == BAD_VAR_NUM)
5253 hasBadVarNum = true;
5258 // Did we find the promoted fields at the necessary offsets?
5261 LclVarDsc* varDscs[4];
5262 var_types varType[4];
5263 bool varIsFloat = false;
5265 for (unsigned inx = 0; inx < elemCount; inx++)
5267 varDscs[inx] = &lvaTable[varNums[inx]];
5268 varType[inx] = varDscs[inx]->lvType;
5269 if (varTypeIsFloating(varType[inx]))
5271 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the
5273 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
5275 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
5278 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
5287 unsigned offset = 0;
5288 GenTreeFieldList* listEntry = nullptr;
5289 // We can use the struct promoted field as arguments
5290 for (unsigned inx = 0; inx < elemCount; inx++)
5292 GenTree* lclVar = gtNewLclvNode(varNums[inx], varType[inx], varNums[inx]);
5293 // Create a new tree for 'arg'
5294 // replace the existing LDOBJ(ADDR(LCLVAR))
5295 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(lclVar, offset, varType[inx], listEntry);
5296 if (newArg == nullptr)
5300 offset += TARGET_POINTER_SIZE;
5308 // We will create a list of GT_LCL_FLDs nodes to pass this struct
5310 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5312 #endif // _TARGET_ARM_
5315 // If we didn't set newarg to a new List Node tree
5317 if (newArg == nullptr)
5319 if (fgEntryPtr->regNum == REG_STK)
5321 // We leave this stack passed argument alone
5325 // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
5326 // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
5328 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
5330 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
5331 unsigned varNum = varNode->gtLclNum;
5332 assert(varNum < lvaCount);
5333 LclVarDsc* varDsc = &lvaTable[varNum];
5335 unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
5336 unsigned lastOffset = baseOffset + (elemCount * elemSize);
5338 // The allocated size of our LocalVar must be at least as big as lastOffset
5339 assert(varDsc->lvSize() >= lastOffset);
5341 if (varDsc->lvStructGcCount > 0)
5343 // alignment of the baseOffset is required
5344 noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
5345 noway_assert(elemSize == TARGET_POINTER_SIZE);
5346 unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE;
5347 const BYTE* gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable
5348 for (unsigned inx = 0; (inx < elemCount); inx++)
5350 // The GC information must match what we setup using 'objClass'
5351 noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
5354 else // this varDsc contains no GC pointers
5356 for (unsigned inx = 0; inx < elemCount; inx++)
5358 // The GC information must match what we setup using 'objClass'
5359 noway_assert(!varTypeIsGC(type[inx]));
5364 // We create a list of GT_LCL_FLDs nodes to pass this struct
5366 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
5368 // Create a new tree for 'arg'
5369 // replace the existing LDOBJ(ADDR(LCLVAR))
5370 // with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
5372 unsigned offset = baseOffset;
5373 GenTreeFieldList* listEntry = nullptr;
5374 for (unsigned inx = 0; inx < elemCount; inx++)
5376 elemSize = genTypeSize(type[inx]);
5377 GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
5378 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry);
5379 if (newArg == nullptr)
5386 // Are we passing a GT_OBJ struct?
5388 else if (argValue->OperGet() == GT_OBJ)
5390 GenTreeObj* argObj = argValue->AsObj();
5391 GenTree* baseAddr = argObj->gtOp1;
5392 var_types addrType = baseAddr->TypeGet();
5394 if (baseAddr->OperGet() == GT_ADDR)
5396 GenTree* addrTaken = baseAddr->gtOp.gtOp1;
5397 if (addrTaken->IsLocal())
5399 GenTreeLclVarCommon* varNode = addrTaken->AsLclVarCommon();
5400 unsigned varNum = varNode->gtLclNum;
5401 // We access non-struct type (for example, long) as a struct type.
5402 // Make sure lclVar lives on stack to make sure its fields are accessible by address.
5403 lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LocalField));
5407 // Create a new tree for 'arg'
5408 // replace the existing LDOBJ(EXPR)
5409 // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...)
5412 unsigned offset = 0;
5413 GenTreeFieldList* listEntry = nullptr;
5414 for (unsigned inx = 0; inx < elemCount; inx++)
5416 elemSize = genTypeSize(type[inx]);
5417 GenTree* curAddr = baseAddr;
5420 GenTree* baseAddrDup = gtCloneExpr(baseAddr);
5421 noway_assert(baseAddrDup != nullptr);
5422 curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
5428 GenTree* curItem = gtNewIndir(type[inx], curAddr);
5430 // For safety all GT_IND should have at least GT_GLOB_REF set.
5431 curItem->gtFlags |= GTF_GLOB_REF;
5433 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry);
5434 if (newArg == nullptr)
5444 // If we reach here we should have set newArg to something
5445 if (newArg == nullptr)
5447 gtDispTree(argValue);
5448 assert(!"Missing case in fgMorphMultiregStructArg");
5452 noway_assert(newArg != nullptr);
5453 noway_assert(newArg->OperIsFieldList());
5455 // We need to propagate any GTF_ALL_EFFECT flags from the end of the list back to the beginning.
5456 // This is verified in fgDebugCheckFlags().
5458 ArrayStack<GenTree*> stack(this);
5460 for (tree = newArg; (tree->gtGetOp2() != nullptr) && tree->gtGetOp2()->OperIsFieldList(); tree = tree->gtGetOp2())
5465 unsigned propFlags = (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
5466 tree->gtFlags |= propFlags;
5468 while (stack.Height() > 0)
5471 propFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
5472 propFlags |= (tree->gtGetOp2()->gtFlags & GTF_ALL_EFFECT);
5473 tree->gtFlags |= propFlags;
5479 printf("fgMorphMultiregStructArg created tree:\n");
5484 arg = newArg; // consider calling fgMorphTree(newArg);
5486 #endif // FEATURE_MULTIREG_ARGS
5491 // Make a copy of a struct variable if necessary, to pass to a callee.
5492 // returns: tree that computes address of the outgoing arg
5493 void Compiler::fgMakeOutgoingStructArgCopy(
5497 CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
5498 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
5500 GenTree* argx = args->Current();
5501 noway_assert(argx->gtOper != GT_MKREFANY);
5502 // See if we need to insert a copy at all
5503 // Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time
5504 // but if there is only one use and no loops, the use must be last.
5505 GenTreeLclVarCommon* lcl = nullptr;
5506 if (argx->OperIsLocal())
5508 lcl = argx->AsLclVarCommon();
5510 else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
5512 lcl = argx->AsObj()->Addr()->AsLclVarCommon();
5516 unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
5517 if (lvaIsImplicitByRefLocal(varNum))
5519 LclVarDsc* varDsc = &lvaTable[varNum];
5520 // JIT_TailCall helper has an implicit assumption that all tail call arguments live
5521 // on the caller's frame. If an argument lives on the caller caller's frame, it may get
5522 // overwritten if that frame is reused for the tail call. Therefore, we should always copy
5523 // struct parameters if they are passed as arguments to a tail call.
5524 if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
5526 varDsc->lvRefCnt = 0;
5527 args->gtOp.gtOp1 = lcl;
5528 fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx);
5531 JITDUMP("did not have to make outgoing copy for V%2d", varNum);
5537 if (fgOutgoingArgTemps == nullptr)
5539 fgOutgoingArgTemps = hashBv::Create(this);
5545 // Attempt to find a local we have already used for an outgoing struct and reuse it.
5546 // We do not reuse within a statement.
5547 if (!opts.MinOpts())
5550 FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
5552 LclVarDsc* varDsc = &lvaTable[lclNum];
5553 if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
5554 !fgCurrentlyInUseArgTemps->testBit(lclNum))
5556 tmp = (unsigned)lclNum;
5558 JITDUMP("reusing outgoing struct arg");
5565 // Create the CopyBlk tree and insert it.
5569 // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
5570 tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
5571 lvaSetStruct(tmp, copyBlkClass, false);
5572 fgOutgoingArgTemps->setBit(tmp);
5575 fgCurrentlyInUseArgTemps->setBit(tmp);
5577 // TYP_SIMD structs should not be enregistered, since ABI requires it to be
5578 // allocated on stack and address of it needs to be passed.
5579 if (lclVarIsSIMDType(tmp))
5581 lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
5584 // Create a reference to the temp
5585 GenTree* dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
5586 dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
5588 // TODO-Cleanup: This probably shouldn't be done here because arg morphing is done prior
5589 // to ref counting of the lclVars.
5590 lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this);
5592 if (argx->gtOper == GT_OBJ)
5594 argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
5595 argx->SetIndirExceptionFlags(this);
5599 argx->gtFlags |= GTF_DONT_CSE;
5602 // Copy the valuetype to the temp
5603 unsigned size = info.compCompHnd->getClassSize(copyBlkClass);
5604 GenTree* copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
5605 copyBlk = fgMorphCopyBlock(copyBlk);
5607 #if FEATURE_FIXED_OUT_ARGS
5609 // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
5610 // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
5611 GenTree* arg = copyBlk;
5613 #else // FEATURE_FIXED_OUT_ARGS
5615 // Structs are always on the stack, and thus never need temps
5616 // so we have to put the copy and temp all into one expression
5617 GenTree* arg = fgMakeTmpArgNode(tmp FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
5619 // Change the expression to "(tmp=val),tmp"
5620 arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
5622 #endif // FEATURE_FIXED_OUT_ARGS
5624 args->gtOp.gtOp1 = arg;
5625 call->fgArgInfo->EvalToTmp(argIndex, tmp, arg);
5631 // See declaration for specification comment.
5632 void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
5633 unsigned firstArgRegNum,
5634 regMaskTP* pArgSkippedRegMask)
5636 assert(varDsc->lvPromoted);
5637 // There's no way to do these calculations without breaking abstraction and assuming that
5638 // integer register arguments are consecutive ints. They are on ARM.
5640 // To start, figure out what register contains the last byte of the first argument.
5641 LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
5642 unsigned lastFldRegOfLastByte =
5643 (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5646 // Now we're keeping track of the register that the last field ended in; see what registers
5647 // subsequent fields start in, and whether any are skipped.
5648 // (We assume here the invariant that the fields are sorted in offset order.)
5649 for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
5651 unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset;
5652 LclVarDsc* fldVarDsc = &lvaTable[fldVarNum];
5653 unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
5654 assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
5655 // This loop should enumerate the offsets of any registers skipped.
5656 // Find what reg contains the last byte:
5657 // And start at the first register after that. If that isn't the first reg of the current
5658 for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
5659 skippedRegOffsets++)
5661 // If the register number would not be an arg reg, we're done.
5662 if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
5664 *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
5666 lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5670 #endif // _TARGET_ARM_
5672 //****************************************************************************
5673 // fgFixupStructReturn:
5674 // The companion to impFixupCallStructReturn. Now that the importer is done
5675 // change the gtType to the precomputed native return type
5676 // requires that callNode currently has a struct type
5678 void Compiler::fgFixupStructReturn(GenTree* callNode)
5680 assert(varTypeIsStruct(callNode));
5682 GenTreeCall* call = callNode->AsCall();
5683 bool callHasRetBuffArg = call->HasRetBufArg();
5684 bool isHelperCall = call->IsHelperCall();
5686 // Decide on the proper return type for this call that currently returns a struct
5688 CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd;
5689 Compiler::structPassingKind howToReturnStruct;
5690 var_types returnType;
5692 // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
5693 // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
5695 // CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
5696 // CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
5697 // CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
5701 assert(!callHasRetBuffArg);
5702 assert(retClsHnd == NO_CLASS_HANDLE);
5704 // Now that we are past the importer, re-type this node
5705 howToReturnStruct = SPK_PrimitiveType;
5706 returnType = (var_types)call->gtReturnType;
5710 returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
5713 if (howToReturnStruct == SPK_ByReference)
5715 assert(returnType == TYP_UNKNOWN);
5716 assert(callHasRetBuffArg);
5720 assert(returnType != TYP_UNKNOWN);
5722 if (!varTypeIsStruct(returnType))
5724 // Widen the primitive type if necessary
5725 returnType = genActualType(returnType);
5727 call->gtType = returnType;
5730 #if FEATURE_MULTIREG_RET
5731 // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
5732 assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
5733 #else // !FEATURE_MULTIREG_RET
5734 // No more struct returns
5735 assert(call->TypeGet() != TYP_STRUCT);
5738 #if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5739 // If it was a struct return, it has been transformed into a call
5740 // with a return buffer (that returns TYP_VOID) or into a return
5741 // of a primitive/enregisterable type
5742 assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
5746 /*****************************************************************************
5748 * A little helper used to rearrange nested commutative operations. The
5749 * effect is that nested associative, commutative operations are transformed
5750 * into a 'left-deep' tree, i.e. into something like this:
5752 * (((a op b) op c) op d) op...
5757 void Compiler::fgMoveOpsLeft(GenTree* tree)
5765 op1 = tree->gtOp.gtOp1;
5766 op2 = tree->gtOp.gtOp2;
5767 oper = tree->OperGet();
5769 noway_assert(GenTree::OperIsCommutative(oper));
5770 noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
5771 noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
5772 noway_assert(oper == op2->gtOper);
5774 // Commutativity doesn't hold if overflow checks are needed
5776 if (tree->gtOverflowEx() || op2->gtOverflowEx())
5781 if (gtIsActiveCSE_Candidate(op2))
5783 // If we have marked op2 as a CSE candidate,
5784 // we can't perform a commutative reordering
5785 // because any value numbers that we computed for op2
5786 // will be incorrect after performing a commutative reordering
5791 if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
5796 // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
5797 if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
5802 if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
5804 // We could deal with this, but we were always broken and just hit the assert
5805 // below regarding flags, which means it's not frequent, so will just bail out.
5810 noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
5812 GenTree* ad1 = op2->gtOp.gtOp1;
5813 GenTree* ad2 = op2->gtOp.gtOp2;
5815 // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
5816 // We can not reorder such GT_OR trees
5818 if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
5823 #if FEATURE_PREVENT_BAD_BYREFS
5825 // Don't split up a byref calculation and create a new byref. E.g.,
5826 // [byref]+ (ref, [int]+ (int, int)) => [byref]+ ([byref]+ (ref, int), int).
5827 // Doing this transformation could create a situation where the first
5828 // addition (that is, [byref]+ (ref, int) ) creates a byref pointer that
5829 // no longer points within the ref object. If a GC happens, the byref won't
5830 // get updated. This can happen, for instance, if one of the int components
5831 // is negative. It also requires the address generation be in a fully-interruptible
5834 if (varTypeIsGC(op1->TypeGet()) && op2->TypeGet() == TYP_I_IMPL)
5836 assert(varTypeIsGC(tree->TypeGet()) && (oper == GT_ADD));
5840 #endif // FEATURE_PREVENT_BAD_BYREFS
5842 /* Change "(x op (y op z))" to "(x op y) op z" */
5843 /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
5845 GenTree* new_op1 = op2;
5847 new_op1->gtOp.gtOp1 = op1;
5848 new_op1->gtOp.gtOp2 = ad1;
5850 /* Change the flags. */
5852 // Make sure we arent throwing away any flags
5853 noway_assert((new_op1->gtFlags &
5854 ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
5855 GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated
5856 GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
5859 (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
5860 (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
5862 /* Retype new_op1 if it has not/become a GC ptr. */
5864 if (varTypeIsGC(op1->TypeGet()))
5866 noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5867 oper == GT_ADD) || // byref(ref + (int+int))
5868 (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5869 oper == GT_OR)); // int(gcref | int(gcref|intval))
5871 new_op1->gtType = tree->gtType;
5873 else if (varTypeIsGC(ad2->TypeGet()))
5875 // Neither ad1 nor op1 are GC. So new_op1 isnt either
5876 noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
5877 new_op1->gtType = TYP_I_IMPL;
5880 // If new_op1 is a new expression. Assign it a new unique value number.
5881 // vnStore is null before the ValueNumber phase has run
5882 if (vnStore != nullptr)
5884 // We can only keep the old value number on new_op1 if both op1 and ad2
5885 // have the same non-NoVN value numbers. Since op is commutative, comparing
5886 // only ad2 and op1 is enough.
5887 if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5888 (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5889 (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
5891 new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
5895 tree->gtOp.gtOp1 = new_op1;
5896 tree->gtOp.gtOp2 = ad2;
5898 /* If 'new_op1' is now the same nested op, process it recursively */
5900 if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
5902 fgMoveOpsLeft(new_op1);
5905 /* If 'ad2' is now the same nested op, process it
5906 * Instead of recursion, we set up op1 and op2 for the next loop.
5911 } while ((op2->gtOper == oper) && !op2->gtOverflowEx());
5918 /*****************************************************************************/
5920 void Compiler::fgSetRngChkTarget(GenTree* tree, bool delay)
5922 if (tree->OperIsBoundsCheck())
5924 GenTreeBoundsChk* const boundsChk = tree->AsBoundsChk();
5925 BasicBlock* const failBlock = fgSetRngChkTargetInner(boundsChk->gtThrowKind, delay, &boundsChk->gtStkDepth);
5926 if (failBlock != nullptr)
5928 boundsChk->gtIndRngFailBB = gtNewCodeRef(failBlock);
5931 else if (tree->OperIs(GT_INDEX_ADDR))
5933 GenTreeIndexAddr* const indexAddr = tree->AsIndexAddr();
5934 BasicBlock* const failBlock = fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay, &indexAddr->gtStkDepth);
5935 if (failBlock != nullptr)
5937 indexAddr->gtIndRngFailBB = gtNewCodeRef(failBlock);
5942 noway_assert(tree->OperIs(GT_ARR_ELEM, GT_ARR_INDEX));
5943 fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay, nullptr);
5947 BasicBlock* Compiler::fgSetRngChkTargetInner(SpecialCodeKind kind, bool delay, unsigned* stkDepth)
5953 #if !FEATURE_FIXED_OUT_ARGS
5954 // we need to initialize this field
5955 if (fgGlobalMorph && (stkDepth != nullptr))
5957 *stkDepth = fgPtrArgCntCur;
5959 #endif // !FEATURE_FIXED_OUT_ARGS
5962 if (!opts.compDbgCode)
5964 if (delay || compIsForInlining())
5966 #if !FEATURE_FIXED_OUT_ARGS
5967 // We delay this until after loop-oriented range check analysis. For now we merely store the current stack
5968 // level in the tree node.
5969 if (stkDepth != nullptr)
5971 *stkDepth = fgPtrArgCntCur;
5973 #endif // !FEATURE_FIXED_OUT_ARGS
5977 #if !FEATURE_FIXED_OUT_ARGS
5978 // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
5979 noway_assert(fgGlobalMorph || (stkDepth != nullptr));
5980 const unsigned theStkDepth = fgGlobalMorph ? fgPtrArgCntCur : *stkDepth;
5982 // only x86 pushes args
5983 const unsigned theStkDepth = 0;
5986 // Create/find the appropriate "range-fail" label
5987 return fgRngChkTarget(compCurBB, theStkDepth, kind);
5994 /*****************************************************************************
5996 * Expand a GT_INDEX node and fully morph the child operands
5998 * The orginal GT_INDEX node is bashed into the GT_IND node that accesses
5999 * the array element. We expand the GT_INDEX node into a larger tree that
6000 * evaluates the array base and index. The simplest expansion is a GT_COMMA
6001 * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
6002 * For complex array or index expressions one or more GT_COMMA assignments
6003 * are inserted so that we only evaluate the array or index expressions once.
6005 * The fully expanded tree is then morphed. This causes gtFoldExpr to
6006 * perform local constant prop and reorder the constants in the tree and
6009 * We then parse the resulting array element expression in order to locate
6010 * and label the constants and variables that occur in the tree.
6013 const int MAX_ARR_COMPLEXITY = 4;
6014 const int MAX_INDEX_COMPLEXITY = 4;
6016 GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
6018 noway_assert(tree->gtOper == GT_INDEX);
6019 GenTreeIndex* asIndex = tree->AsIndex();
6021 var_types elemTyp = tree->TypeGet();
6022 unsigned elemSize = tree->gtIndex.gtIndElemSize;
6023 CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
6025 noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
6028 if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= maxSIMDStructBytes())
6030 // If this is a SIMD type, this is the point at which we lose the type information,
6031 // so we need to set the correct type on the GT_IND.
6032 // (We don't care about the base type here, so we only check, but don't retain, the return value).
6033 unsigned simdElemSize = 0;
6034 if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
6036 assert(simdElemSize == elemSize);
6037 elemTyp = getSIMDTypeForSize(elemSize);
6038 // This is the new type of the node.
6039 tree->gtType = elemTyp;
6040 // Now set elemStructType to null so that we don't confuse value numbering.
6041 elemStructType = nullptr;
6044 #endif // FEATURE_SIMD
6046 // Set up the the array length's offset into lenOffs
6047 // And the the first element's offset into elemOffs
6050 if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
6052 lenOffs = offsetof(CORINFO_String, stringLen);
6053 elemOffs = offsetof(CORINFO_String, chars);
6054 tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
6056 else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
6058 lenOffs = offsetof(CORINFO_RefArray, length);
6059 elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
6061 else // We have a standard array
6063 lenOffs = offsetof(CORINFO_Array, length);
6064 elemOffs = offsetof(CORINFO_Array, u1Elems);
6067 #ifndef LEGACY_BACKEND
6068 // In minopts, we expand GT_INDEX to GT_IND(GT_INDEX_ADDR) in order to minimize the size of the IR. As minopts
6069 // compilation time is roughly proportional to the size of the IR, this helps keep compilation times down.
6070 // Furthermore, this representation typically saves on code size in minopts w.r.t. the complete expansion
6071 // performed when optimizing, as it does not require LclVar nodes (which are always stack loads/stores in
6074 // When we *are* optimizing, we fully expand GT_INDEX to:
6075 // 1. Evaluate the array address expression and store the result in a temp if the expression is complex or
6077 // 2. Evaluate the array index expression and store the result in a temp if the expression is complex or
6079 // 3. Perform an explicit bounds check: GT_ARR_BOUNDS_CHK(index, GT_ARR_LENGTH(array))
6080 // 4. Compute the address of the element that will be accessed:
6081 // GT_ADD(GT_ADD(array, firstElementOffset), GT_MUL(index, elementSize))
6082 // 5. Dereference the address with a GT_IND.
6084 // This expansion explicitly exposes the bounds check and the address calculation to the optimizer, which allows
6085 // for more straightforward bounds-check removal, CSE, etc.
6088 GenTree* const array = fgMorphTree(asIndex->Arr());
6089 GenTree* const index = fgMorphTree(asIndex->Index());
6091 GenTreeIndexAddr* const indexAddr =
6092 new (this, GT_INDEX_ADDR) GenTreeIndexAddr(array, index, elemTyp, elemStructType, elemSize,
6093 static_cast<unsigned>(lenOffs), static_cast<unsigned>(elemOffs));
6094 indexAddr->gtFlags |= (array->gtFlags | index->gtFlags) & GTF_ALL_EFFECT;
6096 // Mark the indirection node as needing a range check if necessary.
6097 // Note this will always be true unless JitSkipArrayBoundCheck() is used
6098 if ((indexAddr->gtFlags & GTF_INX_RNGCHK) != 0)
6100 fgSetRngChkTarget(indexAddr);
6103 // Change `tree` into an indirection and return.
6104 tree->ChangeOper(GT_IND);
6105 GenTreeIndir* const indir = tree->AsIndir();
6106 indir->Addr() = indexAddr;
6107 indir->gtFlags = GTF_IND_ARR_INDEX | (indexAddr->gtFlags & GTF_ALL_EFFECT);
6110 indexAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
6115 #endif // LEGACY_BACKEND
6117 GenTree* arrRef = asIndex->Arr();
6118 GenTree* index = asIndex->Index();
6120 bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
6121 bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
6123 GenTree* arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
6124 GenTree* indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression
6125 GenTree* bndsChk = nullptr;
6127 // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
6130 GenTree* arrRef2 = nullptr; // The second copy will be used in array address expression
6131 GenTree* index2 = nullptr;
6133 // If the arrRef expression involves an assignment, a call or reads from global memory,
6134 // then we *must* allocate a temporary in which to "localize" those values,
6135 // to ensure that the same values are used in the bounds check and the actual
6137 // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
6138 // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true
6139 // complexity is not exposed. (Without that condition there are cases of local struct
6140 // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was
6141 // fixed, there were some regressions that were mostly ameliorated by adding this condition.)
6143 if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
6144 gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
6146 unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
6147 arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
6148 arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
6149 arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
6153 arrRef2 = gtCloneExpr(arrRef);
6154 noway_assert(arrRef2 != nullptr);
6157 // If the index expression involves an assignment, a call or reads from global memory,
6158 // we *must* allocate a temporary in which to "localize" those values,
6159 // to ensure that the same values are used in the bounds check and the actual
6161 // Also we allocate the temporary when the index is sufficiently complex/expensive.
6163 if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
6164 (arrRef->OperGet() == GT_FIELD))
6166 unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
6167 indexDefn = gtNewTempAssign(indexTmpNum, index);
6168 index = gtNewLclvNode(indexTmpNum, index->TypeGet());
6169 index2 = gtNewLclvNode(indexTmpNum, index->TypeGet());
6173 index2 = gtCloneExpr(index);
6174 noway_assert(index2 != nullptr);
6177 // Next introduce a GT_ARR_BOUNDS_CHECK node
6178 var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
6180 #ifdef _TARGET_64BIT_
6181 // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case
6182 // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
6183 // the comparison will have to be widen to 64 bits.
6184 if (index->TypeGet() == TYP_I_IMPL)
6186 bndsChkType = TYP_I_IMPL;
6188 #endif // _TARGET_64BIT_
6190 GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs);
6192 if (bndsChkType != TYP_INT)
6194 arrLen = gtNewCastNode(bndsChkType, arrLen, false, bndsChkType);
6197 GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
6198 GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);
6200 bndsChk = arrBndsChk;
6202 // Make sure to increment ref-counts if already ref-counted.
6203 if (lvaLocalVarRefCounted)
6205 lvaRecursiveIncRefCounts(index);
6206 lvaRecursiveIncRefCounts(arrRef);
6209 // Now we'll switch to using the second copies for arrRef and index
6210 // to compute the address expression
6216 // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
6220 #ifdef _TARGET_64BIT_
6221 // Widen 'index' on 64-bit targets
6222 if (index->TypeGet() != TYP_I_IMPL)
6224 if (index->OperGet() == GT_CNS_INT)
6226 index->gtType = TYP_I_IMPL;
6230 index = gtNewCastNode(TYP_I_IMPL, index, false, TYP_I_IMPL);
6233 #endif // _TARGET_64BIT_
6235 /* Scale the index value if necessary */
6238 GenTree* size = gtNewIconNode(elemSize, TYP_I_IMPL);
6240 // Fix 392756 WP7 Crossgen
6242 // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
6243 // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
6244 // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
6246 size->gtFlags |= GTF_DONT_CSE;
6248 /* Multiply by the array element size */
6249 addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
6256 #if FEATURE_PREVENT_BAD_BYREFS
6258 // Be careful to only create the byref pointer when the full index expression is added to the array reference.
6259 // We don't want to create a partial byref address expression that doesn't include the full index offset:
6260 // a byref must point within the containing object. It is dangerous (especially when optimizations come into
6261 // play) to create a "partial" byref that doesn't point exactly to the correct object; there is risk that
6262 // the partial byref will not point within the object, and thus not get updated correctly during a GC.
6263 // This is mostly a risk in fully-interruptible code regions.
6265 /* Add the first element's offset */
6267 GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
6269 addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, cns);
6271 /* Add the object ref to the element's offset */
6273 addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
6275 #else // !FEATURE_PREVENT_BAD_BYREFS
6277 /* Add the object ref to the element's offset */
6279 addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
6281 /* Add the first element's offset */
6283 GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
6285 addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);
6287 #endif // !FEATURE_PREVENT_BAD_BYREFS
6289 #if SMALL_TREE_NODES
6290 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL);
6293 // Change the orginal GT_INDEX node into a GT_IND node
6294 tree->SetOper(GT_IND);
6296 // If the index node is a floating-point type, notify the compiler
6297 // we'll potentially use floating point registers at the time of codegen.
6298 if (varTypeIsFloating(tree->gtType))
6300 this->compFloatingPointUsed = true;
6303 // We've now consumed the GTF_INX_RNGCHK, and the node
6304 // is no longer a GT_INDEX node.
6305 tree->gtFlags &= ~GTF_INX_RNGCHK;
6307 tree->gtOp.gtOp1 = addr;
6309 // This is an array index expression.
6310 tree->gtFlags |= GTF_IND_ARR_INDEX;
6312 /* An indirection will cause a GPF if the address is null */
6313 tree->gtFlags |= GTF_EXCEPT;
6317 tree->gtFlags |= GTF_DONT_CSE;
6320 // Store information about it.
6321 GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));
6323 // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
6325 GenTree* indTree = tree;
6327 // Did we create a bndsChk tree?
6330 // Use a GT_COMMA node to prepend the array bound check
6332 tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
6334 /* Mark the indirection node as needing a range check */
6335 fgSetRngChkTarget(bndsChk);
6338 if (indexDefn != nullptr)
6340 // Use a GT_COMMA node to prepend the index assignment
6342 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
6344 if (arrRefDefn != nullptr)
6346 // Use a GT_COMMA node to prepend the arRef assignment
6348 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
6351 // Currently we morph the tree to perform some folding operations prior
6352 // to attaching fieldSeq info and labeling constant array index contributions
6356 // Ideally we just want to proceed to attaching fieldSeq info and labeling the
6357 // constant array index contributions, but the morphing operation may have changed
6358 // the 'tree' into something that now unconditionally throws an exception.
6360 // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
6361 // or it could be left unchanged. If it is unchanged then we should not return,
6362 // instead we should proceed to attaching fieldSeq info, etc...
6364 GenTree* arrElem = tree->gtEffectiveVal();
6366 if (fgIsCommaThrow(tree))
6368 if ((arrElem != indTree) || // A new tree node may have been created
6369 (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
6371 return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
6375 assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
6377 addr = arrElem->gtOp.gtOp1;
6379 assert(addr->TypeGet() == TYP_BYREF);
6381 GenTree* cnsOff = nullptr;
6382 if (addr->OperGet() == GT_ADD)
6385 #if FEATURE_PREVENT_BAD_BYREFS
6387 assert(addr->TypeGet() == TYP_BYREF);
6388 assert(addr->gtOp.gtOp1->TypeGet() == TYP_REF);
6390 addr = addr->gtOp.gtOp2;
6392 // Look for the constant [#FirstElem] node here, or as the RHS of an ADD.
6394 if (addr->gtOper == GT_CNS_INT)
6401 if ((addr->OperGet() == GT_ADD) && (addr->gtOp.gtOp2->gtOper == GT_CNS_INT))
6403 cnsOff = addr->gtOp.gtOp2;
6404 addr = addr->gtOp.gtOp1;
6407 // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
6408 addr->LabelIndex(this);
6411 #else // !FEATURE_PREVENT_BAD_BYREFS
6413 if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
6415 cnsOff = addr->gtOp.gtOp2;
6416 addr = addr->gtOp.gtOp1;
6419 while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB))
6421 assert(addr->TypeGet() == TYP_BYREF);
6422 GenTree* index = addr->gtOp.gtOp2;
6424 // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
6425 index->LabelIndex(this);
6427 addr = addr->gtOp.gtOp1;
6429 assert(addr->TypeGet() == TYP_REF);
6431 #endif // !FEATURE_PREVENT_BAD_BYREFS
6433 else if (addr->OperGet() == GT_CNS_INT)
6438 FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
6440 if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
6442 // Assign it the [#FirstElem] field sequence
6444 cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
6446 else // We have folded the first element's offset with the index expression
6448 // Build the [#ConstantIndex, #FirstElem] field sequence
6450 FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
6451 FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
6453 if (cnsOff == nullptr) // It must have folded into a zero offset
6455 // Record in the general zero-offset map.
6456 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6460 cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
6468 /*****************************************************************************
6470 * Wrap fixed stack arguments for varargs functions to go through varargs
6471 * cookie to access them, except for the cookie itself.
6473 * Non-x86 platforms are allowed to access all arguments directly
6474 * so we don't need this code.
6477 GenTree* Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
6479 /* For the fixed stack arguments of a varargs function, we need to go
6480 through the varargs cookies to access them, except for the
6483 LclVarDsc* varDsc = &lvaTable[lclNum];
6485 if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
6487 // Create a node representing the local pointing to the base of the args
6489 gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
6490 gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES +
6493 // Access the argument through the local
6495 if (varTypeIsStruct(varType))
6497 tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize);
6501 tree = gtNewOperNode(GT_IND, varType, ptrArg);
6503 tree->gtFlags |= GTF_IND_TGTANYWHERE;
6505 if (varDsc->lvAddrExposed)
6507 tree->gtFlags |= GTF_GLOB_REF;
6510 return fgMorphTree(tree);
6517 /*****************************************************************************
6519 * Transform the given GT_LCL_VAR tree for code generation.
6522 GenTree* Compiler::fgMorphLocalVar(GenTree* tree, bool forceRemorph)
6524 assert(tree->gtOper == GT_LCL_VAR);
6526 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
6527 var_types varType = lvaGetRealType(lclNum);
6528 LclVarDsc* varDsc = &lvaTable[lclNum];
6530 if (varDsc->lvAddrExposed)
6532 tree->gtFlags |= GTF_GLOB_REF;
6536 if (info.compIsVarArgs)
6538 GenTree* newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
6539 if (newTree != nullptr)
6541 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
6543 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
6548 #endif // _TARGET_X86_
6550 /* If not during the global morphing phase bail */
6552 if (!fgGlobalMorph && !forceRemorph)
6557 bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
6559 noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
6561 if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
6563 #if LOCAL_ASSERTION_PROP
6564 /* Assertion prop can tell us to omit adding a cast here */
6565 if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
6570 /* Small-typed arguments and aliased locals are normalized on load.
6571 Other small-typed locals are normalized on store.
6572 Also, under the debugger as the debugger could write to the variable.
6573 If this is one of the former, insert a narrowing cast on the load.
6574 ie. Convert: var-short --> cast-short(var-int) */
6576 tree->gtType = TYP_INT;
6577 fgMorphTreeDone(tree);
6578 tree = gtNewCastNode(TYP_INT, tree, false, varType);
6579 fgMorphTreeDone(tree);
6586 /*****************************************************************************
6587 Grab a temp for big offset morphing.
6588 This method will grab a new temp if no temp of this "type" has been created.
6589 Or it will return the same cached one if it has been created.
6591 unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
6593 unsigned lclNum = fgBigOffsetMorphingTemps[type];
6595 if (lclNum == BAD_VAR_NUM)
6597 // We haven't created a temp for this kind of type. Create one now.
6598 lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
6599 fgBigOffsetMorphingTemps[type] = lclNum;
6603 // We better get the right type.
6604 noway_assert(lvaTable[lclNum].TypeGet() == type);
6607 noway_assert(lclNum != BAD_VAR_NUM);
6611 /*****************************************************************************
6613 * Transform the given GT_FIELD tree for code generation.
6616 GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac)
6618 assert(tree->gtOper == GT_FIELD);
6620 CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
6621 unsigned fldOffset = tree->gtField.gtFldOffset;
6622 GenTree* objRef = tree->gtField.gtFldObj;
6623 bool fieldMayOverlap = false;
6624 bool objIsLocal = false;
6626 if (fgGlobalMorph && (objRef != nullptr) && (objRef->gtOper == GT_ADDR))
6628 // Make sure we've checked if 'objRef' is an address of an implicit-byref parameter.
6629 // If it is, fgMorphImplicitByRefArgs may change it do a different opcode, which the
6630 // simd field rewrites are sensitive to.
6631 fgMorphImplicitByRefArgs(objRef);
6634 noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
6635 ((tree->gtFlags & GTF_GLOB_REF) != 0));
6637 if (tree->gtField.gtFldMayOverlap)
6639 fieldMayOverlap = true;
6640 // Reset the flag because we may reuse the node.
6641 tree->gtField.gtFldMayOverlap = false;
6645 // if this field belongs to simd struct, translate it to simd instrinsic.
6648 GenTree* newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
6649 if (newTree != tree)
6651 newTree = fgMorphSmpOp(newTree);
6655 else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
6657 GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr();
6660 lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField));
6665 /* Is this an instance data member? */
6670 objIsLocal = objRef->IsLocal();
6672 if (tree->gtFlags & GTF_IND_TLS_REF)
6674 NO_WAY("instance field can not be a TLS ref.");
6677 /* We'll create the expression "*(objRef + mem_offs)" */
6679 noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);
6681 // An optimization for Contextful classes:
6682 // we unwrap the proxy when we have a 'this reference'
6683 if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
6685 objRef = fgUnwrapProxy(objRef);
6689 Now we have a tree like this:
6691 +--------------------+
6693 +----------+---------+
6695 +--------------+-------------+
6696 | tree->gtField.gtFldObj |
6697 +--------------+-------------+
6700 We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6702 +--------------------+
6703 | GT_IND/GT_OBJ | tree
6704 +---------+----------+
6707 +---------+----------+
6709 +---------+----------+
6714 +-------------------+ +----------------------+
6715 | objRef | | fldOffset |
6716 | | | (when fldOffset !=0) |
6717 +-------------------+ +----------------------+
6720 or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6723 +--------------------+
6724 | GT_IND/GT_OBJ | tree
6725 +----------+---------+
6727 +----------+---------+
6729 +----------+---------+
6735 +---------+----------+ +---------+----------+
6736 comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr
6737 +---------+----------+ +---------+----------+
6742 +-----+-----+ +-----+-----+ +---------+ +-----------+
6743 asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset |
6744 +-----+-----+ +-----+-----+ +---------+ +-----------+
6749 +-----+-----+ +-----+-----+ +-----------+
6750 | tmpLcl | | objRef | | tmpLcl |
6751 +-----------+ +-----------+ +-----------+
6756 var_types objRefType = objRef->TypeGet();
6758 GenTree* comma = nullptr;
6760 // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field,
6761 // and thus is equivalent to a MACK_Ind with zero offset.
6762 MorphAddrContext defMAC(MACK_Ind);
6768 // This flag is set to enable the "conservative" style of explicit null-check insertion.
6769 // This means that we insert an explicit null check whenever we create byref by adding a
6770 // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
6771 // dereferenced). The alternative is "aggressive", which would not insert such checks (for
6772 // small offsets); in this plan, we would transfer some null-checking responsibility to
6773 // callee's of methods taking byref parameters. They would have to add explicit null checks
6774 // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
6775 // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
6776 // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
6777 // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
6778 // This is left here to point out how to implement it.
6779 CLANG_FORMAT_COMMENT_ANCHOR;
6781 #define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
6783 bool addExplicitNullCheck = false;
6785 // Implicit byref locals are never null.
6786 if (!((objRef->gtOper == GT_LCL_VAR) && lvaIsImplicitByRefLocal(objRef->gtLclVarCommon.gtLclNum)))
6788 // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression
6789 // whose address is being taken is either a local or static variable, whose address is necessarily
6790 // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
6791 if (objRef->gtOper != GT_ADDR && (mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind))
6793 if (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset))
6795 addExplicitNullCheck = true;
6799 // In R2R mode the field offset for some fields may change when the code
6800 // is loaded. So we can't rely on a zero offset here to suppress the null check.
6802 // See GitHub issue #16454.
6803 bool fieldHasChangeableOffset = false;
6805 #ifdef FEATURE_READYTORUN_COMPILER
6806 fieldHasChangeableOffset = (tree->gtField.gtFieldLookup.addr != nullptr);
6809 #if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
6810 addExplicitNullCheck = (mac->m_kind == MACK_Addr) &&
6811 ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset);
6813 addExplicitNullCheck = (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
6814 ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset));
6820 if (addExplicitNullCheck)
6825 printf("Before explicit null check morphing:\n");
6831 // Create the "comma" subtree
6833 GenTree* asg = nullptr;
6838 if (objRef->gtOper != GT_LCL_VAR)
6840 lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
6842 // Create the "asg" node
6843 asg = gtNewTempAssign(lclNum, objRef);
6847 lclNum = objRef->gtLclVarCommon.gtLclNum;
6850 // Create the "nullchk" node.
6851 // Make it TYP_BYTE so we only deference it for 1 byte.
6852 GenTree* lclVar = gtNewLclvNode(lclNum, objRefType);
6853 nullchk = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);
6855 nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
6857 // An indirection will cause a GPF if the address is null.
6858 nullchk->gtFlags |= GTF_EXCEPT;
6860 compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
6861 optMethodFlags |= OMF_HAS_NULLCHECK;
6865 // Create the "comma" node.
6866 comma = gtNewOperNode(GT_COMMA,
6867 TYP_VOID, // We don't want to return anything from this "comma" node.
6868 // Set the type to TYP_VOID, so we can select "cmp" instruction
6869 // instead of "mov" instruction later on.
6877 addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
6879 else if (fldOffset == 0)
6881 // Generate the "addr" node.
6883 FieldSeqNode* fieldSeq =
6884 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6885 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6892 #ifdef FEATURE_READYTORUN_COMPILER
6893 if (tree->gtField.gtFieldLookup.addr != nullptr)
6895 GenTree* offsetNode = nullptr;
6896 if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
6898 offsetNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)tree->gtField.gtFieldLookup.addr,
6899 GTF_ICON_FIELD_HDL, false);
6903 noway_assert(!"unexpected accessType for R2R field access");
6906 var_types addType = (objRefType == TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF;
6907 addr = gtNewOperNode(GT_ADD, addType, addr, offsetNode);
6912 // Generate the "addr" node.
6913 /* Add the member offset to the object's address */
6914 FieldSeqNode* fieldSeq =
6915 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6916 addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
6917 gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
6920 // Now let's set the "tree" as a GT_IND tree.
6922 tree->SetOper(GT_IND);
6923 tree->gtOp.gtOp1 = addr;
6925 tree->gtFlags &= (~GTF_EXCEPT | addr->gtFlags);
6926 tree->SetIndirExceptionFlags(this);
6928 if (addExplicitNullCheck)
6931 // Create "comma2" node and link it to "tree".
6934 comma2 = gtNewOperNode(GT_COMMA,
6935 addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
6937 tree->gtOp.gtOp1 = comma2;
6943 if (addExplicitNullCheck)
6945 printf("After adding explicit null check:\n");
6951 else /* This is a static data member */
6953 if (tree->gtFlags & GTF_IND_TLS_REF)
6955 // Thread Local Storage static field reference
6957 // Field ref is a TLS 'Thread-Local-Storage' reference
6959 // Build this tree: IND(*) #
6967 // IND(I_IMPL) == [Base of this DLL's TLS]
6971 // / CNS(IdValue*4) or MUL
6973 // IND(I_IMPL) / CNS(4)
6975 // CNS(TLS_HDL,0x2C) IND
6979 // # Denotes the orginal node
6981 void** pIdAddr = nullptr;
6982 unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);
6985 // If we can we access the TLS DLL index ID value directly
6986 // then pIdAddr will be NULL and
6987 // IdValue will be the actual TLS DLL index ID
6989 GenTree* dllRef = nullptr;
6990 if (pIdAddr == nullptr)
6994 dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
6999 dllRef = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)pIdAddr, GTF_ICON_STATIC_HDL, true);
7001 // Next we multiply by 4
7002 dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
7005 #define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
7007 // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
7009 GenTree* tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
7011 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
7012 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
7014 tree->gtFlags &= ~GTF_FLD_INITCLASS;
7015 tlsRef->gtFlags |= GTF_ICON_INITCLASS;
7018 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
7020 if (dllRef != nullptr)
7022 /* Add the dllRef */
7023 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
7026 /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
7027 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
7031 FieldSeqNode* fieldSeq =
7032 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
7033 GenTree* fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
7035 /* Add the TLS static field offset to the address */
7037 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
7040 // Final indirect to get to actual value of TLS static field
7042 tree->SetOper(GT_IND);
7043 tree->gtOp.gtOp1 = tlsRef;
7045 noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
7049 // Normal static field reference
7052 // If we can we access the static's address directly
7053 // then pFldAddr will be NULL and
7054 // fldAddr will be the actual address of the static field
7056 void** pFldAddr = nullptr;
7057 void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);
7059 if (pFldAddr == nullptr)
7061 #ifdef _TARGET_64BIT_
7062 if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
7064 // The address is not directly addressible, so force it into a
7065 // constant, so we handle it properly
7067 GenTree* addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
7068 addr->gtType = TYP_I_IMPL;
7069 FieldSeqNode* fieldSeq =
7070 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
7071 addr->gtIntCon.gtFieldSeq = fieldSeq;
7072 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
7073 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
7075 tree->gtFlags &= ~GTF_FLD_INITCLASS;
7076 addr->gtFlags |= GTF_ICON_INITCLASS;
7079 tree->SetOper(GT_IND);
7080 // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
7081 // We must clear it when we transform the node.
7082 // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
7083 // that the logic above does its own checking to determine whether a nullcheck is needed.
7084 tree->gtFlags &= ~GTF_IND_ARR_LEN;
7085 tree->gtOp.gtOp1 = addr;
7087 return fgMorphSmpOp(tree);
7090 #endif // _TARGET_64BIT_
7092 // Only volatile or classinit could be set, and they map over
7093 noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0);
7094 static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE);
7095 static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS);
7096 tree->SetOper(GT_CLS_VAR);
7097 tree->gtClsVar.gtClsVarHnd = symHnd;
7098 FieldSeqNode* fieldSeq =
7099 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
7100 tree->gtClsVar.gtFieldSeq = fieldSeq;
7107 GenTree* addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
7109 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
7110 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
7112 tree->gtFlags &= ~GTF_FLD_INITCLASS;
7113 addr->gtFlags |= GTF_ICON_INITCLASS;
7116 // There are two cases here, either the static is RVA based,
7117 // in which case the type of the FIELD node is not a GC type
7118 // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
7119 // a GC type and the handle to it is a TYP_BYREF in the GC heap
7120 // because handles to statics now go into the large object heap
7122 var_types handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
7123 GenTree* op1 = gtNewOperNode(GT_IND, handleTyp, addr);
7124 op1->gtFlags |= GTF_IND_INVARIANT;
7126 tree->SetOper(GT_IND);
7127 tree->gtOp.gtOp1 = op1;
7131 noway_assert(tree->gtOper == GT_IND);
7132 // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
7133 // We must clear it when we transform the node.
7134 // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
7135 // that the logic above does its own checking to determine whether a nullcheck is needed.
7136 tree->gtFlags &= ~GTF_IND_ARR_LEN;
7138 GenTree* res = fgMorphSmpOp(tree);
7140 // If we have a struct type, this node would previously have been under a GT_ADDR,
7141 // and therefore would have been marked GTF_DONT_CSE.
7142 // TODO-1stClassStructs: revisit this.
7143 if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
7145 res->gtFlags |= GTF_DONT_CSE;
7148 if (fldOffset == 0 && res->OperGet() == GT_IND)
7150 GenTree* addr = res->gtOp.gtOp1;
7151 // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
7152 FieldSeqNode* fieldSeq =
7153 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
7154 fgAddFieldSeqForZeroOffset(addr, fieldSeq);
7160 //------------------------------------------------------------------------------
7161 // fgMorphCallInline: attempt to inline a call
7164 // call - call expression to inline, inline candidate
7165 // inlineResult - result tracking and reporting
7168 // Attempts to inline the call.
7170 // If successful, callee's IR is inserted in place of the call, and
7171 // is marked with an InlineContext.
7173 // If unsuccessful, the transformations done in anticpation of a
7174 // possible inline are undone, and the candidate flag on the call
7177 void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
7179 // The call must be a candiate for inlining.
7180 assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
7182 // Attempt the inline
7183 fgMorphCallInlineHelper(call, inlineResult);
7185 // We should have made up our minds one way or another....
7186 assert(inlineResult->IsDecided());
7188 // If we failed to inline, we have a bit of work to do to cleanup
7189 if (inlineResult->IsFailure())
7194 // Before we do any cleanup, create a failing InlineContext to
7195 // capture details of the inlining attempt.
7196 m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
7200 // It was an inline candidate, but we haven't expanded it.
7201 if (call->gtCall.gtReturnType != TYP_VOID)
7203 // Detach the GT_CALL tree from the original statement by
7204 // hanging a "nothing" node to it. Later the "nothing" node will be removed
7205 // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
7207 noway_assert(fgMorphStmt->gtStmtExpr == call);
7208 fgMorphStmt->gtStmtExpr = gtNewNothingNode();
7211 // Clear the Inline Candidate flag so we can ensure later we tried
7212 // inlining all candidates.
7214 call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
7218 /*****************************************************************************
7219 * Helper to attempt to inline a call
7220 * Sets success/failure in inline result
7221 * If success, modifies current method's IR with inlinee's IR
7222 * If failed, undoes any speculative modifications to current method
7225 void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
7227 // Don't expect any surprises here.
7228 assert(result->IsCandidate());
7230 if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
7232 // For now, attributing this to call site, though it's really
7233 // more of a budget issue (lvaCount currently includes all
7234 // caller and prospective callee locals). We still might be
7235 // able to inline other callees into this caller, or inline
7236 // this callee in other callers.
7237 result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
7241 if (call->IsVirtual())
7243 result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
7247 // impMarkInlineCandidate() is expected not to mark tail prefixed calls
7248 // and recursive tail calls as inline candidates.
7249 noway_assert(!call->IsTailPrefixedCall());
7250 noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
7252 /* If the caller's stack frame is marked, then we can't do any inlining. Period.
7253 Although we have checked this in impCanInline, it is possible that later IL instructions
7254 might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
7257 if (opts.compNeedSecurityCheck)
7259 result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
7264 // Calling inlinee's compiler to inline the method.
7267 unsigned startVars = lvaCount;
7272 printf("Expanding INLINE_CANDIDATE in statement ");
7273 printTreeID(fgMorphStmt);
7274 printf(" in BB%02u:\n", compCurBB->bbNum);
7275 gtDispTree(fgMorphStmt);
7276 if (call->IsImplicitTailCall())
7278 printf("Note: candidate is implicit tail call\n");
7283 impInlineRoot()->m_inlineStrategy->NoteAttempt(result);
7286 // Invoke the compiler to inline the call.
7289 fgInvokeInlineeCompiler(call, result);
7291 if (result->IsFailure())
7293 // Undo some changes made in anticipation of inlining...
7295 // Zero out the used locals
7296 memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
7297 for (unsigned i = startVars; i < lvaCount; i++)
7299 new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
7302 lvaCount = startVars;
7307 // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
7317 // printf("After inlining lvaCount=%d.\n", lvaCount);
7322 //------------------------------------------------------------------------
7323 // fgCanFastTailCall: Check to see if this tail call can be optimized as epilog+jmp.
7326 // callee - The callee to check
7329 // Returns true or false based on whether the callee can be fastTailCalled
7332 // This function is target specific and each target will make the fastTailCall
7333 // decision differently. See the notes below.
7337 // A fast tail call can be made whenever the number of callee arguments
7338 // is larger than or equal to the number of caller arguments, or we have four
7339 // or fewer callee arguments. This is because, on Windows AMD64, each
7340 // argument uses exactly one register or one 8-byte stack slot. Thus, we only
7341 // need to count arguments, and not be concerned with the size of each
7342 // incoming or outgoing argument.
7344 // Can fast tail call examples (amd64 Windows):
7346 // -- Callee will have all register arguments --
7347 // caller(int, int, int, int)
7348 // callee(int, int, float, int)
7350 // -- Callee requires stack space that is equal to the caller --
7351 // caller(struct, struct, struct, struct, struct, struct)
7352 // callee(int, int, int, int, int, int)
7354 // -- Callee requires stack space that is less than the caller --
7355 // caller(struct, double, struct, float, struct, struct)
7356 // callee(int, int, int, int, int)
7358 // -- Callee will have all register arguments --
7360 // callee(int, int, int, int)
7362 // Cannot fast tail call examples (amd64 Windows):
7364 // -- Callee requires stack space that is larger than the caller --
7365 // caller(struct, double, struct, float, struct, struct)
7366 // callee(int, int, int, int, int, double, double, double)
7368 // Unix Amd64 && Arm64:
7369 // A fastTailCall decision can be made whenever the callee's stack space is
7370 // less than or equal to the caller's stack space. There are many permutations
7371 // of when the caller and callee have different stack sizes if there are
7372 // structs being passed to either the caller or callee.
7375 // 1) If the callee has structs which cannot be enregistered it will be
7376 // reported as cannot fast tail call. This is an implementation limitation
7377 // where the callee only is checked for non enregisterable structs. This is
7378 // tracked with https://github.com/dotnet/coreclr/issues/12644.
7380 // 2) If the caller or callee has stack arguments and the callee has more
7381 // arguments then the caller it will be reported as cannot fast tail call.
7382 // This is due to a bug in LowerFastTailCall which assumes that
7383 // nCalleeArgs <= nCallerArgs, which is always true on Windows Amd64. This
7384 // is tracked with https://github.com/dotnet/coreclr/issues/12468.
7386 // 3) If the callee has a 9 to 16 byte struct argument and the callee has
7387 // stack arguments, the decision will be to not fast tail call. This is
7388 // because before fgMorphArgs is done, the struct is unknown whether it
7389 // will be placed on the stack or enregistered. Therefore, the conservative
7390 // decision of do not fast tail call is taken. This limitations should be
7391 // removed if/when fgMorphArgs no longer depends on fgCanFastTailCall.
7393 // 4) Arm64 Only, if there are HFA arguments and the callee has stack
7394 // arguments, the decision will be reported as cannot fast tail call.
7395 // This is because before fgMorphArgs is done, the struct is unknown whether it
7396 // will be placed on the stack or enregistered. Therefore, the conservative
7397 // decision of do not fast tail call is taken.
7399 // Can fast tail call examples (amd64 Unix):
7401 // -- Callee will have all register arguments --
7402 // caller(int, int, int, int)
7403 // callee(int, int, float, int)
7405 // -- Callee requires stack space that is equal to the caller --
7406 // caller({ int, int }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte stack
7408 // callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space
7410 // -- Callee requires stack space that is less than the caller --
7411 // caller({ int, int }, int, { int, int }, int, { int, int }, { int, int }) 6 int register arguments, 32 byte stack
7413 // callee(int, int, int, int, int, int, { int, int } ) // 6 int register arguments, 16 byte stack space
7415 // -- Callee will have all register arguments --
7417 // callee(int, int, int, int)
7419 // Cannot fast tail call examples (amd64 Unix):
7421 // -- Callee requires stack space that is larger than the caller --
7422 // caller(float, float, float, float, float, float, float, float) -- 8 float register arguments
7423 // callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space
7425 // -- Callee has structs which cannot be enregistered (Implementation Limitation) --
7426 // caller(float, float, float, float, float, float, float, float, { double, double, double }) -- 8 float register
7427 // arguments, 24 byte stack space
7428 // callee({ double, double, double }) -- 24 bytes stack space
7430 // -- Callee requires stack space and has a struct argument >8 bytes and <16 bytes (Implementation Limitation) --
7431 // caller(int, int, int, int, int, int, { double, double, double }) -- 6 int register arguments, 24 byte stack space
7432 // callee(int, int, int, int, int, int, { int, int }) -- 6 int registers, 16 byte stack space
7434 // -- Caller requires stack space and nCalleeArgs > nCallerArgs (Bug) --
7435 // caller({ double, double, double, double, double, double }) // 48 byte stack
7436 // callee(int, int) -- 2 int registers
7438 bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
7440 #if FEATURE_FASTTAILCALL
7441 // To reach here means that the return types of the caller and callee are tail call compatible.
7442 // In the case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
7444 // In an implicit tail call case callSig may not be available but it is guaranteed to be available
7445 // for explicit tail call cases. The reason implicit tail case callSig may not be available is that
7446 // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
7447 // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
7448 // currently not copying/setting callSig.
7449 CLANG_FORMAT_COMMENT_ANCHOR;
7452 if (callee->IsTailPrefixedCall())
7454 assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
7455 (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
7459 auto reportFastTailCallDecision = [this, callee](const char* msg, size_t callerStackSize, size_t calleeStackSize) {
7461 if ((JitConfig.JitReportFastTailCallDecisions()) == 1)
7463 if (callee->gtCallType != CT_INDIRECT)
7465 const char* methodName;
7467 methodName = eeGetMethodFullName(callee->gtCallMethHnd);
7469 printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: ",
7470 info.compFullName, methodName);
7474 printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- "
7479 if (callerStackSize != -1)
7481 printf("%s (CallerStackSize: %d, CalleeStackSize: %d)\n\n", msg, callerStackSize, calleeStackSize);
7485 printf("%s\n\n", msg);
7490 JITDUMP("[Fast tailcall decision]: %s\n", msg);
7498 // Note on vararg methods:
7499 // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
7500 // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
7501 // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
7502 // out-going area required for callee is bounded by caller's fixed argument space.
7504 // Note that callee being a vararg method is not a problem since we can account the params being passed.
7505 unsigned nCallerArgs = info.compArgsCount;
7507 size_t callerArgRegCount = codeGen->intRegState.rsCalleeRegArgCount;
7508 size_t callerFloatArgRegCount = codeGen->floatRegState.rsCalleeRegArgCount;
7510 // Count the callee args including implicit and hidden.
7511 // Note that GenericContext and VarargCookie are added by importer while
7512 // importing the call to gtCallArgs list along with explicit user args.
7513 size_t calleeArgRegCount = 0;
7514 size_t calleeFloatArgRegCount = 0;
7516 if (callee->gtCallObjp) // thisPtr
7518 ++calleeArgRegCount;
7521 if (callee->HasRetBufArg()) // RetBuf
7523 // We don't increment calleeArgRegCount here, since it is already in callee->gtCallArgs.
7525 // If callee has RetBuf param, caller too must have it.
7526 // Otherwise go the slow route.
7527 if (info.compRetBuffArg == BAD_VAR_NUM)
7529 reportFastTailCallDecision("Callee has RetBuf but caller does not.", 0, 0);
7534 // Count user args while tracking whether any of them is a multi-byte params
7535 // that cannot be passed in a register. Note that we don't need to count
7536 // non-standard and secret params passed in registers (e.g. R10, R11) since
7537 // these won't contribute to out-going arg size.
7538 bool hasMultiByteStackArgs = false;
7539 bool hasTwoSlotSizedStruct = false;
7540 bool hasHfaArg = false;
7541 size_t nCalleeArgs = calleeArgRegCount; // Keep track of how many args we have.
7542 size_t calleeStackSize = 0;
7543 for (GenTree* args = callee->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
7546 assert(args->OperIsList());
7547 GenTree* argx = args->gtOp.gtOp1;
7549 if (varTypeIsStruct(argx))
7551 // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
7552 while (argx->gtOper == GT_COMMA)
7554 argx = argx->gtOp.gtOp2;
7557 // Get the size of the struct and see if it is register passable.
7558 CORINFO_CLASS_HANDLE objClass = nullptr;
7560 if (argx->OperGet() == GT_OBJ)
7562 objClass = argx->AsObj()->gtClass;
7564 else if (argx->IsLocal())
7566 objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
7568 if (objClass != nullptr)
7570 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
7572 // hasMultiByteStackArgs will determine if the struct can be passed
7573 // in registers. If it cannot we will break the loop and not
7574 // fastTailCall. This is an implementation limitation
7575 // where the callee only is checked for non enregisterable structs.
7576 // It is tracked with https://github.com/dotnet/coreclr/issues/12644.
7577 unsigned typeSize = 0;
7578 hasMultiByteStackArgs = hasMultiByteStackArgs ||
7579 !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
7581 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
7582 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
7584 assert(objClass != nullptr);
7585 eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
7587 if (structDesc.passedInRegisters)
7589 if (structDesc.eightByteCount == 2)
7591 hasTwoSlotSizedStruct = true;
7594 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
7596 if (structDesc.IsIntegralSlot(i))
7598 ++calleeArgRegCount;
7600 else if (structDesc.IsSseSlot(i))
7602 ++calleeFloatArgRegCount;
7606 assert(false && "Invalid eightbyte classification type.");
7613 calleeStackSize += roundUp(typeSize, TARGET_POINTER_SIZE);
7616 #elif defined(_TARGET_ARM64_) // ARM64
7617 var_types hfaType = GetHfaType(argx);
7618 bool isHfaArg = varTypeIsFloating(hfaType);
7625 calleeFloatArgRegCount += GetHfaCount(argx);
7629 // Structs are either passed in 1 or 2 (64-bit) slots
7630 size_t roundupSize = roundUp(typeSize, TARGET_POINTER_SIZE);
7631 size = roundupSize / TARGET_POINTER_SIZE;
7640 hasTwoSlotSizedStruct = true;
7643 calleeArgRegCount += size;
7646 #elif defined(WINDOWS_AMD64_ABI)
7648 ++calleeArgRegCount;
7650 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
7653 assert(!"Target platform ABI rules regarding passing struct type args in registers");
7655 #endif //_TARGET_AMD64_ || _TARGET_ARM64_
7659 hasMultiByteStackArgs = true;
7664 varTypeIsFloating(argx) ? ++calleeFloatArgRegCount : ++calleeArgRegCount;
7667 // We can break early on multiByte cases.
7668 if (hasMultiByteStackArgs)
7674 const unsigned maxRegArgs = MAX_REG_ARG;
7676 // If we reached here means that callee has only those argument types which can be passed in
7677 // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
7678 // If we are passing args on stack for the callee and it has more args passed on stack than
7679 // the caller, then fast tail call cannot be performed.
7681 // Note that the GC'ness of on stack args need not match since the arg setup area is marked
7682 // as non-interruptible for fast tail calls.
7684 #ifdef WINDOWS_AMD64_ABI
7685 assert(calleeStackSize == 0);
7686 size_t calleeStackSlots = ((calleeArgRegCount + calleeFloatArgRegCount) > maxRegArgs)
7687 ? (calleeArgRegCount + calleeFloatArgRegCount) - maxRegArgs
7689 calleeStackSize = calleeStackSlots * TARGET_POINTER_SIZE;
7690 size_t callerStackSize = info.compArgStackSize;
7692 bool hasStackArgs = false;
7694 if (callerStackSize > 0 || calleeStackSize > 0)
7696 hasStackArgs = true;
7699 // Go the slow route, if it has multi-byte params. This is an implementation
7700 // limitatio see https://github.com/dotnet/coreclr/issues/12644.
7701 if (hasMultiByteStackArgs)
7703 reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs", callerStackSize, calleeStackSize);
7707 // x64 Windows: If we have more callee registers used than MAX_REG_ARG, then
7708 // make sure the callee's incoming arguments is less than the caller's
7709 if (hasStackArgs && (nCalleeArgs > nCallerArgs))
7711 reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)", callerStackSize,
7716 #elif (defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_)
7718 // For *nix Amd64 and Arm64 check to see if all arguments for the callee
7719 // and caller are passing in registers. If not, ensure that the outgoing argument stack size
7720 // requirement for the callee is less than or equal to the caller's entire stack frame usage.
7722 // Also, in the case that we have to pass arguments on the stack make sure
7723 // that we are not dealing with structs that are >8 bytes.
7725 bool hasStackArgs = false;
7726 size_t maxFloatRegArgs = MAX_FLOAT_REG_ARG;
7728 size_t calleeIntStackArgCount = calleeArgRegCount > maxRegArgs ? calleeArgRegCount - maxRegArgs : 0;
7729 size_t calleeFloatStackArgCount =
7730 calleeFloatArgRegCount > maxFloatRegArgs ? calleeFloatArgRegCount - maxFloatRegArgs : 0;
7732 size_t calleeStackArgCount = calleeIntStackArgCount + calleeFloatStackArgCount;
7733 size_t callerStackSize = info.compArgStackSize;
7734 calleeStackSize += calleeStackArgCount * TARGET_POINTER_SIZE;
7736 if (callerStackSize > 0 || calleeStackSize > 0)
7738 hasStackArgs = true;
7741 // Go the slow route, if it has multi-byte params. This is an implementation
7742 // limitation see https://github.com/dotnet/coreclr/issues/12644.
7743 if (hasMultiByteStackArgs)
7745 reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs", callerStackSize, calleeStackSize);
7749 // Callee has a >8 and <=16 byte struct and arguments that has to go on the stack. Do not fastTailCall.
7750 if (calleeStackSize > 0 && hasTwoSlotSizedStruct)
7752 reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasTwoSlotSizedStruct",
7753 callerStackSize, calleeStackSize);
7757 // Callee has an HFA struct and arguments that has to go on the stack. Do not fastTailCall.
7758 if (calleeStackSize > 0 && hasHfaArg)
7760 reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasHfaArg", callerStackSize,
7768 // LowerFastTailCall currently assumes nCalleeArgs <= nCallerArgs. This is
7769 // not true in many cases on x64 linux, remove this pessimization when
7770 // LowerFastTailCall is fixed. See https://github.com/dotnet/coreclr/issues/12468
7771 // for more information.
7772 if (hasStackArgs && (nCalleeArgs > nCallerArgs))
7774 reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)", callerStackSize,
7779 if (calleeStackSize > callerStackSize)
7781 reportFastTailCallDecision("Will not fastTailCall calleeStackSize > callerStackSize", callerStackSize,
7788 NYI("fastTailCall not supported on this Architecture.");
7790 #endif // WINDOWS_AMD64_ABI
7792 reportFastTailCallDecision("Will fastTailCall", callerStackSize, calleeStackSize);
7794 #else // FEATURE_FASTTAILCALL
7799 /*****************************************************************************
7801 * Transform the given GT_CALL tree for tail call code generation.
7803 void Compiler::fgMorphTailCall(GenTreeCall* call)
7805 JITDUMP("fgMorphTailCall (before):\n");
7808 #if defined(_TARGET_ARM_)
7809 // For the helper-assisted tail calls, we need to push all the arguments
7810 // into a single list, and then add a few extra at the beginning
7812 // Check for PInvoke call types that we don't handle in codegen yet.
7813 assert(!call->IsUnmanaged());
7814 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));
7816 // First move the this pointer (if any) onto the regular arg list
7817 GenTree* thisPtr = NULL;
7818 if (call->gtCallObjp)
7820 GenTree* objp = call->gtCallObjp;
7821 call->gtCallObjp = NULL;
7823 if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
7825 thisPtr = gtClone(objp, true);
7826 var_types vt = objp->TypeGet();
7827 if (thisPtr == NULL)
7829 // Too complex, so use a temp
7830 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7831 GenTree* asg = gtNewTempAssign(lclNum, objp);
7832 if (!call->IsVirtualVtable())
7834 // Add an indirection to get the nullcheck
7835 GenTree* tmp = gtNewLclvNode(lclNum, vt);
7836 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7837 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7839 objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7840 thisPtr = gtNewLclvNode(lclNum, vt);
7842 else if (!call->IsVirtualVtable())
7844 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7845 objp = gtNewOperNode(GT_COMMA, vt, ind, objp);
7846 thisPtr = gtClone(thisPtr, true);
7849 call->gtFlags &= ~GTF_CALL_NULLCHECK;
7852 call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
7855 // Add the extra VSD parameter if needed
7856 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
7857 if (call->IsVirtualStub())
7859 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
7860 #ifdef LEGACY_BACKEND
7862 if (call->gtCallType == CT_INDIRECT)
7864 arg = gtClone(call->gtCallAddr, true);
7865 noway_assert(arg != nullptr);
7869 noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
7870 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7871 arg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7873 // Change the call type, so we can add the extra indirection here, rather than in codegen
7874 call->gtCallAddr = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7875 call->gtStubCallStubAddr = NULL;
7876 call->gtCallType = CT_INDIRECT;
7878 arg->gtRegNum = virtualStubParamInfo->GetReg();
7879 // Add the extra indirection to generate the real target
7880 call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr);
7881 call->gtFlags |= GTF_EXCEPT;
7882 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7884 #else // !LEGACY_BACKEND
7885 GenTree* stubAddrArg = fgGetStubAddrArg(call);
7886 // And push the stub address onto the list of arguments
7887 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
7888 #endif // !LEGACY_BACKEND
7890 else if (call->IsVirtualVtable())
7892 // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER
7894 noway_assert(thisPtr != NULL);
7896 GenTree* add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
7897 GenTree* vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7898 vtbl->gtFlags |= GTF_EXCEPT;
7900 unsigned vtabOffsOfIndirection;
7901 unsigned vtabOffsAfterIndirection;
7903 info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection,
7906 /* Get the appropriate vtable chunk */
7908 if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
7910 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
7912 GenTree* indOffTree = nullptr;
7916 indOffTree = impCloneExpr(add, &add, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
7917 nullptr DEBUGARG("virtual table call"));
7920 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7924 vtbl = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, indOffTree);
7928 /* Now the appropriate vtable slot */
7930 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
7931 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7933 // Switch this to a plain indirect call
7934 call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
7935 assert(!call->IsVirtual());
7936 call->gtCallType = CT_INDIRECT;
7938 call->gtCallAddr = vtbl;
7939 call->gtCallCookie = NULL;
7940 call->gtFlags |= GTF_EXCEPT;
7943 // Now inject a placeholder for the real call target that codegen will generate
7944 #ifdef LEGACY_BACKEND
7945 GenTree* arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL);
7946 codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR);
7947 #else // !LEGACY_BACKEND
7948 GenTree* arg = gtNewIconNode(0, TYP_I_IMPL);
7949 #endif // !LEGACY_BACKEND
7950 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7952 // Lastly inject the pointer for the copy routine
7953 noway_assert(call->callSig != NULL);
7954 void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
7955 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7956 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7958 // It is now a varargs tail call
7959 #ifdef LEGACY_BACKEND
7960 call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
7961 #else // !LEGACY_BACKEND
7962 call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
7963 #endif // !LEGACY_BACKEND
7964 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7966 #elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
7968 // x86 classic codegen doesn't require any morphing
7970 // For the helper-assisted tail calls, we need to push all the arguments
7971 // into a single list, and then add a few extra at the beginning or end.
7973 // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
7975 // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
7977 // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
7978 // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
7979 // for callTarget here which will be replaced later with callTarget in tail call lowering.
7981 // For x86, the tailcall helper is defined as:
7983 // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
7986 // Note that the special arguments are on the stack, whereas the function arguments follow
7987 // the normal convention: there might be register arguments in ECX and EDX. The stack will
7988 // look like (highest address at the top):
7989 // first normal stack argument
7991 // last normal stack argument
7992 // numberOfOldStackArgs
7993 // numberOfNewStackArgs
7997 // Each special arg is 4 bytes.
7999 // 'flags' is a bitmask where:
8000 // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
8001 // callee-saved registers for tailcall functions. Note that the helper assumes
8002 // that the callee-saved registers live immediately below EBP, and must have been
8003 // pushed in this order: EDI, ESI, EBX.
8004 // 2 == call target is a virtual stub dispatch.
8006 // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
8007 // on the custom calling convention.
8009 // Check for PInvoke call types that we don't handle in codegen yet.
8010 assert(!call->IsUnmanaged());
8011 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
8013 // Don't support tail calling helper methods
8014 assert(call->gtCallType != CT_HELPER);
8016 // We come this route only for tail prefixed calls that cannot be dispatched as
8018 assert(!call->IsImplicitTailCall());
8019 assert(!fgCanFastTailCall(call));
8021 // First move the 'this' pointer (if any) onto the regular arg list. We do this because
8022 // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
8023 // and thus shift where the 'this' pointer will be passed to a later argument slot. In
8024 // addition, for all platforms, we are going to change the call into a helper call. Our code
8025 // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
8026 // do this transformation, we must explicitly create a null 'this' pointer check, if required,
8027 // since special 'this' pointer handling will no longer kick in.
8029 // Some call types, such as virtual vtable calls, require creating a call address expression
8030 // that involves the "this" pointer. Lowering will sometimes create an embedded statement
8031 // to create a temporary that is assigned to the "this" pointer expression, and then use
8032 // that temp to create the call address expression. This temp creation embedded statement
8033 // will occur immediately before the "this" pointer argument, and then will be used for both
8034 // the "this" pointer argument as well as the call address expression. In the normal ordering,
8035 // the embedded statement establishing the "this" pointer temp will execute before both uses
8036 // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
8037 // normal call argument list, and insert a placeholder which will hold the call address
8038 // expression. For non-x86, things are ok, because the order of execution of these is not
8039 // altered. However, for x86, the call address expression is inserted as the *last* argument
8040 // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
8041 // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
8042 // for those cases where call lowering creates an embedded form temp of "this", we will
8043 // create a temp here, early, that will later get morphed correctly.
8045 if (call->gtCallObjp)
8047 GenTree* thisPtr = nullptr;
8048 GenTree* objp = call->gtCallObjp;
8049 call->gtCallObjp = nullptr;
8052 if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
8055 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
8056 GenTree* asg = gtNewTempAssign(lclNum, objp);
8058 // COMMA(tmp = "this", tmp)
8059 var_types vt = objp->TypeGet();
8060 GenTree* tmp = gtNewLclvNode(lclNum, vt);
8061 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp);
8065 #endif // _TARGET_X86_
8067 #if defined(_TARGET_X86_)
8068 // When targeting x86, the runtime requires that we perforrm a null check on the `this` argument before tail
8069 // calling to a virtual dispatch stub. This requirement is a consequence of limitations in the runtime's
8070 // ability to map an AV to a NullReferenceException if the AV occurs in a dispatch stub.
8071 if (call->NeedsNullCheck() || call->IsVirtualStub())
8073 if (call->NeedsNullCheck())
8074 #endif // defined(_TARGET_X86_)
8076 // clone "this" if "this" has no side effects.
8077 if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
8079 thisPtr = gtClone(objp, true);
8082 var_types vt = objp->TypeGet();
8083 if (thisPtr == nullptr)
8085 // create a temp if either "this" has side effects or "this" is too complex to clone.
8088 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
8089 GenTree* asg = gtNewTempAssign(lclNum, objp);
8091 // COMMA(tmp = "this", deref(tmp))
8092 GenTree* tmp = gtNewLclvNode(lclNum, vt);
8093 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
8094 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
8096 // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
8097 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
8101 // thisPtr = COMMA(deref("this"), "this")
8102 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
8103 thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
8106 call->gtFlags &= ~GTF_CALL_NULLCHECK;
8113 // During rationalization tmp="this" and null check will
8114 // materialize as embedded stmts in right execution order.
8115 assert(thisPtr != nullptr);
8116 call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
8119 #if defined(_TARGET_AMD64_)
8121 // Add the extra VSD parameter to arg list in case of VSD calls.
8122 // Tail call arg copying thunk will move this extra VSD parameter
8123 // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
8124 // in Stublinkerx86.cpp for more details.
8125 CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
8126 if (call->IsVirtualStub())
8128 flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
8130 GenTree* stubAddrArg = fgGetStubAddrArg(call);
8131 // And push the stub address onto the list of arguments
8132 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
8135 // Now inject a placeholder for the real call target that Lower phase will generate.
8136 GenTree* arg = gtNewIconNode(0, TYP_I_IMPL);
8137 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
8139 // Inject the pointer for the copy routine to be used for struct copying
8140 noway_assert(call->callSig != nullptr);
8141 void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
8142 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
8143 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
8145 #else // !_TARGET_AMD64_
8147 // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
8148 // append to the list.
8149 GenTreeArgList** ppArg = &call->gtCallArgs;
8150 for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
8152 ppArg = (GenTreeArgList**)&args->gtOp2;
8154 assert(ppArg != nullptr);
8155 assert(*ppArg == nullptr);
8157 unsigned nOldStkArgsWords =
8158 (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
8159 GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
8160 *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
8161 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
8163 // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
8164 // The constant will be replaced.
8165 GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
8166 *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
8167 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
8169 // Inject a placeholder for the flags.
8170 // The constant will be replaced.
8171 GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
8172 *ppArg = gtNewListNode(arg1, nullptr);
8173 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
8175 // Inject a placeholder for the real call target that the Lowering phase will generate.
8176 // The constant will be replaced.
8177 GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
8178 *ppArg = gtNewListNode(arg0, nullptr);
8180 #endif // !_TARGET_AMD64_
8182 // It is now a varargs tail call dispatched via helper.
8183 call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
8184 call->gtFlags &= ~GTF_CALL_POP_ARGS;
8188 JITDUMP("fgMorphTailCall (after):\n");
8192 //------------------------------------------------------------------------
8193 // fgGetStubAddrArg: Return the virtual stub address for the given call.
8196 // the JIT must place the address of the stub used to load the call target,
8197 // the "stub indirection cell", in special call argument with special register.
8200 // call - a call that needs virtual stub dispatching.
8203 // addr tree with set resister requirements.
8205 GenTree* Compiler::fgGetStubAddrArg(GenTreeCall* call)
8207 assert(call->IsVirtualStub());
8208 GenTree* stubAddrArg;
8209 if (call->gtCallType == CT_INDIRECT)
8211 stubAddrArg = gtClone(call->gtCallAddr, true);
8215 assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
8216 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
8217 stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
8219 assert(stubAddrArg != nullptr);
8220 stubAddrArg->gtRegNum = virtualStubParamInfo->GetReg();
8224 //------------------------------------------------------------------------------
8225 // fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
8229 // block - basic block ending with a recursive fast tail call
8230 // recursiveTailCall - recursive tail call to transform
8233 // The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
8235 void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
8237 assert(recursiveTailCall->IsTailCallConvertibleToLoop());
8238 GenTree* last = block->lastStmt();
8239 assert(recursiveTailCall == last->gtStmt.gtStmtExpr);
8241 // Transform recursive tail call into a loop.
8243 GenTree* earlyArgInsertionPoint = last;
8244 IL_OFFSETX callILOffset = last->gtStmt.gtStmtILoffsx;
8246 // Hoist arg setup statement for the 'this' argument.
8247 GenTree* thisArg = recursiveTailCall->gtCallObjp;
8248 if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
8250 GenTree* thisArgStmt = gtNewStmt(thisArg, callILOffset);
8251 fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
8254 // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
8255 // then the temps need to be assigned to the method parameters. This is done so that the caller
8256 // parameters are not re-assigned before call arguments depending on them are evaluated.
8257 // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
8258 // where the next temp or parameter assignment should be inserted.
8260 // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
8261 // while the second call argument (const 1) doesn't.
8262 // Basic block before tail recursion elimination:
8263 // ***** BB04, stmt 1 (top level)
8264 // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013)
8265 // [000033] --C - G------ - \--* call void RecursiveMethod
8266 // [000030] ------------ | / --* const int - 1
8267 // [000031] ------------arg0 in rcx + --* +int
8268 // [000029] ------------ | \--* lclVar int V00 arg1
8269 // [000032] ------------arg1 in rdx \--* const int 1
8272 // Basic block after tail recursion elimination :
8273 // ***** BB04, stmt 1 (top level)
8274 // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
8275 // [000030] ------------ | / --* const int - 1
8276 // [000031] ------------ | / --* +int
8277 // [000029] ------------ | | \--* lclVar int V00 arg1
8278 // [000050] - A---------- \--* = int
8279 // [000049] D------N---- \--* lclVar int V02 tmp0
8281 // ***** BB04, stmt 2 (top level)
8282 // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
8283 // [000052] ------------ | / --* lclVar int V02 tmp0
8284 // [000054] - A---------- \--* = int
8285 // [000053] D------N---- \--* lclVar int V00 arg0
8287 // ***** BB04, stmt 3 (top level)
8288 // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
8289 // [000032] ------------ | / --* const int 1
8290 // [000057] - A---------- \--* = int
8291 // [000056] D------N---- \--* lclVar int V01 arg1
8293 GenTree* tmpAssignmentInsertionPoint = last;
8294 GenTree* paramAssignmentInsertionPoint = last;
8296 // Process early args. They may contain both setup statements for late args and actual args.
8297 // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
8298 // below has the correct second argument.
8299 int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
8300 for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
8301 (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
8303 GenTree* earlyArg = earlyArgs->Current();
8304 if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
8306 if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
8308 // This is a setup node so we need to hoist it.
8309 GenTree* earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
8310 fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
8314 // This is an actual argument that needs to be assigned to the corresponding caller parameter.
8315 fgArgTabEntry* curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
8316 GenTree* paramAssignStmt =
8317 fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
8318 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
8319 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
8321 // All temp assignments will happen before the first param assignment.
8322 tmpAssignmentInsertionPoint = paramAssignStmt;
8328 // Process late args.
8329 int lateArgIndex = 0;
8330 for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
8331 (lateArgIndex++, lateArgs = lateArgs->Rest()))
8333 // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
8334 GenTree* lateArg = lateArgs->Current();
8335 fgArgTabEntry* curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
8336 GenTree* paramAssignStmt =
8337 fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
8338 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
8340 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
8342 // All temp assignments will happen before the first param assignment.
8343 tmpAssignmentInsertionPoint = paramAssignStmt;
8347 // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
8348 // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
8349 // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
8350 if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
8352 var_types thisType = lvaTable[info.compThisArg].TypeGet();
8353 GenTree* arg0 = gtNewLclvNode(lvaArg0Var, thisType);
8354 GenTree* arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
8355 GenTree* arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
8356 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
8359 // If compInitMem is set, we may need to zero-initialize some locals. Normally it's done in the prolog
8360 // but this loop can't include the prolog. Since we don't have liveness information, we insert zero-initialization
8361 // for all non-parameter IL locals as well as temp structs with GC fields.
8362 // Liveness phase will remove unnecessary initializations.
8363 if (info.compInitMem)
8367 for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
8369 if (!varDsc->lvIsParam)
8371 var_types lclType = varDsc->TypeGet();
8372 bool isUserLocal = (varNum < info.compLocalsCount);
8373 bool structWithGCFields = ((lclType == TYP_STRUCT) && (varDsc->lvStructGcCount > 0));
8374 if (isUserLocal || structWithGCFields)
8376 GenTree* lcl = gtNewLclvNode(varNum, lclType);
8377 GenTree* init = nullptr;
8378 if (lclType == TYP_STRUCT)
8380 const bool isVolatile = false;
8381 const bool isCopyBlock = false;
8382 init = gtNewBlkOpNode(lcl, gtNewIconNode(0), varDsc->lvSize(), isVolatile, isCopyBlock);
8383 init = fgMorphInitBlock(init);
8387 GenTree* zero = gtNewZeroConNode(genActualType(lclType));
8388 init = gtNewAssignNode(lcl, zero);
8390 GenTree* initStmt = gtNewStmt(init, callILOffset);
8391 fgInsertStmtBefore(block, last, initStmt);
8398 fgRemoveStmt(block, last);
8400 // Set the loop edge. Ensure we have a scratch block and then target the
8401 // next block. Loop detection needs to see a pred out of the loop, so
8402 // mark the scratch block BBF_DONT_REMOVE to prevent empty block removal
8404 fgEnsureFirstBBisScratch();
8405 fgFirstBB->bbFlags |= BBF_DONT_REMOVE;
8406 block->bbJumpKind = BBJ_ALWAYS;
8407 block->bbJumpDest = fgFirstBB->bbNext;
8408 fgAddRefPred(block->bbJumpDest, block);
8409 block->bbFlags &= ~BBF_HAS_JMP;
8412 //------------------------------------------------------------------------------
8413 // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
8417 // arg - argument to assign
8418 // argTabEntry - argument table entry corresponding to arg
8419 // block --- basic block the call is in
8420 // callILOffset - IL offset of the call
8421 // tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary)
8422 // paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted
8425 // parameter assignment statement if one was inserted; nullptr otherwise.
8427 GenTree* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg,
8428 fgArgTabEntry* argTabEntry,
8430 IL_OFFSETX callILOffset,
8431 GenTree* tmpAssignmentInsertionPoint,
8432 GenTree* paramAssignmentInsertionPoint)
8434 // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
8435 // some argument trees may reference parameters directly.
8437 GenTree* argInTemp = nullptr;
8438 unsigned originalArgNum = argTabEntry->argNum;
8439 bool needToAssignParameter = true;
8441 // TODO-CQ: enable calls with struct arguments passed in registers.
8442 noway_assert(!varTypeIsStruct(arg->TypeGet()));
8444 if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
8446 // The argument is already assigned to a temp or is a const.
8449 else if (arg->OperGet() == GT_LCL_VAR)
8451 unsigned lclNum = arg->AsLclVar()->gtLclNum;
8452 LclVarDsc* varDsc = &lvaTable[lclNum];
8453 if (!varDsc->lvIsParam)
8455 // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
8458 else if (lclNum == originalArgNum)
8460 // The argument is the same parameter local that we were about to assign so
8461 // we can skip the assignment.
8462 needToAssignParameter = false;
8466 // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
8467 // any caller parameters. Some common cases are handled above but we may be able to eliminate
8468 // more temp assignments.
8470 GenTree* paramAssignStmt = nullptr;
8471 if (needToAssignParameter)
8473 if (argInTemp == nullptr)
8475 // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
8476 // TODO: we can avoid a temp assignment if we can prove that the argument tree
8477 // doesn't involve any caller parameters.
8478 unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp"));
8479 GenTree* tempSrc = arg;
8480 GenTree* tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType);
8481 GenTree* tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
8482 GenTree* tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
8483 fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
8484 argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
8487 // Now assign the temp to the parameter.
8488 LclVarDsc* paramDsc = lvaTable + originalArgNum;
8489 assert(paramDsc->lvIsParam);
8490 GenTree* paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType);
8491 GenTree* paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
8492 paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset);
8494 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
8496 return paramAssignStmt;
8499 /*****************************************************************************
8501 * Transform the given GT_CALL tree for code generation.
8504 GenTree* Compiler::fgMorphCall(GenTreeCall* call)
8506 if (varTypeIsStruct(call))
8508 fgFixupStructReturn(call);
8510 if (call->CanTailCall())
8512 // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
8513 assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
8515 // It cannot be an inline candidate
8516 assert(!call->IsInlineCandidate());
8518 const char* szFailReason = nullptr;
8519 bool hasStructParam = false;
8520 if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
8522 szFailReason = "Might turn into an intrinsic";
8525 if (opts.compNeedSecurityCheck)
8527 szFailReason = "Needs security check";
8529 else if (compLocallocUsed || compLocallocOptimized)
8531 szFailReason = "Localloc used";
8533 #ifdef _TARGET_AMD64_
8534 // Needed for Jit64 compat.
8535 // In future, enabling tail calls from methods that need GS cookie check
8536 // would require codegen side work to emit GS cookie check before a tail
8538 else if (getNeedsGSSecurityCookie())
8540 szFailReason = "GS Security cookie check";
8544 // DDB 99324: Just disable tailcall under compGcChecks stress mode.
8545 else if (opts.compGcChecks)
8547 szFailReason = "GcChecks";
8550 #if FEATURE_TAILCALL_OPT
8553 // We are still not sure whether it can be a tail call. Because, when converting
8554 // a call to an implicit tail call, we must check that there are no locals with
8555 // their address taken. If this is the case, we have to assume that the address
8556 // has been leaked and the current stack frame must live until after the final
8559 // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
8560 // that lvHasLdAddrOp is much more conservative. We cannot just base it on
8561 // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
8562 // during morph stage. The reason for also checking lvAddrExposed is that in case
8563 // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
8564 // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
8565 // never to be incorrect.
8567 // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
8568 // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
8569 // is set. This avoids the need for iterating through all lcl vars of the current
8570 // method. Right now throughout the code base we are not consistently using 'set'
8571 // method to set lvHasLdAddrOp and lvAddrExposed flags.
8574 bool hasAddrExposedVars = false;
8575 bool hasStructPromotedParam = false;
8576 bool hasPinnedVars = false;
8578 for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
8580 // If the method is marked as an explicit tail call we will skip the
8581 // following three hazard checks.
8582 // We still must check for any struct parameters and set 'hasStructParam'
8583 // so that we won't transform the recursive tail call into a loop.
8585 if (call->IsImplicitTailCall())
8587 if (varDsc->lvHasLdAddrOp)
8589 hasAddrExposedVars = true;
8592 if (varDsc->lvAddrExposed)
8594 if (lvaIsImplicitByRefLocal(varNum))
8596 // The address of the implicit-byref is a non-address use of the pointer parameter.
8598 else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl))
8600 // The address of the implicit-byref's field is likewise a non-address use of the pointer
8603 else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum))
8605 // This temp was used for struct promotion bookkeeping. It will not be used, and will have
8606 // its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs.
8607 assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl));
8608 assert(fgGlobalMorph);
8612 hasAddrExposedVars = true;
8616 if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum))
8618 hasStructPromotedParam = true;
8621 if (varDsc->lvPinned)
8623 // A tail call removes the method from the stack, which means the pinning
8624 // goes away for the callee. We can't allow that.
8625 hasPinnedVars = true;
8629 if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
8631 hasStructParam = true;
8632 // This prevents transforming a recursive tail call into a loop
8633 // but doesn't prevent tail call optimization so we need to
8634 // look at the rest of parameters.
8639 if (hasAddrExposedVars)
8641 szFailReason = "Local address taken";
8643 if (hasStructPromotedParam)
8645 szFailReason = "Has Struct Promoted Param";
8649 szFailReason = "Has Pinned Vars";
8652 #endif // FEATURE_TAILCALL_OPT
8654 var_types callType = call->TypeGet();
8656 // We have to ensure to pass the incoming retValBuf as the
8657 // outgoing one. Using a temp will not do as this function will
8658 // not regain control to do the copy.
8660 if (info.compRetBuffArg != BAD_VAR_NUM)
8662 noway_assert(callType == TYP_VOID);
8663 GenTree* retValBuf = call->gtCallArgs->gtOp.gtOp1;
8664 if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
8666 szFailReason = "Need to copy return buffer";
8670 // If this is an opportunistic tail call and cannot be dispatched as
8671 // fast tail call, go the non-tail call route. This is done for perf
8674 // Avoid the cost of determining whether can be dispatched as fast tail
8675 // call if we already know that tail call cannot be honored for other
8677 bool canFastTailCall = false;
8678 if (szFailReason == nullptr)
8680 canFastTailCall = fgCanFastTailCall(call);
8681 if (!canFastTailCall)
8683 // Implicit or opportunistic tail calls are always dispatched via fast tail call
8684 // mechanism and never via tail call helper for perf.
8685 if (call->IsImplicitTailCall())
8687 szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
8689 #ifndef LEGACY_BACKEND
8690 else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
8692 // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
8693 // dispatched as a fast tail call.
8695 // Methods with non-standard args will have indirection cell or cookie param passed
8696 // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
8697 // tail calling the target method and hence ".tail" prefix on such calls needs to be
8700 // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
8701 // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
8702 // This is done by by adding stubAddr as an additional arg before the original list of
8703 // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
8704 // in Stublinkerx86.cpp.
8705 szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
8706 "called via helper";
8708 #ifdef _TARGET_ARM64_
8711 // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
8712 // So, bail out if we can't make fast tail call.
8713 szFailReason = "Non-qualified fast tail call";
8716 #endif // LEGACY_BACKEND
8720 // Clear these flags before calling fgMorphCall() to avoid recursion.
8721 bool isTailPrefixed = call->IsTailPrefixedCall();
8722 call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
8724 #if FEATURE_TAILCALL_OPT
8725 call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
8729 if (!canFastTailCall && szFailReason == nullptr)
8731 szFailReason = "Non fast tail calls disabled for PAL based systems.";
8733 #endif // FEATURE_PAL
8735 if (szFailReason == nullptr)
8737 if (!fgCheckStmtAfterTailCall())
8739 szFailReason = "Unexpected statements after the tail call";
8743 if (szFailReason != nullptr)
8748 printf("\nRejecting tail call late for call ");
8750 printf(": %s\n", szFailReason);
8754 // for non user funcs, we have no handles to report
8755 info.compCompHnd->reportTailCallDecision(nullptr,
8756 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
8757 isTailPrefixed, TAILCALL_FAIL, szFailReason);
8762 #if !FEATURE_TAILCALL_OPT_SHARED_RETURN
8763 // We enable shared-ret tail call optimization for recursive calls even if
8764 // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
8765 if (gtIsRecursiveCall(call))
8768 // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
8769 // but if the call falls through to a ret, and we are doing a tailcall, change it here.
8770 if (compCurBB->bbJumpKind != BBJ_RETURN)
8772 compCurBB->bbJumpKind = BBJ_RETURN;
8776 // Set this flag before calling fgMorphCall() to prevent inlining this call.
8777 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
8779 bool fastTailCallToLoop = false;
8780 #if FEATURE_TAILCALL_OPT
8781 // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
8782 // or return type is a struct that can be passed in a register.
8784 // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
8785 // hidden generic context param or through keep alive thisptr), then while transforming a recursive
8786 // call to such a method requires that the generic context stored on stack slot be updated. Right now,
8787 // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
8788 // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the
8789 // generic type parameters of both caller and callee generic method are the same.
8790 if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
8791 !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam &&
8792 !varTypeIsStruct(call->TypeGet()) && ((info.compClassAttr & CORINFO_FLG_MARSHAL_BYREF) == 0))
8794 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
8795 fastTailCallToLoop = true;
8799 // Do some target-specific transformations (before we process the args, etc.)
8800 // This is needed only for tail prefixed calls that cannot be dispatched as
8802 if (!canFastTailCall)
8804 fgMorphTailCall(call);
8807 // Implementation note : If we optimize tailcall to do a direct jump
8808 // to the target function (after stomping on the return address, etc),
8809 // without using CORINFO_HELP_TAILCALL, we have to make certain that
8810 // we don't starve the hijacking logic (by stomping on the hijacked
8811 // return address etc).
8813 // At this point, we are committed to do the tailcall.
8814 compTailCallUsed = true;
8816 CorInfoTailCall tailCallResult;
8818 if (fastTailCallToLoop)
8820 tailCallResult = TAILCALL_RECURSIVE;
8822 else if (canFastTailCall)
8824 tailCallResult = TAILCALL_OPTIMIZED;
8828 tailCallResult = TAILCALL_HELPER;
8831 // for non user funcs, we have no handles to report
8832 info.compCompHnd->reportTailCallDecision(nullptr,
8833 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
8834 isTailPrefixed, tailCallResult, nullptr);
8836 // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
8837 // to avoid doing any extra work for the return value.
8838 call->gtType = TYP_VOID;
8843 printf("\nGTF_CALL_M_TAILCALL bit set for call ");
8846 if (fastTailCallToLoop)
8848 printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
8855 GenTree* stmtExpr = fgMorphStmt->gtStmtExpr;
8858 // Tail call needs to be in one of the following IR forms
8859 // Either a call stmt or
8860 // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
8861 // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
8862 // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
8864 // GT_CASTS may be nested.
8865 genTreeOps stmtOper = stmtExpr->gtOper;
8866 if (stmtOper == GT_CALL)
8868 assert(stmtExpr == call);
8872 assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA);
8873 GenTree* treeWithCall;
8874 if (stmtOper == GT_RETURN)
8876 treeWithCall = stmtExpr->gtGetOp1();
8878 else if (stmtOper == GT_COMMA)
8880 // Second operation must be nop.
8881 assert(stmtExpr->gtGetOp2()->IsNothingNode());
8882 treeWithCall = stmtExpr->gtGetOp1();
8886 treeWithCall = stmtExpr->gtGetOp2();
8890 while (treeWithCall->gtOper == GT_CAST)
8892 assert(!treeWithCall->gtOverflow());
8893 treeWithCall = treeWithCall->gtGetOp1();
8896 assert(treeWithCall == call);
8899 GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt;
8900 // Remove all stmts after the call.
8901 while (nextMorphStmt != nullptr)
8903 GenTreeStmt* stmtToRemove = nextMorphStmt;
8904 nextMorphStmt = stmtToRemove->gtNextStmt;
8905 fgRemoveStmt(compCurBB, stmtToRemove);
8908 fgMorphStmt->gtStmtExpr = call;
8910 // Tail call via helper: The VM can't use return address hijacking if we're
8911 // not going to return and the helper doesn't have enough info to safely poll,
8912 // so we poll before the tail call, if the block isn't already safe. Since
8913 // tail call via helper is a slow mechanism it doen't matter whether we emit
8914 // GC poll. This is done to be in parity with Jit64. Also this avoids GC info
8915 // size increase if all most all methods are expected to be tail calls (e.g. F#).
8917 // Note that we can avoid emitting GC-poll if we know that the current BB is
8918 // dominated by a Gc-SafePoint block. But we don't have dominator info at this
8919 // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
8920 // here and remove it in lowering if the block is dominated by a GC-SafePoint. For
8921 // now it not clear whether optimizing slow tail calls is worth the effort. As a
8922 // low cost check, we check whether the first and current basic blocks are
8925 // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
8926 // is going to mark the method as fully interruptible if the block containing this tail
8927 // call is reachable without executing any call.
8928 if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
8929 !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
8931 // We didn't insert a poll block, so we need to morph the call now
8932 // (Normally it will get morphed when we get to the split poll block)
8933 GenTree* temp = fgMorphCall(call);
8934 noway_assert(temp == call);
8937 // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
8938 // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
8940 // Fast tail call: in case of fast tail calls, we need a jmp epilog and
8941 // hence mark it as BBJ_RETURN with BBF_JMP flag set.
8942 noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
8944 if (canFastTailCall)
8946 compCurBB->bbFlags |= BBF_HAS_JMP;
8950 compCurBB->bbJumpKind = BBJ_THROW;
8953 // For non-void calls, we return a place holder which will be
8954 // used by the parent GT_RETURN node of this call.
8956 GenTree* result = call;
8957 if (callType != TYP_VOID && info.compRetType != TYP_VOID)
8960 // Return a dummy node, as the return is already removed.
8961 if (callType == TYP_STRUCT)
8963 // This is a HFA, use float 0.
8964 callType = TYP_FLOAT;
8966 #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
8967 // Return a dummy node, as the return is already removed.
8968 if (varTypeIsStruct(callType))
8970 // This is a register-returned struct. Return a 0.
8971 // The actual return registers are hacked in lower and the register allocator.
8976 // Return a dummy node, as the return is already removed.
8977 if (varTypeIsSIMD(callType))
8979 callType = TYP_DOUBLE;
8982 result = gtNewZeroConNode(genActualType(callType));
8983 result = fgMorphTree(result);
8991 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
8992 (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
8993 #ifdef FEATURE_READYTORUN_COMPILER
8994 || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
8997 (call == fgMorphStmt->gtStmtExpr))
8999 // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
9000 // Transform it into a null check.
9002 GenTree* thisPtr = call->gtCallArgs->gtOp.gtOp1;
9004 GenTree* nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
9005 nullCheck->gtFlags |= GTF_EXCEPT;
9007 return fgMorphTree(nullCheck);
9010 noway_assert(call->gtOper == GT_CALL);
9013 // Only count calls once (only in the global morph phase)
9017 if (call->gtCallType == CT_INDIRECT)
9020 optIndirectCallCount++;
9022 else if (call->gtCallType == CT_USER_FUNC)
9025 if (call->IsVirtual())
9027 optIndirectCallCount++;
9032 // Couldn't inline - remember that this BB contains method calls
9034 // If this is a 'regular' call, mark the basic block as
9035 // having a call (for computing full interruptibility).
9036 CLANG_FORMAT_COMMENT_ANCHOR;
9038 if (IsGcSafePoint(call))
9040 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
9043 // Morph Type.op_Equality, Type.op_Inequality, and Enum.HasFlag
9045 // We need to do these before the arguments are morphed
9046 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
9048 // See if this is foldable
9049 GenTree* optTree = gtFoldExprCall(call);
9051 // If we optimized, morph the result
9052 if (optTree != call)
9054 return fgMorphTree(optTree);
9058 // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
9059 GenTree* origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
9061 unsigned retValTmpNum = BAD_VAR_NUM;
9062 CORINFO_CLASS_HANDLE structHnd = nullptr;
9063 if (call->HasRetBufArg() &&
9064 call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
9066 // We're enforcing the invariant that return buffers pointers (at least for
9067 // struct return types containing GC pointers) are never pointers into the heap.
9068 // The large majority of cases are address of local variables, which are OK.
9069 // Otherwise, allocate a local of the given struct type, pass its address,
9070 // then assign from that into the proper destination. (We don't need to do this
9071 // if we're passing the caller's ret buff arg to the callee, since the caller's caller
9072 // will maintain the same invariant.)
9074 GenTree* dest = call->gtCallArgs->gtOp.gtOp1;
9075 assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
9076 if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
9078 // We'll exempt helper calls from this, assuming that the helper implementation
9079 // follows the old convention, and does whatever barrier is required.
9080 if (call->gtCallType != CT_HELPER)
9082 structHnd = call->gtRetClsHnd;
9083 if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
9084 !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR) &&
9085 dest->gtLclVar.gtLclNum == info.compRetBuffArg))
9089 retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
9090 lvaSetStruct(retValTmpNum, structHnd, true);
9091 dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
9096 call->gtCallArgs->gtOp.gtOp1 = dest;
9099 /* Process the "normal" argument list */
9100 call = fgMorphArgs(call);
9101 noway_assert(call->gtOper == GT_CALL);
9103 // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
9104 // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
9105 if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
9107 GenTree* value = gtArgEntryByArgNum(call, 2)->node;
9108 if (value->IsIntegralConst(0))
9110 assert(value->OperGet() == GT_CNS_INT);
9112 GenTree* arr = gtArgEntryByArgNum(call, 0)->node;
9113 GenTree* index = gtArgEntryByArgNum(call, 1)->node;
9115 // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy
9116 // the spill trees as well if necessary.
9117 GenTreeOp* argSetup = nullptr;
9118 for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest())
9120 GenTree* const arg = earlyArgs->Current();
9121 if (arg->OperGet() != GT_ASG)
9127 assert(arg != index);
9129 arg->gtFlags &= ~GTF_LATE_ARG;
9131 GenTree* op1 = argSetup;
9134 op1 = gtNewNothingNode();
9136 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9140 argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg);
9143 argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9148 auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult {
9149 (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
9150 return WALK_CONTINUE;
9153 fgWalkTreePost(&arr, resetMorphedFlag);
9154 fgWalkTreePost(&index, resetMorphedFlag);
9155 fgWalkTreePost(&value, resetMorphedFlag);
9158 GenTree* const nullCheckedArr = impCheckForNullPointer(arr);
9159 GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
9160 GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value);
9161 arrStore->gtFlags |= GTF_ASG;
9163 GenTree* result = fgMorphTree(arrStore);
9164 if (argSetup != nullptr)
9166 result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result);
9168 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9176 // Optimize get_ManagedThreadId(get_CurrentThread)
9177 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
9178 info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
9180 noway_assert(origDest == nullptr);
9181 noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);
9183 GenTree* innerCall = call->gtCallLateArgs->gtOp.gtOp1;
9185 if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
9186 info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
9187 CORINFO_INTRINSIC_GetCurrentManagedThread)
9189 // substitute expression with call to helper
9190 GenTree* newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT);
9191 JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
9192 return fgMorphTree(newCall);
9196 if (origDest != nullptr)
9198 GenTree* retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
9199 // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
9200 // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the
9201 // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to
9203 if (origDest->OperGet() == GT_ASG)
9205 if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
9207 GenTree* var = origDest->gtOp.gtOp1;
9208 origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
9209 gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
9212 GenTree* copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
9213 copyBlk = fgMorphTree(copyBlk);
9214 GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
9216 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9221 if (call->IsNoReturn())
9224 // If we know that the call does not return then we can set fgRemoveRestOfBlock
9225 // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
9226 // As a result the compiler won't need to preserve live registers across the call.
9228 // This isn't need for tail calls as there shouldn't be any code after the call anyway.
9229 // Besides, the tail call code is part of the epilog and converting the block to
9230 // BBJ_THROW would result in the tail call being dropped as the epilog is generated
9231 // only for BBJ_RETURN blocks.
9233 // Currently this doesn't work for non-void callees. Some of the code that handles
9234 // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
9235 // do not have this flag by default. We could add the flag here but the proper solution
9236 // would be to replace the return expression with a local var node during inlining
9237 // so the rest of the call tree stays in a separate statement. That statement can then
9238 // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
9241 if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
9243 fgRemoveRestOfBlock = true;
9250 /*****************************************************************************
9252 * Transform the given GTK_CONST tree for code generation.
9255 GenTree* Compiler::fgMorphConst(GenTree* tree)
9257 assert(tree->OperKind() & GTK_CONST);
9259 /* Clear any exception flags or other unnecessary flags
9260 * that may have been set before folding this node to a constant */
9262 tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
9264 if (tree->OperGet() != GT_CNS_STR)
9269 // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
9270 // guarantee slow performance for that block. Instead cache the return value
9271 // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
9273 if (compCurBB->bbJumpKind == BBJ_THROW)
9275 CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
9276 if (helper != CORINFO_HELP_UNDEF)
9278 // For un-important blocks, we want to construct the string lazily
9280 GenTreeArgList* args;
9281 if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
9283 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
9287 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
9288 gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
9291 tree = gtNewHelperCallNode(helper, TYP_REF, args);
9292 return fgMorphTree(tree);
9296 assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
9299 InfoAccessType iat =
9300 info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);
9302 tree = gtNewStringLiteralNode(iat, pValue);
9304 return fgMorphTree(tree);
9307 /*****************************************************************************
9309 * Transform the given GTK_LEAF tree for code generation.
9312 GenTree* Compiler::fgMorphLeaf(GenTree* tree)
9314 assert(tree->OperKind() & GTK_LEAF);
9316 if (tree->gtOper == GT_LCL_VAR)
9318 const bool forceRemorph = false;
9319 return fgMorphLocalVar(tree, forceRemorph);
9322 else if (tree->gtOper == GT_LCL_FLD)
9324 if (info.compIsVarArgs)
9327 fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
9328 if (newTree != nullptr)
9330 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
9332 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
9338 #endif // _TARGET_X86_
9339 else if (tree->gtOper == GT_FTN_ADDR)
9341 CORINFO_CONST_LOOKUP addrInfo;
9343 #ifdef FEATURE_READYTORUN_COMPILER
9344 if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
9346 addrInfo = tree->gtFptrVal.gtEntryPoint;
9351 info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
9354 // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
9356 tree->SetOper(GT_CNS_INT);
9357 tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
9358 tree->gtFlags |= GTF_ICON_FTN_ADDR;
9360 switch (addrInfo.accessType)
9363 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
9364 tree->gtFlags |= GTF_IND_INVARIANT;
9369 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
9373 tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
9377 noway_assert(!"Unknown addrInfo.accessType");
9380 return fgMorphTree(tree);
9386 void Compiler::fgAssignSetVarDef(GenTree* tree)
9388 GenTreeLclVarCommon* lclVarCmnTree;
9389 bool isEntire = false;
9390 if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
9394 lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
9398 // We consider partial definitions to be modeled as uses followed by definitions.
9399 // This captures the idea that precedings defs are not necessarily made redundant
9400 // by this definition.
9401 lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
9406 //------------------------------------------------------------------------
9407 // fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
9410 // tree - The block assignment to be possibly morphed
9413 // The modified tree if successful, nullptr otherwise.
9416 // 'tree' must be a block assignment.
9419 // If successful, this method always returns the incoming tree, modifying only
9422 GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
9424 // This must be a block assignment.
9425 noway_assert(tree->OperIsBlkOp());
9426 var_types asgType = tree->TypeGet();
9428 GenTree* asg = tree;
9429 GenTree* dest = asg->gtGetOp1();
9430 GenTree* src = asg->gtGetOp2();
9431 unsigned destVarNum = BAD_VAR_NUM;
9432 LclVarDsc* destVarDsc = nullptr;
9433 GenTree* lclVarTree = nullptr;
9434 bool isCopyBlock = asg->OperIsCopyBlkOp();
9435 bool isInitBlock = !isCopyBlock;
9438 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
9440 // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD/GT_HWIntrinsic)
9441 // The SIMD type in question could be Vector2f which is 8-bytes in size.
9442 // The below check is to make sure that we don't turn that copyblk
9443 // into a assignment, since rationalizer logic will transform the
9444 // copyblk appropriately. Otherwise, the transformation made in this
9445 // routine will prevent rationalizer logic and we might end up with
9446 // GT_ADDR(GT_SIMD/GT_HWIntrinsic) node post rationalization, leading to a noway assert
9448 // TODO-1stClassStructs: This is here to preserve old behavior.
9449 // It should be eliminated.
9450 if (src->OperIsSIMDorSimdHWintrinsic())
9456 if (dest->gtEffectiveVal()->OperIsBlk())
9458 GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
9459 size = lhsBlk->Size();
9460 if (impIsAddressInLocal(lhsBlk->Addr(), &lclVarTree))
9462 destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
9463 destVarDsc = &(lvaTable[destVarNum]);
9465 if (lhsBlk->OperGet() == GT_OBJ)
9467 clsHnd = lhsBlk->AsObj()->gtClass;
9472 // Is this an enregisterable struct that is already a simple assignment?
9473 // This can happen if we are re-morphing.
9474 if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
9478 noway_assert(dest->OperIsLocal());
9480 destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
9481 destVarDsc = &(lvaTable[destVarNum]);
9484 clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
9485 size = info.compCompHnd->getClassSize(clsHnd);
9489 size = destVarDsc->lvExactSize;
9494 // See if we can do a simple transformation:
9496 // GT_ASG <TYP_size>
9498 // GT_IND GT_IND or CNS_INT
9503 if (size == REGSIZE_BYTES)
9505 if (clsHnd == NO_CLASS_HANDLE)
9507 // A register-sized cpblk can be treated as an integer asignment.
9508 asgType = TYP_I_IMPL;
9513 info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
9514 asgType = getJitGCType(gcPtr);
9525 asgType = TYP_SHORT;
9528 #ifdef _TARGET_64BIT_
9532 #endif // _TARGET_64BIT_
9536 // TODO-1stClassStructs: Change this to asgType != TYP_STRUCT.
9537 if (!varTypeIsStruct(asgType))
9539 // For initBlk, a non constant source is not going to allow us to fiddle
9540 // with the bits to create a single assigment.
9541 noway_assert(size <= REGSIZE_BYTES);
9543 if (isInitBlock && !src->IsConstInitVal())
9548 if (destVarDsc != nullptr)
9550 #if LOCAL_ASSERTION_PROP
9551 // Kill everything about dest
9552 if (optLocalAssertionProp)
9554 if (optAssertionCount > 0)
9556 fgKillDependentAssertions(destVarNum DEBUGARG(tree));
9559 #endif // LOCAL_ASSERTION_PROP
9561 // A previous incarnation of this code also required the local not to be
9562 // address-exposed(=taken). That seems orthogonal to the decision of whether
9563 // to do field-wise assignments: being address-exposed will cause it to be
9564 // "dependently" promoted, so it will be in the right memory location. One possible
9565 // further reason for avoiding field-wise stores is that the struct might have alignment-induced
9566 // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid
9567 // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
9568 // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
9569 if (varTypeIsStruct(lclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
9571 // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
9574 else if (!varTypeIsFloating(lclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
9576 // Use the dest local var directly, as well as its type.
9578 asgType = destVarDsc->lvType;
9580 // If the block operation had been a write to a local var of a small int type,
9581 // of the exact size of the small int type, and the var is NormalizeOnStore,
9582 // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
9583 // have done that normalization. If we're now making it into an assignment,
9584 // the NormalizeOnStore will work, and it can be a full def.
9585 if (destVarDsc->lvNormalizeOnStore())
9587 dest->gtFlags &= (~GTF_VAR_USEASG);
9592 // Could be a non-promoted struct, or a floating point type local, or
9593 // an int subject to a partial write. Don't enregister.
9594 lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
9596 // Mark the local var tree as a definition point of the local.
9597 lclVarTree->gtFlags |= GTF_VAR_DEF;
9598 if (size < destVarDsc->lvExactSize)
9599 { // If it's not a full-width assignment....
9600 lclVarTree->gtFlags |= GTF_VAR_USEASG;
9603 if (dest == lclVarTree)
9605 dest = gtNewIndir(asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
9610 // Check to ensure we don't have a reducible *(& ... )
9611 if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
9613 // If dest is an Indir or Block, and it has a child that is a Addr node
9615 GenTree* addrNode = dest->AsIndir()->Addr(); // known to be a GT_ADDR
9617 // Can we just remove the Ind(Addr(destOp)) and operate directly on 'destOp'?
9619 GenTree* destOp = addrNode->gtGetOp1();
9620 var_types destOpType = destOp->TypeGet();
9622 // We can if we have a primitive integer type and the sizes are exactly the same.
9624 if ((varTypeIsIntegralOrI(destOp) && (size == genTypeSize(destOpType))))
9627 asgType = destOpType;
9631 if (dest->gtEffectiveVal()->OperIsIndir())
9633 // If we have no information about the destination, we have to assume it could
9634 // live anywhere (not just in the GC heap).
9635 // Mark the GT_IND node so that we use the correct write barrier helper in case
9636 // the field is a GC ref.
9638 if (!fgIsIndirOfAddrOfLocal(dest))
9640 dest->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
9641 tree->gtFlags |= GTF_GLOB_REF;
9644 dest->gtFlags &= (~GTF_EXCEPT | dest->AsIndir()->Addr()->gtFlags);
9645 dest->SetIndirExceptionFlags(this);
9646 tree->gtFlags |= (dest->gtFlags & GTF_EXCEPT);
9649 LclVarDsc* srcVarDsc = nullptr;
9652 if (src->OperGet() == GT_LCL_VAR)
9655 srcVarDsc = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
9657 else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &lclVarTree))
9659 srcVarDsc = &(lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum]);
9661 if (srcVarDsc != nullptr)
9663 if (varTypeIsStruct(lclVarTree) && (srcVarDsc->lvPromoted || srcVarDsc->lvIsSIMDType()))
9665 // Let fgMorphCopyBlock handle it.
9668 else if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
9669 size == genTypeSize(genActualType(lclVarTree->TypeGet())))
9671 // Use the src local var directly.
9676 #ifndef LEGACY_BACKEND
9678 // The source argument of the copyblk can potentially
9679 // be accessed only through indir(addr(lclVar))
9680 // or indir(lclVarAddr) in rational form and liveness
9681 // won't account for these uses. That said,
9682 // we have to mark this local as address exposed so
9683 // we don't delete it as a dead store later on.
9684 unsigned lclVarNum = lclVarTree->gtLclVarCommon.gtLclNum;
9685 lvaTable[lclVarNum].lvAddrExposed = true;
9686 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
9688 #else // LEGACY_BACKEND
9689 lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
9690 #endif // LEGACY_BACKEND
9692 if (src == lclVarTree)
9694 srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
9695 src = gtNewOperNode(GT_IND, asgType, srcAddr);
9699 assert(src->OperIsIndir());
9704 if (src->OperIsIndir())
9706 if (!fgIsIndirOfAddrOfLocal(src))
9708 // If we have no information about the src, we have to assume it could
9709 // live anywhere (not just in the GC heap).
9710 // Mark the GT_IND node so that we use the correct write barrier helper in case
9711 // the field is a GC ref.
9712 src->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
9715 src->gtFlags &= (~GTF_EXCEPT | src->AsIndir()->Addr()->gtFlags);
9716 src->SetIndirExceptionFlags(this);
9723 if (varTypeIsSIMD(asgType))
9725 assert(!isCopyBlock); // Else we would have returned the tree above.
9726 noway_assert(src->IsIntegralConst(0));
9727 noway_assert(destVarDsc != nullptr);
9729 src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
9730 tree->gtOp.gtOp2 = src;
9736 if (src->OperIsInitVal())
9738 src = src->gtGetOp1();
9740 assert(src->IsCnsIntOrI());
9741 // This will mutate the integer constant, in place, to be the correct
9742 // value for the type we are using in the assignment.
9743 src->AsIntCon()->FixupInitBlkValue(asgType);
9747 // Ensure that the dest is setup appropriately.
9748 if (dest->gtEffectiveVal()->OperIsIndir())
9750 dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/);
9753 // Ensure that the rhs is setup appropriately.
9756 src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/);
9759 // Set the lhs and rhs on the assignment.
9760 if (dest != tree->gtOp.gtOp1)
9762 asg->gtOp.gtOp1 = dest;
9764 if (src != asg->gtOp.gtOp2)
9766 asg->gtOp.gtOp2 = src;
9769 asg->ChangeType(asgType);
9770 dest->gtFlags |= GTF_DONT_CSE;
9771 asg->gtFlags &= ~GTF_EXCEPT;
9772 asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
9773 // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
9774 asg->gtFlags &= ~GTF_REVERSE_OPS;
9779 printf("fgMorphOneAsgBlock (after):\n");
9789 //------------------------------------------------------------------------
9790 // fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
9793 // tree - a tree node with a gtOper of GT_INITBLK
9794 // the child nodes for tree have already been Morphed
9797 // We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
9798 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
9799 // If we have performed struct promotion of the Dest() then we will try to
9800 // perform a field by field assignment for each of the promoted struct fields
9803 // If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
9804 // if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9805 // can not use a field by field assignment and must the orginal GT_INITBLK unmodified.
9807 GenTree* Compiler::fgMorphInitBlock(GenTree* tree)
9809 // We must have the GT_ASG form of InitBlkOp.
9810 noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp());
9812 bool morphed = false;
9815 GenTree* asg = tree;
9816 GenTree* src = tree->gtGetOp2();
9817 GenTree* origDest = tree->gtGetOp1();
9819 GenTree* dest = fgMorphBlkNode(origDest, true);
9820 if (dest != origDest)
9822 tree->gtOp.gtOp1 = dest;
9824 tree->gtType = dest->TypeGet();
9825 // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its
9826 // type will be the type of the original lclVar, in which case we will change it to TYP_INT).
9827 if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src))
9829 src->gtType = TYP_INT;
9831 JITDUMP("\nfgMorphInitBlock:");
9833 GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree);
9836 JITDUMP(" using oneAsgTree.\n");
9841 GenTree* destAddr = nullptr;
9842 GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src;
9843 GenTree* blockSize = nullptr;
9844 unsigned blockWidth = 0;
9845 FieldSeqNode* destFldSeq = nullptr;
9846 LclVarDsc* destLclVar = nullptr;
9847 bool destDoFldAsg = false;
9848 unsigned destLclNum = BAD_VAR_NUM;
9849 bool blockWidthIsConst = false;
9850 GenTreeLclVarCommon* lclVarTree = nullptr;
9851 if (dest->IsLocal())
9853 lclVarTree = dest->AsLclVarCommon();
9857 if (dest->OperIsBlk())
9859 destAddr = dest->AsBlk()->Addr();
9860 blockWidth = dest->AsBlk()->gtBlkSize;
9864 assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT));
9865 destAddr = dest->gtGetOp1();
9866 blockWidth = genTypeSize(dest->TypeGet());
9869 if (lclVarTree != nullptr)
9871 destLclNum = lclVarTree->gtLclNum;
9872 destLclVar = &lvaTable[destLclNum];
9873 blockWidth = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar);
9874 blockWidthIsConst = true;
9878 if (dest->gtOper == GT_DYN_BLK)
9880 // The size must be an integer type
9881 blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize;
9882 assert(varTypeIsIntegral(blockSize->gtType));
9886 assert(blockWidth != 0);
9887 blockWidthIsConst = true;
9890 if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
9892 destLclNum = lclVarTree->gtLclNum;
9893 destLclVar = &lvaTable[destLclNum];
9896 if (destLclNum != BAD_VAR_NUM)
9898 #if LOCAL_ASSERTION_PROP
9899 // Kill everything about destLclNum (and its field locals)
9900 if (optLocalAssertionProp)
9902 if (optAssertionCount > 0)
9904 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9907 #endif // LOCAL_ASSERTION_PROP
9909 if (destLclVar->lvPromoted && blockWidthIsConst)
9911 assert(initVal->OperGet() == GT_CNS_INT);
9912 noway_assert(varTypeIsStruct(destLclVar));
9913 noway_assert(!opts.MinOpts());
9914 if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
9916 JITDUMP(" dest is address exposed");
9920 if (blockWidth == destLclVar->lvExactSize)
9922 JITDUMP(" (destDoFldAsg=true)");
9923 // We may decide later that a copyblk is required when this struct has holes
9924 destDoFldAsg = true;
9928 JITDUMP(" with mismatched size");
9934 // Can we use field by field assignment for the dest?
9935 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9937 JITDUMP(" dest contains holes");
9938 destDoFldAsg = false;
9941 JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n");
9943 // If we're doing an InitBlock and we've transformed the dest to a non-Blk
9944 // we need to change it back.
9945 if (!destDoFldAsg && !dest->OperIsBlk())
9947 noway_assert(blockWidth != 0);
9948 tree->gtOp.gtOp1 = origDest;
9949 tree->gtType = origDest->gtType;
9952 if (!destDoFldAsg && (destLclVar != nullptr))
9954 // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
9955 if (!destLclVar->lvRegStruct)
9957 // Mark it as DoNotEnregister.
9958 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
9962 // Mark the dest struct as DoNotEnreg
9963 // when they are LclVar structs and we are using a CopyBlock
9964 // or the struct is not promoted
9968 #if CPU_USES_BLOCK_MOVE
9969 compBlkOpUsed = true;
9971 dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true);
9972 tree->gtOp.gtOp1 = dest;
9973 tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
9977 // The initVal must be a constant of TYP_INT
9978 noway_assert(initVal->OperGet() == GT_CNS_INT);
9979 noway_assert(genActualType(initVal->gtType) == TYP_INT);
9981 // The dest must be of a struct type.
9982 noway_assert(varTypeIsStruct(destLclVar));
9985 // Now, convert InitBlock to individual assignments
9989 INDEBUG(morphed = true);
9993 unsigned fieldLclNum;
9994 unsigned fieldCnt = destLclVar->lvFieldCnt;
9996 for (unsigned i = 0; i < fieldCnt; ++i)
9998 fieldLclNum = destLclVar->lvFieldLclStart + i;
9999 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10001 noway_assert(lclVarTree->gtOper == GT_LCL_VAR);
10002 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
10003 dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));
10005 srcCopy = gtCloneExpr(initVal);
10006 noway_assert(srcCopy != nullptr);
10008 // need type of oper to be same as tree
10009 if (dest->gtType == TYP_LONG)
10011 srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
10012 // copy and extend the value
10013 srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
10014 /* Change the types of srcCopy to TYP_LONG */
10015 srcCopy->gtType = TYP_LONG;
10017 else if (varTypeIsFloating(dest->gtType))
10019 srcCopy->ChangeOperConst(GT_CNS_DBL);
10020 // setup the bit pattern
10021 memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal,
10022 sizeof(srcCopy->gtDblCon.gtDconVal));
10023 /* Change the types of srcCopy to TYP_DOUBLE */
10024 srcCopy->gtType = TYP_DOUBLE;
10028 noway_assert(srcCopy->gtOper == GT_CNS_INT);
10029 noway_assert(srcCopy->TypeGet() == TYP_INT);
10030 // setup the bit pattern
10031 memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal,
10032 sizeof(srcCopy->gtIntCon.gtIconVal));
10035 srcCopy->gtType = dest->TypeGet();
10037 asg = gtNewAssignNode(dest, srcCopy);
10039 #if LOCAL_ASSERTION_PROP
10040 if (optLocalAssertionProp)
10042 optAssertionGen(asg);
10044 #endif // LOCAL_ASSERTION_PROP
10048 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
10061 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10065 printf("fgMorphInitBlock (after):\n");
10074 //------------------------------------------------------------------------
10075 // fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
10078 // tree - the node to be modified.
10079 // type - the type of indirection to change it to.
10082 // Returns the node, modified in place.
10085 // This doesn't really warrant a separate method, but is here to abstract
10086 // the fact that these nodes can be modified in-place.
10088 GenTree* Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
10090 tree->SetOper(GT_IND);
10091 tree->gtType = type;
10095 //------------------------------------------------------------------------
10096 // fgMorphGetStructAddr: Gets the address of a struct object
10099 // pTree - the parent's pointer to the struct object node
10100 // clsHnd - the class handle for the struct type
10101 // isRValue - true if this is a source (not dest)
10104 // Returns the address of the struct value, possibly modifying the existing tree to
10105 // sink the address below any comma nodes (this is to canonicalize for value numbering).
10106 // If this is a source, it will morph it to an GT_IND before taking its address,
10107 // since it may not be remorphed (and we don't want blk nodes as rvalues).
10109 GenTree* Compiler::fgMorphGetStructAddr(GenTree** pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
10112 GenTree* tree = *pTree;
10113 // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
10114 // need to hang onto that for the purposes of value numbering.
10115 if (tree->OperIsIndir())
10117 if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
10119 addr = tree->gtOp.gtOp1;
10123 if (isRValue && tree->OperIsBlk())
10125 tree->ChangeOper(GT_IND);
10127 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
10130 else if (tree->gtOper == GT_COMMA)
10132 // If this is a comma, we're going to "sink" the GT_ADDR below it.
10133 (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
10134 tree->gtType = TYP_BYREF;
10139 switch (tree->gtOper)
10146 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
10148 case GT_INDEX_ADDR:
10153 // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
10154 // not going to use "temp"
10155 GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
10156 addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
10165 //------------------------------------------------------------------------
10166 // fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
10169 // tree - The struct type node
10170 // isDest - True if this is the destination of the assignment
10173 // Returns the possibly-morphed node. The caller is responsible for updating
10174 // the parent of this node..
10176 GenTree* Compiler::fgMorphBlkNode(GenTree* tree, bool isDest)
10178 GenTree* handleTree = nullptr;
10179 GenTree* addr = nullptr;
10180 if (tree->OperIs(GT_COMMA))
10182 // In order to CSE and value number array index expressions and bounds checks,
10183 // the commas in which they are contained need to match.
10184 // The pattern is that the COMMA should be the address expression.
10185 // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
10186 // TODO-1stClassStructs: Consider whether this can be improved.
10187 // Also consider whether some of this can be included in gtNewBlockVal (though note
10188 // that doing so may cause us to query the type system before we otherwise would).
10190 // before: [3] comma struct <- [2] comma struct <- [1] LCL_VAR struct
10191 // after: [3] comma byref <- [2] comma byref <- [4] addr byref <- [1] LCL_VAR struct
10194 GenTree* effectiveVal = tree->gtEffectiveVal();
10196 GenTreePtrStack commas(this);
10197 for (GenTree* comma = tree; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2())
10199 commas.Push(comma);
10202 GenTree* lastComma = commas.Top();
10203 noway_assert(lastComma->gtGetOp2() == effectiveVal);
10204 GenTree* effectiveValAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
10206 effectiveValAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10208 lastComma->gtOp.gtOp2 = effectiveValAddr;
10210 while (commas.Height() > 0)
10212 GenTree* comma = commas.Pop();
10213 comma->gtType = TYP_BYREF;
10214 gtUpdateNodeSideEffects(comma);
10217 handleTree = effectiveVal;
10219 else if (tree->OperIs(GT_IND) && tree->AsIndir()->Addr()->OperIs(GT_INDEX_ADDR))
10222 addr = tree->AsIndir()->Addr();
10225 if (addr != nullptr)
10227 var_types structType = handleTree->TypeGet();
10228 if (structType == TYP_STRUCT)
10230 CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(handleTree);
10231 if (structHnd == NO_CLASS_HANDLE)
10233 tree = gtNewOperNode(GT_IND, structType, addr);
10237 tree = gtNewObjNode(structHnd, addr);
10238 if (tree->OperGet() == GT_OBJ)
10240 gtSetObjGcInfo(tree->AsObj());
10246 tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
10249 gtUpdateNodeSideEffects(tree);
10251 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10255 if (!tree->OperIsBlk())
10259 GenTreeBlk* blkNode = tree->AsBlk();
10260 if (blkNode->OperGet() == GT_DYN_BLK)
10262 if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
10264 unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
10265 // A GT_BLK with size of zero is not supported,
10266 // so if we encounter such a thing we just leave it as a GT_DYN_BLK
10269 blkNode->AsDynBlk()->gtDynamicSize = nullptr;
10270 blkNode->ChangeOper(GT_BLK);
10271 blkNode->gtBlkSize = size;
10283 if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
10284 (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
10286 GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
10287 if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
10289 lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
10296 //------------------------------------------------------------------------
10297 // fgMorphBlockOperand: Canonicalize an operand of a block assignment
10300 // tree - The block operand
10301 // asgType - The type of the assignment
10302 // blockWidth - The size of the block
10303 // isDest - true iff this is the destination of the assignment
10306 // Returns the morphed block operand
10309 // This does the following:
10310 // - Ensures that a struct operand is a block node or (for non-LEGACY_BACKEND) lclVar.
10311 // - Ensures that any COMMAs are above ADDR nodes.
10312 // Although 'tree' WAS an operand of a block assignment, the assignment
10313 // may have been retyped to be a scalar assignment.
10315 GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest)
10317 GenTree* effectiveVal = tree->gtEffectiveVal();
10319 if (!varTypeIsStruct(asgType))
10321 if (effectiveVal->OperIsIndir())
10323 GenTree* addr = effectiveVal->AsIndir()->Addr();
10324 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
10326 effectiveVal = addr->gtGetOp1();
10328 else if (effectiveVal->OperIsBlk())
10330 effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
10334 effectiveVal->gtType = asgType;
10337 else if (effectiveVal->TypeGet() != asgType)
10339 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
10340 effectiveVal = gtNewIndir(asgType, addr);
10345 GenTreeIndir* indirTree = nullptr;
10346 GenTreeLclVarCommon* lclNode = nullptr;
10347 bool needsIndirection = true;
10349 if (effectiveVal->OperIsIndir())
10351 indirTree = effectiveVal->AsIndir();
10352 GenTree* addr = effectiveVal->AsIndir()->Addr();
10353 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
10355 lclNode = addr->gtGetOp1()->AsLclVarCommon();
10358 else if (effectiveVal->OperGet() == GT_LCL_VAR)
10360 lclNode = effectiveVal->AsLclVarCommon();
10362 #ifdef FEATURE_SIMD
10363 if (varTypeIsSIMD(asgType))
10365 if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) &&
10366 (indirTree->Addr()->gtGetOp1()->OperIsSIMDorSimdHWintrinsic()))
10369 needsIndirection = false;
10370 effectiveVal = indirTree->Addr()->gtGetOp1();
10372 if (effectiveVal->OperIsSIMDorSimdHWintrinsic())
10374 needsIndirection = false;
10377 #endif // FEATURE_SIMD
10378 if (lclNode != nullptr)
10380 LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]);
10381 if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth) && (varDsc->lvType == asgType))
10383 #ifndef LEGACY_BACKEND
10384 if (effectiveVal != lclNode)
10386 JITDUMP("Replacing block node [%06d] with lclVar V%02u\n", dspTreeID(tree), lclNode->gtLclNum);
10387 effectiveVal = lclNode;
10389 needsIndirection = false;
10390 #endif // !LEGACY_BACKEND
10394 // This may be a lclVar that was determined to be address-exposed.
10395 effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT);
10398 if (needsIndirection)
10400 if (indirTree != nullptr)
10402 // We should never find a struct indirection on the lhs of an assignment.
10403 assert(!isDest || indirTree->OperIsBlk());
10404 if (!isDest && indirTree->OperIsBlk())
10406 (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
10412 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
10415 CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
10416 if (clsHnd == NO_CLASS_HANDLE)
10418 newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
10422 newTree = gtNewObjNode(clsHnd, addr);
10423 if (isDest && (newTree->OperGet() == GT_OBJ))
10425 gtSetObjGcInfo(newTree->AsObj());
10427 if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
10429 // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
10430 // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
10431 // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
10432 // separately now to avoid excess diffs.
10433 newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
10439 newTree = gtNewIndir(asgType, addr);
10441 effectiveVal = newTree;
10445 tree = effectiveVal;
10449 //------------------------------------------------------------------------
10450 // fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk
10453 // dest - the GT_OBJ or GT_STORE_OBJ
10456 // The destination must be known (by the caller) to be on the stack.
10459 // If we have a CopyObj with a dest on the stack, and its size is small enough
10460 // to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a
10461 // GC Unsafe CopyBlk that is non-interruptible.
10462 // This is not supported for the JIT32_GCENCODER, in which case this method is a no-op.
10464 void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest)
10466 #if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER) && !defined(LEGACY_BACKEND)
10467 assert(dest->gtGcPtrCount != 0);
10468 unsigned blockWidth = dest->AsBlk()->gtBlkSize;
10470 bool destOnStack = false;
10471 GenTree* destAddr = dest->Addr();
10472 assert(destAddr->IsLocalAddrExpr() != nullptr);
10474 if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT))
10476 genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK;
10477 dest->SetOper(newOper);
10478 dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
10480 #endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
10483 //------------------------------------------------------------------------
10484 // fgMorphCopyBlock: Perform the Morphing of block copy
10487 // tree - a block copy (i.e. an assignment with a block op on the lhs).
10490 // We can return the orginal block copy unmodified (least desirable, but always correct)
10491 // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
10492 // If we have performed struct promotion of the Source() or the Dest() then we will try to
10493 // perform a field by field assignment for each of the promoted struct fields.
10496 // The child nodes for tree have already been Morphed.
10499 // If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
10500 // When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
10501 // and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
10502 // if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
10503 // can not use a field by field assignment and must leave the orginal block copy unmodified.
10505 GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
10507 noway_assert(tree->OperIsCopyBlkOp());
10509 JITDUMP("\nfgMorphCopyBlock:");
10511 bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
10513 GenTree* asg = tree;
10514 GenTree* rhs = asg->gtGetOp2();
10515 GenTree* dest = asg->gtGetOp1();
10517 #if FEATURE_MULTIREG_RET
10518 // If this is a multi-reg return, we will not do any morphing of this node.
10519 if (rhs->IsMultiRegCall())
10521 assert(dest->OperGet() == GT_LCL_VAR);
10522 JITDUMP(" not morphing a multireg call return\n");
10525 #endif // FEATURE_MULTIREG_RET
10527 // If we have an array index on the lhs, we need to create an obj node.
10529 dest = fgMorphBlkNode(dest, true);
10530 if (dest != asg->gtGetOp1())
10532 asg->gtOp.gtOp1 = dest;
10533 if (dest->IsLocal())
10535 dest->gtFlags |= GTF_VAR_DEF;
10538 asg->gtType = dest->TypeGet();
10539 rhs = fgMorphBlkNode(rhs, false);
10541 asg->gtOp.gtOp2 = rhs;
10543 GenTree* oldTree = tree;
10544 GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree);
10548 JITDUMP(" using oneAsgTree.\n");
10553 unsigned blockWidth;
10554 bool blockWidthIsConst = false;
10555 GenTreeLclVarCommon* lclVarTree = nullptr;
10556 GenTreeLclVarCommon* srcLclVarTree = nullptr;
10557 unsigned destLclNum = BAD_VAR_NUM;
10558 LclVarDsc* destLclVar = nullptr;
10559 FieldSeqNode* destFldSeq = nullptr;
10560 bool destDoFldAsg = false;
10561 GenTree* destAddr = nullptr;
10562 GenTree* srcAddr = nullptr;
10563 bool destOnStack = false;
10564 bool hasGCPtrs = false;
10566 JITDUMP("block assignment to morph:\n");
10569 if (dest->IsLocal())
10571 blockWidthIsConst = true;
10572 destOnStack = true;
10573 if (dest->gtOper == GT_LCL_VAR)
10575 lclVarTree = dest->AsLclVarCommon();
10576 destLclNum = lclVarTree->gtLclNum;
10577 destLclVar = &lvaTable[destLclNum];
10578 if (destLclVar->lvType == TYP_STRUCT)
10580 // It would be nice if lvExactSize always corresponded to the size of the struct,
10581 // but it doesn't always for the temps that the importer creates when it spills side
10583 // TODO-Cleanup: Determine when this happens, and whether it can be changed.
10584 blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
10588 blockWidth = genTypeSize(destLclVar->lvType);
10590 hasGCPtrs = destLclVar->lvStructGcCount != 0;
10594 assert(dest->TypeGet() != TYP_STRUCT);
10595 assert(dest->gtOper == GT_LCL_FLD);
10596 blockWidth = genTypeSize(dest->TypeGet());
10597 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
10598 destFldSeq = dest->AsLclFld()->gtFieldSeq;
10603 GenTree* effectiveDest = dest->gtEffectiveVal();
10604 if (effectiveDest->OperGet() == GT_IND)
10606 assert(dest->TypeGet() != TYP_STRUCT);
10607 blockWidth = genTypeSize(effectiveDest->TypeGet());
10608 blockWidthIsConst = true;
10609 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
10611 destAddr = dest->gtGetOp1();
10616 assert(effectiveDest->OperIsBlk());
10617 GenTreeBlk* blk = effectiveDest->AsBlk();
10619 blockWidth = blk->gtBlkSize;
10620 blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
10621 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
10623 destAddr = blk->Addr();
10626 if (destAddr != nullptr)
10628 noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
10629 if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
10631 destOnStack = true;
10632 destLclNum = lclVarTree->gtLclNum;
10633 destLclVar = &lvaTable[destLclNum];
10638 if (destLclVar != nullptr)
10640 #if LOCAL_ASSERTION_PROP
10641 // Kill everything about destLclNum (and its field locals)
10642 if (optLocalAssertionProp)
10644 if (optAssertionCount > 0)
10646 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
10649 #endif // LOCAL_ASSERTION_PROP
10651 if (destLclVar->lvPromoted && blockWidthIsConst)
10653 noway_assert(varTypeIsStruct(destLclVar));
10654 noway_assert(!opts.MinOpts());
10656 if (blockWidth == destLclVar->lvExactSize)
10658 JITDUMP(" (destDoFldAsg=true)");
10659 // We may decide later that a copyblk is required when this struct has holes
10660 destDoFldAsg = true;
10664 JITDUMP(" with mismatched dest size");
10669 FieldSeqNode* srcFldSeq = nullptr;
10670 unsigned srcLclNum = BAD_VAR_NUM;
10671 LclVarDsc* srcLclVar = nullptr;
10672 bool srcDoFldAsg = false;
10674 if (rhs->IsLocal())
10676 srcLclVarTree = rhs->AsLclVarCommon();
10677 srcLclNum = srcLclVarTree->gtLclNum;
10678 if (rhs->OperGet() == GT_LCL_FLD)
10680 srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
10683 else if (rhs->OperIsIndir())
10685 if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
10687 srcLclNum = srcLclVarTree->gtLclNum;
10691 srcAddr = rhs->gtOp.gtOp1;
10695 if (srcLclNum != BAD_VAR_NUM)
10697 srcLclVar = &lvaTable[srcLclNum];
10699 if (srcLclVar->lvPromoted && blockWidthIsConst)
10701 noway_assert(varTypeIsStruct(srcLclVar));
10702 noway_assert(!opts.MinOpts());
10704 if (blockWidth == srcLclVar->lvExactSize)
10706 JITDUMP(" (srcDoFldAsg=true)");
10707 // We may decide later that a copyblk is required when this struct has holes
10708 srcDoFldAsg = true;
10712 JITDUMP(" with mismatched src size");
10717 // Check to see if we are doing a copy to/from the same local block.
10718 // If so, morph it to a nop.
10719 if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq) &&
10720 destFldSeq != FieldSeqStore::NotAField())
10722 JITDUMP("Self-copy; replaced with a NOP.\n");
10723 GenTree* nop = gtNewNothingNode();
10724 INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
10728 // Check to see if we are required to do a copy block because the struct contains holes
10729 // and either the src or dest is externally visible
10731 bool requiresCopyBlock = false;
10732 bool srcSingleLclVarAsg = false;
10733 bool destSingleLclVarAsg = false;
10735 // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
10736 if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
10738 requiresCopyBlock = true;
10741 // Can we use field by field assignment for the dest?
10742 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
10744 JITDUMP(" dest contains custom layout and contains holes");
10745 // C++ style CopyBlock with holes
10746 requiresCopyBlock = true;
10749 // Can we use field by field assignment for the src?
10750 if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
10752 JITDUMP(" src contains custom layout and contains holes");
10753 // C++ style CopyBlock with holes
10754 requiresCopyBlock = true;
10757 #if defined(_TARGET_ARM_)
10758 if ((rhs->OperIsIndir()) && (rhs->gtFlags & GTF_IND_UNALIGNED))
10760 JITDUMP(" rhs is unaligned");
10761 requiresCopyBlock = true;
10764 if (asg->gtFlags & GTF_BLK_UNALIGNED)
10766 JITDUMP(" asg is unaligned");
10767 requiresCopyBlock = true;
10769 #endif // _TARGET_ARM_
10771 if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
10773 requiresCopyBlock = true;
10776 // Can't use field by field assignment if the src is a call.
10777 if (rhs->OperGet() == GT_CALL)
10779 JITDUMP(" src is a call");
10780 // C++ style CopyBlock with holes
10781 requiresCopyBlock = true;
10784 // If we passed the above checks, then we will check these two
10785 if (!requiresCopyBlock)
10787 // Are both dest and src promoted structs?
10788 if (destDoFldAsg && srcDoFldAsg)
10790 // Both structs should be of the same type, or each have a single field of the same type.
10791 // If not we will use a copy block.
10792 if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
10793 lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
10795 unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart;
10796 unsigned srcFieldNum = lvaTable[srcLclNum].lvFieldLclStart;
10797 if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) ||
10798 (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType))
10800 requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
10801 JITDUMP(" with mismatched types");
10805 // Are neither dest or src promoted structs?
10806 else if (!destDoFldAsg && !srcDoFldAsg)
10808 requiresCopyBlock = true; // Leave as a CopyBlock
10809 JITDUMP(" with no promoted structs");
10811 else if (destDoFldAsg)
10813 // Match the following kinds of trees:
10814 // fgMorphTree BB01, stmt 9 (before)
10815 // [000052] ------------ const int 8
10816 // [000053] -A--G------- copyBlk void
10817 // [000051] ------------ addr byref
10818 // [000050] ------------ lclVar long V07 loc5
10819 // [000054] --------R--- <list> void
10820 // [000049] ------------ addr byref
10821 // [000048] ------------ lclVar struct(P) V06 loc4
10822 // long V06.h (offs=0x00) -> V17 tmp9
10823 // Yields this transformation
10824 // fgMorphCopyBlock (after):
10825 // [000050] ------------ lclVar long V07 loc5
10826 // [000085] -A---------- = long
10827 // [000083] D------N---- lclVar long V17 tmp9
10829 if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
10830 (blockWidth == genTypeSize(srcLclVar->TypeGet())))
10832 // Reject the following tree:
10833 // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe
10835 // fgMorphTree BB01, stmt 6 (before)
10836 // [000038] ------------- const int 4
10837 // [000039] -A--G-------- copyBlk void
10838 // [000037] ------------- addr byref
10839 // [000036] ------------- lclVar int V05 loc3
10840 // [000040] --------R---- <list> void
10841 // [000035] ------------- addr byref
10842 // [000034] ------------- lclVar struct(P) V04 loc2
10843 // float V04.f1 (offs=0x00) -> V13 tmp6
10844 // As this would framsform into
10845 // float V13 = int V05
10847 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
10848 var_types destType = lvaTable[fieldLclNum].TypeGet();
10849 if (srcLclVar->TypeGet() == destType)
10851 srcSingleLclVarAsg = true;
10857 assert(srcDoFldAsg);
10858 // Check for the symmetric case (which happens for the _pointer field of promoted spans):
10860 // [000240] -----+------ /--* lclVar struct(P) V18 tmp9
10861 // /--* byref V18._value (offs=0x00) -> V30 tmp21
10862 // [000245] -A------R--- * = struct (copy)
10863 // [000244] -----+------ \--* obj(8) struct
10864 // [000243] -----+------ \--* addr byref
10865 // [000242] D----+-N---- \--* lclVar byref V28 tmp19
10867 if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) &&
10868 (blockWidth == genTypeSize(destLclVar->TypeGet())))
10870 // Check for type agreement
10871 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart;
10872 var_types srcType = lvaTable[fieldLclNum].TypeGet();
10873 if (destLclVar->TypeGet() == srcType)
10875 destSingleLclVarAsg = true;
10881 // If we require a copy block the set both of the field assign bools to false
10882 if (requiresCopyBlock)
10884 // If a copy block is required then we won't do field by field assignments
10885 destDoFldAsg = false;
10886 srcDoFldAsg = false;
10889 JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
10891 // Mark the dest/src structs as DoNotEnreg
10892 // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
10893 // or the struct is not promoted
10895 if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg)
10897 if (!destLclVar->lvRegStruct)
10899 // Mark it as DoNotEnregister.
10900 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
10904 if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
10906 if (!srcLclVar->lvRegStruct || (srcLclVar->lvType != dest->TypeGet()))
10908 lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
10912 if (requiresCopyBlock)
10914 #if CPU_USES_BLOCK_MOVE
10915 compBlkOpUsed = true;
10917 var_types asgType = dest->TypeGet();
10918 dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
10919 asg->gtOp.gtOp1 = dest;
10920 asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
10922 // Note that the unrolling of CopyBlk is only implemented on some platforms.
10923 // Currently that includes x64 and ARM but not x86: the code generation for this
10924 // construct requires the ability to mark certain regions of the generated code
10925 // as non-interruptible, and the GC encoding for the latter platform does not
10926 // have this capability.
10928 // If we have a CopyObj with a dest on the stack
10929 // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
10930 // when its size is small enough to be completely unrolled (i.e. between [16..64] bytes).
10931 // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.)
10933 if (destOnStack && (dest->OperGet() == GT_OBJ))
10935 fgMorphUnsafeBlk(dest->AsObj());
10938 // Eliminate the "OBJ or BLK" node on the rhs.
10939 rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
10940 asg->gtOp.gtOp2 = rhs;
10942 #ifdef LEGACY_BACKEND
10943 if (!rhs->OperIsIndir())
10945 noway_assert(rhs->gtOper == GT_LCL_VAR);
10946 GenTree* rhsAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, rhs);
10947 rhs = gtNewIndir(TYP_STRUCT, rhsAddr);
10949 #endif // LEGACY_BACKEND
10950 // Formerly, liveness did not consider copyblk arguments of simple types as being
10951 // a use or def, so these variables were marked as address-exposed.
10952 // TODO-1stClassStructs: This should no longer be needed.
10953 if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
10955 JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
10956 lvaTable[srcLclNum].lvAddrExposed = true;
10959 if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
10961 JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
10962 lvaTable[destLclNum].lvAddrExposed = true;
10969 // Otherwise we convert this CopyBlock into individual field by field assignments
10974 GenTree* addrSpill = nullptr;
10975 unsigned addrSpillTemp = BAD_VAR_NUM;
10976 bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
10978 unsigned fieldCnt = DUMMY_INIT(0);
10980 if (destDoFldAsg && srcDoFldAsg)
10982 // To do fieldwise assignments for both sides, they'd better be the same struct type!
10983 // All of these conditions were checked above...
10984 assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
10985 assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
10987 fieldCnt = destLclVar->lvFieldCnt;
10988 goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
10991 else if (destDoFldAsg)
10993 fieldCnt = destLclVar->lvFieldCnt;
10994 rhs = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
10995 if (srcAddr == nullptr)
10997 srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
11002 assert(srcDoFldAsg);
11003 fieldCnt = srcLclVar->lvFieldCnt;
11004 dest = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
11005 if (dest->OperIsBlk())
11007 (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
11009 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
11014 noway_assert(!srcDoFldAsg);
11015 if (gtClone(srcAddr))
11017 // srcAddr is simple expression. No need to spill.
11018 noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
11022 // srcAddr is complex expression. Clone and spill it (unless the destination is
11023 // a struct local that only has one field, in which case we'd only use the
11024 // address value once...)
11025 if (destLclVar->lvFieldCnt > 1)
11027 addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
11028 noway_assert(addrSpill != nullptr);
11035 noway_assert(!destDoFldAsg);
11037 // If we're doing field-wise stores, to an address within a local, and we copy
11038 // the address into "addrSpill", do *not* declare the original local var node in the
11039 // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
11040 // field-wise assignments as an "indirect" assignment to the local.
11041 // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
11043 if (lclVarTree != nullptr)
11045 lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
11048 if (gtClone(destAddr))
11050 // destAddr is simple expression. No need to spill
11051 noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
11055 // destAddr is complex expression. Clone and spill it (unless
11056 // the source is a struct local that only has one field, in which case we'd only
11057 // use the address value once...)
11058 if (srcLclVar->lvFieldCnt > 1)
11060 addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
11061 noway_assert(addrSpill != nullptr);
11064 // TODO-CQ: this should be based on a more general
11065 // "BaseAddress" method, that handles fields of structs, before or after
11067 if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
11069 if (addrSpill->gtOp.gtOp1->IsLocal())
11071 // We will *not* consider this to define the local, but rather have each individual field assign
11072 // be a definition.
11073 addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
11074 assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
11075 PROMOTION_TYPE_INDEPENDENT);
11076 addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
11077 // local stack frame
11083 if (addrSpill != nullptr)
11085 // Spill the (complex) address to a BYREF temp.
11086 // Note, at most one address may need to be spilled.
11087 addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
11089 lvaTable[addrSpillTemp].lvType = TYP_BYREF;
11091 if (addrSpillIsStackDest)
11093 lvaTable[addrSpillTemp].lvStackByref = true;
11096 tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);
11098 // If we are assigning the address of a LclVar here
11099 // liveness does not account for this kind of address taken use.
11101 // We have to mark this local as address exposed so
11102 // that we don't delete the definition for this LclVar
11103 // as a dead store later on.
11105 if (addrSpill->OperGet() == GT_ADDR)
11107 GenTree* addrOp = addrSpill->gtOp.gtOp1;
11108 if (addrOp->IsLocal())
11110 unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum;
11111 lvaTable[lclVarNum].lvAddrExposed = true;
11112 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
11119 for (unsigned i = 0; i < fieldCnt; ++i)
11121 FieldSeqNode* curFieldSeq = nullptr;
11124 noway_assert(destLclNum != BAD_VAR_NUM);
11125 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
11126 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
11127 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
11128 if (destAddr != nullptr)
11130 noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
11131 dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
11135 noway_assert(lclVarTree != nullptr);
11136 dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
11138 // Don't CSE the lhs of an assignment.
11139 dest->gtFlags |= GTF_DONT_CSE;
11143 noway_assert(srcDoFldAsg);
11144 noway_assert(srcLclNum != BAD_VAR_NUM);
11145 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
11147 if (destSingleLclVarAsg)
11149 noway_assert(fieldCnt == 1);
11150 noway_assert(destLclVar != nullptr);
11151 noway_assert(addrSpill == nullptr);
11153 dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet());
11159 assert(addrSpillTemp != BAD_VAR_NUM);
11160 dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
11164 dest = gtCloneExpr(destAddr);
11165 noway_assert(dest != nullptr);
11167 // Is the address of a local?
11168 GenTreeLclVarCommon* lclVarTree = nullptr;
11169 bool isEntire = false;
11170 bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
11171 if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
11173 lclVarTree->gtFlags |= GTF_VAR_DEF;
11176 lclVarTree->gtFlags |= GTF_VAR_USEASG;
11181 GenTree* fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
11182 // Have to set the field sequence -- which means we need the field handle.
11183 CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
11184 CORINFO_FIELD_HANDLE fieldHnd =
11185 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
11186 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
11187 fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
11189 dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
11191 dest = gtNewIndir(lvaTable[fieldLclNum].TypeGet(), dest);
11193 // !!! The destination could be on stack. !!!
11194 // This flag will let us choose the correct write barrier.
11195 dest->gtFlags |= GTF_IND_TGTANYWHERE;
11201 noway_assert(srcLclNum != BAD_VAR_NUM);
11202 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
11203 src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
11205 noway_assert(srcLclVarTree != nullptr);
11206 src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
11207 // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
11208 // but they are when they are under a GT_ADDR.
11209 src->gtFlags |= GTF_DONT_CSE;
11213 noway_assert(destDoFldAsg);
11214 noway_assert(destLclNum != BAD_VAR_NUM);
11215 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
11217 if (srcSingleLclVarAsg)
11219 noway_assert(fieldCnt == 1);
11220 noway_assert(srcLclVar != nullptr);
11221 noway_assert(addrSpill == nullptr);
11223 src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet());
11229 assert(addrSpillTemp != BAD_VAR_NUM);
11230 src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
11234 src = gtCloneExpr(srcAddr);
11235 noway_assert(src != nullptr);
11238 CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
11239 CORINFO_FIELD_HANDLE fieldHnd =
11240 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
11241 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
11243 src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
11244 new (this, GT_CNS_INT)
11245 GenTreeIntCon(TYP_I_IMPL, lvaTable[fieldLclNum].lvFldOffset, curFieldSeq));
11247 src = gtNewIndir(lvaTable[fieldLclNum].TypeGet(), src);
11251 noway_assert(dest->TypeGet() == src->TypeGet());
11253 asg = gtNewAssignNode(dest, src);
11255 // If we spilled the address, and we didn't do individual field assignments to promoted fields,
11256 // and it was of a local, record the assignment as an indirect update of a local.
11257 if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
11259 curFieldSeq = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq);
11260 bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType)) == genTypeSize(dest->TypeGet()));
11261 IndirectAssignmentAnnotation* pIndirAnnot =
11262 new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire);
11263 GetIndirAssignMap()->Set(asg, pIndirAnnot);
11266 #if LOCAL_ASSERTION_PROP
11267 if (optLocalAssertionProp)
11269 optAssertionGen(asg);
11271 #endif // LOCAL_ASSERTION_PROP
11275 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
11286 tree->gtFlags |= GTF_LATE_ARG;
11290 if (tree != oldTree)
11292 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11297 printf("\nfgMorphCopyBlock (after):\n");
11306 // insert conversions and normalize to make tree amenable to register
11307 // FP architectures
11308 GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
11310 if (tree->OperIsArithmetic())
11312 if (varTypeIsFloating(tree))
11314 GenTree* op1 = tree->gtOp.gtOp1;
11315 GenTree* op2 = tree->gtGetOp2();
11317 assert(varTypeIsFloating(op1->TypeGet()) && varTypeIsFloating(op2->TypeGet()));
11319 if (op1->TypeGet() != tree->TypeGet())
11321 tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, false, tree->TypeGet());
11323 if (op2->TypeGet() != tree->TypeGet())
11325 tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, false, tree->TypeGet());
11329 else if (tree->OperIsCompare())
11331 GenTree* op1 = tree->gtOp.gtOp1;
11333 if (varTypeIsFloating(op1))
11335 GenTree* op2 = tree->gtGetOp2();
11336 assert(varTypeIsFloating(op2));
11338 if (op1->TypeGet() != op2->TypeGet())
11340 // both had better be floating, just one bigger than other
11341 if (op1->TypeGet() == TYP_FLOAT)
11343 assert(op2->TypeGet() == TYP_DOUBLE);
11344 tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE);
11346 else if (op2->TypeGet() == TYP_FLOAT)
11348 assert(op1->TypeGet() == TYP_DOUBLE);
11349 tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE);
11358 //--------------------------------------------------------------------------------------------------------------
11359 // fgMorphRecognizeBoxNullable:
11360 // Recognize this pattern:
11362 // stmtExpr void (IL 0x000... ???)
11364 // CNS_INT ref null
11366 // CALL help ref HELPER.CORINFO_HELP_BOX_NULLABLE
11367 // CNS_INT(h) long 0x7fed96836c8 class
11369 // FIELD struct value
11370 // LCL_VAR ref V00 this
11372 // which comes from this code:
11374 // return this.value==null;
11376 // and transform it into
11378 // stmtExpr void (IL 0x000... ???)
11380 // CNS_INT ref null
11384 // FIELD struct value
11385 // LCL_VAR ref V00 this
11388 // compare - Compare tree to optimize.
11391 // A tree that has a call to CORINFO_HELP_BOX_NULLABLE optimized away if the pattern is found;
11392 // the original tree otherwise.
11395 GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
11397 GenTree* op1 = compare->gtOp.gtOp1;
11398 GenTree* op2 = compare->gtOp.gtOp2;
11400 GenTreeCall* opCall;
11402 if (op1->IsCnsIntOrI() && op2->IsHelperCall())
11405 opCall = op2->AsCall();
11407 else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
11410 opCall = op1->AsCall();
11417 if (!opCns->IsIntegralConst(0))
11422 if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
11427 // Get the nullable struct argument
11428 GenTree* arg = opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
11430 // Check for cases that are unsafe to optimize and return the unchanged tree
11431 if (arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || ((arg->gtFlags & GTF_LATE_ARG) != 0))
11436 // Replace the box with an access of the nullable 'hasValue' field which is at the zero offset
11437 GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, arg);
11441 compare->gtOp.gtOp1 = newOp;
11445 compare->gtOp.gtOp2 = newOp;
11448 opCns->gtType = TYP_INT;
11453 #ifdef FEATURE_SIMD
11455 //--------------------------------------------------------------------------------------------------------------
11456 // getSIMDStructFromField:
11457 // Checking whether the field belongs to a simd struct or not. If it is, return the GenTree* for
11458 // the struct node, also base type, field index and simd size. If it is not, just return nullptr.
11459 // Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
11460 // should return nullptr, since in this case we should treat SIMD struct as a regular struct.
11461 // However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
11462 // as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
11463 // if the struct is a SIMD struct.
11466 // tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
11467 // struct used for simd intrinsic or not.
11468 // pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
11469 // to simd lclvar's base type.
11470 // indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
11471 // equals to the index number of this field.
11472 // simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
11473 // equals to the simd struct size which this tree belongs to.
11474 // ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
11475 // the UsedInSIMDIntrinsic check.
11478 // A GenTree* which points the simd lclvar tree belongs to. If the tree is not the simd
11479 // instrinic related field, return nullptr.
11482 GenTree* Compiler::getSIMDStructFromField(GenTree* tree,
11483 var_types* pBaseTypeOut,
11484 unsigned* indexOut,
11485 unsigned* simdSizeOut,
11486 bool ignoreUsedInSIMDIntrinsic /*false*/)
11488 GenTree* ret = nullptr;
11489 if (tree->OperGet() == GT_FIELD)
11491 GenTree* objRef = tree->gtField.gtFldObj;
11492 if (objRef != nullptr)
11494 GenTree* obj = nullptr;
11495 if (objRef->gtOper == GT_ADDR)
11497 obj = objRef->gtOp.gtOp1;
11499 else if (ignoreUsedInSIMDIntrinsic)
11508 if (isSIMDTypeLocal(obj))
11510 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
11511 LclVarDsc* varDsc = &lvaTable[lclNum];
11512 if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
11514 *simdSizeOut = varDsc->lvExactSize;
11515 *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
11519 else if (obj->OperGet() == GT_SIMD)
11522 GenTreeSIMD* simdNode = obj->AsSIMD();
11523 *simdSizeOut = simdNode->gtSIMDSize;
11524 *pBaseTypeOut = simdNode->gtSIMDBaseType;
11526 #ifdef FEATURE_HW_INTRINSICS
11527 else if (obj->OperIsSimdHWIntrinsic())
11530 GenTreeHWIntrinsic* simdNode = obj->AsHWIntrinsic();
11531 *simdSizeOut = simdNode->gtSIMDSize;
11532 *pBaseTypeOut = simdNode->gtSIMDBaseType;
11534 #endif // FEATURE_HW_INTRINSICS
11537 if (ret != nullptr)
11539 unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
11540 *indexOut = tree->gtField.gtFldOffset / BaseTypeSize;
11545 /*****************************************************************************
11546 * If a read operation tries to access simd struct field, then transform the
11547 * operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
11548 * Otherwise, return the old tree.
11550 * tree - GenTree*. If this pointer points to simd struct which is used for simd
11551 * intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem.
11553 * A GenTree* which points to the new tree. If the tree is not for simd intrinsic,
11557 GenTree* Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTree* tree)
11559 unsigned index = 0;
11560 var_types baseType = TYP_UNKNOWN;
11561 unsigned simdSize = 0;
11562 GenTree* simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
11563 if (simdStructNode != nullptr)
11565 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
11566 GenTree* op2 = gtNewIconNode(index);
11567 tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
11569 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11575 /*****************************************************************************
11576 * Transform an assignment of a SIMD struct field to SIMD intrinsic
11577 * SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment,
11578 * then return the old tree.
11580 * tree - GenTree*. If this pointer points to simd struct which is used for simd
11581 * intrinsic, we will morph it as simd intrinsic set.
11583 * A GenTree* which points to the new tree. If the tree is not for simd intrinsic,
11587 GenTree* Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTree* tree)
11589 assert(tree->OperGet() == GT_ASG);
11590 GenTree* op1 = tree->gtGetOp1();
11591 GenTree* op2 = tree->gtGetOp2();
11593 unsigned index = 0;
11594 var_types baseType = TYP_UNKNOWN;
11595 unsigned simdSize = 0;
11596 GenTree* simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
11597 if (simdOp1Struct != nullptr)
11599 // Generate the simd set intrinsic
11600 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
11602 SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
11606 simdIntrinsicID = SIMDIntrinsicSetX;
11609 simdIntrinsicID = SIMDIntrinsicSetY;
11612 simdIntrinsicID = SIMDIntrinsicSetZ;
11615 simdIntrinsicID = SIMDIntrinsicSetW;
11618 noway_assert(!"There is no set intrinsic for index bigger than 3");
11621 GenTree* target = gtClone(simdOp1Struct);
11622 assert(target != nullptr);
11623 GenTree* simdTree = gtNewSIMDNode(target->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
11624 tree->gtOp.gtOp1 = target;
11625 tree->gtOp.gtOp2 = simdTree;
11627 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11634 #endif // FEATURE_SIMD
11636 /*****************************************************************************
11638 * Transform the given GTK_SMPOP tree for code generation.
11642 #pragma warning(push)
11643 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
11645 GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
11648 assert(tree->OperKind() & GTK_SMPOP);
11650 /* The steps in this function are :
11651 o Perform required preorder processing
11652 o Process the first, then second operand, if any
11653 o Perform required postorder morphing
11654 o Perform optional postorder morphing if optimizing
11657 bool isQmarkColon = false;
11659 #if LOCAL_ASSERTION_PROP
11660 AssertionIndex origAssertionCount = DUMMY_INIT(0);
11661 AssertionDsc* origAssertionTab = DUMMY_INIT(NULL);
11663 AssertionIndex thenAssertionCount = DUMMY_INIT(0);
11664 AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL);
11669 tree = fgMorphForRegisterFP(tree);
11672 genTreeOps oper = tree->OperGet();
11673 var_types typ = tree->TypeGet();
11674 GenTree* op1 = tree->gtOp.gtOp1;
11675 GenTree* op2 = tree->gtGetOp2IfPresent();
11677 /*-------------------------------------------------------------------------
11678 * First do any PRE-ORDER processing
11683 // Some arithmetic operators need to use a helper call to the EE
11687 tree = fgDoNormalizeOnStore(tree);
11688 /* fgDoNormalizeOnStore can change op2 */
11689 noway_assert(op1 == tree->gtOp.gtOp1);
11690 op2 = tree->gtOp.gtOp2;
11692 #ifdef FEATURE_SIMD
11694 // We should check whether op2 should be assigned to a SIMD field or not.
11695 // If it is, we should tranlate the tree to simd intrinsic.
11696 assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0));
11697 GenTree* newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
11698 typ = tree->TypeGet();
11699 op1 = tree->gtGetOp1();
11700 op2 = tree->gtGetOp2();
11702 assert((tree == newTree) && (tree->OperGet() == oper));
11703 if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
11705 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
11711 #ifdef LEGACY_BACKEND
11730 // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
11731 // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former
11732 // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
11733 // TODO-1stClassStructs: improve this.
11734 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
11736 op1->gtFlags |= GTF_DONT_CSE;
11742 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
11743 op1->gtFlags |= GTF_DONT_CSE;
11751 if (op1->OperKind() & GTK_RELOP)
11753 noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
11754 /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
11755 not need to materialize the result as a 0 or 1. */
11757 /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
11758 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
11760 // Request that the codegen for op1 sets the condition flags
11761 // when it generates the code for op1.
11763 // Codegen for op1 must set the condition flags if
11764 // this method returns true.
11766 op1->gtRequestSetFlags();
11770 GenTree* effOp1 = op1->gtEffectiveVal();
11771 noway_assert((effOp1->gtOper == GT_CNS_INT) &&
11772 (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
11777 #if LOCAL_ASSERTION_PROP
11778 if (optLocalAssertionProp)
11781 isQmarkColon = true;
11786 return fgMorphArrayIndex(tree);
11789 return fgMorphCast(tree);
11793 #ifndef _TARGET_64BIT_
11794 if (typ == TYP_LONG)
11796 /* For (long)int1 * (long)int2, we dont actually do the
11797 casts, and just multiply the 32 bit values, which will
11798 give us the 64 bit result in edx:eax */
11801 if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
11802 genActualType(op1->CastFromType()) == TYP_INT && genActualType(op2->CastFromType()) == TYP_INT) &&
11803 !op1->gtOverflow() && !op2->gtOverflow())
11805 // The casts have to be of the same signedness.
11806 if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
11808 // We see if we can force an int constant to change its signedness
11810 if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
11812 else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
11815 goto NO_MUL_64RSLT;
11817 if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
11818 constOp->gtFlags ^= GTF_UNSIGNED;
11820 goto NO_MUL_64RSLT;
11823 // The only combination that can overflow
11824 if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
11825 goto NO_MUL_64RSLT;
11827 /* Remaining combinations can never overflow during long mul. */
11829 tree->gtFlags &= ~GTF_OVERFLOW;
11831 /* Do unsigned mul only if the casts were unsigned */
11833 tree->gtFlags &= ~GTF_UNSIGNED;
11834 tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
11836 /* Since we are committing to GTF_MUL_64RSLT, we don't want
11837 the casts to be folded away. So morph the castees directly */
11839 op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
11840 op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
11842 // Propagate side effect flags up the tree
11843 op1->gtFlags &= ~GTF_ALL_EFFECT;
11844 op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11845 op2->gtFlags &= ~GTF_ALL_EFFECT;
11846 op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11848 // If the GT_MUL can be altogether folded away, we should do that.
11850 if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
11851 opts.OptEnabled(CLFLG_CONSTANTFOLD))
11853 tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
11854 tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
11855 noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
11856 tree = gtFoldExprConst(tree);
11857 noway_assert(tree->OperIsConst());
11861 tree->gtFlags |= GTF_MUL_64RSLT;
11863 // If op1 and op2 are unsigned casts, we need to do an unsigned mult
11864 tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
11866 // Insert GT_NOP nodes for the cast operands so that they do not get folded
11867 // And propagate the new flags. We don't want to CSE the casts because
11868 // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
11870 if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
11872 op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
11873 op1->gtFlags &= ~GTF_ALL_EFFECT;
11874 op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11877 if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
11879 op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
11880 op2->gtFlags &= ~GTF_ALL_EFFECT;
11881 op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11884 op1->gtFlags |= GTF_DONT_CSE;
11885 op2->gtFlags |= GTF_DONT_CSE;
11887 tree->gtFlags &= ~GTF_ALL_EFFECT;
11888 tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
11890 goto DONE_MORPHING_CHILDREN;
11892 else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
11895 if (tree->gtOverflow())
11896 helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
11898 helper = CORINFO_HELP_LMUL;
11900 goto USE_HELPER_FOR_ARITH;
11904 /* We are seeing this node again. We have decided to use
11905 GTF_MUL_64RSLT, so leave it alone. */
11907 assert(tree->gtIsValid64RsltMul());
11910 #endif // !_TARGET_64BIT_
11915 #ifndef _TARGET_64BIT_
11916 if (typ == TYP_LONG)
11918 helper = CORINFO_HELP_LDIV;
11919 goto USE_HELPER_FOR_ARITH;
11922 #if USE_HELPERS_FOR_INT_DIV
11924 #if defined(LEGACY_BACKEND)
11925 && !fgIsSignedDivOptimizable(op2)
11926 #endif // LEGACY_BACKEND
11929 helper = CORINFO_HELP_DIV;
11930 goto USE_HELPER_FOR_ARITH;
11933 #endif // !_TARGET_64BIT_
11935 #ifndef LEGACY_BACKEND
11936 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11938 op2 = gtFoldExprConst(op2);
11940 #endif // !LEGACY_BACKEND
11945 #ifndef _TARGET_64BIT_
11946 if (typ == TYP_LONG)
11948 helper = CORINFO_HELP_ULDIV;
11949 goto USE_HELPER_FOR_ARITH;
11951 #if USE_HELPERS_FOR_INT_DIV
11953 #if defined(LEGACY_BACKEND)
11954 && !fgIsUnsignedDivOptimizable(op2)
11955 #endif // LEGACY_BACKEND
11958 helper = CORINFO_HELP_UDIV;
11959 goto USE_HELPER_FOR_ARITH;
11962 #endif // _TARGET_64BIT_
11967 if (varTypeIsFloating(typ))
11969 helper = CORINFO_HELP_DBLREM;
11971 if (op1->TypeGet() == TYP_FLOAT)
11973 if (op2->TypeGet() == TYP_FLOAT)
11975 helper = CORINFO_HELP_FLTREM;
11979 tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE);
11982 else if (op2->TypeGet() == TYP_FLOAT)
11984 tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE);
11986 goto USE_HELPER_FOR_ARITH;
11989 // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
11990 // A similar optimization for signed mod will not work for a negative perfectly divisible
11991 // HI-word. To make it correct, we would need to divide without the sign and then flip the
11992 // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
11993 goto ASSIGN_HELPER_FOR_MOD;
11997 #ifdef _TARGET_ARMARCH_
11999 // Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization
12001 #else // _TARGET_XARCH
12002 /* If this is an unsigned long mod with op2 which is a cast to long from a
12003 constant int, then don't morph to a call to the helper. This can be done
12004 faster inline using idiv.
12008 if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
12009 ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
12010 ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
12012 if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
12013 op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
12014 op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
12015 (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
12017 tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
12018 noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
12021 if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
12022 op2->gtIntConCommon.LngValue() <= 0x3fffffff)
12024 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
12025 noway_assert(op1->TypeGet() == TYP_LONG);
12027 // Update flags for op1 morph
12028 tree->gtFlags &= ~GTF_ALL_EFFECT;
12030 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
12032 // If op1 is a constant, then do constant folding of the division operator
12033 if (op1->gtOper == GT_CNS_NATIVELONG)
12035 tree = gtFoldExpr(tree);
12040 #endif // _TARGET_XARCH
12042 ASSIGN_HELPER_FOR_MOD:
12044 // For "val % 1", return 0 if op1 doesn't have any side effects
12045 // and we are not in the CSE phase, we cannot discard 'tree'
12046 // because it may contain CSE expressions that we haven't yet examined.
12048 if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
12050 if (op2->IsIntegralConst(1))
12052 GenTree* zeroNode = gtNewZeroConNode(typ);
12054 zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
12056 DEBUG_DESTROY_NODE(tree);
12061 #ifndef _TARGET_64BIT_
12062 if (typ == TYP_LONG)
12064 helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
12065 goto USE_HELPER_FOR_ARITH;
12068 #if USE_HELPERS_FOR_INT_DIV
12069 if (typ == TYP_INT)
12071 if (oper == GT_UMOD
12072 #if defined(LEGACY_BACKEND)
12073 && !fgIsUnsignedModOptimizable(op2)
12074 #endif // LEGACY_BACKEND
12077 helper = CORINFO_HELP_UMOD;
12078 goto USE_HELPER_FOR_ARITH;
12080 else if (oper == GT_MOD
12081 #if defined(LEGACY_BACKEND)
12082 && !fgIsSignedModOptimizable(op2)
12083 #endif // LEGACY_BACKEND
12086 helper = CORINFO_HELP_MOD;
12087 goto USE_HELPER_FOR_ARITH;
12091 #endif // !_TARGET_64BIT_
12093 #ifndef LEGACY_BACKEND
12094 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
12096 op2 = gtFoldExprConst(op2);
12099 #ifdef _TARGET_ARM64_
12100 // For ARM64 we don't have a remainder instruction,
12101 // The architecture manual suggests the following transformation to
12102 // generate code for such operator:
12104 // a % b = a - (a / b) * b;
12106 // We will use the suggested transform except in the special case
12107 // when the modulo operation is unsigned and the divisor is a
12108 // integer constant power of two. In this case, we will rely on lower
12109 // to make the transform:
12111 // a % b = a & (b - 1);
12113 // Note: We must always perform one or the other of these transforms.
12114 // Therefore we must also detect the special cases where lower does not do the
12115 // % to & transform. In our case there is only currently one extra condition:
12117 // * Dividend must not be constant. Lower disables this rare const % const case
12120 // Do "a % b = a - (a / b) * b" morph if ...........................
12121 bool doMorphModToSubMulDiv = (tree->OperGet() == GT_MOD) || // Modulo operation is signed
12122 !op2->IsIntegralConst() || // Divisor is not an integer constant
12123 !isPow2(op2->AsIntCon()->IconValue()) || // Divisor is not a power of two
12124 op1->IsCnsIntOrI(); // Dividend is constant
12126 if (doMorphModToSubMulDiv)
12128 assert(!optValnumCSE_phase);
12130 tree = fgMorphModToSubMulDiv(tree->AsOp());
12131 op1 = tree->gtOp.gtOp1;
12132 op2 = tree->gtOp.gtOp2;
12135 #else // !_TARGET_ARM64_
12136 // If b is not a power of 2 constant then lowering replaces a % b
12137 // with a - (a / b) * b and applies magic division optimization to
12138 // a / b. The code may already contain an a / b expression (e.g.
12139 // x = a / 10; y = a % 10;) and then we end up with redundant code.
12140 // If we convert % to / here we give CSE the opportunity to eliminate
12141 // the redundant division. If there's no redundant division then
12142 // nothing is lost, lowering would have done this transform anyway.
12144 if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
12146 ssize_t divisorValue = op2->AsIntCon()->IconValue();
12147 size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
12148 : static_cast<size_t>(abs(divisorValue));
12150 if (!isPow2(absDivisorValue))
12152 tree = fgMorphModToSubMulDiv(tree->AsOp());
12153 op1 = tree->gtOp.gtOp1;
12154 op2 = tree->gtOp.gtOp2;
12157 #endif // !_TARGET_ARM64_
12158 #endif // !LEGACY_BACKEND
12161 USE_HELPER_FOR_ARITH:
12163 /* We have to morph these arithmetic operations into helper calls
12164 before morphing the arguments (preorder), else the arguments
12165 won't get correct values of fgPtrArgCntCur.
12166 However, try to fold the tree first in case we end up with a
12167 simple node which won't need a helper call at all */
12169 noway_assert(tree->OperIsBinary());
12171 GenTree* oldTree = tree;
12173 tree = gtFoldExpr(tree);
12175 // Were we able to fold it ?
12176 // Note that gtFoldExpr may return a non-leaf even if successful
12177 // e.g. for something like "expr / 1" - see also bug #290853
12178 if (tree->OperIsLeaf() || (oldTree != tree))
12180 return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
12183 // Did we fold it into a comma node with throw?
12184 if (tree->gtOper == GT_COMMA)
12186 noway_assert(fgIsCommaThrow(tree));
12187 return fgMorphTree(tree);
12190 return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
12193 // normalize small integer return values
12194 if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) && (op1->TypeGet() != TYP_VOID) &&
12195 fgCastNeeded(op1, info.compRetType))
12197 // Small-typed return values are normalized by the callee
12198 op1 = gtNewCastNode(TYP_INT, op1, false, info.compRetType);
12200 // Propagate GTF_COLON_COND
12201 op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
12203 tree->gtOp.gtOp1 = fgMorphCast(op1);
12205 // Propagate side effect flags
12206 tree->gtFlags &= ~GTF_ALL_EFFECT;
12207 tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
12216 GenTree* optimizedTree = gtFoldTypeCompare(tree);
12218 if (optimizedTree != tree)
12220 return fgMorphTree(optimizedTree);
12228 // Try to optimize away calls to CORINFO_HELP_BOX_NULLABLE for GT_EQ, GT_NE, and unsigned GT_GT.
12229 if ((oper != GT_GT) || tree->IsUnsigned())
12231 fgMorphRecognizeBoxNullable(tree);
12234 op1 = tree->gtOp.gtOp1;
12235 op2 = tree->gtGetOp2IfPresent();
12239 case GT_RUNTIMELOOKUP:
12240 return fgMorphTree(op1);
12242 #ifdef _TARGET_ARM_
12244 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
12246 switch (tree->TypeGet())
12249 return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
12251 return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
12263 #if !CPU_HAS_FP_SUPPORT
12264 tree = fgMorphToEmulatedFP(tree);
12267 /*-------------------------------------------------------------------------
12268 * Process the first operand, if any
12274 #if LOCAL_ASSERTION_PROP
12275 // If we are entering the "then" part of a Qmark-Colon we must
12276 // save the state of the current copy assignment table
12277 // so that we can restore this state when entering the "else" part
12280 noway_assert(optLocalAssertionProp);
12281 if (optAssertionCount)
12283 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
12284 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
12285 origAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
12286 origAssertionCount = optAssertionCount;
12287 memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
12291 origAssertionCount = 0;
12292 origAssertionTab = nullptr;
12295 #endif // LOCAL_ASSERTION_PROP
12297 // We might need a new MorphAddressContext context. (These are used to convey
12298 // parent context about how addresses being calculated will be used; see the
12299 // specification comment for MorphAddrContext for full details.)
12300 // Assume it's an Ind context to start.
12301 MorphAddrContext subIndMac1(MACK_Ind);
12302 MorphAddrContext* subMac1 = mac;
12303 if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind)
12305 switch (tree->gtOper)
12308 if (subMac1 == nullptr)
12310 subMac1 = &subIndMac1;
12311 subMac1->m_kind = MACK_Addr;
12315 // In a comma, the incoming context only applies to the rightmost arg of the
12316 // comma list. The left arg (op1) gets a fresh context.
12323 subMac1 = &subIndMac1;
12330 // For additions, if we're in an IND context keep track of whether
12331 // all offsets added to the address are constant, and their sum.
12332 if (tree->gtOper == GT_ADD && subMac1 != nullptr)
12334 assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
12335 GenTree* otherOp = tree->gtOp.gtOp2;
12336 // Is the other operator a constant?
12337 if (otherOp->IsCnsIntOrI())
12339 ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
12340 totalOffset += otherOp->gtIntConCommon.IconValue();
12341 if (totalOffset.IsOverflow())
12343 // We will consider an offset so large as to overflow as "not a constant" --
12344 // we will do a null check.
12345 subMac1->m_allConstantOffsets = false;
12349 subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
12354 subMac1->m_allConstantOffsets = false;
12358 // If gtOp1 is a GT_FIELD, we need to pass down the mac if
12359 // its parent is GT_ADDR, since the address of the field
12360 // is part of an ongoing address computation. Otherwise
12361 // op1 represents the value of the field and so any address
12362 // calculations it does are in a new context.
12363 if ((op1->gtOper == GT_FIELD) && (tree->gtOper != GT_ADDR))
12367 // The impact of this field's value to any ongoing
12368 // address computation is handled below when looking
12372 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
12374 #if LOCAL_ASSERTION_PROP
12375 // If we are exiting the "then" part of a Qmark-Colon we must
12376 // save the state of the current copy assignment table
12377 // so that we can merge this state with the "else" part exit
12380 noway_assert(optLocalAssertionProp);
12381 if (optAssertionCount)
12383 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
12384 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
12385 thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
12386 thenAssertionCount = optAssertionCount;
12387 memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
12391 thenAssertionCount = 0;
12392 thenAssertionTab = nullptr;
12395 #endif // LOCAL_ASSERTION_PROP
12397 /* Morphing along with folding and inlining may have changed the
12398 * side effect flags, so we have to reset them
12400 * NOTE: Don't reset the exception flags on nodes that may throw */
12402 assert(tree->gtOper != GT_CALL);
12404 if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId))
12406 tree->gtFlags &= ~GTF_CALL;
12409 /* Propagate the new flags */
12410 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
12412 // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
12413 // Similarly for clsVar
12414 if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
12416 tree->gtFlags &= ~GTF_GLOB_REF;
12420 /*-------------------------------------------------------------------------
12421 * Process the second operand, if any
12427 #if LOCAL_ASSERTION_PROP
12428 // If we are entering the "else" part of a Qmark-Colon we must
12429 // reset the state of the current copy assignment table
12432 noway_assert(optLocalAssertionProp);
12433 optAssertionReset(0);
12434 if (origAssertionCount)
12436 size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
12437 memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
12438 optAssertionReset(origAssertionCount);
12441 #endif // LOCAL_ASSERTION_PROP
12443 // We might need a new MorphAddressContext context to use in evaluating op2.
12444 // (These are used to convey parent context about how addresses being calculated
12445 // will be used; see the specification comment for MorphAddrContext for full details.)
12446 // Assume it's an Ind context to start.
12447 switch (tree->gtOper)
12450 if (mac != nullptr && mac->m_kind == MACK_Ind)
12452 GenTree* otherOp = tree->gtOp.gtOp1;
12453 // Is the other operator a constant?
12454 if (otherOp->IsCnsIntOrI())
12456 mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
12460 mac->m_allConstantOffsets = false;
12468 // If gtOp2 is a GT_FIELD, we must be taking its value,
12469 // so it should evaluate its address in a new context.
12470 if (op2->gtOper == GT_FIELD)
12472 // The impact of this field's value to any ongoing
12473 // address computation is handled above when looking
12478 tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
12480 /* Propagate the side effect flags from op2 */
12482 tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
12484 #if LOCAL_ASSERTION_PROP
12485 // If we are exiting the "else" part of a Qmark-Colon we must
12486 // merge the state of the current copy assignment table with
12487 // that of the exit of the "then" part.
12490 noway_assert(optLocalAssertionProp);
12491 // If either exit table has zero entries then
12492 // the merged table also has zero entries
12493 if (optAssertionCount == 0 || thenAssertionCount == 0)
12495 optAssertionReset(0);
12499 size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
12500 if ((optAssertionCount != thenAssertionCount) ||
12501 (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
12503 // Yes they are different so we have to find the merged set
12504 // Iterate over the copy asgn table removing any entries
12505 // that do not have an exact match in the thenAssertionTab
12506 AssertionIndex index = 1;
12507 while (index <= optAssertionCount)
12509 AssertionDsc* curAssertion = optGetAssertion(index);
12511 for (unsigned j = 0; j < thenAssertionCount; j++)
12513 AssertionDsc* thenAssertion = &thenAssertionTab[j];
12515 // Do the left sides match?
12516 if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
12517 (curAssertion->assertionKind == thenAssertion->assertionKind))
12519 // Do the right sides match?
12520 if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
12521 (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
12532 // If we fall out of the loop above then we didn't find
12533 // any matching entry in the thenAssertionTab so it must
12534 // have been killed on that path so we remove it here
12537 // The data at optAssertionTabPrivate[i] is to be removed
12538 CLANG_FORMAT_COMMENT_ANCHOR;
12542 printf("The QMARK-COLON ");
12544 printf(" removes assertion candidate #%d\n", index);
12547 optAssertionRemove(index);
12550 // The data at optAssertionTabPrivate[i] is to be kept
12556 #endif // LOCAL_ASSERTION_PROP
12559 DONE_MORPHING_CHILDREN:
12561 if (tree->OperMayThrow(this))
12563 // Mark the tree node as potentially throwing an exception
12564 tree->gtFlags |= GTF_EXCEPT;
12568 if (tree->OperIsIndirOrArrLength())
12570 tree->gtFlags |= GTF_IND_NONFAULTING;
12572 if (((op1 == nullptr) || ((op1->gtFlags & GTF_EXCEPT) == 0)) &&
12573 ((op2 == nullptr) || ((op2->gtFlags & GTF_EXCEPT) == 0)))
12575 tree->gtFlags &= ~GTF_EXCEPT;
12579 if (tree->OperRequiresAsgFlag())
12581 tree->gtFlags |= GTF_ASG;
12585 if (((op1 == nullptr) || ((op1->gtFlags & GTF_ASG) == 0)) &&
12586 ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0)))
12588 tree->gtFlags &= ~GTF_ASG;
12591 /*-------------------------------------------------------------------------
12592 * Now do POST-ORDER processing
12595 #if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_)
12596 // Variable shifts of a long end up being helper calls, so mark the tree as such. This
12597 // is potentially too conservative, since they'll get treated as having side effects.
12598 // It is important to mark them as calls so if they are part of an argument list,
12599 // they will get sorted and processed properly (for example, it is important to handle
12600 // all nested calls before putting struct arguments in the argument registers). We
12601 // could mark the trees just before argument processing, but it would require a full
12602 // tree walk of the argument tree, so we just do it here, instead, even though we'll
12603 // mark non-argument trees (that will still get converted to calls, anyway).
12604 if (GenTree::OperIsShift(oper) && (tree->TypeGet() == TYP_LONG) && (op2->OperGet() != GT_CNS_INT))
12606 tree->gtFlags |= GTF_CALL;
12608 #endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_
12610 if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) && (op2 && !varTypeIsGC(op2->TypeGet())))
12612 // The tree is really not GC but was marked as such. Now that the
12613 // children have been unmarked, unmark the tree too.
12615 // Remember that GT_COMMA inherits it's type only from op2
12616 if (tree->gtOper == GT_COMMA)
12618 tree->gtType = genActualType(op2->TypeGet());
12622 tree->gtType = genActualType(op1->TypeGet());
12626 GenTree* oldTree = tree;
12628 GenTree* qmarkOp1 = nullptr;
12629 GenTree* qmarkOp2 = nullptr;
12631 if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
12633 qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
12634 qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
12637 // Try to fold it, maybe we get lucky,
12638 tree = gtFoldExpr(tree);
12640 if (oldTree != tree)
12642 /* if gtFoldExpr returned op1 or op2 then we are done */
12643 if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
12648 /* If we created a comma-throw tree then we need to morph op1 */
12649 if (fgIsCommaThrow(tree))
12651 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
12652 fgMorphTreeDone(tree);
12658 else if (tree->OperKind() & GTK_CONST)
12663 /* gtFoldExpr could have used setOper to change the oper */
12664 oper = tree->OperGet();
12665 typ = tree->TypeGet();
12667 /* gtFoldExpr could have changed op1 and op2 */
12668 op1 = tree->gtOp.gtOp1;
12669 op2 = tree->gtGetOp2IfPresent();
12671 // Do we have an integer compare operation?
12673 if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
12675 // Are we comparing against zero?
12677 if (op2->IsIntegralConst(0))
12679 // Request that the codegen for op1 sets the condition flags
12680 // when it generates the code for op1.
12682 // Codegen for op1 must set the condition flags if
12683 // this method returns true.
12685 op1->gtRequestSetFlags();
12688 /*-------------------------------------------------------------------------
12689 * Perform the required oper-specific postorder morphing
12695 size_t ival1, ival2;
12696 GenTree* lclVarTree;
12697 FieldSeqNode* fieldSeq = nullptr;
12703 lclVarTree = fgIsIndirOfAddrOfLocal(op1);
12704 if (lclVarTree != nullptr)
12706 lclVarTree->gtFlags |= GTF_VAR_DEF;
12709 if (op1->gtEffectiveVal()->OperIsConst())
12711 op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
12712 tree->gtOp.gtOp1 = op1;
12715 /* If we are storing a small type, we might be able to omit a cast */
12716 if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet()))
12718 if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
12720 var_types castType = op2->CastToType();
12722 // If we are performing a narrowing cast and
12723 // castType is larger or the same as op1's type
12724 // then we can discard the cast.
12726 if (varTypeIsSmall(castType) && (genTypeSize(castType) >= genTypeSize(op1->TypeGet())))
12728 tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
12731 else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet()))
12733 /* We don't need to zero extend the setcc instruction */
12734 op2->gtType = TYP_BYTE;
12737 // If we introduced a CSE we may need to undo the optimization above
12738 // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
12739 // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
12740 else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
12742 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
12743 LclVarDsc* varDsc = &lvaTable[varNum];
12745 /* We again need to zero extend the setcc instruction */
12746 op2->gtType = varDsc->TypeGet();
12748 fgAssignSetVarDef(tree);
12750 #ifdef LEGACY_BACKEND
12768 /* We can't CSE the LHS of an assignment */
12769 /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
12770 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
12772 op1->gtFlags |= GTF_DONT_CSE;
12779 /* Make sure we're allowed to do this */
12781 if (optValnumCSE_phase)
12783 // It is not safe to reorder/delete CSE's
12789 /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
12791 if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
12793 op1 = tree->gtOp.gtOp1;
12795 /* Since this can occur repeatedly we use a while loop */
12797 while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) && (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) &&
12798 (op1->gtType == TYP_INT) && (op1->gtOverflow() == false))
12800 /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
12802 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
12803 ival2 = cns2->gtIntCon.gtIconVal;
12805 if (op1->gtOper == GT_ADD)
12813 cns2->gtIntCon.gtIconVal = ival2;
12815 #ifdef _TARGET_64BIT_
12816 // we need to properly re-sign-extend or truncate as needed.
12817 cns2->AsIntCon()->TruncateOrSignExtend32();
12818 #endif // _TARGET_64BIT_
12820 op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12825 // Here we look for the following tree
12831 ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
12833 // cast to unsigned allows test for both 0 and 1
12834 if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
12836 ival2 = (size_t)cns2->gtIntConCommon.IconValue();
12838 else // cast to UINT64 allows test for both 0 and 1
12839 if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
12841 ival2 = (size_t)cns2->gtIntConCommon.LngValue();
12844 if (ival2 != INT_MAX)
12846 // If we don't have a comma and relop, we can't do this optimization
12848 if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
12850 // Here we look for the following transformation
12852 // EQ/NE Possible REVERSE(RELOP)
12854 // COMMA CNS 0/1 -> COMMA relop_op2
12856 // x RELOP x relop_op1
12858 // relop_op1 relop_op2
12862 GenTree* comma = op1;
12863 GenTree* relop = comma->gtOp.gtOp2;
12865 GenTree* relop_op1 = relop->gtOp.gtOp1;
12867 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12871 gtReverseCond(relop);
12874 relop->gtOp.gtOp1 = comma;
12875 comma->gtOp.gtOp2 = relop_op1;
12877 // Comma now has fewer nodes underneath it, so we need to regenerate its flags
12878 comma->gtFlags &= ~GTF_ALL_EFFECT;
12879 comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
12880 comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12882 noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
12883 noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
12885 tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);
12890 if (op1->gtOper == GT_COMMA)
12892 // Here we look for the following tree
12893 // and when the LCL_VAR is a temp we can fold the tree:
12897 // COMMA CNS 0/1 -> RELOP CNS 0/1
12905 GenTree* asg = op1->gtOp.gtOp1;
12906 GenTree* lcl = op1->gtOp.gtOp2;
12908 /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
12909 if (asg->gtOper != GT_ASG)
12914 /* The right side of the comma must be a LCL_VAR temp */
12915 if (lcl->gtOper != GT_LCL_VAR)
12920 unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
12921 noway_assert(lclNum < lvaCount);
12923 /* If the LCL_VAR is not a temp then bail, a temp has a single def */
12924 if (!lvaTable[lclNum].lvIsTemp)
12930 /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
12931 // Fix 383856 X86/ARM ILGEN
12932 if (lclNumIsCSE(lclNum))
12938 /* We also must be assigning the result of a RELOP */
12939 if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
12944 /* Both of the LCL_VAR must match */
12945 if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
12950 /* If right side of asg is not a RELOP then skip */
12951 if (!asg->gtOp.gtOp2->OperIsCompare())
12956 LclVarDsc* varDsc = lvaTable + lclNum;
12958 /* Set op1 to the right side of asg, (i.e. the RELOP) */
12959 op1 = asg->gtOp.gtOp2;
12961 DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
12962 DEBUG_DESTROY_NODE(lcl);
12964 /* This local variable should never be used again */
12966 // VSW 184221: Make RefCnt to zero to indicate that this local var
12967 // is not used any more. (Keey the lvType as is.)
12968 // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars
12969 // And then emitter::emitEndCodeGen will assert in the following line:
12970 // noway_assert( dsc->lvTracked);
12972 noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet.
12973 varDsc->lvRefCnt == 2 // Or, we assume this tmp should only be used here,
12974 // and it only shows up twice.
12976 lvaTable[lclNum].lvRefCnt = 0;
12977 lvaTable[lclNum].lvaResetSortAgainFlag(this);
12980 if (op1->OperIsCompare())
12982 // Here we look for the following tree
12984 // EQ/NE -> RELOP/!RELOP
12989 // Note that we will remove/destroy the EQ/NE node and move
12990 // the RELOP up into it's location.
12992 /* Here we reverse the RELOP if necessary */
12994 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12998 gtReverseCond(op1);
13001 /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
13002 op1->gtType = tree->gtType;
13004 noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
13005 op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
13007 DEBUG_DESTROY_NODE(tree);
13012 // Now we check for a compare with the result of an '&' operator
13014 // Here we look for the following transformation:
13018 // AND CNS 0/1 -> AND CNS 0
13020 // RSZ/RSH CNS 1 x CNS (1 << y)
13024 if (op1->gtOper == GT_AND)
13026 GenTree* andOp = op1;
13027 GenTree* rshiftOp = andOp->gtOp.gtOp1;
13029 if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
13034 if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
13039 ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
13041 if (shiftAmount < 0)
13046 if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
13051 if (andOp->gtType == TYP_INT)
13053 if (shiftAmount > 31)
13058 UINT32 newAndOperand = ((UINT32)1) << shiftAmount;
13060 andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
13062 // Reverse the cond if necessary
13065 gtReverseCond(tree);
13066 cns2->gtIntCon.gtIconVal = 0;
13067 oper = tree->gtOper;
13070 else if (andOp->gtType == TYP_LONG)
13072 if (shiftAmount > 63)
13077 UINT64 newAndOperand = ((UINT64)1) << shiftAmount;
13079 andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
13081 // Reverse the cond if necessary
13084 gtReverseCond(tree);
13085 cns2->gtIntConCommon.SetLngValue(0);
13086 oper = tree->gtOper;
13090 andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
13092 DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
13093 DEBUG_DESTROY_NODE(rshiftOp);
13095 } // END if (ival2 != INT_MAX)
13098 /* Now check for compares with small constant longs that can be cast to int */
13100 if (!cns2->OperIsConst())
13105 if (cns2->TypeGet() != TYP_LONG)
13110 /* Is the constant 31 bits or smaller? */
13112 if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
13117 /* Is the first comparand mask operation of type long ? */
13119 if (op1->gtOper != GT_AND)
13121 /* Another interesting case: cast from int */
13123 if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
13124 !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
13125 !op1->gtOverflow()) // cannot be an overflow checking cast
13127 /* Simply make this into an integer comparison */
13129 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
13130 tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
13136 noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
13138 /* Is the result of the mask effectively an INT ? */
13141 andMask = op1->gtOp.gtOp2;
13142 if (andMask->gtOper != GT_CNS_NATIVELONG)
13146 if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
13151 /* Now we know that we can cast gtOp.gtOp1 of AND to int */
13153 op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, false, TYP_INT);
13155 /* now replace the mask node (gtOp.gtOp2 of AND node) */
13157 noway_assert(andMask == op1->gtOp.gtOp2);
13159 ival1 = (int)andMask->gtIntConCommon.LngValue();
13160 andMask->SetOper(GT_CNS_INT);
13161 andMask->gtType = TYP_INT;
13162 andMask->gtIntCon.gtIconVal = ival1;
13164 /* now change the type of the AND node */
13166 op1->gtType = TYP_INT;
13168 /* finally we replace the comparand */
13170 ival2 = (int)cns2->gtIntConCommon.LngValue();
13171 cns2->SetOper(GT_CNS_INT);
13172 cns2->gtType = TYP_INT;
13174 noway_assert(cns2 == op2);
13175 cns2->gtIntCon.gtIconVal = ival2;
13184 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
13186 if (op2->gtOper == GT_CNS_INT)
13189 /* Check for "expr relop 1" */
13190 if (cns2->IsIntegralConst(1))
13192 /* Check for "expr >= 1" */
13195 /* Change to "expr > 0" */
13199 /* Check for "expr < 1" */
13200 else if (oper == GT_LT)
13202 /* Change to "expr <= 0" */
13207 /* Check for "expr relop -1" */
13208 else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
13210 /* Check for "expr <= -1" */
13213 /* Change to "expr < 0" */
13217 /* Check for "expr > -1" */
13218 else if (oper == GT_GT)
13220 /* Change to "expr >= 0" */
13224 // IF we get here we should be changing 'oper'
13225 assert(tree->OperGet() != oper);
13227 // Keep the old ValueNumber for 'tree' as the new expr
13228 // will still compute the same value as before
13229 tree->SetOper(oper, GenTree::PRESERVE_VN);
13230 cns2->gtIntCon.gtIconVal = 0;
13232 // vnStore is null before the ValueNumber phase has run
13233 if (vnStore != nullptr)
13235 // Update the ValueNumber for 'cns2', as we just changed it to 0
13236 fgValueNumberTreeConst(cns2);
13239 op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
13244 else // we have an unsigned comparison
13246 if (op2->IsIntegralConst(0))
13248 if ((oper == GT_GT) || (oper == GT_LE))
13250 // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
13251 // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
13252 // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
13253 // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
13254 // occurs as a result of branch inversion.
13255 oper = (oper == GT_LE) ? GT_EQ : GT_NE;
13256 tree->SetOper(oper, GenTree::PRESERVE_VN);
13257 tree->gtFlags &= ~GTF_UNSIGNED;
13264 noway_assert(tree->OperKind() & GTK_RELOP);
13266 #ifdef LEGACY_BACKEND
13267 /* Check if the result of the comparison is used for a jump.
13268 * If not then only the int (i.e. 32 bit) case is handled in
13269 * the code generator through the (x86) "set" instructions.
13270 * For the rest of the cases, the simplest way is to
13271 * "simulate" the comparison with ?:
13273 * On ARM, we previously used the IT instruction, but the IT instructions
13274 * have mostly been declared obsolete and off-limits, so all cases on ARM
13275 * get converted to ?: */
13277 if (!(tree->gtFlags & GTF_RELOP_JMP_USED) && fgMorphRelopToQmark(op1))
13279 /* We convert it to "(CMP_TRUE) ? (1):(0)" */
13282 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
13283 op1->gtRequestSetFlags();
13285 op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0));
13286 op2 = fgMorphTree(op2);
13288 tree = gtNewQmarkNode(TYP_INT, op1, op2);
13290 fgMorphTreeDone(tree);
13294 #endif // LEGACY_BACKEND
13297 #ifdef LEGACY_BACKEND
13300 /* If op1 is a comma throw node then we won't be keeping op2 */
13301 if (fgIsCommaThrow(op1))
13306 /* Get hold of the two branches */
13308 noway_assert(op2->OperGet() == GT_COLON);
13309 GenTree* thenNode = op2->AsColon()->ThenNode();
13310 GenTree* elseNode = op2->AsColon()->ElseNode();
13312 /* Try to hoist assignments out of qmark colon constructs.
13313 ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */
13315 if (tree->TypeGet() == TYP_VOID && thenNode->OperGet() == GT_ASG && elseNode->OperGet() == GT_ASG &&
13316 thenNode->TypeGet() != TYP_LONG && GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) &&
13317 thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet())
13319 noway_assert(thenNode->TypeGet() == elseNode->TypeGet());
13321 GenTree* asg = thenNode;
13322 GenTree* colon = op2;
13323 colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2;
13324 colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2;
13325 tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType;
13326 asg->gtOp.gtOp2 = tree;
13328 // Asg will have all the flags that the QMARK had
13329 asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT);
13331 // Colon flag won't have the flags that x had.
13332 colon->gtFlags &= ~GTF_ALL_EFFECT;
13333 colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags | colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
13335 DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1);
13336 DEBUG_DESTROY_NODE(elseNode);
13341 /* If the 'else' branch is empty swap the two branches and reverse the condition */
13343 if (elseNode->IsNothingNode())
13345 /* This can only happen for VOID ?: */
13346 noway_assert(op2->gtType == TYP_VOID);
13348 /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */
13349 if (thenNode->IsNothingNode())
13351 // We may be able to throw away op1 (unless it has side-effects)
13353 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
13355 /* Just return a a Nop Node */
13360 /* Just return the relop, but clear the special flags. Note
13361 that we can't do that for longs and floats (see code under
13362 COMPARE label above) */
13364 if (!fgMorphRelopToQmark(op1->gtOp.gtOp1))
13366 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
13373 GenTree* tmp = elseNode;
13375 op2->AsColon()->ElseNode() = elseNode = thenNode;
13376 op2->AsColon()->ThenNode() = thenNode = tmp;
13377 gtReverseCond(op1);
13381 #if !defined(_TARGET_ARM_)
13382 // If we have (cond)?0:1, then we just return "cond" for TYP_INTs
13384 // Don't do this optimization for ARM: we always require assignment
13385 // to boolean to remain ?:, since we don't have any way to generate
13386 // this with straight-line code, like x86 does using setcc (at least
13387 // after the IT instruction is deprecated).
13389 if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT && genActualType(typ) == TYP_INT &&
13390 thenNode->gtOper == GT_CNS_INT && elseNode->gtOper == GT_CNS_INT)
13392 ival1 = thenNode->gtIntCon.gtIconVal;
13393 ival2 = elseNode->gtIntCon.gtIconVal;
13395 // Is one constant 0 and the other 1?
13396 if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0)
13398 // If the constants are {1, 0}, reverse the condition
13401 gtReverseCond(op1);
13404 // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it
13405 // needs to materialize the result as a 0 or 1.
13406 noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED));
13407 op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
13409 DEBUG_DESTROY_NODE(tree);
13410 DEBUG_DESTROY_NODE(op2);
13415 #endif // !_TARGET_ARM_
13417 break; // end case GT_QMARK
13418 #endif // LEGACY_BACKEND
13422 #ifndef _TARGET_64BIT_
13423 if (typ == TYP_LONG)
13425 // This must be GTF_MUL_64RSLT
13426 assert(tree->gtIsValid64RsltMul());
13429 #endif // _TARGET_64BIT_
13434 if (tree->gtOverflow())
13439 // TODO #4104: there are a lot of other places where
13440 // this condition is not checked before transformations.
13443 /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
13446 if (op2->IsCnsIntOrI())
13448 /* Negate the constant and change the node to be "+" */
13450 op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
13452 tree->ChangeOper(oper);
13456 /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
13459 if (op1->IsCnsIntOrI())
13461 noway_assert(varTypeIsIntOrI(tree));
13463 tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG
13464 // node should be the same
13465 // as the type of the tree, i.e. tree->gtType.
13466 fgMorphTreeDone(op2);
13469 tree->ChangeOper(oper);
13473 /* No match - exit */
13477 #ifdef _TARGET_ARM64_
13479 if (!varTypeIsFloating(tree->gtType))
13481 // Codegen for this instruction needs to be able to throw two exceptions:
13482 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
13483 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
13487 // Codegen for this instruction needs to be able to throw one exception:
13488 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
13495 if (tree->gtOverflow())
13497 tree->gtRequestSetFlags();
13499 // Add the excptn-throwing basic block to jump to on overflow
13501 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
13503 // We can't do any commutative morphing for overflow instructions
13514 /* Commute any non-REF constants to the right */
13517 if (op1->OperIsConst() && (op1->gtType != TYP_REF))
13519 // TODO-Review: We used to assert here that
13520 // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
13521 // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
13522 // and would sometimes hit this assertion. This may indicate a missed "remorph".
13523 // Task is to re-enable this assertion and investigate.
13525 /* Swap the operands */
13526 tree->gtOp.gtOp1 = op2;
13527 tree->gtOp.gtOp2 = op1;
13530 op2 = tree->gtOp.gtOp2;
13533 /* See if we can fold GT_ADD nodes. */
13535 if (oper == GT_ADD)
13537 /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
13539 if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
13540 op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
13541 !op1->gtOverflow() && !op2->gtOverflow())
13543 cns1 = op1->gtOp.gtOp2;
13544 cns2 = op2->gtOp.gtOp2;
13545 cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
13546 #ifdef _TARGET_64BIT_
13547 if (cns1->TypeGet() == TYP_INT)
13549 // we need to properly re-sign-extend or truncate after adding two int constants above
13550 cns1->AsIntCon()->TruncateOrSignExtend32();
13552 #endif //_TARGET_64BIT_
13554 tree->gtOp.gtOp2 = cns1;
13555 DEBUG_DESTROY_NODE(cns2);
13557 op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
13558 op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
13559 DEBUG_DESTROY_NODE(op2);
13560 op2 = tree->gtOp.gtOp2;
13563 if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
13565 /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
13566 CLANG_FORMAT_COMMENT_ANCHOR;
13568 #if FEATURE_PREVENT_BAD_BYREFS
13570 if (op1->gtOper == GT_ADD && //
13571 !gtIsActiveCSE_Candidate(op1) && //
13572 !op1->gtOverflow() && //
13573 op1->gtOp.gtOp2->IsCnsIntOrI() && //
13574 (op1->gtOp.gtOp2->OperGet() == op2->OperGet()) && //
13575 (op1->gtOp.gtOp2->TypeGet() != TYP_REF) && // Don't fold REFs
13576 (op2->TypeGet() != TYP_REF)) // Don't fold REFs
13578 #else // !FEATURE_PREVENT_BAD_BYREFS
13580 if (op1->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op1) && op1->gtOp.gtOp2->IsCnsIntOrI() &&
13581 !op1->gtOverflow() && op1->gtOp.gtOp2->OperGet() == op2->OperGet())
13583 #endif // !FEATURE_PREVENT_BAD_BYREFS
13586 cns1 = op1->gtOp.gtOp2;
13587 op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
13588 op2->gtIntConCommon.IconValue());
13589 #ifdef _TARGET_64BIT_
13590 if (op2->TypeGet() == TYP_INT)
13592 // we need to properly re-sign-extend or truncate after adding two int constants above
13593 op2->AsIntCon()->TruncateOrSignExtend32();
13595 #endif //_TARGET_64BIT_
13597 if (cns1->OperGet() == GT_CNS_INT)
13599 op2->gtIntCon.gtFieldSeq =
13600 GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
13602 DEBUG_DESTROY_NODE(cns1);
13604 tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
13605 DEBUG_DESTROY_NODE(op1);
13606 op1 = tree->gtOp.gtOp1;
13611 if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
13614 // If this addition is adding an offset to a null pointer,
13615 // avoid the work and yield the null pointer immediately.
13616 // Dereferencing the pointer in either case will have the
13619 if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) &&
13620 ((op1->gtFlags & GTF_ALL_EFFECT) == 0))
13622 op2->gtType = tree->gtType;
13623 DEBUG_DESTROY_NODE(op1);
13624 DEBUG_DESTROY_NODE(tree);
13628 // Remove the addition iff it won't change the tree type
13631 if (!gtIsActiveCSE_Candidate(op2) &&
13632 ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
13634 if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
13635 (op2->gtIntCon.gtFieldSeq != nullptr) &&
13636 (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
13638 fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
13641 DEBUG_DESTROY_NODE(op2);
13642 DEBUG_DESTROY_NODE(tree);
13649 /* See if we can fold GT_MUL by const nodes */
13650 else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
13652 #ifndef _TARGET_64BIT_
13653 noway_assert(typ <= TYP_UINT);
13654 #endif // _TARGET_64BIT_
13655 noway_assert(!tree->gtOverflow());
13657 ssize_t mult = op2->gtIntConCommon.IconValue();
13658 bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
13659 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
13661 assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
13665 // We may be able to throw away op1 (unless it has side-effects)
13667 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
13669 DEBUG_DESTROY_NODE(op1);
13670 DEBUG_DESTROY_NODE(tree);
13671 return op2; // Just return the "0" node
13674 // We need to keep op1 for the side-effects. Hang it off
13677 tree->ChangeOper(GT_COMMA);
13681 size_t abs_mult = (mult >= 0) ? mult : -mult;
13682 size_t lowestBit = genFindLowestBit(abs_mult);
13683 bool changeToShift = false;
13685 // is it a power of two? (positive or negative)
13686 if (abs_mult == lowestBit)
13688 // if negative negate (min-int does not need negation)
13689 if (mult < 0 && mult != SSIZE_T_MIN)
13691 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
13692 fgMorphTreeDone(op1);
13695 // If "op2" is a constant array index, the other multiplicand must be a constant.
13696 // Transfer the annotation to the other one.
13697 if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
13698 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
13700 assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
13701 GenTree* otherOp = op1;
13702 if (otherOp->OperGet() == GT_NEG)
13704 otherOp = otherOp->gtOp.gtOp1;
13706 assert(otherOp->OperGet() == GT_CNS_INT);
13707 assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
13708 otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
13713 DEBUG_DESTROY_NODE(op2);
13714 DEBUG_DESTROY_NODE(tree);
13718 /* Change the multiplication into a shift by log2(val) bits */
13719 op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
13720 changeToShift = true;
13723 else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
13725 int shift = genLog2(lowestBit);
13726 ssize_t factor = abs_mult >> shift;
13728 if (factor == 3 || factor == 5 || factor == 9)
13730 // if negative negate (min-int does not need negation)
13731 if (mult < 0 && mult != SSIZE_T_MIN)
13733 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
13734 fgMorphTreeDone(op1);
13737 GenTree* factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
13738 if (op2IsConstIndex)
13740 factorIcon->AsIntCon()->gtFieldSeq =
13741 GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
13744 // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
13745 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
13746 fgMorphTreeDone(op1);
13748 op2->gtIntConCommon.SetIconValue(shift);
13749 changeToShift = true;
13752 #endif // LEA_AVAILABLE
13755 // vnStore is null before the ValueNumber phase has run
13756 if (vnStore != nullptr)
13758 // Update the ValueNumber for 'op2', as we just changed the constant
13759 fgValueNumberTreeConst(op2);
13762 // Keep the old ValueNumber for 'tree' as the new expr
13763 // will still compute the same value as before
13764 tree->ChangeOper(oper, GenTree::PRESERVE_VN);
13766 goto DONE_MORPHING_CHILDREN;
13769 else if (fgOperIsBitwiseRotationRoot(oper))
13771 tree = fgRecognizeAndMorphBitwiseRotation(tree);
13773 // fgRecognizeAndMorphBitwiseRotation may return a new tree
13774 oper = tree->OperGet();
13775 typ = tree->TypeGet();
13776 op1 = tree->gtOp.gtOp1;
13777 op2 = tree->gtOp.gtOp2;
13782 #ifdef LEGACY_BACKEND
13788 /* Any constant cases should have been folded earlier */
13789 noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
13794 noway_assert(varTypeIsFloating(op1->TypeGet()));
13796 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN, fgPtrArgCntCur);
13800 // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
13801 // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
13802 // is a local or clsVar, even if it has been address-exposed.
13803 if (op1->OperGet() == GT_ADDR)
13805 tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
13811 // Can not remove a GT_IND if it is currently a CSE candidate.
13812 if (gtIsActiveCSE_Candidate(tree))
13817 bool foldAndReturnTemp;
13818 foldAndReturnTemp = false;
13822 /* Try to Fold *(&X) into X */
13823 if (op1->gtOper == GT_ADDR)
13825 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13826 if (gtIsActiveCSE_Candidate(op1))
13831 temp = op1->gtOp.gtOp1; // X
13833 // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
13834 // they are the *same* struct type. In fact, they almost certainly aren't. If the
13835 // address has an associated field sequence, that identifies this case; go through
13836 // the "lcl_fld" path rather than this one.
13837 FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
13838 if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
13840 foldAndReturnTemp = true;
13842 else if (temp->OperIsLocal())
13844 unsigned lclNum = temp->gtLclVarCommon.gtLclNum;
13845 LclVarDsc* varDsc = &lvaTable[lclNum];
13847 // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
13848 if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
13850 noway_assert(varTypeIsStruct(varDsc));
13852 // We will try to optimize when we have a single field struct that is being struct promoted
13853 if (varDsc->lvFieldCnt == 1)
13855 unsigned lclNumFld = varDsc->lvFieldLclStart;
13856 // just grab the promoted field
13857 LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
13859 // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
13861 if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
13863 // We can just use the existing promoted field LclNum
13864 temp->gtLclVarCommon.SetLclNum(lclNumFld);
13865 temp->gtType = fieldVarDsc->TypeGet();
13867 foldAndReturnTemp = true;
13871 // If the type of the IND (typ) is a "small int", and the type of the local has the
13872 // same width, then we can reduce to just the local variable -- it will be
13873 // correctly normalized, and signed/unsigned differences won't matter.
13875 // The below transformation cannot be applied if the local var needs to be normalized on load.
13876 else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
13877 !lvaTable[lclNum].lvNormalizeOnLoad())
13879 tree->gtType = typ = temp->TypeGet();
13880 foldAndReturnTemp = true;
13884 // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
13886 assert(fieldSeq == nullptr);
13887 bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
13888 assert(b || fieldSeq == nullptr);
13890 if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
13892 // Append the field sequence, change the type.
13893 temp->AsLclFld()->gtFieldSeq =
13894 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13895 temp->gtType = typ;
13897 foldAndReturnTemp = true;
13900 // Otherwise will will fold this into a GT_LCL_FLD below
13901 // where we check (temp != nullptr)
13903 else // !temp->OperIsLocal()
13905 // We don't try to fold away the GT_IND/GT_ADDR for this case
13909 else if (op1->OperGet() == GT_ADD)
13911 /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
13913 if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
13914 (!(opts.MinOpts() || opts.compDbgCode)))
13916 // No overflow arithmetic with pointers
13917 noway_assert(!op1->gtOverflow());
13919 temp = op1->gtOp.gtOp1->gtOp.gtOp1;
13920 if (!temp->OperIsLocal())
13926 // Can not remove the GT_ADDR if it is currently a CSE candidate.
13927 if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
13932 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
13933 fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
13935 // Does the address have an associated zero-offset field sequence?
13936 FieldSeqNode* addrFieldSeq = nullptr;
13937 if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
13939 fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
13942 if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
13944 noway_assert(!varTypeIsGC(temp->TypeGet()));
13945 foldAndReturnTemp = true;
13949 // The emitter can't handle large offsets
13950 if (ival1 != (unsigned short)ival1)
13955 // The emitter can get confused by invalid offsets
13956 if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
13961 #ifdef _TARGET_ARM_
13962 // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
13964 if (varTypeIsFloating(typ))
13966 if ((ival1 % emitTypeSize(typ)) != 0)
13968 tree->gtFlags |= GTF_IND_UNALIGNED;
13974 // Now we can fold this into a GT_LCL_FLD below
13975 // where we check (temp != nullptr)
13979 // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging:
13980 // - We may have a load of a local where the load has a different type than the local
13981 // - We may have a load of a local plus an offset
13983 // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and
13984 // offset if doing so is legal. The only cases in which this transformation is illegal are if the load
13985 // begins before the local or if the load extends beyond the end of the local (i.e. if the load is
13986 // out-of-bounds w.r.t. the local).
13987 if ((temp != nullptr) && !foldAndReturnTemp)
13989 assert(temp->OperIsLocal());
13991 const unsigned lclNum = temp->AsLclVarCommon()->gtLclNum;
13992 LclVarDsc* const varDsc = &lvaTable[lclNum];
13994 const var_types tempTyp = temp->TypeGet();
13995 const bool useExactSize = varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK);
13996 const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp);
13998 // Make sure we do not enregister this lclVar.
13999 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
14001 // If the size of the load is greater than the size of the lclVar, we cannot fold this access into
14002 // a lclFld: the access represented by an lclFld node must begin at or after the start of the
14003 // lclVar and must not extend beyond the end of the lclVar.
14004 if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize))
14006 // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
14007 // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
14008 // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
14010 if (temp->OperGet() == GT_LCL_FLD)
14012 temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
14013 temp->AsLclFld()->gtFieldSeq =
14014 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
14018 temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
14019 temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
14020 if (fieldSeq != nullptr)
14021 { // If it does represent a field, note that.
14022 temp->AsLclFld()->gtFieldSeq = fieldSeq;
14025 temp->gtType = tree->gtType;
14026 foldAndReturnTemp = true;
14030 if (foldAndReturnTemp)
14032 assert(temp != nullptr);
14033 assert(temp->TypeGet() == typ);
14034 assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR));
14036 // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for
14037 // 'temp' because a GT_ADDR always marks it for its operand.
14038 temp->gtFlags &= ~GTF_DONT_CSE;
14039 temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
14041 if (op1->OperGet() == GT_ADD)
14043 DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
14044 DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
14046 DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR
14047 DEBUG_DESTROY_NODE(tree); // GT_IND
14049 // If the result of the fold is a local var, we may need to perform further adjustments e.g. for
14051 if (temp->OperIs(GT_LCL_VAR))
14054 // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear
14055 // and the node in question must have this bit set (as it has already been morphed).
14056 temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
14058 const bool forceRemorph = true;
14059 temp = fgMorphLocalVar(temp, forceRemorph);
14061 // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the
14062 // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function
14064 temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14071 // Only do this optimization when we are in the global optimizer. Doing this after value numbering
14072 // could result in an invalid value number for the newly generated GT_IND node.
14073 if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
14075 // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
14076 // TBD: this transformation is currently necessary for correctness -- it might
14077 // be good to analyze the failures that result if we don't do this, and fix them
14078 // in other ways. Ideally, this should be optional.
14079 GenTree* commaNode = op1;
14080 unsigned treeFlags = tree->gtFlags;
14081 commaNode->gtType = typ;
14082 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
14083 // dangerous, clear the GTF_REVERSE_OPS at
14086 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14088 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
14090 commaNode = commaNode->gtOp.gtOp2;
14091 commaNode->gtType = typ;
14092 commaNode->gtFlags =
14093 (treeFlags & ~GTF_REVERSE_OPS & ~GTF_ASG); // Bashing the GT_COMMA flags here is
14094 // dangerous, clear the GTF_REVERSE_OPS at
14096 commaNode->gtFlags |=
14097 ((commaNode->gtOp.gtOp1->gtFlags & GTF_ASG) | (commaNode->gtOp.gtOp2->gtFlags & GTF_ASG));
14099 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14102 bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
14106 bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
14108 GetArrayInfoMap()->Remove(tree);
14111 GenTree* addr = commaNode->gtOp.gtOp2;
14112 op1 = gtNewIndir(typ, addr);
14113 // This is very conservative
14114 op1->gtFlags |= treeFlags & ~GTF_ALL_EFFECT & ~GTF_IND_NONFAULTING;
14115 op1->gtFlags |= (addr->gtFlags & GTF_ALL_EFFECT);
14119 GetArrayInfoMap()->Set(op1, arrInfo);
14122 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14124 commaNode->gtOp.gtOp2 = op1;
14125 commaNode->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
14133 // Can not remove op1 if it is currently a CSE candidate.
14134 if (gtIsActiveCSE_Candidate(op1))
14139 if (op1->OperGet() == GT_IND)
14141 if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
14143 // Can not remove a GT_ADDR if it is currently a CSE candidate.
14144 if (gtIsActiveCSE_Candidate(tree))
14149 // Perform the transform ADDR(IND(...)) == (...).
14150 GenTree* addr = op1->gtOp.gtOp1;
14152 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
14154 DEBUG_DESTROY_NODE(op1);
14155 DEBUG_DESTROY_NODE(tree);
14160 else if (op1->OperGet() == GT_OBJ)
14162 // Can not remove a GT_ADDR if it is currently a CSE candidate.
14163 if (gtIsActiveCSE_Candidate(tree))
14168 // Perform the transform ADDR(OBJ(...)) == (...).
14169 GenTree* addr = op1->AsObj()->Addr();
14171 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
14173 DEBUG_DESTROY_NODE(op1);
14174 DEBUG_DESTROY_NODE(tree);
14178 else if (op1->gtOper == GT_CAST)
14180 GenTree* casting = op1->gtCast.CastOp();
14181 if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
14183 DEBUG_DESTROY_NODE(op1);
14184 tree->gtOp.gtOp1 = op1 = casting;
14187 else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
14189 // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
14190 // (Be sure to mark "z" as an l-value...)
14191 GenTree* commaNode = op1;
14192 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
14194 commaNode = commaNode->gtOp.gtOp2;
14196 // The top-level addr might be annotated with a zeroOffset field.
14197 FieldSeqNode* zeroFieldSeq = nullptr;
14198 bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
14200 commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
14202 // If the node we're about to put under a GT_ADDR is an indirection, it
14203 // doesn't need to be materialized, since we only want the addressing mode. Because
14204 // of this, this GT_IND is not a faulting indirection and we don't have to extract it
14205 // as a side effect.
14206 GenTree* commaOp2 = commaNode->gtOp.gtOp2;
14207 if (commaOp2->OperIsBlk())
14209 commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
14211 if (commaOp2->gtOper == GT_IND)
14213 commaOp2->gtFlags |= GTF_IND_NONFAULTING;
14214 commaOp2->gtFlags &= ~GTF_EXCEPT;
14215 commaOp2->gtFlags |= (commaOp2->gtOp.gtOp1->gtFlags & GTF_EXCEPT);
14218 op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
14222 // Transfer the annotation to the new GT_ADDR node.
14223 GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
14225 commaNode->gtOp.gtOp2 = op1;
14226 // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform
14227 // might give op1 a type different from byref (like, say, native int). So now go back and give
14228 // all the comma nodes the type of op1.
14229 // TODO: the comma flag update below is conservative and can be improved.
14230 // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
14231 // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
14233 while (commaNode->gtOper == GT_COMMA)
14235 commaNode->gtType = op1->gtType;
14236 commaNode->gtFlags |= op1->gtFlags;
14238 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14240 commaNode = commaNode->gtOp.gtOp2;
14243 tree->gtFlags &= ~GTF_EXCEPT;
14245 // Propagate the new flags
14246 tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_EXCEPT);
14247 tree->gtFlags |= (tree->gtOp.gtOp2->gtFlags & GTF_EXCEPT);
14252 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
14253 op1->gtFlags |= GTF_DONT_CSE;
14259 /* Mark the nodes that are conditionally executed */
14260 fgWalkTreePre(&tree, gtMarkColonCond);
14262 /* Since we're doing this postorder we clear this if it got set by a child */
14263 fgRemoveRestOfBlock = false;
14268 /* Special case: trees that don't produce a value */
14269 if (op2->OperIsAssignment() || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) || fgIsThrow(op2))
14271 typ = tree->gtType = TYP_VOID;
14274 // If we are in the Valuenum CSE phase then don't morph away anything as these
14275 // nodes may have CSE defs/uses in them.
14277 if (!optValnumCSE_phase)
14279 // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this
14282 GenTree* op1SideEffects = nullptr;
14283 // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
14284 // hoisted expressions in loops.
14285 gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
14286 if (op1SideEffects)
14288 // Replace the left hand side with the side effect list.
14289 tree->gtOp.gtOp1 = op1SideEffects;
14290 tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT);
14294 /* The left operand is worthless, throw it away */
14295 if (lvaLocalVarRefCounted)
14297 lvaRecursiveDecRefCounts(op1);
14299 op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
14300 DEBUG_DESTROY_NODE(tree);
14301 DEBUG_DESTROY_NODE(op1);
14305 /* If the right operand is just a void nop node, throw it away */
14306 if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
14308 op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
14309 DEBUG_DESTROY_NODE(tree);
14310 DEBUG_DESTROY_NODE(op2);
14319 /* Special case if fgRemoveRestOfBlock is set to true */
14320 if (fgRemoveRestOfBlock)
14322 if (fgIsCommaThrow(op1, true))
14324 GenTree* throwNode = op1->gtOp.gtOp1;
14325 noway_assert(throwNode->gtType == TYP_VOID);
14330 noway_assert(op1->OperKind() & GTK_RELOP);
14331 noway_assert(op1->gtFlags & GTF_EXCEPT);
14333 // We need to keep op1 for the side-effects. Hang it off
14336 tree->ChangeOper(GT_COMMA);
14337 tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
14339 // Additionally since we're eliminating the JTRUE
14340 // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
14341 // So we change it into a GT_COMMA as well.
14342 op1->ChangeOper(GT_COMMA);
14343 op1->gtType = op1->gtOp.gtOp1->gtType;
14352 assert(oper == tree->gtOper);
14354 // If we are in the Valuenum CSE phase then don't morph away anything as these
14355 // nodes may have CSE defs/uses in them.
14357 if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList())
14359 /* Check for op1 as a GT_COMMA with a unconditional throw node */
14360 if (op1 && fgIsCommaThrow(op1, true))
14362 if ((op1->gtFlags & GTF_COLON_COND) == 0)
14364 /* We can safely throw out the rest of the statements */
14365 fgRemoveRestOfBlock = true;
14368 GenTree* throwNode = op1->gtOp.gtOp1;
14369 noway_assert(throwNode->gtType == TYP_VOID);
14371 if (oper == GT_COMMA)
14373 /* Both tree and op1 are GT_COMMA nodes */
14374 /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
14375 tree->gtOp.gtOp1 = throwNode;
14377 // Possibly reset the assignment flag
14378 if (((throwNode->gtFlags & GTF_ASG) == 0) && ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0)))
14380 tree->gtFlags &= ~GTF_ASG;
14385 else if (oper != GT_NOP)
14387 if (genActualType(typ) == genActualType(op1->gtType))
14389 /* The types match so, return the comma throw node as the new tree */
14394 if (typ == TYP_VOID)
14396 // Return the throw node
14401 GenTree* commaOp2 = op1->gtOp.gtOp2;
14403 // need type of oper to be same as tree
14404 if (typ == TYP_LONG)
14406 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
14407 commaOp2->gtIntConCommon.SetLngValue(0);
14408 /* Change the types of oper and commaOp2 to TYP_LONG */
14409 op1->gtType = commaOp2->gtType = TYP_LONG;
14411 else if (varTypeIsFloating(typ))
14413 commaOp2->ChangeOperConst(GT_CNS_DBL);
14414 commaOp2->gtDblCon.gtDconVal = 0.0;
14415 /* Change the types of oper and commaOp2 to TYP_DOUBLE */
14416 op1->gtType = commaOp2->gtType = TYP_DOUBLE;
14420 commaOp2->ChangeOperConst(GT_CNS_INT);
14421 commaOp2->gtIntConCommon.SetIconValue(0);
14422 /* Change the types of oper and commaOp2 to TYP_INT */
14423 op1->gtType = commaOp2->gtType = TYP_INT;
14426 /* Return the GT_COMMA node as the new tree */
14433 /* Check for op2 as a GT_COMMA with a unconditional throw */
14435 if (op2 && fgIsCommaThrow(op2, true))
14437 if ((op2->gtFlags & GTF_COLON_COND) == 0)
14439 /* We can safely throw out the rest of the statements */
14440 fgRemoveRestOfBlock = true;
14443 // If op1 has no side-effects
14444 if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
14446 // If tree is an asg node
14447 if (tree->OperIsAssignment())
14449 /* Return the throw node as the new tree */
14450 return op2->gtOp.gtOp1;
14453 if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
14455 /* Return the throw node as the new tree */
14456 return op2->gtOp.gtOp1;
14459 // If tree is a comma node
14460 if (tree->OperGet() == GT_COMMA)
14462 /* Return the throw node as the new tree */
14463 return op2->gtOp.gtOp1;
14466 /* for the shift nodes the type of op2 can differ from the tree type */
14467 if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
14469 noway_assert(GenTree::OperIsShiftOrRotate(oper));
14471 GenTree* commaOp2 = op2->gtOp.gtOp2;
14473 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
14474 commaOp2->gtIntConCommon.SetLngValue(0);
14476 /* Change the types of oper and commaOp2 to TYP_LONG */
14477 op2->gtType = commaOp2->gtType = TYP_LONG;
14480 if ((genActualType(typ) == TYP_INT) &&
14481 (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
14483 // An example case is comparison (say GT_GT) of two longs or floating point values.
14485 GenTree* commaOp2 = op2->gtOp.gtOp2;
14487 commaOp2->ChangeOperConst(GT_CNS_INT);
14488 commaOp2->gtIntCon.gtIconVal = 0;
14489 /* Change the types of oper and commaOp2 to TYP_INT */
14490 op2->gtType = commaOp2->gtType = TYP_INT;
14493 if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
14495 noway_assert(tree->OperGet() == GT_ADD);
14497 GenTree* commaOp2 = op2->gtOp.gtOp2;
14499 commaOp2->ChangeOperConst(GT_CNS_INT);
14500 commaOp2->gtIntCon.gtIconVal = 0;
14501 /* Change the types of oper and commaOp2 to TYP_BYREF */
14502 op2->gtType = commaOp2->gtType = TYP_BYREF;
14505 /* types should now match */
14506 noway_assert((genActualType(typ) == genActualType(op2->gtType)));
14508 /* Return the GT_COMMA node as the new tree */
14514 /*-------------------------------------------------------------------------
14515 * Optional morphing is done if tree transformations is permitted
14518 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
14523 tree = fgMorphSmpOpOptional(tree->AsOp());
14528 #pragma warning(pop)
14531 GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
14533 genTreeOps oper = tree->gtOper;
14534 GenTree* op1 = tree->gtOp1;
14535 GenTree* op2 = tree->gtOp2;
14536 var_types typ = tree->TypeGet();
14538 if (fgGlobalMorph && GenTree::OperIsCommutative(oper))
14540 /* Swap the operands so that the more expensive one is 'op1' */
14542 if (tree->gtFlags & GTF_REVERSE_OPS)
14550 tree->gtFlags &= ~GTF_REVERSE_OPS;
14553 if (oper == op2->gtOper)
14555 /* Reorder nested operators at the same precedence level to be
14556 left-recursive. For example, change "(a+(b+c))" to the
14557 equivalent expression "((a+b)+c)".
14560 /* Things are handled differently for floating-point operators */
14562 if (!varTypeIsFloating(tree->TypeGet()))
14564 fgMoveOpsLeft(tree);
14573 /* Change "((x+icon)+y)" to "((x+y)+icon)"
14574 Don't reorder floating-point operations */
14576 if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
14577 varTypeIsIntegralOrI(typ))
14579 GenTree* ad2 = op1->gtOp.gtOp2;
14581 if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0)
14593 // And it swaps ad2 and op2. If (op2) is varTypeIsGC, then this implies that (tree) is
14594 // varTypeIsGC. If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
14595 // (op1) with a child that is varTypeIsGC. If we encounter that situation, make (op1) the same
14598 // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
14601 if (varTypeIsGC(op2->TypeGet()))
14603 noway_assert(varTypeIsGC(typ));
14608 op1->gtOp.gtOp2 = op2;
14609 op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
14617 /*-------------------------------------------------------------------------
14618 * Perform optional oper-specific postorder morphing
14623 #ifdef LEGACY_BACKEND
14625 bool dstIsSafeLclVar;
14629 if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
14631 if (tree->OperIsCopyBlkOp())
14633 return fgMorphCopyBlock(tree);
14637 return fgMorphInitBlock(tree);
14641 if (typ == TYP_LONG)
14646 /* Make sure we're allowed to do this */
14648 if (optValnumCSE_phase)
14650 // It is not safe to reorder/delete CSE's
14654 #ifdef LEGACY_BACKEND
14655 /* We'll convert "a = a <op> x" into "a <op>= x" */
14656 /* and also "a = x <op> a" into "a <op>= x" for communative ops */
14658 /* Are we assigning to a GT_LCL_VAR ? */
14660 dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR);
14662 /* If we have a GT_LCL_VAR, then is the address taken? */
14663 if (dstIsSafeLclVar)
14665 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
14666 LclVarDsc* varDsc = lvaTable + lclNum;
14668 noway_assert(lclNum < lvaCount);
14670 /* Is the address taken? */
14671 if (varDsc->lvAddrExposed)
14673 dstIsSafeLclVar = false;
14675 else if (op2->gtFlags & GTF_ASG)
14681 if (!dstIsSafeLclVar)
14682 #endif // LEGACY_BACKEND
14684 if (op2->gtFlags & GTF_ASG)
14689 if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
14695 /* Special case: a cast that can be thrown away */
14697 // TODO-Cleanup: fgMorphSmp does a similar optimization. However, it removes only
14698 // one cast and sometimes there is another one after it that gets removed by this
14699 // code. fgMorphSmp should be improved to remove all redundant casts so this code
14702 if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
14708 srct = op2->gtCast.CastOp()->TypeGet();
14709 cast = (var_types)op2->CastToType();
14710 dstt = op1->TypeGet();
14712 /* Make sure these are all ints and precision is not lost */
14714 if (genTypeSize(cast) >= genTypeSize(dstt) && dstt <= TYP_INT && srct <= TYP_INT)
14716 op2 = tree->gtOp2 = op2->gtCast.CastOp();
14720 #ifdef LEGACY_BACKEND
14721 /* Make sure we have the operator range right */
14723 static_assert(GT_SUB == GT_ADD + 1, "bad oper value");
14724 static_assert(GT_MUL == GT_ADD + 2, "bad oper value");
14725 static_assert(GT_DIV == GT_ADD + 3, "bad oper value");
14726 static_assert(GT_MOD == GT_ADD + 4, "bad oper value");
14727 static_assert(GT_UDIV == GT_ADD + 5, "bad oper value");
14728 static_assert(GT_UMOD == GT_ADD + 6, "bad oper value");
14730 static_assert(GT_OR == GT_ADD + 7, "bad oper value");
14731 static_assert(GT_XOR == GT_ADD + 8, "bad oper value");
14732 static_assert(GT_AND == GT_ADD + 9, "bad oper value");
14734 static_assert(GT_LSH == GT_ADD + 10, "bad oper value");
14735 static_assert(GT_RSH == GT_ADD + 11, "bad oper value");
14736 static_assert(GT_RSZ == GT_ADD + 12, "bad oper value");
14738 /* Check for a suitable operator on the RHS */
14740 cmop = op2->OperGet();
14745 // GT_CHS only supported for integer types
14746 if (varTypeIsFloating(tree->TypeGet()))
14754 // GT_ASG_MUL only supported for floating point types
14755 if (!varTypeIsFloating(tree->TypeGet()))
14764 if (op2->gtOverflow())
14766 /* Disable folding into "<op>=" if the result can be
14767 visible to anyone as <op> may throw an exception and
14768 the assignment should not proceed
14769 We are safe with an assignment to a local variables
14771 if (ehBlockHasExnFlowDsc(compCurBB))
14775 if (!dstIsSafeLclVar)
14780 #ifndef _TARGET_AMD64_
14781 // This is hard for byte-operations as we need to make
14782 // sure both operands are in RBM_BYTE_REGS.
14783 if (varTypeIsByte(op2->TypeGet()))
14785 #endif // _TARGET_AMD64_
14790 // GT_ASG_DIV only supported for floating point types
14791 if (!varTypeIsFloating(tree->TypeGet()))
14804 bool bReverse = false;
14805 bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse);
14806 if (bAsgOpFoldable)
14810 // We will transform this from "a = x <op> a" to "a <op>= x"
14811 // so we can now destroy the duplicate "a"
14812 DEBUG_DESTROY_NODE(op2->gtOp.gtOp2);
14813 op2->gtOp.gtOp2 = op2->gtOp.gtOp1;
14816 /* Special case: "x |= -1" and "x &= 0" */
14817 if (((cmop == GT_AND) && op2->gtOp.gtOp2->IsIntegralConst(0)) ||
14818 ((cmop == GT_OR) && op2->gtOp.gtOp2->IsIntegralConst(-1)))
14820 /* Simply change to an assignment */
14821 tree->gtOp2 = op2->gtOp.gtOp2;
14825 if (cmop == GT_NEG)
14827 /* This is "x = -x;", use the flipsign operator */
14829 tree->ChangeOper(GT_CHS);
14831 if (op1->gtOper == GT_LCL_VAR)
14833 op1->gtFlags |= GTF_VAR_USEASG;
14836 tree->gtOp2 = gtNewIconNode(0, op1->TypeGet());
14841 if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
14843 // Changing from x = x op y to x op= y when x is a small integer type
14844 // makes the op size smaller (originally the op size was 32 bits, after
14845 // sign or zero extension of x, and there is an implicit truncation in the
14847 // This is ok in most cases because the upper bits were
14848 // lost when assigning the op result to a small type var,
14849 // but it may not be ok for the right shift operation where the higher bits
14850 // could be shifted into the lower bits and preserved.
14851 // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) ==
14852 // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) ==
14853 // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the
14856 // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
14857 // but (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
14858 // The result becomes correct if we use >>unsigned instead of >>signed.
14859 noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet());
14863 /* Replace with an assignment operator */
14864 noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD);
14865 noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD);
14866 noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD);
14867 noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD);
14868 noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD);
14869 noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD);
14870 noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD);
14871 noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD);
14873 tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD));
14874 tree->gtOp2 = op2->gtOp.gtOp2;
14876 /* Propagate GTF_OVERFLOW */
14878 if (op2->gtOverflowEx())
14880 tree->gtType = op2->gtType;
14881 tree->gtFlags |= (op2->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
14884 #if FEATURE_SET_FLAGS
14886 /* Propagate GTF_SET_FLAGS */
14887 if (op2->gtSetFlags())
14889 tree->gtRequestSetFlags();
14892 #endif // FEATURE_SET_FLAGS
14894 DEBUG_DESTROY_NODE(op2);
14897 /* The target is used as well as being defined */
14898 if (op1->OperIsLocal())
14900 op1->gtFlags |= GTF_VAR_USEASG;
14903 #if CPU_HAS_FP_SUPPORT
14904 /* Check for the special case "x += y * x;" */
14906 // GT_ASG_MUL only supported for floating point types
14907 if (cmop != GT_ADD && cmop != GT_SUB)
14912 if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet()))
14914 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
14916 /* Change "x += x * y" into "x *= (y + 1)" */
14918 op2 = op2->gtOp.gtOp2;
14920 else if (GenTree::Compare(op1, op2->gtOp.gtOp2))
14922 /* Change "x += y * x" into "x *= (y + 1)" */
14924 op2 = op2->gtOp.gtOp1;
14931 op1 = gtNewDconNode(1.0);
14933 /* Now make the "*=" node */
14935 if (cmop == GT_ADD)
14937 /* Change "x += x * y" into "x *= (y + 1)" */
14939 tree->gtOp2 = op2 = gtNewOperNode(GT_ADD, tree->TypeGet(), op2, op1);
14943 /* Change "x -= x * y" into "x *= (1 - y)" */
14945 noway_assert(cmop == GT_SUB);
14946 tree->gtOp2 = op2 = gtNewOperNode(GT_SUB, tree->TypeGet(), op1, op2);
14948 tree->ChangeOper(GT_ASG_MUL);
14950 #endif // CPU_HAS_FP_SUPPORT
14958 /* Is the destination identical to the first RHS sub-operand? */
14960 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
14962 /* This is "x = ~x" which is the same as "x ^= -1"
14963 * Transform the node into a GT_ASG_XOR */
14965 noway_assert(genActualType(typ) == TYP_INT || genActualType(typ) == TYP_LONG);
14967 op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT) ? gtNewIconNode(-1) : gtNewLconNode(-1);
14977 #endif // LEGACY_BACKEND
14982 /* Check for the case "(val + icon) * icon" */
14984 if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
14986 GenTree* add = op1->gtOp.gtOp2;
14988 if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
14990 if (tree->gtOverflow() || op1->gtOverflow())
14995 ssize_t imul = op2->gtIntCon.gtIconVal;
14996 ssize_t iadd = add->gtIntCon.gtIconVal;
14998 /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
15001 tree->ChangeOper(oper);
15003 op2->gtIntCon.gtIconVal = iadd * imul;
15005 op1->ChangeOper(GT_MUL);
15007 add->gtIntCon.gtIconVal = imul;
15008 #ifdef _TARGET_64BIT_
15009 if (add->gtType == TYP_INT)
15011 // we need to properly re-sign-extend or truncate after multiplying two int constants above
15012 add->AsIntCon()->TruncateOrSignExtend32();
15014 #endif //_TARGET_64BIT_
15022 /* For "val / 1", just return "val" */
15024 if (op2->IsIntegralConst(1))
15026 DEBUG_DESTROY_NODE(tree);
15034 /* Check for the case "(val + icon) << icon" */
15036 if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
15038 GenTree* cns = op1->gtOp.gtOp2;
15040 if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
15042 ssize_t ishf = op2->gtIntConCommon.IconValue();
15043 ssize_t iadd = cns->gtIntConCommon.IconValue();
15045 // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
15047 /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
15049 tree->ChangeOper(GT_ADD);
15050 ssize_t result = iadd << ishf;
15051 op2->gtIntConCommon.SetIconValue(result);
15052 #ifdef _TARGET_64BIT_
15053 if (op1->gtType == TYP_INT)
15055 op2->AsIntCon()->TruncateOrSignExtend32();
15057 #endif // _TARGET_64BIT_
15059 // we are reusing the shift amount node here, but the type we want is that of the shift result
15060 op2->gtType = op1->gtType;
15062 if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
15063 cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
15065 assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
15066 op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
15069 op1->ChangeOper(GT_LSH);
15071 cns->gtIntConCommon.SetIconValue(ishf);
15079 if (!optValnumCSE_phase)
15081 /* "x ^ -1" is "~x" */
15083 if (op2->IsIntegralConst(-1))
15085 tree->ChangeOper(GT_NOT);
15086 tree->gtOp2 = nullptr;
15087 DEBUG_DESTROY_NODE(op2);
15089 else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
15091 /* "binaryVal ^ 1" is "!binaryVal" */
15092 gtReverseCond(op1);
15093 DEBUG_DESTROY_NODE(op2);
15094 DEBUG_DESTROY_NODE(tree);
15102 // Initialization values for initBlk have special semantics - their lower
15103 // byte is used to fill the struct. However, we allow 0 as a "bare" value,
15104 // which enables them to get a VNForZero, and be propagated.
15105 if (op1->IsIntegralConst(0))
15117 //------------------------------------------------------------------------
15118 // fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b
15119 // (see ECMA III 3.55 and III.3.56).
15122 // tree - The GT_MOD/GT_UMOD tree to morph
15125 // The morphed tree
15128 // For ARM64 we don't have a remainder instruction so this transform is
15129 // always done. For XARCH this transform is done if we know that magic
15130 // division will be used, in that case this transform allows CSE to
15131 // eliminate the redundant div from code like "x = a / 3; y = a % 3;".
15133 // This method will produce the above expression in 'a' and 'b' are
15134 // leaf nodes, otherwise, if any of them is not a leaf it will spill
15135 // its value into a temporary variable, an example:
15136 // (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
15138 GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
15140 if (tree->OperGet() == GT_MOD)
15142 tree->SetOper(GT_DIV);
15144 else if (tree->OperGet() == GT_UMOD)
15146 tree->SetOper(GT_UDIV);
15150 noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
15153 var_types type = tree->gtType;
15154 GenTree* denominator = tree->gtOp2;
15155 GenTree* numerator = tree->gtOp1;
15157 if (!numerator->OperIsLeaf())
15159 numerator = fgMakeMultiUse(&tree->gtOp1);
15161 else if (lvaLocalVarRefCounted && numerator->OperIsLocal())
15163 // Morphing introduces new lclVar references. Increase ref counts
15164 lvaIncRefCnts(numerator);
15167 if (!denominator->OperIsLeaf())
15169 denominator = fgMakeMultiUse(&tree->gtOp2);
15171 else if (lvaLocalVarRefCounted && denominator->OperIsLocal())
15173 // Morphing introduces new lclVar references. Increase ref counts
15174 lvaIncRefCnts(denominator);
15177 // The numerator and denominator may have been assigned to temps, in which case
15178 // their defining assignments are in the current tree. Therefore, we need to
15179 // set the execuction order accordingly on the nodes we create.
15180 // That is, the "mul" will be evaluated in "normal" order, and the "sub" must
15181 // be set to be evaluated in reverse order.
15183 GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
15184 assert(!mul->IsReverseOp());
15185 GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
15186 sub->gtFlags |= GTF_REVERSE_OPS;
15189 sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
15195 //------------------------------------------------------------------------------
15196 // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
15200 // oper - Operation to check
15203 // True if the operation can be a root of a bitwise rotation tree; false otherwise.
15205 bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
15207 return (oper == GT_OR) || (oper == GT_XOR);
15210 //------------------------------------------------------------------------------
15211 // fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
15212 // an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
15215 // tree - tree to check for a rotation pattern
15218 // An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
15221 // The input is a GT_OR or a GT_XOR tree.
15223 GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree)
15225 #ifndef LEGACY_BACKEND
15227 // Check for a rotation pattern, e.g.,
15240 // The patterns recognized:
15241 // (x << (y & M)) op (x >>> ((-y + N) & M))
15242 // (x >>> ((-y + N) & M)) op (x << (y & M))
15244 // (x << y) op (x >>> (-y + N))
15245 // (x >> > (-y + N)) op (x << y)
15247 // (x >>> (y & M)) op (x << ((-y + N) & M))
15248 // (x << ((-y + N) & M)) op (x >>> (y & M))
15250 // (x >>> y) op (x << (-y + N))
15251 // (x << (-y + N)) op (x >>> y)
15253 // (x << c1) op (x >>> c2)
15254 // (x >>> c1) op (x << c2)
15257 // c1 and c2 are const
15258 // c1 + c2 == bitsize(x)
15261 // M & (N - 1) == N - 1
15262 // op is either | or ^
15264 if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
15266 // We can't do anything if the tree has assignments, calls, or volatile
15267 // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
15268 // thrown by the original tree will be thrown by the transformed tree as well.
15272 genTreeOps oper = tree->OperGet();
15273 assert(fgOperIsBitwiseRotationRoot(oper));
15275 // Check if we have an LSH on one side of the OR and an RSZ on the other side.
15276 GenTree* op1 = tree->gtGetOp1();
15277 GenTree* op2 = tree->gtGetOp2();
15278 GenTree* leftShiftTree = nullptr;
15279 GenTree* rightShiftTree = nullptr;
15280 if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
15282 leftShiftTree = op1;
15283 rightShiftTree = op2;
15285 else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
15287 leftShiftTree = op2;
15288 rightShiftTree = op1;
15295 // Check if the trees representing the value to shift are identical.
15296 // We already checked that there are no side effects above.
15297 if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
15299 GenTree* rotatedValue = leftShiftTree->gtGetOp1();
15300 var_types rotatedValueActualType = genActualType(rotatedValue->gtType);
15301 ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8;
15302 noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
15303 GenTree* leftShiftIndex = leftShiftTree->gtGetOp2();
15304 GenTree* rightShiftIndex = rightShiftTree->gtGetOp2();
15306 // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
15307 // shouldn't be masked for the transformation to be valid. If additional
15308 // higher bits are not masked, the transformation is still valid since the result
15309 // of MSIL shift instructions is unspecified if the shift amount is greater or equal
15310 // than the width of the value being shifted.
15311 ssize_t minimalMask = rotatedValueBitSize - 1;
15312 ssize_t leftShiftMask = -1;
15313 ssize_t rightShiftMask = -1;
15315 if ((leftShiftIndex->OperGet() == GT_AND))
15317 if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
15319 leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
15320 leftShiftIndex = leftShiftIndex->gtGetOp1();
15328 if ((rightShiftIndex->OperGet() == GT_AND))
15330 if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
15332 rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
15333 rightShiftIndex = rightShiftIndex->gtGetOp1();
15341 if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
15343 // The shift index is overmasked, e.g., we have
15344 // something like (x << y & 15) or
15345 // (x >> (32 - y) & 15 with 32 bit x.
15346 // The transformation is not valid.
15350 GenTree* shiftIndexWithAdd = nullptr;
15351 GenTree* shiftIndexWithoutAdd = nullptr;
15352 genTreeOps rotateOp = GT_NONE;
15353 GenTree* rotateIndex = nullptr;
15355 if (leftShiftIndex->OperGet() == GT_ADD)
15357 shiftIndexWithAdd = leftShiftIndex;
15358 shiftIndexWithoutAdd = rightShiftIndex;
15361 else if (rightShiftIndex->OperGet() == GT_ADD)
15363 shiftIndexWithAdd = rightShiftIndex;
15364 shiftIndexWithoutAdd = leftShiftIndex;
15368 if (shiftIndexWithAdd != nullptr)
15370 if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
15372 if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
15374 if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
15376 if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
15378 // We found one of these patterns:
15379 // (x << (y & M)) | (x >>> ((-y + N) & M))
15380 // (x << y) | (x >>> (-y + N))
15381 // (x >>> (y & M)) | (x << ((-y + N) & M))
15382 // (x >>> y) | (x << (-y + N))
15383 // where N == bitsize(x), M is const, and
15384 // M & (N - 1) == N - 1
15385 CLANG_FORMAT_COMMENT_ANCHOR;
15387 #ifndef _TARGET_64BIT_
15388 if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
15390 // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86.
15391 // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
15392 // to add helpers for GT_ROL and GT_ROR.
15397 rotateIndex = shiftIndexWithoutAdd;
15403 else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
15405 if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
15407 // We found this pattern:
15408 // (x << c1) | (x >>> c2)
15409 // where c1 and c2 are const and c1 + c2 == bitsize(x)
15411 rotateIndex = leftShiftIndex;
15415 if (rotateIndex != nullptr)
15417 noway_assert(GenTree::OperIsRotate(rotateOp));
15419 unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
15421 // We can use the same tree only during global morph; reusing the tree in a later morph
15422 // may invalidate value numbers.
15425 tree->gtOp.gtOp1 = rotatedValue;
15426 tree->gtOp.gtOp2 = rotateIndex;
15427 tree->ChangeOper(rotateOp);
15429 unsigned childFlags = 0;
15430 for (GenTree* op : tree->Operands())
15432 childFlags |= (op->gtFlags & GTF_ALL_EFFECT);
15435 // The parent's flags should be a superset of its operands' flags
15436 noway_assert((inputTreeEffects & childFlags) == childFlags);
15440 tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
15441 noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
15447 #endif // LEGACY_BACKEND
15451 #if !CPU_HAS_FP_SUPPORT
15452 GenTree* Compiler::fgMorphToEmulatedFP(GenTree* tree)
15455 genTreeOps oper = tree->OperGet();
15456 var_types typ = tree->TypeGet();
15457 GenTree* op1 = tree->gtOp.gtOp1;
15458 GenTree* op2 = tree->gtGetOp2IfPresent();
15461 We have to use helper calls for all FP operations:
15463 FP operators that operate on FP values
15464 casts to and from FP
15465 comparisons of FP values
15468 if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
15472 size_t argc = genTypeStSz(typ);
15474 /* Not all FP operations need helper calls */
15488 /* If the result isn't FP, it better be a compare or cast */
15490 if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
15493 noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
15496 /* Keep track of how many arguments we're passing */
15498 fgPtrArgCntCur += argc;
15500 /* Is this a binary operator? */
15504 /* Add the second operand to the argument count */
15506 fgPtrArgCntCur += argc;
15509 /* What kind of an operator do we have? */
15514 helper = CPX_R4_ADD;
15517 helper = CPX_R4_SUB;
15520 helper = CPX_R4_MUL;
15523 helper = CPX_R4_DIV;
15525 // case GT_MOD: helper = CPX_R4_REM; break;
15528 helper = CPX_R4_EQ;
15531 helper = CPX_R4_NE;
15534 helper = CPX_R4_LT;
15537 helper = CPX_R4_LE;
15540 helper = CPX_R4_GE;
15543 helper = CPX_R4_GT;
15550 noway_assert(!"unexpected FP binary op");
15554 args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
15564 noway_assert(!"FP cast");
15567 helper = CPX_R4_NEG;
15574 noway_assert(!"unexpected FP unary op");
15578 args = gtNewArgList(tree->gtOp.gtOp1);
15581 /* If we have double result/operands, modify the helper */
15583 if (typ == TYP_DOUBLE)
15585 static_assert_no_msg(CPX_R4_NEG + 1 == CPX_R8_NEG);
15586 static_assert_no_msg(CPX_R4_ADD + 1 == CPX_R8_ADD);
15587 static_assert_no_msg(CPX_R4_SUB + 1 == CPX_R8_SUB);
15588 static_assert_no_msg(CPX_R4_MUL + 1 == CPX_R8_MUL);
15589 static_assert_no_msg(CPX_R4_DIV + 1 == CPX_R8_DIV);
15595 noway_assert(tree->OperIsCompare());
15597 static_assert_no_msg(CPX_R4_EQ + 1 == CPX_R8_EQ);
15598 static_assert_no_msg(CPX_R4_NE + 1 == CPX_R8_NE);
15599 static_assert_no_msg(CPX_R4_LT + 1 == CPX_R8_LT);
15600 static_assert_no_msg(CPX_R4_LE + 1 == CPX_R8_LE);
15601 static_assert_no_msg(CPX_R4_GE + 1 == CPX_R8_GE);
15602 static_assert_no_msg(CPX_R4_GT + 1 == CPX_R8_GT);
15605 tree = fgMorphIntoHelperCall(tree, helper, args);
15607 if (fgPtrArgCntMax < fgPtrArgCntCur)
15609 JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
15610 fgPtrArgCntMax = fgPtrArgCntCur;
15613 fgPtrArgCntCur -= argc;
15621 if (compCurBB == genReturnBB)
15623 /* This is the 'exitCrit' call at the exit label */
15625 noway_assert(op1->gtType == TYP_VOID);
15626 noway_assert(op2 == 0);
15628 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
15633 /* This is a (real) return value -- check its type */
15634 CLANG_FORMAT_COMMENT_ANCHOR;
15637 if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
15639 bool allowMismatch = false;
15641 // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
15642 if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
15643 (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
15644 allowMismatch = true;
15646 if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
15647 allowMismatch = true;
15649 if (!allowMismatch)
15650 NO_WAY("Return type mismatch");
15660 /*****************************************************************************
15662 * Transform the given tree for code generation and return an equivalent tree.
15665 GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac)
15668 assert(tree->gtOper != GT_STMT);
15673 if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
15675 noway_assert(!"JitBreakMorphTree hit");
15681 int thisMorphNum = 0;
15682 if (verbose && treesBeforeAfterMorph)
15684 thisMorphNum = morphNum++;
15685 printf("\nfgMorphTree (before %d):\n", thisMorphNum);
15692 // Apply any rewrites for implicit byref arguments before morphing the
15695 if (fgMorphImplicitByRefArgs(tree))
15698 if (verbose && treesBeforeAfterMorph)
15700 printf("\nfgMorphTree (%d), after implicit-byref rewrite:\n", thisMorphNum);
15707 /*-------------------------------------------------------------------------
15708 * fgMorphTree() can potentially replace a tree with another, and the
15709 * caller has to store the return value correctly.
15710 * Turn this on to always make copy of "tree" here to shake out
15711 * hidden/unupdated references.
15716 if (compStressCompile(STRESS_GENERIC_CHECK, 0))
15720 #ifdef SMALL_TREE_NODES
15721 if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
15723 copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
15728 copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
15731 copy->ReplaceWith(tree, this);
15733 #if defined(LATE_DISASM)
15734 // GT_CNS_INT is considered small, so ReplaceWith() won't copy all fields
15735 if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
15737 copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle;
15741 DEBUG_DESTROY_NODE(tree);
15748 /* Ensure that we haven't morphed this node already */
15749 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
15751 #if LOCAL_ASSERTION_PROP
15752 /* Before morphing the tree, we try to propagate any active assertions */
15753 if (optLocalAssertionProp)
15755 /* Do we have any active assertions? */
15757 if (optAssertionCount > 0)
15759 GenTree* newTree = tree;
15760 while (newTree != nullptr)
15763 /* newTree is non-Null if we propagated an assertion */
15764 newTree = optAssertionProp(apFull, tree, nullptr);
15766 assert(tree != nullptr);
15769 PREFAST_ASSUME(tree != nullptr);
15773 /* Save the original un-morphed tree for fgMorphTreeDone */
15775 GenTree* oldTree = tree;
15777 /* Figure out what kind of a node we have */
15779 unsigned kind = tree->OperKind();
15781 /* Is this a constant node? */
15783 if (kind & GTK_CONST)
15785 tree = fgMorphConst(tree);
15789 /* Is this a leaf node? */
15791 if (kind & GTK_LEAF)
15793 tree = fgMorphLeaf(tree);
15797 /* Is it a 'simple' unary/binary operator? */
15799 if (kind & GTK_SMPOP)
15801 tree = fgMorphSmpOp(tree, mac);
15805 /* See what kind of a special operator we have here */
15807 switch (tree->OperGet())
15810 tree = fgMorphField(tree, mac);
15814 if (tree->OperMayThrow(this))
15816 tree->gtFlags |= GTF_EXCEPT;
15820 tree->gtFlags &= ~GTF_EXCEPT;
15822 tree = fgMorphCall(tree->AsCall());
15825 case GT_ARR_BOUNDS_CHECK:
15826 #ifdef FEATURE_SIMD
15828 #endif // FEATURE_SIMD
15829 #ifdef FEATURE_HW_INTRINSICS
15830 case GT_HW_INTRINSIC_CHK:
15831 #endif // FEATURE_HW_INTRINSICS
15833 fgSetRngChkTarget(tree);
15835 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
15836 bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex);
15837 bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
15838 // If the index is a comma(throw, x), just return that.
15839 if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
15841 tree = bndsChk->gtIndex;
15844 // Propagate effects flags upwards
15845 bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
15846 bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
15848 // Otherwise, we don't change the tree.
15853 tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
15856 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
15858 tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
15861 tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15863 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
15865 tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
15870 fgSetRngChkTarget(tree, false);
15874 case GT_ARR_OFFSET:
15875 tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
15876 tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
15877 tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
15879 tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
15880 tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
15881 tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
15884 fgSetRngChkTarget(tree, false);
15889 tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
15890 tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue);
15891 tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
15893 tree->gtFlags &= ~GTF_EXCEPT;
15895 tree->gtFlags |= tree->gtCmpXchg.gtOpLocation->gtFlags & GTF_ALL_EFFECT;
15896 tree->gtFlags |= tree->gtCmpXchg.gtOpValue->gtFlags & GTF_ALL_EFFECT;
15897 tree->gtFlags |= tree->gtCmpXchg.gtOpComparand->gtFlags & GTF_ALL_EFFECT;
15900 case GT_STORE_DYN_BLK:
15902 if (tree->OperGet() == GT_STORE_DYN_BLK)
15904 tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
15906 tree->gtDynBlk.Addr() = fgMorphTree(tree->gtDynBlk.Addr());
15907 tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
15909 tree->gtFlags &= ~GTF_EXCEPT;
15910 tree->SetIndirExceptionFlags(this);
15912 if (tree->OperGet() == GT_STORE_DYN_BLK)
15914 tree->gtFlags |= tree->gtDynBlk.Data()->gtFlags & GTF_ALL_EFFECT;
15916 tree->gtFlags |= tree->gtDynBlk.Addr()->gtFlags & GTF_ALL_EFFECT;
15917 tree->gtFlags |= tree->gtDynBlk.gtDynamicSize->gtFlags & GTF_ALL_EFFECT;
15920 case GT_INDEX_ADDR:
15921 tree->AsIndexAddr()->Index() = fgMorphTree(tree->AsIndexAddr()->Index());
15922 tree->AsIndexAddr()->Arr() = fgMorphTree(tree->AsIndexAddr()->Arr());
15929 noway_assert(!"unexpected operator");
15933 fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
15938 #if LOCAL_ASSERTION_PROP
15939 //------------------------------------------------------------------------
15940 // fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum
15943 // lclNum - The varNum of the lclVar for which we're killing assertions.
15944 // tree - (DEBUG only) the tree responsible for killing its assertions.
15946 void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTree* tree))
15948 /* All dependent assertions are killed here */
15950 ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
15954 AssertionIndex index = optAssertionCount;
15955 while (killed && (index > 0))
15957 if (BitVecOps::IsMember(apTraits, killed, index - 1))
15960 AssertionDsc* curAssertion = optGetAssertion(index);
15961 noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
15962 ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
15965 printf("\nThe assignment ");
15967 printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
15968 optPrintAssertion(curAssertion);
15971 // Remove this bit from the killed mask
15972 BitVecOps::RemoveElemD(apTraits, killed, index - 1);
15974 optAssertionRemove(index);
15980 // killed mask should now be zero
15981 noway_assert(BitVecOps::IsEmpty(apTraits, killed));
15984 //------------------------------------------------------------------------
15985 // fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum.
15988 // lclNum - The varNum of the lclVar for which we're killing assertions.
15989 // tree - (DEBUG only) the tree responsible for killing its assertions.
15992 // For structs and struct fields, it will invalidate the children and parent
15994 // Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar.
15996 void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTree* tree))
15998 LclVarDsc* varDsc = &lvaTable[lclNum];
16000 if (varDsc->lvPromoted)
16002 noway_assert(varTypeIsStruct(varDsc));
16004 // Kill the field locals.
16005 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
16007 fgKillDependentAssertionsSingle(i DEBUGARG(tree));
16010 // Kill the struct local itself.
16011 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
16013 else if (varDsc->lvIsStructField)
16015 // Kill the field local.
16016 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
16018 // Kill the parent struct.
16019 fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree));
16023 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
16026 #endif // LOCAL_ASSERTION_PROP
16028 /*****************************************************************************
16030 * This function is called to complete the morphing of a tree node
16031 * It should only be called once for each node.
16032 * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
16033 * to enforce the invariant that each node is only morphed once.
16034 * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
16035 * by an equivalent tree.
16039 void Compiler::fgMorphTreeDone(GenTree* tree,
16040 GenTree* oldTree /* == NULL */
16041 DEBUGARG(int morphNum))
16044 if (verbose && treesBeforeAfterMorph)
16046 printf("\nfgMorphTree (after %d):\n", morphNum);
16048 printf(""); // in our logic this causes a flush
16052 if (!fgGlobalMorph)
16057 if ((oldTree != nullptr) && (oldTree != tree))
16059 /* Ensure that we have morphed this node */
16060 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
16063 TransferTestDataToNode(oldTree, tree);
16068 // Ensure that we haven't morphed this node already
16069 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
16072 if (tree->OperKind() & GTK_CONST)
16077 #if LOCAL_ASSERTION_PROP
16079 if (!optLocalAssertionProp)
16084 /* Do we have any active assertions? */
16086 if (optAssertionCount > 0)
16088 /* Is this an assignment to a local variable */
16089 GenTreeLclVarCommon* lclVarTree = nullptr;
16090 if (tree->DefinesLocal(this, &lclVarTree))
16092 unsigned lclNum = lclVarTree->gtLclNum;
16093 noway_assert(lclNum < lvaCount);
16094 fgKillDependentAssertions(lclNum DEBUGARG(tree));
16098 /* If this tree makes a new assertion - make it available */
16099 optAssertionGen(tree);
16101 #endif // LOCAL_ASSERTION_PROP
16106 /* Mark this node as being morphed */
16107 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
16111 /*****************************************************************************
16113 * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
16114 * Returns true if we modified the flow graph
16117 bool Compiler::fgFoldConditional(BasicBlock* block)
16119 bool result = false;
16121 // We don't want to make any code unreachable
16122 if (opts.compDbgCode || opts.MinOpts())
16127 if (block->bbJumpKind == BBJ_COND)
16129 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
16131 GenTree* stmt = block->bbTreeList->gtPrev;
16133 noway_assert(stmt->gtNext == nullptr);
16135 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
16137 noway_assert(fgRemoveRestOfBlock);
16139 /* Unconditional throw - transform the basic block into a BBJ_THROW */
16140 fgConvertBBToThrowBB(block);
16142 /* Remove 'block' from the predecessor list of 'block->bbNext' */
16143 fgRemoveRefPred(block->bbNext, block);
16145 /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
16146 fgRemoveRefPred(block->bbJumpDest, block);
16151 printf("\nConditional folded at BB%02u\n", block->bbNum);
16152 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
16158 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
16160 /* Did we fold the conditional */
16162 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
16164 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
16166 if (cond->OperKind() & GTK_CONST)
16168 /* Yupee - we folded the conditional!
16169 * Remove the conditional statement */
16171 noway_assert(cond->gtOper == GT_CNS_INT);
16172 noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));
16174 /* remove the statement from bbTreelist - No need to update
16175 * the reference counts since there are no lcl vars */
16176 fgRemoveStmt(block, stmt);
16178 // block is a BBJ_COND that we are folding the conditional for
16179 // bTaken is the path that will always be taken from block
16180 // bNotTaken is the path that will never be taken from block
16182 BasicBlock* bTaken;
16183 BasicBlock* bNotTaken;
16185 if (cond->gtIntCon.gtIconVal != 0)
16187 /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
16188 block->bbJumpKind = BBJ_ALWAYS;
16189 bTaken = block->bbJumpDest;
16190 bNotTaken = block->bbNext;
16194 /* Unmark the loop if we are removing a backwards branch */
16195 /* dest block must also be marked as a loop head and */
16196 /* We must be able to reach the backedge block */
16197 if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
16198 fgReachable(block->bbJumpDest, block))
16200 optUnmarkLoopBlocks(block->bbJumpDest, block);
16203 /* JTRUE 0 - transform the basic block into a BBJ_NONE */
16204 block->bbJumpKind = BBJ_NONE;
16205 noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
16206 bTaken = block->bbNext;
16207 bNotTaken = block->bbJumpDest;
16210 if (fgHaveValidEdgeWeights)
16212 // We are removing an edge from block to bNotTaken
16213 // and we have already computed the edge weights, so
16214 // we will try to adjust some of the weights
16216 flowList* edgeTaken = fgGetPredForBlock(bTaken, block);
16217 BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block
16219 // We examine the taken edge (block -> bTaken)
16220 // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
16221 // else if bTaken has valid profile weight and block does not we try to adjust block's weight
16222 // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
16224 if (block->hasProfileWeight())
16226 // The edge weights for (block -> bTaken) are 100% of block's weight
16227 edgeTaken->flEdgeWeightMin = block->bbWeight;
16228 edgeTaken->flEdgeWeightMax = block->bbWeight;
16230 if (!bTaken->hasProfileWeight())
16232 if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
16234 // Update the weight of bTaken
16235 bTaken->inheritWeight(block);
16240 else if (bTaken->hasProfileWeight())
16242 if (bTaken->countOfInEdges() == 1)
16244 // There is only one in edge to bTaken
16245 edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
16246 edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
16248 // Update the weight of block
16249 block->inheritWeight(bTaken);
16254 if (bUpdated != nullptr)
16257 // Now fix the weights of the edges out of 'bUpdated'
16258 switch (bUpdated->bbJumpKind)
16261 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
16262 edge->flEdgeWeightMax = bUpdated->bbWeight;
16265 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
16266 edge->flEdgeWeightMax = bUpdated->bbWeight;
16269 edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
16270 edge->flEdgeWeightMax = bUpdated->bbWeight;
16273 // We don't handle BBJ_SWITCH
16279 /* modify the flow graph */
16281 /* Remove 'block' from the predecessor list of 'bNotTaken' */
16282 fgRemoveRefPred(bNotTaken, block);
16287 printf("\nConditional folded at BB%02u\n", block->bbNum);
16288 printf("BB%02u becomes a %s", block->bbNum,
16289 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
16290 if (block->bbJumpKind == BBJ_ALWAYS)
16292 printf(" to BB%02u", block->bbJumpDest->bbNum);
16298 /* if the block was a loop condition we may have to modify
16299 * the loop table */
16301 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
16303 /* Some loops may have been already removed by
16304 * loop unrolling or conditional folding */
16306 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
16311 /* We are only interested in the loop bottom */
16313 if (optLoopTable[loopNum].lpBottom == block)
16315 if (cond->gtIntCon.gtIconVal == 0)
16317 /* This was a bogus loop (condition always false)
16318 * Remove the loop from the table */
16320 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
16324 printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n", loopNum,
16325 optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
16335 else if (block->bbJumpKind == BBJ_SWITCH)
16337 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
16339 GenTree* stmt = block->bbTreeList->gtPrev;
16341 noway_assert(stmt->gtNext == nullptr);
16343 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
16345 noway_assert(fgRemoveRestOfBlock);
16347 /* Unconditional throw - transform the basic block into a BBJ_THROW */
16348 fgConvertBBToThrowBB(block);
16350 /* update the flow graph */
16352 unsigned jumpCnt = block->bbJumpSwt->bbsCount;
16353 BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
16355 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
16357 BasicBlock* curJump = *jumpTab;
16359 /* Remove 'block' from the predecessor list of 'curJump' */
16360 fgRemoveRefPred(curJump, block);
16366 printf("\nConditional folded at BB%02u\n", block->bbNum);
16367 printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
16373 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
16375 /* Did we fold the conditional */
16377 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
16379 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
16381 if (cond->OperKind() & GTK_CONST)
16383 /* Yupee - we folded the conditional!
16384 * Remove the conditional statement */
16386 noway_assert(cond->gtOper == GT_CNS_INT);
16388 /* remove the statement from bbTreelist - No need to update
16389 * the reference counts since there are no lcl vars */
16390 fgRemoveStmt(block, stmt);
16392 /* modify the flow graph */
16394 /* Find the actual jump target */
16395 unsigned switchVal;
16396 switchVal = (unsigned)cond->gtIntCon.gtIconVal;
16398 jumpCnt = block->bbJumpSwt->bbsCount;
16399 BasicBlock** jumpTab;
16400 jumpTab = block->bbJumpSwt->bbsDstTab;
16404 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
16406 BasicBlock* curJump = *jumpTab;
16408 assert(curJump->countOfInEdges() > 0);
16410 // If val matches switchVal or we are at the last entry and
16411 // we never found the switch value then set the new jump dest
16413 if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
16415 if (curJump != block->bbNext)
16417 /* transform the basic block into a BBJ_ALWAYS */
16418 block->bbJumpKind = BBJ_ALWAYS;
16419 block->bbJumpDest = curJump;
16421 // if we are jumping backwards, make sure we have a GC Poll.
16422 if (curJump->bbNum > block->bbNum)
16424 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
16429 /* transform the basic block into a BBJ_NONE */
16430 block->bbJumpKind = BBJ_NONE;
16431 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
16437 /* Remove 'block' from the predecessor list of 'curJump' */
16438 fgRemoveRefPred(curJump, block);
16444 printf("\nConditional folded at BB%02u\n", block->bbNum);
16445 printf("BB%02u becomes a %s", block->bbNum,
16446 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
16447 if (block->bbJumpKind == BBJ_ALWAYS)
16449 printf(" to BB%02u", block->bbJumpDest->bbNum);
16461 //*****************************************************************************
16463 // Morphs a single statement in a block.
16464 // Can be called anytime, unlike fgMorphStmts() which should only be called once.
16466 // Returns true if 'stmt' was removed from the block.
16467 // Returns false if 'stmt' is still in the block (even if other statements were removed).
16470 bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg))
16472 assert(block != nullptr);
16473 assert(stmt != nullptr);
16476 compCurStmt = stmt;
16478 GenTree* morph = fgMorphTree(stmt->gtStmtExpr);
16480 // Bug 1106830 - During the CSE phase we can't just remove
16481 // morph->gtOp.gtOp2 as it could contain CSE expressions.
16482 // This leads to a noway_assert in OptCSE.cpp when
16483 // searching for the removed CSE ref. (using gtFindLink)
16485 if (!optValnumCSE_phase)
16487 // Check for morph as a GT_COMMA with an unconditional throw
16488 if (fgIsCommaThrow(morph, true))
16493 printf("Folding a top-level fgIsCommaThrow stmt\n");
16494 printf("Removing op2 as unreachable:\n");
16495 gtDispTree(morph->gtOp.gtOp2);
16499 // Use the call as the new stmt
16500 morph = morph->gtOp.gtOp1;
16501 noway_assert(morph->gtOper == GT_CALL);
16504 // we can get a throw as a statement root
16505 if (fgIsThrow(morph))
16510 printf("We have a top-level fgIsThrow stmt\n");
16511 printf("Removing the rest of block as unreachable:\n");
16514 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
16515 fgRemoveRestOfBlock = true;
16519 stmt->gtStmtExpr = morph;
16521 if (lvaLocalVarRefCounted)
16523 // fgMorphTree may have introduced new lclVar references. Bump the ref counts if requested.
16524 lvaRecursiveIncRefCounts(stmt->gtStmtExpr);
16527 // Can the entire tree be removed?
16528 bool removedStmt = false;
16530 // Defer removing statements during CSE so we don't inadvertently remove any CSE defs.
16531 if (!optValnumCSE_phase)
16533 removedStmt = fgCheckRemoveStmt(block, stmt);
16536 // Or this is the last statement of a conditional branch that was just folded?
16537 if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock)
16539 if (fgFoldConditional(block))
16541 if (block->bbJumpKind != BBJ_THROW)
16543 removedStmt = true;
16550 // Have to re-do the evaluation order since for example some later code does not expect constants as op1
16551 gtSetStmtInfo(stmt);
16553 // Have to re-link the nodes for this statement
16554 fgSetStmtSeq(stmt);
16560 printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
16566 if (fgRemoveRestOfBlock)
16568 // Remove the rest of the stmts in the block
16569 for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
16571 fgRemoveStmt(block, stmt);
16574 // The rest of block has been removed and we will always throw an exception.
16576 // Update succesors of block
16577 fgRemoveBlockAsPred(block);
16579 // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
16580 // We should not convert it to a ThrowBB.
16581 if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
16583 // Convert block to a throw bb
16584 fgConvertBBToThrowBB(block);
16590 printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum);
16593 fgRemoveRestOfBlock = false;
16596 return removedStmt;
16599 /*****************************************************************************
16601 * Morph the statements of the given block.
16602 * This function should be called just once for a block. Use fgMorphBlockStmt()
16603 * for reentrant calls.
16606 #ifdef LEGACY_BACKEND
16607 void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw)
16609 void Compiler::fgMorphStmts(BasicBlock* block, bool* lnot, bool* loadw)
16612 fgRemoveRestOfBlock = false;
16614 /* Make the current basic block address available globally */
16618 *lnot = *loadw = false;
16619 #ifdef LEGACY_BACKEND
16623 fgCurrentlyInUseArgTemps = hashBv::Create(this);
16625 GenTreeStmt* stmt = block->firstStmt();
16626 GenTree* prev = nullptr;
16627 for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt)
16629 assert(stmt->gtOper == GT_STMT);
16631 if (fgRemoveRestOfBlock)
16633 fgRemoveStmt(block, stmt);
16636 #ifdef FEATURE_SIMD
16637 if (!opts.MinOpts() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT && stmt->gtStmtExpr->OperGet() == GT_ASG)
16639 fgMorphCombineSIMDFieldAssignments(block, stmt);
16643 fgMorphStmt = stmt;
16644 compCurStmt = stmt;
16645 GenTree* tree = stmt->gtStmtExpr;
16649 if (stmt == block->bbTreeList)
16651 block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
16654 unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
16658 printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum);
16663 /* Morph this statement tree */
16665 GenTree* morph = fgMorphTree(tree);
16667 // mark any outgoing arg temps as free so we can reuse them in the next statement.
16669 fgCurrentlyInUseArgTemps->ZeroAll();
16671 // Has fgMorphStmt been sneakily changed ?
16673 if (stmt->gtStmtExpr != tree)
16675 /* This must be tailcall. Ignore 'morph' and carry on with
16676 the tail-call node */
16678 morph = stmt->gtStmtExpr;
16679 noway_assert(compTailCallUsed);
16680 noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
16681 noway_assert(stmt->gtNextStmt == nullptr);
16683 GenTreeCall* call = morph->AsCall();
16685 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
16686 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
16688 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
16689 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
16690 (compCurBB->bbFlags & BBF_HAS_JMP)));
16692 else if (block != compCurBB)
16694 /* This must be a tail call that caused a GCPoll to get
16695 injected. We haven't actually morphed the call yet
16696 but the flag still got set, clear it here... */
16697 CLANG_FORMAT_COMMENT_ANCHOR;
16700 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
16703 noway_assert(compTailCallUsed);
16704 noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
16705 noway_assert(stmt->gtNextStmt == nullptr);
16707 GenTreeCall* call = morph->AsCall();
16710 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
16711 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
16713 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
16714 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
16715 (compCurBB->bbFlags & BBF_HAS_JMP)));
16719 if (compStressCompile(STRESS_CLONE_EXPR, 30))
16721 // Clone all the trees to stress gtCloneExpr()
16725 printf("\nfgMorphTree (stressClone from):\n");
16729 morph = gtCloneExpr(morph);
16730 noway_assert(morph);
16734 printf("\nfgMorphTree (stressClone to):\n");
16739 /* If the hash value changes. we modified the tree during morphing */
16742 unsigned newHash = gtHashValue(morph);
16743 if (newHash != oldHash)
16745 printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum);
16751 /* Check for morph as a GT_COMMA with an unconditional throw */
16752 if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
16754 /* Use the call as the new stmt */
16755 morph = morph->gtOp.gtOp1;
16756 noway_assert(morph->gtOper == GT_CALL);
16757 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
16759 fgRemoveRestOfBlock = true;
16762 stmt->gtStmtExpr = tree = morph;
16764 noway_assert(fgPtrArgCntCur == 0);
16766 if (fgRemoveRestOfBlock)
16771 /* Has the statement been optimized away */
16773 if (fgCheckRemoveStmt(block, stmt))
16778 /* Check if this block ends with a conditional branch that can be folded */
16780 if (fgFoldConditional(block))
16785 if (ehBlockHasExnFlowDsc(block))
16790 #ifdef LEGACY_BACKEND
16791 /* Note whether we have two or more +=/-= operators in a row */
16793 if (tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB)
16795 if (prev && prev->gtOper == tree->gtOper)
16801 /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */
16803 if (tree->gtOper == GT_ASG_OR && prev && prev->gtOper == GT_ASG)
16807 #endif // LEGACY_BACKEND
16810 if (fgRemoveRestOfBlock)
16812 if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
16814 GenTree* first = block->bbTreeList;
16815 noway_assert(first);
16816 GenTree* last = first->gtPrev;
16817 noway_assert(last && last->gtNext == nullptr);
16818 GenTree* lastStmt = last->gtStmt.gtStmtExpr;
16820 if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) ||
16821 ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)))
16823 GenTree* op1 = lastStmt->gtOp.gtOp1;
16825 if (op1->OperKind() & GTK_RELOP)
16827 /* Unmark the comparison node with GTF_RELOP_JMP_USED */
16828 op1->gtFlags &= ~GTF_RELOP_JMP_USED;
16831 last->gtStmt.gtStmtExpr = fgMorphTree(op1);
16835 /* Mark block as a BBJ_THROW block */
16836 fgConvertBBToThrowBB(block);
16839 #if FEATURE_FASTTAILCALL
16840 GenTree* recursiveTailCall = nullptr;
16841 if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
16843 fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
16848 compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
16851 // Reset this back so that it doesn't leak out impacting other blocks
16852 fgRemoveRestOfBlock = false;
16855 /*****************************************************************************
16857 * Morph the blocks of the method.
16858 * Returns true if the basic block list is modified.
16859 * This function should be called just once.
16862 void Compiler::fgMorphBlocks()
16867 printf("\n*************** In fgMorphBlocks()\n");
16871 /* Since fgMorphTree can be called after various optimizations to re-arrange
16872 * the nodes we need a global flag to signal if we are during the one-pass
16873 * global morphing */
16875 fgGlobalMorph = true;
16877 #if LOCAL_ASSERTION_PROP
16879 // Local assertion prop is enabled if we are optimized
16881 optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts());
16883 if (optLocalAssertionProp)
16886 // Initialize for local assertion prop
16888 optAssertionInit(true);
16890 #elif ASSERTION_PROP
16892 // If LOCAL_ASSERTION_PROP is not set
16893 // and we have global assertion prop
16894 // then local assertion prop is always off
16896 optLocalAssertionProp = false;
16900 /*-------------------------------------------------------------------------
16901 * Process all basic blocks in the function
16904 BasicBlock* block = fgFirstBB;
16905 noway_assert(block);
16908 compCurStmtNum = 0;
16913 #ifdef LEGACY_BACKEND
16921 bool loadw = false;
16926 printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName);
16930 #if LOCAL_ASSERTION_PROP
16931 if (optLocalAssertionProp)
16934 // Clear out any currently recorded assertion candidates
16935 // before processing each basic block,
16936 // also we must handle QMARK-COLON specially
16938 optAssertionReset(0);
16942 /* Process all statement trees in the basic block */
16944 #ifndef LEGACY_BACKEND
16945 fgMorphStmts(block, &lnot, &loadw);
16947 fgMorphStmts(block, &mult, &lnot, &loadw);
16949 if (mult && (opts.compFlags & CLFLG_TREETRANS) && !opts.compDbgCode && !opts.MinOpts())
16951 for (GenTree* tree = block->bbTreeList; tree; tree = tree->gtNext)
16953 assert(tree->gtOper == GT_STMT);
16954 GenTree* last = tree->gtStmt.gtStmtExpr;
16956 if (last->gtOper == GT_ASG_ADD || last->gtOper == GT_ASG_SUB)
16961 GenTree* dst1 = last->gtOp.gtOp1;
16962 GenTree* src1 = last->gtOp.gtOp2;
16964 if (!last->IsCnsIntOrI())
16969 if (dst1->gtOper != GT_LCL_VAR)
16973 if (!src1->IsCnsIntOrI())
16983 /* Look at the next statement */
16985 temp = tree->gtNext;
16991 noway_assert(temp->gtOper == GT_STMT);
16992 next = temp->gtStmt.gtStmtExpr;
16994 if (next->gtOper != last->gtOper)
16998 if (next->gtType != last->gtType)
17003 dst2 = next->gtOp.gtOp1;
17004 src2 = next->gtOp.gtOp2;
17006 if (dst2->gtOper != GT_LCL_VAR)
17010 if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum)
17015 if (!src2->IsCnsIntOrI())
17020 if (last->gtOverflow() != next->gtOverflow())
17025 const ssize_t i1 = src1->gtIntCon.gtIconVal;
17026 const ssize_t i2 = src2->gtIntCon.gtIconVal;
17027 const ssize_t itemp = i1 + i2;
17029 /* if the operators are checking for overflow, check for overflow of the operands */
17031 if (next->gtOverflow())
17033 if (next->TypeGet() == TYP_LONG)
17035 if (next->gtFlags & GTF_UNSIGNED)
17037 ClrSafeInt<UINT64> si1(i1);
17038 if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow())
17045 ClrSafeInt<INT64> si1(i1);
17046 if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow())
17052 else if (next->gtFlags & GTF_UNSIGNED)
17054 ClrSafeInt<UINT32> si1(i1);
17055 if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow())
17062 ClrSafeInt<INT32> si1(i1);
17063 if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow())
17070 /* Fold the two increments/decrements into one */
17072 src1->gtIntCon.gtIconVal = itemp;
17073 #ifdef _TARGET_64BIT_
17074 if (src1->gtType == TYP_INT)
17076 src1->AsIntCon()->TruncateOrSignExtend32();
17078 #endif //_TARGET_64BIT_
17080 /* Remove the second statement completely */
17082 noway_assert(tree->gtNext == temp);
17083 noway_assert(temp->gtPrev == tree);
17087 noway_assert(temp->gtNext->gtPrev == temp);
17089 temp->gtNext->gtPrev = tree;
17090 tree->gtNext = temp->gtNext;
17094 tree->gtNext = nullptr;
17096 noway_assert(block->bbTreeList->gtPrev == temp);
17098 block->bbTreeList->gtPrev = tree;
17107 #endif // LEGACY_BACKEND
17109 /* Are we using a single return block? */
17111 if (block->bbJumpKind == BBJ_RETURN)
17113 if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
17116 // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
17117 // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
17118 // Such blocks do materialize as part of in-lining.
17120 // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
17121 // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
17122 // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
17125 // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
17127 GenTree* last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
17128 GenTree* ret = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;
17130 if ((ret != nullptr) && (ret->OperGet() == GT_RETURN) && ((ret->gtFlags & GTF_RET_MERGED) != 0))
17132 // This return was generated during epilog merging, so leave it alone
17136 /* We'll jump to the genReturnBB */
17137 CLANG_FORMAT_COMMENT_ANCHOR;
17139 #if !defined(_TARGET_X86_)
17140 if (info.compFlags & CORINFO_FLG_SYNCH)
17142 fgConvertSyncReturnToLeave(block);
17145 #endif // !_TARGET_X86_
17147 block->bbJumpKind = BBJ_ALWAYS;
17148 block->bbJumpDest = genReturnBB;
17151 if (genReturnLocal != BAD_VAR_NUM)
17153 // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
17155 // Method must be returning a value other than TYP_VOID.
17156 noway_assert(compMethodHasRetVal());
17158 // This block must be ending with a GT_RETURN
17159 noway_assert(last != nullptr);
17160 noway_assert(last->gtOper == GT_STMT);
17161 noway_assert(last->gtNext == nullptr);
17162 noway_assert(ret != nullptr);
17164 // GT_RETURN must have non-null operand as the method is returning the value assigned to
17166 noway_assert(ret->OperGet() == GT_RETURN);
17167 noway_assert(ret->gtGetOp1() != nullptr);
17169 GenTree* tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
17171 last->gtStmt.gtStmtExpr = (tree->OperIsCopyBlkOp()) ? fgMorphCopyBlock(tree) : tree;
17173 // make sure that copy-prop ignores this assignment.
17174 last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
17176 else if (ret != nullptr && ret->OperGet() == GT_RETURN)
17178 // This block ends with a GT_RETURN
17179 noway_assert(last != nullptr);
17180 noway_assert(last->gtOper == GT_STMT);
17181 noway_assert(last->gtNext == nullptr);
17183 // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn
17185 noway_assert(ret->TypeGet() == TYP_VOID);
17186 noway_assert(ret->gtGetOp1() == nullptr);
17188 fgRemoveStmt(block, last);
17193 printf("morph BB%02u to point at onereturn. New block is\n", block->bbNum);
17194 fgTableDispBasicBlock(block);
17200 block = block->bbNext;
17203 /* We are done with the global morphing phase */
17205 fgGlobalMorph = false;
17210 fgDispBasicBlocks(true);
17215 //------------------------------------------------------------------------
17216 // fgCheckArgCnt: Check whether the maximum arg size will change codegen requirements
17219 // fpPtrArgCntMax records the maximum number of pushed arguments.
17220 // Depending upon this value of the maximum number of pushed arguments
17221 // we may need to use an EBP frame or be partially interuptible.
17222 // This functionality has been factored out of fgSetOptions() because
17223 // the Rationalizer can create new calls.
17226 // This must be called before isFramePointerRequired() is called, because it is a
17227 // phased variable (can only be written before it has been read).
17229 void Compiler::fgCheckArgCnt()
17231 if (!compCanEncodePtrArgCntMax())
17236 printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
17237 "interruptible\n");
17240 genInterruptible = false;
17242 if (fgPtrArgCntMax >= sizeof(unsigned))
17247 printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
17250 codeGen->setFramePointerRequired(true);
17254 /*****************************************************************************
17256 * Make some decisions about the kind of code to generate.
17259 void Compiler::fgSetOptions()
17262 /* Should we force fully interruptible code ? */
17263 if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
17265 noway_assert(!codeGen->isGCTypeFixed());
17266 genInterruptible = true;
17270 if (opts.compDbgCode)
17272 assert(!codeGen->isGCTypeFixed());
17273 genInterruptible = true; // debugging is easier this way ...
17276 /* Assume we won't need an explicit stack frame if this is allowed */
17278 // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
17279 // the callee-saved registers.
17280 noway_assert(!compTailCallUsed || !compLocallocUsed);
17282 if (compLocallocUsed)
17284 codeGen->setFramePointerRequired(true);
17287 #ifdef _TARGET_X86_
17289 if (compTailCallUsed)
17290 codeGen->setFramePointerRequired(true);
17292 #endif // _TARGET_X86_
17294 if (!opts.genFPopt)
17296 codeGen->setFramePointerRequired(true);
17299 // Assert that the EH table has been initialized by now. Note that
17300 // compHndBBtabAllocCount never decreases; it is a high-water mark
17301 // of table allocation. In contrast, compHndBBtabCount does shrink
17302 // if we delete a dead EH region, and if it shrinks to zero, the
17303 // table pointer compHndBBtab is unreliable.
17304 assert(compHndBBtabAllocCount >= info.compXcptnsCount);
17306 #ifdef _TARGET_X86_
17308 // Note: this case, and the !X86 case below, should both use the
17309 // !X86 path. This would require a few more changes for X86 to use
17310 // compHndBBtabCount (the current number of EH clauses) instead of
17311 // info.compXcptnsCount (the number of EH clauses in IL), such as
17312 // in ehNeedsShadowSPslots(). This is because sometimes the IL has
17313 // an EH clause that we delete as statically dead code before we
17314 // get here, leaving no EH clauses left, and thus no requirement
17315 // to use a frame pointer because of EH. But until all the code uses
17316 // the same test, leave info.compXcptnsCount here.
17317 if (info.compXcptnsCount > 0)
17319 codeGen->setFramePointerRequiredEH(true);
17322 #else // !_TARGET_X86_
17324 if (compHndBBtabCount > 0)
17326 codeGen->setFramePointerRequiredEH(true);
17329 #endif // _TARGET_X86_
17331 #ifdef UNIX_X86_ABI
17332 if (info.compXcptnsCount > 0)
17334 assert(!codeGen->isGCTypeFixed());
17335 // Enforce fully interruptible codegen for funclet unwinding
17336 genInterruptible = true;
17338 #endif // UNIX_X86_ABI
17342 if (info.compCallUnmanaged)
17344 codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
17347 if (info.compPublishStubParam)
17349 codeGen->setFramePointerRequiredGCInfo(true);
17352 if (opts.compNeedSecurityCheck)
17354 codeGen->setFramePointerRequiredGCInfo(true);
17356 #ifndef JIT32_GCENCODER
17358 // The decoder only reports objects in frames with exceptions if the frame
17359 // is fully interruptible.
17360 // Even if there is no catch or other way to resume execution in this frame
17361 // the VM requires the security object to remain alive until later, so
17362 // Frames with security objects must be fully interruptible.
17363 genInterruptible = true;
17365 #endif // JIT32_GCENCODER
17368 if (compIsProfilerHookNeeded())
17370 codeGen->setFramePointerRequired(true);
17373 if (info.compIsVarArgs)
17375 // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
17376 codeGen->setFramePointerRequiredGCInfo(true);
17379 if (lvaReportParamTypeArg())
17381 codeGen->setFramePointerRequiredGCInfo(true);
17384 // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not");
17387 /*****************************************************************************/
17389 GenTree* Compiler::fgInitThisClass()
17391 noway_assert(!compIsForInlining());
17393 CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
17395 if (!kind.needsRuntimeLookup)
17397 return fgGetSharedCCtor(info.compClassHnd);
17401 #ifdef FEATURE_READYTORUN_COMPILER
17402 // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR.
17403 if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI))
17405 CORINFO_RESOLVED_TOKEN resolvedToken;
17406 memset(&resolvedToken, 0, sizeof(resolvedToken));
17408 // We are in a shared method body, but maybe we don't need a runtime lookup after all.
17409 // This covers the case of a generic method on a non-generic type.
17410 if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
17412 resolvedToken.hClass = info.compClassHnd;
17413 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
17416 // We need a runtime lookup.
17417 GenTree* ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
17419 // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
17420 // base of the class that owns the method being compiled". If we're in this method, it means we're not
17421 // inlining and there's no ambiguity.
17422 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF,
17423 gtNewArgList(ctxTree), &kind);
17427 // Collectible types requires that for shared generic code, if we use the generic context paramter
17428 // that we report it. (This is a conservative approach, we could detect some cases particularly when the
17429 // context parameter is this that we don't need the eager reporting logic.)
17430 lvaGenericsContextUseCount++;
17432 switch (kind.runtimeLookupKind)
17434 case CORINFO_LOOKUP_THISOBJ:
17435 // This code takes a this pointer; but we need to pass the static method desc to get the right point in
17438 GenTree* vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
17439 // Vtable pointer of this object
17440 vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
17441 vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
17442 GenTree* methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
17444 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, gtNewArgList(vtTree, methodHnd));
17447 case CORINFO_LOOKUP_CLASSPARAM:
17449 GenTree* vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
17450 return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, gtNewArgList(vtTree));
17453 case CORINFO_LOOKUP_METHODPARAM:
17455 GenTree* methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
17456 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID,
17457 gtNewArgList(gtNewIconNode(0), methHndTree));
17462 noway_assert(!"Unknown LOOKUP_KIND");
17467 /*****************************************************************************
17469 * Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
17470 * except for the allowed ? 1 : 0; pattern.
17472 Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTree** tree, fgWalkData* data)
17474 if ((*tree)->OperGet() == GT_QMARK)
17476 fgCheckQmarkAllowedForm(*tree);
17478 return WALK_CONTINUE;
17481 void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
17483 assert(tree->OperGet() == GT_QMARK);
17484 #ifndef LEGACY_BACKEND
17485 assert(!"Qmarks beyond morph disallowed.");
17486 #else // LEGACY_BACKEND
17487 GenTree* colon = tree->gtOp.gtOp2;
17489 assert(colon->gtOp.gtOp1->IsIntegralConst(0));
17490 assert(colon->gtOp.gtOp2->IsIntegralConst(1));
17491 #endif // LEGACY_BACKEND
17494 /*****************************************************************************
17496 * Verify that the importer has created GT_QMARK nodes in a way we can
17497 * process them. The following is allowed:
17499 * 1. A top level qmark. Top level qmark is of the form:
17500 * a) (bool) ? (void) : (void) OR
17501 * b) V0N = (bool) ? (type) : (type)
17503 * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
17504 * of either op1 of colon or op2 of colon but not a child of any other
17507 void Compiler::fgPreExpandQmarkChecks(GenTree* expr)
17509 GenTree* topQmark = fgGetTopLevelQmark(expr);
17511 // If the top level Qmark is null, then scan the tree to make sure
17512 // there are no qmarks within it.
17513 if (topQmark == nullptr)
17515 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
17519 // We could probably expand the cond node also, but don't think the extra effort is necessary,
17520 // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
17521 fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);
17523 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
17524 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
17529 /*****************************************************************************
17531 * Get the top level GT_QMARK node in a given "expr", return NULL if such a
17532 * node is not present. If the top level GT_QMARK node is assigned to a
17533 * GT_LCL_VAR, then return the lcl node in ppDst.
17536 GenTree* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = NULL */)
17538 if (ppDst != nullptr)
17543 GenTree* topQmark = nullptr;
17544 if (expr->gtOper == GT_QMARK)
17548 else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
17550 topQmark = expr->gtOp.gtOp2;
17551 if (ppDst != nullptr)
17553 *ppDst = expr->gtOp.gtOp1;
17559 /*********************************************************************************
17561 * For a castclass helper call,
17562 * Importer creates the following tree:
17563 * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
17565 * This method splits the qmark expression created by the importer into the
17566 * following blocks: (block, asg, cond1, cond2, helper, remainder)
17567 * Notice that op1 is the result for both the conditions. So we coalesce these
17568 * assignments into a single block instead of two blocks resulting a nested diamond.
17570 * +---------->-----------+
17574 * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
17576 * We expect to achieve the following codegen:
17577 * mov rsi, rdx tmp = op1 // asgBlock
17578 * test rsi, rsi goto skip if tmp == null ? // cond1Block
17580 * mov rcx, 0x76543210 cns = op2 // cond2Block
17581 * cmp qword ptr [rsi], rcx goto skip if *tmp == op2
17583 * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock
17585 * SKIP: // remainderBlock
17586 * tmp has the result.
17589 void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTree* stmt)
17594 printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum);
17595 fgDispBasicBlocks(block, block, true);
17599 GenTree* expr = stmt->gtStmt.gtStmtExpr;
17601 GenTree* dst = nullptr;
17602 GenTree* qmark = fgGetTopLevelQmark(expr, &dst);
17603 noway_assert(dst != nullptr);
17605 assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
17607 // Get cond, true, false exprs for the qmark.
17608 GenTree* condExpr = qmark->gtGetOp1();
17609 GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
17610 GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
17612 // Get cond, true, false exprs for the nested qmark.
17613 GenTree* nestedQmark = falseExpr;
17614 GenTree* cond2Expr;
17615 GenTree* true2Expr;
17616 GenTree* false2Expr;
17618 if (nestedQmark->gtOper == GT_QMARK)
17620 cond2Expr = nestedQmark->gtGetOp1();
17621 true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
17622 false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
17624 assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
17625 cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
17629 // This is a rare case that arises when we are doing minopts and encounter isinst of null
17630 // gtFoldExpr was still is able to optimize away part of the tree (but not all).
17631 // That means it does not match our pattern.
17633 // Rather than write code to handle this case, just fake up some nodes to make it match the common
17634 // case. Synthesize a comparison that is always true, and for the result-on-true, use the
17635 // entire subtree we expected to be the nested question op.
17637 cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
17638 true2Expr = nestedQmark;
17639 false2Expr = gtNewIconNode(0, TYP_I_IMPL);
17641 assert(false2Expr->OperGet() == trueExpr->OperGet());
17643 // Clear flags as they are now going to be part of JTRUE.
17644 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
17645 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
17647 // Create the chain of blocks. See method header comment.
17648 // The order of blocks after this is the following:
17649 // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
17651 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
17652 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
17653 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
17654 // remainderBlock will still be GC safe.
17655 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
17656 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
17657 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
17659 BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
17660 BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true);
17661 BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true);
17662 BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true);
17664 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
17666 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
17667 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
17668 if ((block->bbFlags & BBF_INTERNAL) == 0)
17670 helperBlock->bbFlags &= ~BBF_INTERNAL;
17671 cond2Block->bbFlags &= ~BBF_INTERNAL;
17672 cond1Block->bbFlags &= ~BBF_INTERNAL;
17673 asgBlock->bbFlags &= ~BBF_INTERNAL;
17674 helperBlock->bbFlags |= BBF_IMPORTED;
17675 cond2Block->bbFlags |= BBF_IMPORTED;
17676 cond1Block->bbFlags |= BBF_IMPORTED;
17677 asgBlock->bbFlags |= BBF_IMPORTED;
17680 // Chain the flow correctly.
17681 fgAddRefPred(asgBlock, block);
17682 fgAddRefPred(cond1Block, asgBlock);
17683 fgAddRefPred(cond2Block, cond1Block);
17684 fgAddRefPred(helperBlock, cond2Block);
17685 fgAddRefPred(remainderBlock, helperBlock);
17686 fgAddRefPred(remainderBlock, cond1Block);
17687 fgAddRefPred(remainderBlock, cond2Block);
17689 cond1Block->bbJumpDest = remainderBlock;
17690 cond2Block->bbJumpDest = remainderBlock;
17692 // Set the weights; some are guesses.
17693 asgBlock->inheritWeight(block);
17694 cond1Block->inheritWeight(block);
17695 cond2Block->inheritWeightPercentage(cond1Block, 50);
17696 helperBlock->inheritWeightPercentage(cond2Block, 50);
17698 // Append cond1 as JTRUE to cond1Block
17699 GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
17700 GenTree* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
17701 fgInsertStmtAtEnd(cond1Block, jmpStmt);
17703 // Append cond2 as JTRUE to cond2Block
17704 jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
17705 jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
17706 fgInsertStmtAtEnd(cond2Block, jmpStmt);
17708 // AsgBlock should get tmp = op1 assignment.
17709 trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
17710 GenTree* trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
17711 fgInsertStmtAtEnd(asgBlock, trueStmt);
17713 // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
17714 gtReverseCond(cond2Expr);
17715 GenTree* helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
17716 GenTree* helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
17717 fgInsertStmtAtEnd(helperBlock, helperStmt);
17719 // Finally remove the nested qmark stmt.
17720 fgRemoveStmt(block, stmt);
17725 printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum);
17726 fgDispBasicBlocks(block, remainderBlock, true);
17731 /*****************************************************************************
17733 * Expand a statement with a top level qmark node. There are three cases, based
17734 * on whether the qmark has both "true" and "false" arms, or just one of them.
17745 * S0 -->-- ~C -->-- T F -->-- S1
17750 * -----------------------------------------
17759 * S0 -->-- ~C -->-- T -->-- S1
17761 * +-->-------------+
17764 * -----------------------------------------
17773 * S0 -->-- C -->-- F -->-- S1
17775 * +-->------------+
17778 * If the qmark assigns to a variable, then create tmps for "then"
17779 * and "else" results and assign the temp to the variable as a writeback step.
17781 void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTree* stmt)
17783 GenTree* expr = stmt->gtStmt.gtStmtExpr;
17785 // Retrieve the Qmark node to be expanded.
17786 GenTree* dst = nullptr;
17787 GenTree* qmark = fgGetTopLevelQmark(expr, &dst);
17788 if (qmark == nullptr)
17793 if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
17795 fgExpandQmarkForCastInstOf(block, stmt);
17802 printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum);
17803 fgDispBasicBlocks(block, block, true);
17807 // Retrieve the operands.
17808 GenTree* condExpr = qmark->gtGetOp1();
17809 GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
17810 GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
17812 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
17813 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
17815 assert(!varTypeIsFloating(condExpr->TypeGet()));
17817 bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP);
17818 bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
17819 assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
17821 // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
17822 // block ... condBlock ... elseBlock ... remainderBlock
17824 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
17825 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
17826 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
17827 // remainderBlock will still be GC safe.
17828 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
17829 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
17830 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
17832 BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
17833 BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
17835 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
17836 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
17837 if ((block->bbFlags & BBF_INTERNAL) == 0)
17839 condBlock->bbFlags &= ~BBF_INTERNAL;
17840 elseBlock->bbFlags &= ~BBF_INTERNAL;
17841 condBlock->bbFlags |= BBF_IMPORTED;
17842 elseBlock->bbFlags |= BBF_IMPORTED;
17845 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
17847 condBlock->inheritWeight(block);
17849 fgAddRefPred(condBlock, block);
17850 fgAddRefPred(elseBlock, condBlock);
17851 fgAddRefPred(remainderBlock, elseBlock);
17853 BasicBlock* thenBlock = nullptr;
17854 if (hasTrueExpr && hasFalseExpr)
17859 // S0 -->-- ~C -->-- T F -->-- S1
17864 gtReverseCond(condExpr);
17865 condBlock->bbJumpDest = elseBlock;
17867 thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
17868 thenBlock->bbJumpDest = remainderBlock;
17869 if ((block->bbFlags & BBF_INTERNAL) == 0)
17871 thenBlock->bbFlags &= ~BBF_INTERNAL;
17872 thenBlock->bbFlags |= BBF_IMPORTED;
17875 elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
17877 fgAddRefPred(thenBlock, condBlock);
17878 fgAddRefPred(remainderBlock, thenBlock);
17880 thenBlock->inheritWeightPercentage(condBlock, 50);
17881 elseBlock->inheritWeightPercentage(condBlock, 50);
17883 else if (hasTrueExpr)
17886 // S0 -->-- ~C -->-- T -->-- S1
17888 // +-->-------------+
17891 gtReverseCond(condExpr);
17892 condBlock->bbJumpDest = remainderBlock;
17893 fgAddRefPred(remainderBlock, condBlock);
17894 // Since we have no false expr, use the one we'd already created.
17895 thenBlock = elseBlock;
17896 elseBlock = nullptr;
17898 thenBlock->inheritWeightPercentage(condBlock, 50);
17900 else if (hasFalseExpr)
17903 // S0 -->-- C -->-- F -->-- S1
17905 // +-->------------+
17908 condBlock->bbJumpDest = remainderBlock;
17909 fgAddRefPred(remainderBlock, condBlock);
17911 elseBlock->inheritWeightPercentage(condBlock, 50);
17914 GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
17915 GenTree* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
17916 fgInsertStmtAtEnd(condBlock, jmpStmt);
17918 // Remove the original qmark statement.
17919 fgRemoveStmt(block, stmt);
17921 // Since we have top level qmarks, we either have a dst for it in which case
17922 // we need to create tmps for true and falseExprs, else just don't bother
17924 unsigned lclNum = BAD_VAR_NUM;
17925 if (dst != nullptr)
17927 assert(dst->gtOper == GT_LCL_VAR);
17928 lclNum = dst->gtLclVar.gtLclNum;
17932 assert(qmark->TypeGet() == TYP_VOID);
17937 if (dst != nullptr)
17939 trueExpr = gtNewTempAssign(lclNum, trueExpr);
17941 GenTree* trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
17942 fgInsertStmtAtEnd(thenBlock, trueStmt);
17945 // Assign the falseExpr into the dst or tmp, insert in elseBlock
17948 if (dst != nullptr)
17950 falseExpr = gtNewTempAssign(lclNum, falseExpr);
17952 GenTree* falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
17953 fgInsertStmtAtEnd(elseBlock, falseStmt);
17959 printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum);
17960 fgDispBasicBlocks(block, remainderBlock, true);
17965 /*****************************************************************************
17967 * Expand GT_QMARK nodes from the flow graph into basic blocks.
17971 void Compiler::fgExpandQmarkNodes()
17975 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
17977 for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
17979 GenTree* expr = stmt->gtStmt.gtStmtExpr;
17981 fgPreExpandQmarkChecks(expr);
17983 fgExpandQmarkStmt(block, stmt);
17987 fgPostExpandQmarkChecks();
17990 compQmarkRationalized = true;
17994 /*****************************************************************************
17996 * Make sure we don't have any more GT_QMARK nodes.
17999 void Compiler::fgPostExpandQmarkChecks()
18001 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
18003 for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
18005 GenTree* expr = stmt->gtStmt.gtStmtExpr;
18006 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
18012 /*****************************************************************************
18014 * Transform all basic blocks for codegen.
18017 void Compiler::fgMorph()
18019 noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
18021 fgOutgoingArgTemps = nullptr;
18026 printf("*************** In fgMorph()\n");
18030 fgDispBasicBlocks(true);
18034 // Insert call to class constructor as the first basic block if
18035 // we were asked to do so.
18036 if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
18037 impTokenLookupContextHandle /* context */) &
18038 CORINFO_INITCLASS_USE_HELPER)
18040 fgEnsureFirstBBisScratch();
18041 fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
18045 if (opts.compGcChecks)
18047 for (unsigned i = 0; i < info.compArgsCount; i++)
18049 if (lvaTable[i].TypeGet() == TYP_REF)
18051 // confirm that the argument is a GC pointer (for debugging (GC stress))
18052 GenTree* op = gtNewLclvNode(i, TYP_REF);
18053 GenTreeArgList* args = gtNewArgList(op);
18054 op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, args);
18056 fgEnsureFirstBBisScratch();
18057 fgInsertStmtAtEnd(fgFirstBB, op);
18062 if (opts.compStackCheckOnRet)
18064 lvaReturnEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck"));
18065 lvaTable[lvaReturnEspCheck].lvType = TYP_INT;
18068 if (opts.compStackCheckOnCall)
18070 lvaCallEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck"));
18071 lvaTable[lvaCallEspCheck].lvType = TYP_INT;
18075 /* Filter out unimported BBs */
18077 fgRemoveEmptyBlocks();
18080 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
18081 fgDebugCheckBBlist(false, false);
18084 EndPhase(PHASE_MORPH_INIT);
18089 JITDUMP("trees after inlining\n");
18090 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
18093 RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
18095 EndPhase(PHASE_MORPH_INLINE);
18097 /* Add any internal blocks/trees we may need */
18102 fgMultipleNots = false;
18106 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
18107 fgDebugCheckBBlist(false, false);
18108 /* Inliner could clone some trees. */
18109 fgDebugCheckNodesUniqueness();
18112 fgRemoveEmptyTry();
18114 EndPhase(PHASE_EMPTY_TRY);
18116 fgRemoveEmptyFinally();
18118 EndPhase(PHASE_EMPTY_FINALLY);
18120 fgMergeFinallyChains();
18122 EndPhase(PHASE_MERGE_FINALLY_CHAINS);
18126 EndPhase(PHASE_CLONE_FINALLY);
18128 fgUpdateFinallyTargetFlags();
18130 /* For x64 and ARM64 we need to mark irregular parameters */
18131 fgMarkImplicitByRefArgs();
18133 /* Promote struct locals if necessary */
18134 fgPromoteStructs();
18136 /* Now it is the time to figure out what locals have address-taken. */
18137 fgMarkAddressExposedLocals();
18139 EndPhase(PHASE_STR_ADRLCL);
18141 /* Apply the type update to implicit byref parameters; also choose (based on address-exposed
18142 analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. */
18143 fgRetypeImplicitByRefArgs();
18146 /* Now that locals have address-taken and implicit byref marked, we can safely apply stress. */
18148 fgStress64RsltMul();
18151 EndPhase(PHASE_MORPH_IMPBYREF);
18153 /* Morph the trees in all the blocks of the method */
18157 /* Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args */
18158 fgMarkDemotedImplicitByRefArgs();
18160 EndPhase(PHASE_MORPH_GLOBAL);
18163 JITDUMP("trees after fgMorphBlocks\n");
18164 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
18167 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
18168 if (fgNeedToAddFinallyTargetBits)
18170 // We previously wiped out the BBF_FINALLY_TARGET bits due to some morphing; add them back.
18171 fgAddFinallyTargetFlags();
18172 fgNeedToAddFinallyTargetBits = false;
18174 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
18176 /* Decide the kind of code we want to generate */
18180 fgExpandQmarkNodes();
18183 compCurBB = nullptr;
18187 /*****************************************************************************
18189 * Promoting struct locals
18191 void Compiler::fgPromoteStructs()
18196 printf("*************** In fgPromoteStructs()\n");
18200 if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
18202 JITDUMP(" promotion opt flag not enabled\n");
18206 if (fgNoStructPromotion)
18208 JITDUMP(" promotion disabled by JitNoStructPromotion\n");
18213 // The code in this #if has been useful in debugging struct promotion issues, by
18214 // enabling selective enablement of the struct promotion optimization according to
18217 unsigned methHash = info.compMethodHash();
18218 char* lostr = getenv("structpromohashlo");
18219 unsigned methHashLo = 0;
18222 sscanf_s(lostr, "%x", &methHashLo);
18224 char* histr = getenv("structpromohashhi");
18225 unsigned methHashHi = UINT32_MAX;
18228 sscanf_s(histr, "%x", &methHashHi);
18230 if (methHash < methHashLo || methHash > methHashHi)
18236 printf("Promoting structs for method %s, hash = 0x%x.\n",
18237 info.compFullName, info.compMethodHash());
18238 printf(""); // in our logic this causes a flush
18243 if (info.compIsVarArgs)
18245 JITDUMP(" promotion disabled because of varargs\n");
18252 printf("\nlvaTable before fgPromoteStructs\n");
18257 // The lvaTable might grow as we grab temps. Make a local copy here.
18258 unsigned startLvaCount = lvaCount;
18261 // Loop through the original lvaTable. Looking for struct locals to be promoted.
18263 lvaStructPromotionInfo structPromotionInfo;
18264 bool tooManyLocals = false;
18266 for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
18268 // Whether this var got promoted
18269 bool promotedVar = false;
18270 LclVarDsc* varDsc = &lvaTable[lclNum];
18272 // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
18273 // its fields. Instead, we will attempt to enregister the entire struct.
18274 if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic())
18276 varDsc->lvRegStruct = true;
18278 // Don't promote if we have reached the tracking limit.
18279 else if (lvaHaveManyLocals())
18281 // Print the message first time when we detected this condition
18282 if (!tooManyLocals)
18284 JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
18286 tooManyLocals = true;
18288 else if (varTypeIsStruct(varDsc))
18290 bool shouldPromote;
18292 lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
18293 if (structPromotionInfo.canPromote)
18295 shouldPromote = lvaShouldPromoteStructVar(lclNum, &structPromotionInfo);
18299 shouldPromote = false;
18303 // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
18304 // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
18305 static int structPromoVarNum = 0;
18306 structPromoVarNum++;
18307 if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
18312 // Promote the this struct local var.
18313 lvaPromoteStructVar(lclNum, &structPromotionInfo);
18314 promotedVar = true;
18316 #ifdef _TARGET_ARM_
18317 if (structPromotionInfo.requiresScratchVar)
18319 // Ensure that the scratch variable is allocated, in case we
18320 // pass a promoted struct as an argument.
18321 if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
18323 lvaPromotedStructAssemblyScratchVar =
18324 lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
18325 lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
18328 #endif // _TARGET_ARM_
18332 if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed)
18334 // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
18335 // we will treat it as a reg struct.
18336 varDsc->lvRegStruct = true;
18343 printf("\nlvaTable after fgPromoteStructs\n");
18349 Compiler::fgWalkResult Compiler::fgMorphStructField(GenTree* tree, fgWalkData* fgWalkPre)
18351 noway_assert(tree->OperGet() == GT_FIELD);
18353 GenTree* objRef = tree->gtField.gtFldObj;
18354 GenTree* obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
18355 noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));
18357 /* Is this an instance data member? */
18359 if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
18361 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
18362 LclVarDsc* varDsc = &lvaTable[lclNum];
18364 if (varTypeIsStruct(obj))
18366 if (varDsc->lvPromoted)
18369 unsigned fldOffset = tree->gtField.gtFldOffset;
18370 unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
18371 noway_assert(fieldLclIndex != BAD_VAR_NUM);
18373 if (lvaIsImplicitByRefLocal(lclNum))
18375 // Keep track of the number of appearances of each promoted implicit
18376 // byref (here during struct promotion, which happens during address-exposed
18377 // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
18378 // byref params when deciding if it's legal to elide certain copies of them.
18379 // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
18380 // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
18381 // chance, so have to check now.
18383 "Incrementing ref count from %d to %d for V%02d in fgMorphStructField for promoted struct\n",
18384 varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
18385 varDsc->lvRefCnt++;
18388 tree->SetOper(GT_LCL_VAR);
18389 tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
18390 tree->gtType = lvaTable[fieldLclIndex].TypeGet();
18391 tree->gtFlags &= GTF_NODE_MASK;
18392 tree->gtFlags &= ~GTF_GLOB_REF;
18394 GenTree* parent = fgWalkPre->parentStack->Index(1);
18395 if (parent->gtOper == GT_ASG)
18397 if (parent->gtOp.gtOp1 == tree)
18399 tree->gtFlags |= GTF_VAR_DEF;
18400 tree->gtFlags |= GTF_DONT_CSE;
18403 // Promotion of struct containing struct fields where the field
18404 // is a struct with a single pointer sized scalar type field: in
18405 // this case struct promotion uses the type of the underlying
18406 // scalar field as the type of struct field instead of recursively
18407 // promoting. This can lead to a case where we have a block-asgn
18408 // with its RHS replaced with a scalar type. Mark RHS value as
18409 // DONT_CSE so that assertion prop will not do const propagation.
18410 // The reason this is required is that if RHS of a block-asg is a
18411 // constant, then it is interpreted as init-block incorrectly.
18413 // TODO - This can also be avoided if we implement recursive struct
18415 if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree))
18417 tree->gtFlags |= GTF_DONT_CSE;
18423 printf("Replacing the field in promoted struct with a local var:\n");
18424 fgWalkPre->printModified = true;
18427 return WALK_SKIP_SUBTREES;
18433 // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
18434 // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
18435 // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
18436 // there is one extremely rare case where that won't be true. An enum type is a special value type
18437 // that contains exactly one element of a primitive integer type (that, for CLS programs is named
18438 // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
18439 // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
18440 // ldfld. For example:
18442 // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
18444 // .field public specialname rtspecialname int16 value__
18445 // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
18447 // .method public hidebysig static void Main() cil managed
18449 // .locals init (valuetype mynamespace.e_t V_0)
18452 // ldflda int16 mynamespace.e_t::value__
18456 // Normally, compilers will not generate the ldflda, since it is superfluous.
18458 // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
18459 // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
18460 // mismatch like this, don't do this morphing. The local var may end up getting marked as
18461 // address taken, and the appropriate SHORT load will be done from memory in that case.
18463 if (tree->TypeGet() == obj->TypeGet())
18465 if (lvaIsImplicitByRefLocal(lclNum))
18467 // Keep track of the number of appearances of each promoted implicit
18468 // byref (here during struct promotion, which happens during address-exposed
18469 // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
18470 // byref params when deciding if it's legal to elide certain copies of them.
18471 // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
18472 // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
18473 // chance, so have to check now.
18474 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField for normed struct\n",
18475 varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
18476 varDsc->lvRefCnt++;
18479 tree->ChangeOper(GT_LCL_VAR);
18480 tree->gtLclVarCommon.SetLclNum(lclNum);
18481 tree->gtFlags &= GTF_NODE_MASK;
18483 GenTree* parent = fgWalkPre->parentStack->Index(1);
18484 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
18486 tree->gtFlags |= GTF_VAR_DEF;
18487 tree->gtFlags |= GTF_DONT_CSE;
18492 printf("Replacing the field in normed struct with the local var:\n");
18493 fgWalkPre->printModified = true;
18496 return WALK_SKIP_SUBTREES;
18501 return WALK_CONTINUE;
18504 Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTree* tree, fgWalkData* fgWalkPre)
18506 noway_assert(tree->OperGet() == GT_LCL_FLD);
18508 unsigned lclNum = tree->gtLclFld.gtLclNum;
18509 LclVarDsc* varDsc = &lvaTable[lclNum];
18511 if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
18514 unsigned fldOffset = tree->gtLclFld.gtLclOffs;
18515 unsigned fieldLclIndex = 0;
18516 LclVarDsc* fldVarDsc = nullptr;
18518 if (fldOffset != BAD_VAR_NUM)
18520 fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
18521 noway_assert(fieldLclIndex != BAD_VAR_NUM);
18522 fldVarDsc = &lvaTable[fieldLclIndex];
18525 if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
18526 #ifdef _TARGET_X86_
18527 && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
18531 // There is an existing sub-field we can use.
18532 tree->gtLclFld.SetLclNum(fieldLclIndex);
18534 // The field must be an enregisterable type; otherwise it would not be a promoted field.
18535 // The tree type may not match, e.g. for return types that have been morphed, but both
18536 // must be enregisterable types.
18537 // TODO-Cleanup: varTypeCanReg should presumably return true for SIMD types, but
18538 // there may be places where that would violate existing assumptions.
18539 var_types treeType = tree->TypeGet();
18540 var_types fieldType = fldVarDsc->TypeGet();
18541 assert((varTypeCanReg(treeType) || varTypeIsSIMD(treeType)) &&
18542 (varTypeCanReg(fieldType) || varTypeIsSIMD(fieldType)));
18544 tree->ChangeOper(GT_LCL_VAR);
18545 assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
18546 tree->gtType = fldVarDsc->TypeGet();
18550 printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n");
18551 fgWalkPre->printModified = true;
18555 GenTree* parent = fgWalkPre->parentStack->Index(1);
18556 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
18558 tree->gtFlags |= GTF_VAR_DEF;
18559 tree->gtFlags |= GTF_DONT_CSE;
18564 // There is no existing field that has all the parts that we need
18565 // So we must ensure that the struct lives in memory.
18566 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
18569 // We can't convert this guy to a float because he really does have his
18571 varDsc->lvKeepType = 1;
18575 return WALK_SKIP_SUBTREES;
18578 return WALK_CONTINUE;
18581 //------------------------------------------------------------------------
18582 // fgMarkImplicitByRefArgs: Identify any by-value struct parameters which are "implicit by-reference";
18583 // i.e. which the ABI requires to be passed by making a copy in the caller and
18584 // passing its address to the callee. Mark their `LclVarDsc`s such that
18585 // `lvaIsImplicitByRefLocal` will return true for them.
18587 void Compiler::fgMarkImplicitByRefArgs()
18589 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
18593 printf("\n*************** In fgMarkImplicitByRefs()\n");
18597 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
18599 LclVarDsc* varDsc = &lvaTable[lclNum];
18601 if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
18605 if (varDsc->lvSize() > REGSIZE_BYTES)
18607 size = varDsc->lvSize();
18611 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
18612 size = info.compCompHnd->getClassSize(typeHnd);
18615 #if defined(_TARGET_AMD64_)
18616 if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
18617 #elif defined(_TARGET_ARM64_)
18618 if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc))
18621 // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
18622 // So I am now using it to indicate that this is one of the weird implicit
18624 // The address taken cleanup will look for references to locals marked like
18625 // this, and transform them appropriately.
18626 varDsc->lvIsTemp = 1;
18628 // Clear the ref count field; fgMarkAddressTakenLocals will increment it per
18629 // appearance of implicit-by-ref param so that call arg morphing can do an
18630 // optimization for single-use implicit-by-ref params whose single use is as
18631 // an outgoing call argument.
18632 varDsc->lvRefCnt = 0;
18637 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18640 //------------------------------------------------------------------------
18641 // fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from
18642 // struct to pointer). Also choose (based on address-exposed analysis)
18643 // which struct promotions of implicit byrefs to keep or discard.
18644 // For those which are kept, insert the appropriate initialization code.
18645 // For those which are to be discarded, annotate the promoted field locals
18646 // so that fgMorphImplicitByRefArgs will know to rewrite their appearances
18647 // using indirections off the pointer parameters.
18649 void Compiler::fgRetypeImplicitByRefArgs()
18651 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
18655 printf("\n*************** In fgRetypeImplicitByRefArgs()\n");
18659 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
18661 LclVarDsc* varDsc = &lvaTable[lclNum];
18663 if (lvaIsImplicitByRefLocal(lclNum))
18667 if (varDsc->lvSize() > REGSIZE_BYTES)
18669 size = varDsc->lvSize();
18673 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
18674 size = info.compCompHnd->getClassSize(typeHnd);
18677 if (varDsc->lvPromoted)
18679 // This implicit-by-ref was promoted; create a new temp to represent the
18680 // promoted struct before rewriting this parameter as a pointer.
18681 unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref"));
18682 lvaSetStruct(newLclNum, lvaGetStruct(lclNum), true);
18683 // Update varDsc since lvaGrabTemp might have re-allocated the var dsc array.
18684 varDsc = &lvaTable[lclNum];
18686 // Copy the struct promotion annotations to the new temp.
18687 LclVarDsc* newVarDsc = &lvaTable[newLclNum];
18688 newVarDsc->lvPromoted = true;
18689 newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart;
18690 newVarDsc->lvFieldCnt = varDsc->lvFieldCnt;
18691 newVarDsc->lvContainsHoles = varDsc->lvContainsHoles;
18692 newVarDsc->lvCustomLayout = varDsc->lvCustomLayout;
18694 newVarDsc->lvKeepType = true;
18697 // Propagate address-taken-ness and do-not-enregister-ness.
18698 newVarDsc->lvAddrExposed = varDsc->lvAddrExposed;
18699 newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister;
18701 newVarDsc->lvLclBlockOpAddr = varDsc->lvLclBlockOpAddr;
18702 newVarDsc->lvLclFieldExpr = varDsc->lvLclFieldExpr;
18703 newVarDsc->lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr;
18704 newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr;
18705 newVarDsc->lvLiveAcrossUCall = varDsc->lvLiveAcrossUCall;
18708 // If the promotion is dependent, the promoted temp would just be committed
18709 // to memory anyway, so we'll rewrite its appearances to be indirections
18710 // through the pointer parameter, the same as we'd do for this
18711 // parameter if it weren't promoted at all (otherwise the initialization
18712 // of the new temp would just be a needless memcpy at method entry).
18713 bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ||
18714 (varDsc->lvRefCnt <= varDsc->lvFieldCnt);
18716 if (!undoPromotion)
18718 // Insert IR that initializes the temp from the parameter.
18719 // LHS is a simple reference to the temp.
18720 fgEnsureFirstBBisScratch();
18721 GenTree* lhs = gtNewLclvNode(newLclNum, varDsc->lvType);
18722 // RHS is an indirection (using GT_OBJ) off the parameter.
18723 GenTree* addr = gtNewLclvNode(lclNum, TYP_BYREF);
18724 GenTree* rhs = gtNewBlockVal(addr, (unsigned)size);
18725 GenTree* assign = gtNewAssignNode(lhs, rhs);
18726 fgInsertStmtAtBeg(fgFirstBB, assign);
18729 // Update the locals corresponding to the promoted fields.
18730 unsigned fieldLclStart = varDsc->lvFieldLclStart;
18731 unsigned fieldCount = varDsc->lvFieldCnt;
18732 unsigned fieldLclStop = fieldLclStart + fieldCount;
18734 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
18736 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
18740 // Leave lvParentLcl pointing to the parameter so that fgMorphImplicitByRefArgs
18741 // will know to rewrite appearances of this local.
18742 assert(fieldVarDsc->lvParentLcl == lclNum);
18746 // Set the new parent.
18747 fieldVarDsc->lvParentLcl = newLclNum;
18748 // Clear the ref count field; it is used to communicate the nubmer of references
18749 // to the implicit byref parameter when morphing calls that pass the implicit byref
18750 // out as an outgoing argument value, but that doesn't pertain to this field local
18751 // which is now a field of a non-arg local.
18752 fieldVarDsc->lvRefCnt = 0;
18755 fieldVarDsc->lvIsParam = false;
18756 // The fields shouldn't inherit any register preferences from
18757 // the parameter which is really a pointer to the struct.
18758 fieldVarDsc->lvIsRegArg = false;
18759 fieldVarDsc->lvIsMultiRegArg = false;
18760 fieldVarDsc->lvSetIsHfaRegArg(false);
18761 fieldVarDsc->lvArgReg = REG_NA;
18762 #if FEATURE_MULTIREG_ARGS
18763 fieldVarDsc->lvOtherArgReg = REG_NA;
18765 fieldVarDsc->lvPrefReg = 0;
18768 // Hijack lvFieldLclStart to record the new temp number.
18769 // It will get fixed up in fgMarkDemotedImplicitByRefArgs.
18770 varDsc->lvFieldLclStart = newLclNum;
18771 // Go ahead and clear lvFieldCnt -- either we're promoting
18772 // a replacement temp or we're not promoting this arg, and
18773 // in either case the parameter is now a pointer that doesn't
18774 // have these fields.
18775 varDsc->lvFieldCnt = 0;
18777 // Hijack lvPromoted to communicate to fgMorphImplicitByRefArgs
18778 // whether references to the struct should be rewritten as
18779 // indirections off the pointer (not promoted) or references
18780 // to the new struct local (promoted).
18781 varDsc->lvPromoted = !undoPromotion;
18785 // The "undo promotion" path above clears lvPromoted for args that struct
18786 // promotion wanted to promote but that aren't considered profitable to
18787 // rewrite. It hijacks lvFieldLclStart to communicate to
18788 // fgMarkDemotedImplicitByRefArgs that it needs to clean up annotations left
18789 // on such args for fgMorphImplicitByRefArgs to consult in the interim.
18790 // Here we have an arg that was simply never promoted, so make sure it doesn't
18791 // have nonzero lvFieldLclStart, since that would confuse fgMorphImplicitByRefArgs
18792 // and fgMarkDemotedImplicitByRefArgs.
18793 assert(varDsc->lvFieldLclStart == 0);
18796 // Since the parameter in this position is really a pointer, its type is TYP_BYREF.
18797 varDsc->lvType = TYP_BYREF;
18799 // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
18800 // make sure that the following flag is not set as these will force SSA to
18801 // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa)
18803 varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
18805 // The struct parameter may have had its address taken, but the pointer parameter
18806 // cannot -- any uses of the struct parameter's address are uses of the pointer
18807 // parameter's value, and there's no way for the MSIL to reference the pointer
18808 // parameter's address. So clear the address-taken bit for the parameter.
18809 varDsc->lvAddrExposed = 0;
18810 varDsc->lvDoNotEnregister = 0;
18813 // This should not be converted to a double in stress mode,
18814 // because it is really a pointer
18815 varDsc->lvKeepType = 1;
18819 printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
18825 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18828 //------------------------------------------------------------------------
18829 // fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion
18830 // asked to promote. Appearances of these have now been rewritten
18831 // (by fgMorphImplicitByRefArgs) using indirections from the pointer
18832 // parameter or references to the promotion temp, as appropriate.
18834 void Compiler::fgMarkDemotedImplicitByRefArgs()
18836 #if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
18838 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
18840 LclVarDsc* varDsc = &lvaTable[lclNum];
18842 if (lvaIsImplicitByRefLocal(lclNum))
18844 if (varDsc->lvPromoted)
18846 // The parameter is simply a pointer now, so clear lvPromoted. It was left set
18847 // by fgRetypeImplicitByRefArgs to communicate to fgMorphImplicitByRefArgs that
18848 // appearances of this arg needed to be rewritten to a new promoted struct local.
18849 varDsc->lvPromoted = false;
18851 // Clear the lvFieldLclStart value that was set by fgRetypeImplicitByRefArgs
18852 // to tell fgMorphImplicitByRefArgs which local is the new promoted struct one.
18853 varDsc->lvFieldLclStart = 0;
18855 else if (varDsc->lvFieldLclStart != 0)
18857 // We created new temps to represent a promoted struct corresponding to this
18858 // parameter, but decided not to go through with the promotion and have
18859 // rewritten all uses as indirections off the pointer parameter.
18860 // We stashed the pointer to the new struct temp in lvFieldLclStart; make
18861 // note of that and clear the annotation.
18862 unsigned structLclNum = varDsc->lvFieldLclStart;
18863 varDsc->lvFieldLclStart = 0;
18865 // Clear the arg's ref count; this was set during address-taken analysis so that
18866 // call morphing could identify single-use implicit byrefs; we're done with
18867 // that, and want it to be in its default state of zero when we go to set
18868 // real ref counts for all variables.
18869 varDsc->lvRefCnt = 0;
18871 // The temp struct is now unused; set flags appropriately so that we
18872 // won't allocate space for it on the stack.
18873 LclVarDsc* structVarDsc = &lvaTable[structLclNum];
18874 structVarDsc->lvRefCnt = 0;
18875 structVarDsc->lvAddrExposed = false;
18877 structVarDsc->lvUnusedStruct = true;
18880 unsigned fieldLclStart = structVarDsc->lvFieldLclStart;
18881 unsigned fieldCount = structVarDsc->lvFieldCnt;
18882 unsigned fieldLclStop = fieldLclStart + fieldCount;
18884 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
18886 // Fix the pointer to the parent local.
18887 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
18888 assert(fieldVarDsc->lvParentLcl == lclNum);
18889 fieldVarDsc->lvParentLcl = structLclNum;
18891 // The field local is now unused; set flags appropriately so that
18892 // we won't allocate stack space for it.
18893 fieldVarDsc->lvRefCnt = 0;
18894 fieldVarDsc->lvAddrExposed = false;
18900 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18903 /*****************************************************************************
18905 * Morph irregular parameters
18906 * for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
18908 bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree)
18910 #if (!defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) && !defined(_TARGET_ARM64_)
18914 #else // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18916 bool changed = false;
18918 // Implicit byref morphing needs to know if the reference to the parameter is a
18919 // child of GT_ADDR or not, so this method looks one level down and does the
18920 // rewrite whenever a child is a reference to an implicit byref parameter.
18921 if (tree->gtOper == GT_ADDR)
18923 if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
18925 GenTree* morphedTree = fgMorphImplicitByRefArgs(tree, true);
18926 changed = (morphedTree != nullptr);
18927 assert(!changed || (morphedTree == tree));
18932 for (GenTree** pTree : tree->UseEdges())
18934 GenTree* childTree = *pTree;
18935 if (childTree->gtOper == GT_LCL_VAR)
18937 GenTree* newChildTree = fgMorphImplicitByRefArgs(childTree, false);
18938 if (newChildTree != nullptr)
18941 *pTree = newChildTree;
18948 #endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
18951 GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr)
18953 assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
18954 assert(isAddr == (tree->gtOper == GT_ADDR));
18956 GenTree* lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
18957 unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
18958 LclVarDsc* lclVarDsc = &lvaTable[lclNum];
18960 CORINFO_FIELD_HANDLE fieldHnd;
18961 unsigned fieldOffset = 0;
18962 var_types fieldRefType = TYP_UNKNOWN;
18964 if (lvaIsImplicitByRefLocal(lclNum))
18966 // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
18967 // re-invoke the traversal to mark address-taken locals.
18968 // So, we may encounter a tree that has already been transformed to TYP_BYREF.
18969 // If we do, leave it as-is.
18970 if (!varTypeIsStruct(lclVarTree))
18972 assert(lclVarTree->TypeGet() == TYP_BYREF);
18976 else if (lclVarDsc->lvPromoted)
18978 // fgRetypeImplicitByRefArgs created a new promoted struct local to represent this
18979 // arg. Rewrite this to refer to the new local.
18980 assert(lclVarDsc->lvFieldLclStart != 0);
18981 lclVarTree->AsLclVarCommon()->SetLclNum(lclVarDsc->lvFieldLclStart);
18985 fieldHnd = nullptr;
18987 else if (lclVarDsc->lvIsStructField && lvaIsImplicitByRefLocal(lclVarDsc->lvParentLcl))
18989 // This was a field reference to an implicit-by-reference struct parameter that was
18990 // dependently promoted; update it to a field reference off the pointer.
18991 // Grab the field handle from the struct field lclVar.
18992 fieldHnd = lclVarDsc->lvFieldHnd;
18993 fieldOffset = lclVarDsc->lvFldOffset;
18994 assert(fieldHnd != nullptr);
18995 // Update lclNum/lclVarDsc to refer to the parameter
18996 lclNum = lclVarDsc->lvParentLcl;
18997 lclVarDsc = &lvaTable[lclNum];
18998 fieldRefType = lclVarTree->TypeGet();
19002 // We only need to tranform the 'marked' implicit by ref parameters
19006 // This is no longer a def of the lclVar, even if it WAS a def of the struct.
19007 lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);
19011 if (fieldHnd == nullptr)
19013 // change &X into just plain X
19014 tree->ReplaceWith(lclVarTree, this);
19015 tree->gtType = TYP_BYREF;
19019 // change &(X.f) [i.e. GT_ADDR of local for promoted arg field]
19020 // into &(X, f) [i.e. GT_ADDR of GT_FIELD off ptr param]
19021 lclVarTree->gtLclVarCommon.SetLclNum(lclNum);
19022 lclVarTree->gtType = TYP_BYREF;
19023 tree->gtOp.gtOp1 = gtNewFieldRef(fieldRefType, fieldHnd, lclVarTree, fieldOffset);
19029 printf("Replacing address of implicit by ref struct parameter with byref:\n");
19035 // Change X into OBJ(X) or FIELD(X, f)
19036 var_types structType = tree->gtType;
19037 tree->gtType = TYP_BYREF;
19041 tree->gtLclVarCommon.SetLclNum(lclNum);
19042 tree = gtNewFieldRef(fieldRefType, fieldHnd, tree, fieldOffset);
19046 tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
19049 if (structType == TYP_STRUCT)
19051 gtSetObjGcInfo(tree->AsObj());
19054 // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
19055 // we could remove TGTANYWHERE
19056 tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
19061 printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
19076 // An "AddrExposedContext" expresses the calling context in which an address expression occurs.
19077 enum AddrExposedContext
19079 AXC_None, // None of the below seen yet.
19080 AXC_Ind, // The address being computed is to be dereferenced.
19081 AXC_Addr, // We're computing a raw address (not dereferenced, at least not immediately).
19082 AXC_IndWide, // A block operation dereferenced an address referencing more bytes than the address
19083 // addresses -- if the address addresses a field of a struct local, we need to consider
19084 // the entire local address taken (not just the field).
19085 AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
19086 // on more bytes than the width of the storage location addressed. If this is a
19087 // field of a promoted struct local, declare the entire struct local address-taken.
19088 AXC_IndAdd, // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
19089 // If one arg is a constant int, evaluate the other in an IND context. Otherwise, none.
19092 typedef ArrayStack<AddrExposedContext> AXCStack;
19094 // We use pre-post to simulate passing an argument in a recursion, via a stack.
19095 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTree** pTree, fgWalkData* fgWalkPre)
19097 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
19098 (void)axcStack->Pop();
19099 return WALK_CONTINUE;
19102 Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTree** pTree, fgWalkData* fgWalkPre)
19104 GenTree* tree = *pTree;
19105 Compiler* comp = fgWalkPre->compiler;
19106 AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
19107 AddrExposedContext axc = axcStack->Top();
19109 // In some situations, we have to figure out what the effective context is in which to
19110 // evaluate the current tree, depending on which argument position it is in its parent.
19117 GenTree* parent = fgWalkPre->parentStack->Index(1);
19118 assert(parent->OperGet() == GT_ADD);
19119 // Is one of the args a constant representing a field offset,
19120 // and is this the other? If so, Ind context.
19121 if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree)
19125 else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree)
19140 // Now recurse properly for the tree.
19141 switch (tree->gtOper)
19144 if (axc != AXC_Addr)
19146 axcStack->Push(AXC_Ind);
19150 axcStack->Push(AXC_None);
19152 return WALK_CONTINUE;
19156 if (axc == AXC_Addr)
19158 axcStack->Push(AXC_None);
19160 else if (tree->TypeGet() == TYP_STRUCT)
19162 // The block operation will derefence its argument(s) -- usually. If the size of the initblk
19163 // or copyblk exceeds the size of a storage location whose address is used as one of the
19164 // arguments, then we have to consider that storage location (indeed, it's underlying containing
19165 // location) to be address taken. So get the width of the initblk or copyblk.
19167 GenTree* parent = fgWalkPre->parentStack->Index(1);
19168 GenTreeBlk* blk = tree->AsBlk();
19169 unsigned width = blk->gtBlkSize;
19170 noway_assert(width != 0);
19172 GenTree* addr = blk->Addr();
19173 if (addr->OperGet() == GT_ADDR)
19175 if (parent->gtOper == GT_ASG)
19177 if ((tree == parent->gtOp.gtOp1) &&
19178 ((width == 0) || !comp->fgFitsInOrNotLoc(addr->gtGetOp1(), width)))
19185 assert(parent->gtOper == GT_CALL);
19188 axcStack->Push(axc);
19192 // This is like a regular GT_IND.
19193 axcStack->Push(AXC_Ind);
19195 return WALK_CONTINUE;
19198 // Assume maximal width.
19199 axcStack->Push(AXC_IndWide);
19200 return WALK_CONTINUE;
19203 case GT_FIELD_LIST:
19204 axcStack->Push(AXC_None);
19205 return WALK_CONTINUE;
19208 // Taking the address of an array element never takes the address of a local.
19209 axcStack->Push(AXC_None);
19210 return WALK_CONTINUE;
19213 #ifdef FEATURE_SIMD
19214 if (tree->gtOp.gtOp1->OperIsSIMDorSimdHWintrinsic())
19216 axcStack->Push(AXC_None);
19219 #endif // FEATURE_SIMD
19220 if (axc == AXC_Ind)
19222 axcStack->Push(AXC_None);
19224 else if (axc == AXC_IndWide)
19226 axcStack->Push(AXC_AddrWide);
19230 assert(axc == AXC_None);
19231 axcStack->Push(AXC_Addr);
19233 return WALK_CONTINUE;
19236 // First, handle a couple of special cases: field of promoted struct local, field
19237 // of "normed" struct.
19238 if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES)
19240 // It (may have) replaced the field with a local var or local field. If we're in an addr context,
19241 // label it addr-taken.
19242 if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide))
19244 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
19245 comp->lvaSetVarAddrExposed(lclNum);
19246 if (axc == AXC_AddrWide)
19248 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
19249 if (varDsc->lvIsStructField)
19251 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
19255 // Push something to keep the PostCB, which will pop it, happy.
19256 axcStack->Push(AXC_None);
19257 return WALK_SKIP_SUBTREES;
19261 // GT_FIELD is an implicit deref.
19262 if (axc == AXC_Addr)
19264 axcStack->Push(AXC_None);
19266 else if (axc == AXC_AddrWide)
19268 axcStack->Push(AXC_IndWide);
19272 axcStack->Push(AXC_Ind);
19274 return WALK_CONTINUE;
19279 assert(axc != AXC_Addr);
19280 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
19281 if (comp->lvaIsImplicitByRefLocal(lclNum))
19283 // Keep track of the number of appearances of each promoted implicit
19284 // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
19285 // checks the ref counts for implicit byref params when deciding if it's legal
19286 // to elide certain copies of them.
19287 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
19288 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
19289 varDsc->lvRefCnt + 1, lclNum);
19291 varDsc->lvRefCnt++;
19293 // This recognizes certain forms, and does all the work. In that case, returns WALK_SKIP_SUBTREES,
19294 // else WALK_CONTINUE. We do the same here.
19295 fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre);
19296 if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide))
19298 comp->lvaSetVarAddrExposed(lclNum);
19299 if (axc == AXC_AddrWide)
19301 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
19302 if (varDsc->lvIsStructField)
19304 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
19308 // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter
19309 // what, but something to be popped by the post callback. If we're going
19310 // to analyze children, the LCL_FLD creates an Ind context, so use that.
19311 axcStack->Push(AXC_Ind);
19317 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
19318 LclVarDsc* varDsc = &comp->lvaTable[lclNum];
19320 if (comp->lvaIsImplicitByRefLocal(lclNum))
19322 // Keep track of the number of appearances of each promoted implicit
19323 // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
19324 // checks the ref counts for implicit byref params when deciding if it's legal
19325 // to elide certain copies of them.
19326 JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
19327 varDsc->lvRefCnt + 1, lclNum);
19329 varDsc->lvRefCnt++;
19332 if (axc == AXC_Addr || axc == AXC_AddrWide)
19334 comp->lvaSetVarAddrExposed(lclNum);
19335 if (axc == AXC_AddrWide)
19337 if (varDsc->lvIsStructField)
19339 comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
19343 // We may need to Quirk the storage size for this LCL_VAR
19344 // some PInvoke signatures incorrectly specify a ByRef to an INT32
19345 // when they actually write a SIZE_T or INT64
19346 comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack);
19349 // Push something to keep the PostCB, which will pop it, happy.
19350 axcStack->Push(AXC_None);
19351 // The tree is a leaf.
19352 return WALK_SKIP_SUBTREES;
19356 assert(axc != AXC_Addr);
19357 // See below about treating pointer operations as wider indirection.
19358 if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
19360 axcStack->Push(AXC_IndWide);
19362 else if (axc == AXC_Ind)
19364 // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
19365 // If it's an add of a constant and an address, and the constant represents a field,
19366 // then we'll evaluate the address argument in an Ind context; otherwise, the None context.
19367 axcStack->Push(AXC_IndAdd);
19371 axcStack->Push(axc);
19373 return WALK_CONTINUE;
19375 // !!! Treat Pointer Operations as Wider Indirection
19377 // If we are performing pointer operations, make sure we treat that as equivalent to a wider
19378 // indirection. This is because the pointers could be pointing to the address of struct fields
19379 // and could be used to perform operations on the whole struct or passed to another method.
19381 // When visiting a node in this pre-order walk, we do not know if we would in the future
19382 // encounter a GT_ADDR of a GT_FIELD below.
19384 // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
19385 // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
19386 // wider indirection context down the expr tree.
19388 // Example, in unsafe code,
19390 // IL_000e 12 00 ldloca.s 0x0
19391 // IL_0010 7c 02 00 00 04 ldflda 0x4000002
19392 // IL_0015 12 00 ldloca.s 0x0
19393 // IL_0017 7c 01 00 00 04 ldflda 0x4000001
19396 // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
19397 // consider GT_SUB to be equivalent of an AXC_IndWide.
19399 // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
19400 // them as AXC_IndWide.
19405 // Scan for byref args
19406 GenTreeCall* const call = tree->AsCall();
19407 for (GenTree* args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
19409 if (args->gtOp.gtOp1->gtType == TYP_BYREF)
19411 axcStack->Push(AXC_IndWide);
19412 return WALK_CONTINUE;
19441 if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) ||
19442 (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
19444 axcStack->Push(AXC_IndWide);
19445 return WALK_CONTINUE;
19453 // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None". We must
19454 // handle the "Ind" propogation explicitly above.
19455 if (axc == AXC_Addr || axc == AXC_AddrWide)
19457 axcStack->Push(axc);
19461 axcStack->Push(AXC_None);
19463 return WALK_CONTINUE;
19466 bool Compiler::fgFitsInOrNotLoc(GenTree* tree, unsigned width)
19468 if (tree->TypeGet() != TYP_STRUCT)
19470 return width <= genTypeSize(tree->TypeGet());
19472 else if (tree->OperGet() == GT_LCL_VAR)
19474 assert(tree->TypeGet() == TYP_STRUCT);
19475 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
19476 return width <= lvaTable[lclNum].lvExactSize;
19478 else if (tree->OperGet() == GT_FIELD)
19480 CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass(tree->gtField.gtFldHnd);
19481 return width <= info.compCompHnd->getClassSize(fldClass);
19483 else if (tree->OperGet() == GT_INDEX)
19485 return width <= tree->gtIndex.gtIndElemSize;
19493 void Compiler::fgAddFieldSeqForZeroOffset(GenTree* op1, FieldSeqNode* fieldSeq)
19495 assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);
19497 switch (op1->OperGet())
19500 if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
19502 GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
19503 lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
19508 if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
19510 FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
19511 if (op1Fs != nullptr)
19513 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
19514 op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
19517 else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
19519 FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
19520 if (op2Fs != nullptr)
19522 op2Fs = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
19523 op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
19530 FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
19531 if (op1Fs != nullptr)
19533 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
19534 op1->gtIntCon.gtFieldSeq = op1Fs;
19540 // Record in the general zero-offset map.
19541 GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
19546 /*****************************************************************************
19548 * Mark address-taken locals.
19551 void Compiler::fgMarkAddressExposedLocals()
19556 printf("\n*************** In fgMarkAddressExposedLocals()\n");
19560 BasicBlock* block = fgFirstBB;
19561 noway_assert(block);
19565 /* Make the current basic block address available globally */
19571 for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
19573 // Call Compiler::fgMarkAddrTakenLocalsCB on each node
19574 AXCStack stk(this);
19575 stk.Push(AXC_None); // We start in neither an addr or ind context.
19576 fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
19579 block = block->bbNext;
19584 // fgNodesMayInterfere:
19585 // return true if moving nodes relative to each other can change the result of a computation
19588 // read: a node which reads
19591 bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read)
19593 LclVarDsc* srcVar = nullptr;
19595 bool readIsIndir = read->OperIsIndir() || read->OperIsImplicitIndir();
19596 bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir();
19598 if (read->OperIsLocal())
19600 srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum];
19605 if (srcVar && srcVar->lvAddrExposed)
19609 else if (readIsIndir)
19615 else if (write->OperIsLocal())
19617 LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum];
19620 return dstVar->lvAddrExposed;
19622 else if (read->OperIsLocal())
19624 if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum)
19641 #ifdef LEGACY_BACKEND
19642 /** This predicate decides whether we will fold a tree with the structure:
19643 * x = x <op> y where x could be any arbitrary expression into
19646 * This modification is only performed when the target architecture supports
19647 * complex addressing modes. In the case of ARM for example, this transformation
19648 * yields no benefit.
19650 * In case this functions decides we can proceed to fold into an assignment operator
19651 * we need to inspect whether the operator is commutative to tell fgMorph whether we need to
19652 * reverse the tree due to the fact we saw x = y <op> x and we want to fold that into
19653 * x <op>= y because the operator property.
19655 bool Compiler::fgShouldCreateAssignOp(GenTree* tree, bool* bReverse)
19657 #if CPU_LOAD_STORE_ARCH
19658 /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */
19661 GenTree* op1 = tree->gtOp.gtOp1;
19662 GenTree* op2 = tree->gtGetOp2();
19663 genTreeOps cmop = op2->OperGet();
19665 /* Is the destination identical to the first RHS sub-operand? */
19666 if (GenTree::Compare(op1, op2->gtOp.gtOp1))
19669 Do not transform the following tree
19671 [0024CFA4] ----------- const int 1
19672 [0024CFDC] ----G------ | int
19673 [0024CF5C] ----------- lclVar ubyte V01 tmp0
19674 [0024D05C] -A--G------ = ubyte
19675 [0024D014] D------N--- lclVar ubyte V01 tmp0
19679 [0024CFA4] ----------- const int 1
19680 [0024D05C] -A--G------ |= ubyte
19681 [0024D014] U------N--- lclVar ubyte V01 tmp0
19683 , when V01 is a struct field local.
19686 if (op1->gtOper == GT_LCL_VAR && varTypeIsSmall(op1->TypeGet()) && op1->TypeGet() != op2->gtOp.gtOp2->TypeGet())
19688 unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
19689 LclVarDsc* varDsc = lvaTable + lclNum;
19691 if (varDsc->lvIsStructField)
19700 else if (GenTree::OperIsCommutative(cmop))
19702 /* For commutative ops only, check for "a = x <op> a" */
19704 /* Should we be doing this at all? */
19705 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
19710 /* Can we swap the operands to cmop ... */
19711 if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) && (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT))
19713 // Both sides must have side effects to prevent swap */
19717 /* Is the destination identical to the second RHS sub-operand? */
19718 if (GenTree::Compare(op1, op2->gtOp.gtOp2))
19725 #endif // !CPU_LOAD_STORE_ARCH
19727 #endif // LEGACY_BACKEND
19729 #ifdef FEATURE_SIMD
19731 //-----------------------------------------------------------------------------------
19732 // fgMorphCombineSIMDFieldAssignments:
19733 // If the RHS of the input stmt is a read for simd vector X Field, then this function
19734 // will keep reading next few stmts based on the vector size(2, 3, 4).
19735 // If the next stmts LHS are located contiguous and RHS are also located
19736 // contiguous, then we replace those statements with a copyblk.
19739 // block - BasicBlock*. block which stmt belongs to
19740 // stmt - GenTreeStmt*. the stmt node we want to check
19743 // if this funciton successfully optimized the stmts, then return true. Otherwise
19746 bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTree* stmt)
19749 noway_assert(stmt->gtOper == GT_STMT);
19750 GenTree* tree = stmt->gtStmt.gtStmtExpr;
19751 assert(tree->OperGet() == GT_ASG);
19753 GenTree* originalLHS = tree->gtOp.gtOp1;
19754 GenTree* prevLHS = tree->gtOp.gtOp1;
19755 GenTree* prevRHS = tree->gtOp.gtOp2;
19756 unsigned index = 0;
19757 var_types baseType = TYP_UNKNOWN;
19758 unsigned simdSize = 0;
19759 GenTree* simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
19761 if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
19763 // if the RHS is not from a SIMD vector field X, then there is no need to check further.
19767 var_types simdType = getSIMDTypeForSize(simdSize);
19768 int assignmentsCount = simdSize / genTypeSize(baseType) - 1;
19769 int remainingAssignments = assignmentsCount;
19770 GenTree* curStmt = stmt->gtNext;
19771 GenTree* lastStmt = stmt;
19773 while (curStmt != nullptr && remainingAssignments > 0)
19775 GenTree* exp = curStmt->gtStmt.gtStmtExpr;
19776 if (exp->OperGet() != GT_ASG)
19780 GenTree* curLHS = exp->gtGetOp1();
19781 GenTree* curRHS = exp->gtGetOp2();
19783 if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
19788 remainingAssignments--;
19792 lastStmt = curStmt;
19793 curStmt = curStmt->gtNext;
19796 if (remainingAssignments > 0)
19798 // if the left assignments number is bigger than zero, then this means
19799 // that the assignments are not assgining to the contiguously memory
19800 // locations from same vector.
19806 printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
19807 printf("From BB%02u, stmt", block->bbNum);
19809 printf(" to stmt");
19810 printTreeID(lastStmt);
19815 for (int i = 0; i < assignmentsCount; i++)
19817 fgRemoveStmt(block, stmt->gtNext);
19820 GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
19821 if (simdStructNode->OperIsLocal())
19823 setLclRelatedToSIMDIntrinsic(simdStructNode);
19825 GenTree* copyBlkAddr = copyBlkDst;
19826 if (copyBlkAddr->gtOper == GT_LEA)
19828 copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
19830 GenTreeLclVarCommon* localDst = nullptr;
19831 if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
19833 setLclRelatedToSIMDIntrinsic(localDst);
19836 if (simdStructNode->TypeGet() == TYP_BYREF)
19838 assert(simdStructNode->OperIsLocal());
19839 assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
19840 simdStructNode = gtNewIndir(simdType, simdStructNode);
19844 assert(varTypeIsSIMD(simdStructNode));
19850 printf("\nBB%02u stmt", block->bbNum);
19852 printf("(before)\n");
19857 // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
19858 GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
19859 blkNode->gtType = simdType;
19860 tree = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
19861 false, // not volatile
19862 true); // copyBlock
19864 stmt->gtStmt.gtStmtExpr = tree;
19866 // Since we generated a new address node which didn't exist before,
19867 // we should expose this address manually here.
19868 AXCStack stk(this);
19869 stk.Push(AXC_None);
19870 fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
19875 printf("\nReplaced BB%02u stmt", block->bbNum);
19877 printf("(after)\n");
19884 #endif // FEATURE_SIMD
19886 #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_)
19887 GenTreeStmt* SkipNopStmts(GenTreeStmt* stmt)
19889 while ((stmt != nullptr) && !stmt->IsNothingNode())
19891 stmt = stmt->gtNextStmt;
19896 #endif // !FEATURE_CORECLR && _TARGET_AMD64_
19898 //------------------------------------------------------------------------
19899 // fgCheckStmtAfterTailCall: check that statements after the tail call stmt
19900 // candidate are in one of expected forms, that are desctibed below.
19903 // 'true' if stmts are in the expected form, else 'false'.
19905 bool Compiler::fgCheckStmtAfterTailCall()
19908 // For void calls, we would have created a GT_CALL in the stmt list.
19909 // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
19910 // For calls returning structs, we would have a void call, followed by a void return.
19911 // For debuggable code, it would be an assignment of the call to a temp
19912 // We want to get rid of any of this extra trees, and just leave
19914 GenTreeStmt* callStmt = fgMorphStmt;
19916 GenTreeStmt* nextMorphStmt = callStmt->gtNextStmt;
19918 #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_)
19919 // Legacy Jit64 Compat:
19920 // There could be any number of GT_NOPs between tail call and GT_RETURN.
19921 // That is tail call pattern could be one of the following:
19922 // 1) tail.call, nop*, ret
19923 // 2) tail.call, nop*, pop, nop*, ret
19924 // 3) var=tail.call, nop*, ret(var)
19925 // 4) var=tail.call, nop*, pop, ret
19926 // 5) comma(tail.call, nop), nop*, ret
19928 // See impIsTailCallILPattern() for details on tail call IL patterns
19929 // that are supported.
19930 GenTree* callExpr = callStmt->gtStmtExpr;
19932 if (callExpr->gtOper != GT_RETURN)
19934 // First skip all GT_NOPs after the call
19935 nextMorphStmt = SkipNopStmts(nextMorphStmt);
19937 // Check to see if there is a pop.
19938 // Since tail call is honored, we can get rid of the stmt corresponding to pop.
19939 if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN)
19941 // Note that pop opcode may or may not result in a new stmt (for details see
19942 // impImportBlockCode()). Hence, it is not possible to assert about the IR
19943 // form generated by pop but pop tree must be side-effect free so that we can
19944 // delete it safely.
19945 GenTreeStmt* popStmt = nextMorphStmt;
19947 // Side effect flags on a GT_COMMA may be overly pessimistic, so examine
19948 // the constituent nodes.
19949 GenTree* popExpr = popStmt->gtStmtExpr;
19950 bool isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0;
19951 if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA))
19953 isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) &&
19954 ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0);
19956 noway_assert(isSideEffectFree);
19958 nextMorphStmt = popStmt->gtNextStmt;
19961 // Next skip any GT_NOP nodes after the pop
19962 nextMorphStmt = SkipNopStmts(nextMorphStmt);
19964 #endif // !FEATURE_CORECLR && _TARGET_AMD64_
19966 // Check that the rest stmts in the block are in one of the following pattern:
19968 // 2) ret(cast*(callResultLclVar))
19969 // 3) lclVar = callResultLclVar, the actual ret(lclVar) in another block
19970 if (nextMorphStmt != nullptr)
19972 GenTree* callExpr = callStmt->gtStmtExpr;
19973 if (callExpr->gtOper != GT_ASG)
19975 // The next stmt can be GT_RETURN(TYP_VOID) or GT_RETURN(lclVar),
19976 // where lclVar was return buffer in the call for structs or simd.
19977 GenTreeStmt* retStmt = nextMorphStmt;
19978 GenTree* retExpr = retStmt->gtStmtExpr;
19979 noway_assert(retExpr->gtOper == GT_RETURN);
19981 nextMorphStmt = retStmt->gtNextStmt;
19985 noway_assert(callExpr->gtGetOp1()->OperIsLocal());
19986 unsigned callResultLclNumber = callExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum;
19988 #if FEATURE_TAILCALL_OPT_SHARED_RETURN
19990 // We can have a move from the call result to an lvaInlineeReturnSpillTemp.
19991 // However, we can't check that this assignment was created there.
19992 if (nextMorphStmt->gtStmtExpr->gtOper == GT_ASG)
19994 GenTreeStmt* moveStmt = nextMorphStmt;
19995 GenTree* moveExpr = nextMorphStmt->gtStmtExpr;
19996 noway_assert(moveExpr->gtGetOp1()->OperIsLocal() && moveExpr->gtGetOp2()->OperIsLocal());
19998 unsigned srcLclNum = moveExpr->gtGetOp2()->AsLclVarCommon()->gtLclNum;
19999 noway_assert(srcLclNum == callResultLclNumber);
20000 unsigned dstLclNum = moveExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum;
20001 callResultLclNumber = dstLclNum;
20003 nextMorphStmt = moveStmt->gtNextStmt;
20005 if (nextMorphStmt != nullptr)
20008 GenTreeStmt* retStmt = nextMorphStmt;
20009 GenTree* retExpr = nextMorphStmt->gtStmtExpr;
20010 noway_assert(retExpr->gtOper == GT_RETURN);
20012 GenTree* treeWithLcl = retExpr->gtGetOp1();
20013 while (treeWithLcl->gtOper == GT_CAST)
20015 noway_assert(!treeWithLcl->gtOverflow());
20016 treeWithLcl = treeWithLcl->gtGetOp1();
20019 noway_assert(callResultLclNumber == treeWithLcl->AsLclVarCommon()->gtLclNum);
20021 nextMorphStmt = retStmt->gtNextStmt;
20025 return nextMorphStmt == nullptr;